diff --git a/.gitignore b/.gitignore index df25f8ae5521a..7388644ce9443 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,11 @@ +.cproject +.project +Debug/* +pcre/* +pcre3/* +_32/* +_64/* +bld/* *-t *.a *.ctest diff --git a/CMakeLists.txt b/CMakeLists.txt index b018cc9a818f0..2c14f27250ee5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -341,6 +341,7 @@ SET(CMAKE_INCLUDE_DIRECTORIES_PROJECT_BEFORE ON) ADD_DEFINITIONS(-DHAVE_CONFIG_H) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}/include) + # Add bundled or system zlib. MYSQL_CHECK_ZLIB_WITH_COMPRESS() # Add bundled yassl/taocrypt or system openssl. @@ -380,6 +381,7 @@ IF(WITH_UNIT_TESTS) ADD_SUBDIRECTORY(unittest/examples) ADD_SUBDIRECTORY(unittest/mysys) ADD_SUBDIRECTORY(unittest/my_decimal) + ADD_SUBDIRECTORY(unittest/eperi) IF(NOT WITHOUT_SERVER) ADD_SUBDIRECTORY(unittest/sql) ENDIF() diff --git a/dbug/dbug.c b/dbug/dbug.c index dffd7a44cd8f0..93e7957366247 100644 --- a/dbug/dbug.c +++ b/dbug/dbug.c @@ -85,6 +85,7 @@ #undef SAFE_MUTEX #include #include +#include #ifndef DBUG_OFF @@ -2184,6 +2185,51 @@ const char* _db_get_func_(void) return cs->func; } + +void dump_buffer(unsigned n, const unsigned char* buf) { +int on_this_line = 0; +int counter = 0; +int cc =0; +char ch =0; + +FILE* stream = stderr; +fflush(stream); +fprintf(stream, "%06X: ", counter); +while (n-- > 0) { + fprintf(stream, "%02X ", *buf++); + on_this_line += 1; + if (on_this_line == 16 || n == 0) { + int i; + fprintf(stream, " "); + cc = on_this_line; + if (cc != 16) { + + + for (i = on_this_line; i < 16; i++) { + fprintf(stream," " ); + } + } + for (i = on_this_line; i > 0; i--) { + ch =isprint(buf[-i]) ? buf[-i] : '.'; + fprintf(stream,"%c",ch); + } + + fprintf(stream,"\n" ); + + on_this_line = 0; + if (n!=0) fprintf(stream, "%06X: ", ++counter); + + + } else { + counter++; + } +} +fprintf( stream, "\n"); +fflush(stream); +} + + + #else /* diff --git a/include/keyfile.h b/include/keyfile.h new file mode 100644 index 0000000000000..d55c79086d47c --- /dev/null +++ b/include/keyfile.h @@ -0,0 +1,38 @@ +/* Copyright (C) 2014 eperi GmbH. All Rights Reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +/******************************************************************/ +#ifndef KEYFILE_H +#define KEYFILE_H +#include + +struct keyentry { + int id; + char *iv; + char *key; +}; + +int +parseFile(FILE * fp, struct keyentry **allKeys, const int k_len, const char *secret); + +int +parseLine(const char *line, struct keyentry *entry, const int k_len); + +int +isComment(char *line); + +char* +trim(char *in); +#endif diff --git a/include/my_aes.h b/include/my_aes.h index 58a7891902338..a4868631b7473 100644 --- a/include/my_aes.h +++ b/include/my_aes.h @@ -1,6 +1,13 @@ #ifndef MY_AES_INCLUDED #define MY_AES_INCLUDED +#define AES_OK 0 +#define AES_BAD_DATA -1 +#define AES_BAD_KEYSIZE -5 +#define AES_KEY_CREATION_FAILED -10 + +#define MY_AES_BLOCK_SIZE 16 /* Block size in bytes */ + /* Copyright (c) 2002, 2006 MySQL AB, 2009 Sun Microsystems, Inc. Use is subject to license terms. @@ -27,6 +34,57 @@ C_MODE_START #define AES_KEY_LENGTH 128 /* Must be 128 192 or 256 */ + +/** + Crypt buffer with AES encryption algorithm. + + SYNOPSIS + my_aes_encrypt() + @param source [in] Pointer to data for encryption + @param source_length [in] Size of encryption data + @param dest [out] Buffer to place encrypted data (must be large enough) + @param dest_length [out] Pointer to size of encrypted data + @param key [in] Key to be used for encryption + @param key_length [in] Length of the key. 16, 24 or 32 + @param iv [in] Iv to be used for encryption + @param iv_length [in] Length of the iv. should be 16. + @param noPadding [in] if set to true, no padding is used, input data size must be a mulitple of the AES block size + + @return + != 0 error + 0 no error +*/ +int my_aes_encrypt_cbc(const char* source, uint32 source_length, + char* dest, uint32 *dest_length, + const unsigned char* key, uint8 key_length, + const unsigned char* iv, uint8 iv_length, + int noPadding); + + +/** + * Calculate key and iv from a given salt and secret as it is handled in openssl encrypted files via console + * + * SYNOPSIS + * my_Bytes_To_Key() + * @param salt [in] the given salt as extracted from the encrypted file + * @param secret [in] the given secret as String, provided by the user + * @param key [out] 32 Bytes of key are written to this pointer + * @param iv [out] 16 Bytes of iv are written to this pointer + */ +void my_bytes_to_key(const unsigned char *salt, + const char *secret, unsigned char *key, + unsigned char *iv); +/** + Decode Hexencoded String to uint8[]. + my_aes_hexToUint() + @param iv [in] Pointer to hexadecimal encoded IV String + @param dest [out] Pointer to output uint8 array. Memory needs to be allocated by caller + @param iv_length [in] Size of destination array. + */ +void my_aes_hexToUint(const char* in, + unsigned char *out, + int dest_length); + /* my_aes_encrypt - Crypt buffer with AES encryption algorithm. source - Pointer to data for encryption @@ -41,6 +99,31 @@ C_MODE_START int my_aes_encrypt(const char *source, int source_length, char *dest, const char *key, int key_length); +/** + AES decryption - CBC mode + + SYNOPSIS + my_aes_encrypt() + @param source [in] Pointer to data to decrypt + @param source_length [in] Size of data + @param dest [out] Buffer to place decrypted data (must be large enough) + @param dest_length [out] Pointer to size of decrypted data + @param key [in] Key to be used for decryption + @param key_length [in] Length of the key. 16, 24 or 32 + @param iv [in] Iv to be used for encryption + @param iv_length [in] Length of the iv. should be 16. + @param noPadding [in] if set to true, no padding is used, input data size must be a mulitple of the AES block size + + @return + != 0 error + 0 no error +*/ +int my_aes_decrypt_cbc(const char* source, uint32 source_length, + char* dest, uint32 *dest_length, + const unsigned char* key, uint8 key_length, + const unsigned char* iv, uint8 iv_length, + int noPadding); + /* my_aes_decrypt - DeCrypt buffer with AES encryption algorithm. source - Pointer to data for decryption diff --git a/include/my_dbug.h b/include/my_dbug.h index bcf2015466dec..3837e35f1417a 100644 --- a/include/my_dbug.h +++ b/include/my_dbug.h @@ -52,6 +52,9 @@ extern void _db_return_(uint _line_, struct _db_stack_frame_ *_stack_frame_); extern void _db_pargs_(uint _line_,const char *keyword); extern void _db_doprnt_(const char *format,...) ATTRIBUTE_FORMAT(printf, 1, 2); + +extern void dump_buffer(unsigned n, const unsigned char* buf); + extern void _db_dump_(uint _line_,const char *keyword, const unsigned char *memory, size_t length); extern void _db_end_(void); diff --git a/include/mysql/plugin.h b/include/mysql/plugin.h index 499f3589145ad..07f3c26ba74b8 100644 --- a/include/mysql/plugin.h +++ b/include/mysql/plugin.h @@ -175,6 +175,9 @@ enum enum_mysql_show_type SHOW_always_last }; + + + /* backward compatibility mapping. */ #define SHOW_INT SHOW_UINT #define SHOW_LONG SHOW_ULONG diff --git a/mysql-test/r/enc.result b/mysql-test/r/enc.result new file mode 100644 index 0000000000000..46b53558e23ae --- /dev/null +++ b/mysql-test/r/enc.result @@ -0,0 +1,20 @@ +DROP TABLE IF EXISTS t1; +DROP DATABASE IF EXISTS test; +CREATE DATABASE test; +USE test; +set @save_storage_engine= @@storage_engine; +set storage_engine=InnoDB; +CREATE TABLE t1 (id int) +PAGE_ENCRYPTION='abc'; +ERROR HY000: Incorrect value 'abc' for option 'PAGE_ENCRYPTION' +CREATE TABLE t1 (id int) +PAGE_ENCRYPTION=1 +PAGE_ENCRYPTION_KEY='0xFFC'; +ERROR HY000: Incorrect value '0xFFC' for option 'PAGE_ENCRYPTION_KEY' +CREATE TABLE t1 (id int(11)) +PAGE_ENCRYPTION=1 +PAGE_ENCRYPTION_KEY=42; +INSERT INTO t1(id) values(1); +SELECT * FROM t1; +id +1 diff --git a/mysql-test/t/enc.test b/mysql-test/t/enc.test new file mode 100644 index 0000000000000..6e93d4765d816 --- /dev/null +++ b/mysql-test/t/enc.test @@ -0,0 +1,28 @@ +-- source include/have_xtradb.inc + +--disable_warnings +DROP TABLE IF EXISTS t1; +DROP DATABASE IF EXISTS test; +--enable_warnings + +CREATE DATABASE test; +USE test; +set @save_storage_engine= @@storage_engine; +set storage_engine=InnoDB; + +--error ER_BAD_OPTION_VALUE +CREATE TABLE t1 (id int) + PAGE_ENCRYPTION='abc'; + +--error ER_BAD_OPTION_VALUE +CREATE TABLE t1 (id int) + PAGE_ENCRYPTION=1 + PAGE_ENCRYPTION_KEY='0xFFC'; + +CREATE TABLE t1 (id int(11)) + PAGE_ENCRYPTION=1 + PAGE_ENCRYPTION_KEY=42; + +INSERT INTO t1(id) values(1); +SELECT * FROM t1; + diff --git a/mysys/CMakeLists.txt b/mysys/CMakeLists.txt index d432c22b966d8..f2d671a0ee80c 100644 --- a/mysys/CMakeLists.txt +++ b/mysys/CMakeLists.txt @@ -41,7 +41,7 @@ SET(MYSYS_SOURCES array.c charset-def.c charset.c checksum.c my_default.c my_atomic.c my_getncpus.c my_safehash.c my_chmod.c my_rnd.c my_uuid.c wqueue.c waiting_threads.c ma_dyncol.c my_rdtsc.c my_context.c psi_noop.c - file_logger.c) + file_logger.c ) IF (WIN32) SET (MYSYS_SOURCES ${MYSYS_SOURCES} my_winthread.c my_wincond.c my_winerr.c my_winfile.c my_windac.c my_conio.c) @@ -70,7 +70,7 @@ IF(HAVE_MLOCK) ENDIF() ADD_CONVENIENCE_LIBRARY(mysys ${MYSYS_SOURCES}) -TARGET_LINK_LIBRARIES(mysys dbug strings ${ZLIB_LIBRARY} +TARGET_LINK_LIBRARIES(mysys dbug strings mysys_ssl ${ZLIB_LIBRARY} ${LIBNSL} ${LIBM} ${LIBRT} ${LIBSOCKET} ${LIBEXECINFO}) DTRACE_INSTRUMENT(mysys) diff --git a/mysys_ssl/my_aes.cc b/mysys_ssl/my_aes.cc index 9327bc32a3b60..52640a41587d9 100644 --- a/mysys_ssl/my_aes.cc +++ b/mysys_ssl/my_aes.cc @@ -21,9 +21,12 @@ #if defined(HAVE_YASSL) #include "aes.hpp" #include "openssl/ssl.h" +#include "crypto_wrapper.hpp" #elif defined(HAVE_OPENSSL) #include #include +#include +#include // Wrap C struct, to ensure resources are released. struct MyCipherCtx @@ -37,11 +40,10 @@ struct MyCipherCtx enum encrypt_dir { MY_AES_ENCRYPT, MY_AES_DECRYPT }; -#define MY_AES_BLOCK_SIZE 16 /* Block size in bytes */ - /* If bad data discovered during decoding */ #define AES_BAD_DATA -1 + /** This is internal function just keeps joint code of Key generation @@ -101,7 +103,103 @@ static int my_aes_create_key(const char *key, int key_length, uint8 *rkey) return 0; } +/** + Decode Hexencoded String to uint8[]. + my_aes_hexToUint() + @param iv [in] Pointer to hexadecimal encoded IV String + @param dest [out] Pointer to output uint8 array. Memory needs to be allocated by caller + @param iv_length [in] Size of destination array. + */ +void +my_aes_hexToUint(const char* in, unsigned char *out, int dest_length) +{ + const char *pos = in; + int res = 0; + int count = 0; + for(count = 0; count < dest_length; count++) + { + sscanf(pos, "%2hhx", &res); + out[count] = res; + pos += 2 * sizeof(char); + } +} + +/** + * Calculate key and iv from a given salt and secret as it is handled in openssl encrypted files via console + * + * SYNOPSIS + * my_Bytes_To_Key() + * @param salt [in] the given salt as extracted from the encrypted file + * @param secret [in] the given secret as String, provided by the user + * @param key [out] 32 Bytes of key are written to this pointer + * @param iv [out] 16 Bytes of iv are written to this pointer + */ +void +my_bytes_to_key(const unsigned char *salt, const char *secret, unsigned char *key, unsigned char *iv) +{ +#ifdef HAVE_YASSL +#ifndef ___min +#define ___min(a,b) (((a) < (b)) ? (a) : (b)) +#endif + /* + the yassl function has no support for SHA1. + Reason unknown. + */ + int keyLen = 32; + int ivLen = 16; + int EVP_SALT_SZ = 8; + const int SHA_LEN = 20; + yaSSL::SHA myMD; + uint digestSz = myMD.get_digestSize(); + unsigned char digest[SHA_LEN]; // max size + int sz = strlen(secret); + int count = 1; + int keyLeft = keyLen; + int ivLeft = ivLen; + int keyOutput = 0; + + while (keyOutput < (keyLen + ivLen)) { + int digestLeft = digestSz; + // D_(i - 1) + if (keyOutput) // first time D_0 is empty + myMD.update(digest, digestSz); + // data + myMD.update((yaSSL::byte* )secret, sz); + // salt + if (salt) + myMD.update(salt, EVP_SALT_SZ); + myMD.get_digest(digest); + // count + for (int j = 1; j < count; j++) { + myMD.update(digest, digestSz); + myMD.get_digest(digest); + } + + if (keyLeft) { + int store = ___min(keyLeft, static_cast(digestSz)); + memcpy(&key[keyLen - keyLeft], digest, store); + + keyOutput += store; + keyLeft -= store; + digestLeft -= store; + } + + if (ivLeft && digestLeft) { + int store = ___min(ivLeft, digestLeft); + memcpy(&iv[ivLen - ivLeft], &digest[digestSz - digestLeft], store); + + keyOutput += store; + ivLeft -= store; + } + } + return; +#elif HAVE_OPENSSL + const EVP_CIPHER *type = EVP_aes_256_cbc(); + const EVP_MD *digest = EVP_sha1(); + EVP_BytesToKey(type, digest, salt, (unsigned char*) secret, strlen(secret), 1, key, iv); +#endif +} /** Crypt buffer with AES encryption algorithm. @@ -110,19 +208,247 @@ static int my_aes_create_key(const char *key, int key_length, uint8 *rkey) @param source [in] Pointer to data for encryption @param source_length [in] Size of encryption data @param dest [out] Buffer to place encrypted data (must be large enough) - @param key [in] Key to be used for encryption - @param key_length [in] Length of the key. Will handle keys of any length + @param dest_length [out] Pointer to size of encrypted data + @param key [in] Key to be used for encryption + @param key_length [in] Length of the key. 16, 24 or 32 + @param iv [in] Iv to be used for encryption + @param iv_length [in] Length of the iv. should be 16. + @param noPadding [in] if set to true, no padding is used, input data size must be a mulitple of the AES block size @return - >= 0 Size of encrypted data - < 0 Error + != 0 error + 0 no error */ +int my_aes_encrypt_cbc(const char* source, uint32 source_length, + char* dest, uint32* dest_length, + const unsigned char* key, uint8 key_length, + const unsigned char* iv, uint8 iv_length, + int noPadding) +{ + if (noPadding) { + if (source_length % 16 !=0) return AES_BAD_DATA; + } +#ifdef HAVE_YASSL + TaoCrypt::AES_CBC_Encryption enc; + /* 128 bit block used for padding */ + uint8 block[MY_AES_BLOCK_SIZE]; + int num_blocks; /* number of complete blocks */ + int i; + switch(key_length) { + case 16: + break; + case 24: + break; + case 32: + break; + default: + return AES_BAD_KEYSIZE; + } + + enc.SetKey((const TaoCrypt::byte *) key, key_length, (const TaoCrypt::byte *) iv); + + num_blocks = source_length / MY_AES_BLOCK_SIZE; + + for (i = num_blocks; i > 0; i--) /* Encode complete blocks */ + { + enc.Process((TaoCrypt::byte *) dest, (const TaoCrypt::byte *) source, + MY_AES_BLOCK_SIZE); + source += MY_AES_BLOCK_SIZE; + dest += MY_AES_BLOCK_SIZE; + } + + if (noPadding) { + *dest_length = MY_AES_BLOCK_SIZE * (num_blocks); + return AES_OK; + + } + + /* Encode the rest. We always have incomplete block */ + char pad_len = MY_AES_BLOCK_SIZE - (source_length - + MY_AES_BLOCK_SIZE * num_blocks); + memcpy(block, source, 16 - pad_len); + memset(block + MY_AES_BLOCK_SIZE - pad_len, pad_len, pad_len); + + enc.Process((TaoCrypt::byte *) dest, (const TaoCrypt::byte *) block, + MY_AES_BLOCK_SIZE); + + *dest_length = MY_AES_BLOCK_SIZE * (num_blocks + 1); + return AES_OK; +#elif defined(HAVE_OPENSSL) + MyCipherCtx ctx; + int u_len, f_len; + /* The real key to be used for encryption */ + const EVP_CIPHER* cipher; + switch(key_length) { + case 16: + cipher = EVP_aes_128_cbc(); + break; + case 24: + cipher = EVP_aes_192_cbc(); + break; + case 32: + cipher = EVP_aes_256_cbc(); + break; + default: + return AES_BAD_KEYSIZE; + } + //Initialize Encryption Engine here, default software Engine is default + ENGINE *engine = NULL; + + if (! EVP_EncryptInit_ex(&ctx.ctx, cipher, engine, key, iv)) + return AES_BAD_DATA; /* Error */ + if (noPadding) { + EVP_CIPHER_CTX_set_padding(&ctx.ctx, 0); + } + EVP_CIPHER_CTX_key_length(&ctx.ctx); + OPENSSL_assert(EVP_CIPHER_CTX_key_length(&ctx.ctx) == key_length); + OPENSSL_assert(EVP_CIPHER_CTX_iv_length(&ctx.ctx) == iv_length); + OPENSSL_assert(EVP_CIPHER_CTX_block_size(&ctx.ctx) == 16); + if (! EVP_EncryptUpdate(&ctx.ctx, (unsigned char *) dest, &u_len, + (unsigned const char *) source, source_length)) + return AES_BAD_DATA; /* Error */ + if (! EVP_EncryptFinal_ex(&ctx.ctx, (unsigned char *) dest + u_len, &f_len)) + return AES_BAD_DATA; /* Error */ + *dest_length = (unsigned long int) (u_len + f_len); + + return AES_OK; +#else + /* currently Open SSL is required */ + return AES_BAD_DATA; +#endif +} + + +/** + AES decryption - CBC mode + + SYNOPSIS + my_aes_encrypt() + @param source [in] Pointer to data to decrypt + @param source_length [in] Size of data + @param dest [out] Buffer to place decrypted data (must be large enough) + @param dest_length [out] Pointer to size of decrypted data + @param key [in] Key to be used for decryption + @param key_length [in] Length of the key. 16, 24 or 32 + @param iv [in] Iv to be used for encryption + @param iv_length [in] Length of the iv. should be 16. + @param noPadding [in] if set to true, no padding is used, input data size must be a mulitple of the AES block size + + @return + != 0 error + 0 no error +*/ +int my_aes_decrypt_cbc(const char* source, uint32 source_length, + char* dest, uint32 *dest_length, + const unsigned char* key, uint8 key_length, + const unsigned char* iv, uint8 iv_length, + int noPadding) +{ + if (noPadding) { + if (source_length % 16 !=0) return AES_BAD_DATA; + } +#ifdef HAVE_YASSL + TaoCrypt::AES_CBC_Decryption dec; + /* 128 bit block used for padding */ + uint8 block[MY_AES_BLOCK_SIZE]; + int num_blocks; /* Number of complete blocks */ + int i; + switch(key_length) { + case 16: + break; + case 24: + break; + case 32: + break; + default: + return AES_BAD_KEYSIZE; + } + + dec.SetKey((const TaoCrypt::byte *) key, key_length, iv); + + num_blocks = source_length / MY_AES_BLOCK_SIZE; + + if ((source_length != num_blocks * MY_AES_BLOCK_SIZE) || num_blocks == 0 ) + /* Input size has to be even and at least one block */ + return AES_BAD_DATA; + + /* Decode all but last blocks */ + for (i = num_blocks - 1; i > 0; i--) + { + dec.Process((TaoCrypt::byte *) dest, (const TaoCrypt::byte *) source, + MY_AES_BLOCK_SIZE); + source += MY_AES_BLOCK_SIZE; + dest += MY_AES_BLOCK_SIZE; + } + + dec.Process((TaoCrypt::byte *) block, (const TaoCrypt::byte *) source, + MY_AES_BLOCK_SIZE); + + if (noPadding) { + memcpy(dest, block, MY_AES_BLOCK_SIZE); + *dest_length = MY_AES_BLOCK_SIZE * num_blocks; + return AES_OK; + } + + /* Use last char in the block as size */ + uint pad_len = (uint) (uchar) block[MY_AES_BLOCK_SIZE - 1]; + + if (pad_len > MY_AES_BLOCK_SIZE) + return AES_BAD_DATA; + /* We could also check whole padding but we do not really need this */ + + memcpy(dest, block, MY_AES_BLOCK_SIZE - pad_len); + *dest_length = MY_AES_BLOCK_SIZE * num_blocks - pad_len; + return AES_OK; +#elif defined(HAVE_OPENSSL) + MyCipherCtx ctx; + int u_len, f_len; + + const EVP_CIPHER* cipher; + switch(key_length) { + case 16: + cipher = EVP_aes_128_cbc(); + break; + case 24: + cipher = EVP_aes_192_cbc(); + break; + case 32: + cipher = EVP_aes_256_cbc(); + break; + default: + return AES_BAD_KEYSIZE; + } + //Initialize Encryption Engine here, default software Engine is default + ENGINE *engine = NULL; + + if (! EVP_DecryptInit_ex(&ctx.ctx, cipher, engine, key, iv)) + return AES_BAD_DATA; /* Error */ + if (noPadding) { + EVP_CIPHER_CTX_set_padding(&ctx.ctx, 0); + } + OPENSSL_assert(EVP_CIPHER_CTX_key_length(&ctx.ctx) == key_length); + OPENSSL_assert(EVP_CIPHER_CTX_iv_length(&ctx.ctx) == iv_length); + OPENSSL_assert(EVP_CIPHER_CTX_block_size(&ctx.ctx) == 16); + if (! EVP_DecryptUpdate(&ctx.ctx, (unsigned char *) dest, &u_len, + (unsigned char *)source, source_length)) + return AES_BAD_DATA; /* Error */ + if (! EVP_DecryptFinal_ex(&ctx.ctx, (unsigned char *) dest + u_len, &f_len)) { + *dest_length = (unsigned long int) u_len; + return AES_BAD_DATA; + } + *dest_length = (unsigned long int) (u_len + f_len); +#endif + return AES_OK; +} + -int my_aes_encrypt(const char* source, int source_length, char* dest, +int +my_aes_encrypt(const char* source, int source_length, char* dest, const char* key, int key_length) { #if defined(HAVE_YASSL) TaoCrypt::AES_ECB_Encryption enc; + /* 128 bit block used for padding */ uint8 block[MY_AES_BLOCK_SIZE]; int num_blocks; /* number of complete blocks */ diff --git a/sql/mysqld.cc b/sql/mysqld.cc index fa4f92b26dd09..bd4781ca50fb3 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -3712,6 +3712,8 @@ static bool init_global_datetime_format(timestamp_type format_type, return false; } + + SHOW_VAR com_status_vars[]= { {"admin_commands", (char*) offsetof(STATUS_VAR, com_other), SHOW_LONG_STATUS}, {"alter_db", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_ALTER_DB]), SHOW_LONG_STATUS}, diff --git a/sql/sql_plugin.h b/sql/sql_plugin.h index 6b310865bbaa5..111a6a9e7026c 100644 --- a/sql/sql_plugin.h +++ b/sql/sql_plugin.h @@ -21,10 +21,8 @@ the following #define adds server-only members to enum_mysql_show_type, that is defined in plugin.h */ -#define SHOW_always_last SHOW_KEY_CACHE_LONG, \ - SHOW_LONG_STATUS, SHOW_DOUBLE_STATUS, \ - SHOW_HAVE, SHOW_MY_BOOL, SHOW_HA_ROWS, SHOW_SYS, \ - SHOW_LONG_NOFLUSH, SHOW_LONGLONG_STATUS, SHOW_LEX_STRING + +#define SHOW_always_last SHOW_KEY_CACHE_LONG,SHOW_LONG_STATUS, SHOW_DOUBLE_STATUS,SHOW_HAVE, SHOW_MY_BOOL, SHOW_HA_ROWS, SHOW_SYS,SHOW_LONG_NOFLUSH, SHOW_LONGLONG_STATUS, SHOW_LEX_STRING #include #undef SHOW_always_last diff --git a/storage/xtradb/CMakeLists.txt b/storage/xtradb/CMakeLists.txt index 093f8f64e20ad..5ae1a97c80603 100644 --- a/storage/xtradb/CMakeLists.txt +++ b/storage/xtradb/CMakeLists.txt @@ -30,6 +30,7 @@ MYSQL_CHECK_BZIP2() # OS tests IF(UNIX) + IF(CMAKE_SYSTEM_NAME STREQUAL "Linux") CHECK_INCLUDE_FILES (libaio.h HAVE_LIBAIO_H) IF (XTRADB_PREFER_STATIC_LIBAIO) @@ -357,10 +358,13 @@ SET(INNOBASE_SOURCES dict/dict0stats.cc dict/dict0stats_bg.cc dyn/dyn0dyn.cc + enc/EncKeys.cc + enc/KeySingleton.cc eval/eval0eval.cc eval/eval0proc.cc fil/fil0fil.cc fil/fil0pagecompress.cc + fil/fil0pageencryption.cc fsp/fsp0fsp.cc fut/fut0fut.cc fut/fut0lst.cc diff --git a/storage/xtradb/buf/buf0buf.cc b/storage/xtradb/buf/buf0buf.cc index b27178fa8c808..a5a4e381afc21 100644 --- a/storage/xtradb/buf/buf0buf.cc +++ b/storage/xtradb/buf/buf0buf.cc @@ -57,6 +57,9 @@ Created 11/5/1995 Heikki Tuuri #include "trx0trx.h" #include "srv0start.h" +#include "fil0pageencryption.h" + + /* prototypes for new functions added to ha_innodb.cc */ trx_t* innobase_get_trx(); @@ -570,12 +573,13 @@ buf_page_is_corrupted( ulint zip_size) /*!< in: size of compressed page; 0 for uncompressed pages */ { + ulint page_encrypted = fil_page_is_encrypted(read_buf); ulint checksum_field1; ulint checksum_field2; ibool crc32_inited = FALSE; ib_uint32_t crc32 = ULINT32_UNDEFINED; - if (!zip_size + if (!page_encrypted && !zip_size && memcmp(read_buf + FIL_PAGE_LSN + 4, read_buf + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) { @@ -628,6 +632,9 @@ buf_page_is_corrupted( if (zip_size) { return(!page_zip_verify_checksum(read_buf, zip_size)); } + if (page_encrypted) { + return (FALSE); + } checksum_field1 = mach_read_from_4( read_buf + FIL_PAGE_SPACE_OR_CHKSUM); diff --git a/storage/xtradb/enc/EncKeys.cc b/storage/xtradb/enc/EncKeys.cc new file mode 100644 index 0000000000000..f0934c14b0657 --- /dev/null +++ b/storage/xtradb/enc/EncKeys.cc @@ -0,0 +1,429 @@ +/* Copyright (C) 2014 eperi GmbH. All Rights Reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +/******************************************************************//** + @file EncKeys.cc + A class to keep keys for encryption/decryption. + +How it works... +The location and usage can be configured via the configuration file. +Example + +[mysqld] +... +innodb_data_encryption_providertype = 1 +innodb_data_encryption_providername = keys.enc +innodb_data_encryption_providerurl = /home/mdb/ +innodb_data_encryption_filekey = secret +... + +As provider type currently only value 1 is supported, which means, the keys are read from a file. +The filename is set up via the innodb_data_encryption_providername configuration value. +innodb_data_encryption_providerurl is used to configure the path to this file. This is usually +a folder name. +Examples: +innodb_data_encryption_providerurl = \\\\unc (windows share) +innodb_data_encryption_providerurl = e:/tmp/ (windows path) +innodb_data_encryption_providerurl = /tmp (linux path) + +The key file contains AES keys and initialization vectors as hex-encoded Strings. +Supported are keys of size 128, 192 or 256 bits. IV consists of 16 bytes. + +The key file should be encrypted and the key to decrypt the file can be given with the +innodb_data_encryption_filekey parameter. + +The file key can also be located if FILE: is prepended to the key. Then the following part is interpreted +as absolut to the file containing the file key. This file can optionally be encrypted, currently with a fix key. +Example: +innodb_data_encryption_filekey = FILE:y:/secret256.enc + +If the key file can not be read at server startup, for example if the file key is not present, +page_encryption feature is not availabe and access to page_encryption tables is not possible. + +Example files can be found inside the unittest/eperi folder. + +Open SSL command line utility can be used to create an encrypted key file. +Examples: +openssl enc –aes-256-cbc –md sha1 –k secret –in keys.txt –out keys.enc +openssl enc –aes-256-cbc –md sha1 –k –in secret –out secret.enc + + Created 09/15/2014 + ***********************************************************************/ +#ifdef __WIN__ +#define PCRE_STATIC 1 +#endif + +#include "EncKeys.h" +#include +#include +#include +#include +#include +#include +#include + + + + +const char* EncKeys::strMAGIC = "Salted__"; +const int EncKeys::magicSize = 8;//strlen(strMAGIC); // 8 byte +const char* EncKeys::newLine = "\n"; + +const char* EncKeys::errorNoKeyId = "KeyID = %u not found or with error. Check the key and the log file.\n"; +const char* EncKeys::errorInMatches = "Wrong match of the keyID in line %u, see the template.\n"; +const char* EncKeys::errorExceedKeyFileSize = "The size of the key file %s exceeds " + "the maximum allowed of %u bytes.\n"; +const char* EncKeys::errorExceedKeySize = "The key size exceeds the maximum allowed size of %u in line %u.\n"; +const char* EncKeys::errorEqualDoubleKey = "More than one identical key with keyID = %u found" + " in lines %u and %u.\nDelete one of them in the key file.\n"; +const char* EncKeys::errorUnequalDoubleKey = "More than one not identical key with keyID = %u found" + " in lines %u and %u.\nChoose the right one and delete the other in the key file.\n" + "I'll take the key from line %u\n"; +const char* EncKeys::errorNoInitializedKey = "The key could not be initialized.\n"; +const char* EncKeys::errorNotImplemented = "Initializing keys through key server is not" + " yet implemented.\nYou can not read encrypted tables or columns\n\n"; +const char* EncKeys::errorOpenFile = "Could not open %s for reading. You can not read encrypted tables or columns.\n\n"; +const char* EncKeys::errorReadingFile = "Could not read from %s. You can not read encrypted tables or columns\n\n"; +const char* EncKeys::errorFileSize = "Could not get the file size from %s. You can not read encrypted tables or columns\n\n"; +const char* EncKeys::errorFalseFileKey = "Wrong encryption / decryption key for keyfile '%s'.\n"; + +/* read this from a secret source in some later version */ +const char* EncKeys::initialPwd = "lg28s9ac5ffa537fd8798875c98e190df289da7e047c05"; + +EncKeys::EncKeys() { + countKeys = keyLineInKeyFile = 0; + for (int ii = 0; ii < MAX_KEYS; ii++) { + keys[ii].id = 0; + keys[ii].iv = keys[ii].key = NULL; + } + oneKey = NULL; +} + +EncKeys::~EncKeys() { + for (int ii = MAX_KEYS - 1; ii >= 0 ; ii--) { + delete[] keys[ii].iv; keys[ii].iv = NULL; + delete[] keys[ii].key; keys[ii].key = NULL; + + } +} + +bool EncKeys::initKeys(const char *name, const char *url, const int initType, const char *filekey) { + if (KEYINITTYPE_FILE == initType) + { + int result = initKeysThroughFile(name, url, filekey); + return ERROR_FALSE_FILE_KEY != result && ERROR_OPEN_FILE != result && ERROR_READING_FILE != result; + } + else if (KEYINITTYPE_SERVER == initType) + { + return NO_ERROR_KEY_FILE_PARSE_OK == initKeysThroughServer(name, url, filekey); + } + return false; +} + +int EncKeys::initKeysThroughFile(const char *name, const char *path, const char *filekey) { + if (path==NULL || name==NULL) return ERROR_OPEN_FILE; + size_t len1 = strlen(path); + size_t len2 = strlen(name); + const char *MAGIC = "FILE:"; + const short MAGIC_LEN = 5; + int ret = NO_ERROR_KEY_FILE_PARSE_OK; + bool isUncPath= (len1>2) ? ((strncmp("\\\\", path, 2)==0) ? TRUE : FALSE) : FALSE; + bool isSlash = ((isUncPath? '\\':'/') == path[len1 - 1]); + char *secret = (char*) malloc(MAX_SECRET_SIZE +1 * sizeof(char)); + char *filename = (char*) malloc((len1 + len2 + (isSlash ? 1 : 2)) * sizeof(char)); + if(filekey != NULL) + { + //If secret starts with FILE: interpret the secret as filename. + if(memcmp(MAGIC, filekey, MAGIC_LEN) == 0) { + int fk_len = strlen(filekey); + char *secretfile = (char*)malloc( (1 + fk_len - MAGIC_LEN)* sizeof(char)); + memcpy(secretfile, filekey+MAGIC_LEN, fk_len - MAGIC_LEN); + secretfile[fk_len-MAGIC_LEN] = '\0'; + parseSecret(secretfile, secret); + free(secretfile); + } else + { + sprintf(secret, "%s", filekey); + } + } + sprintf(filename, "%s%s%s", path, isSlash ? "" : (isUncPath ? "\\":"/"), name); + ret = parseFile((const char *)filename, 254, secret); + free(filename); + free(secret); + return ret; +} + +int EncKeys::initKeysThroughServer( const char *name, const char *path, const char *filekey) +{ + //TODO +#ifdef UNIV_DEBUG + fprintf(stderr, errorNotImplemented); +#endif //UNIV_DEBUG + return ERROR_KEYINITTYPE_SERVER_NOT_IMPLEMENTED; +} + +/* + * secret is limited to MAX_SECRET_SIZE characters + */ +void EncKeys::parseSecret( const char *secretfile, char *secret ) { + int maxSize = (MAX_SECRET_SIZE +16 + magicSize*2) ; + char* buf = (char*)malloc((maxSize) * sizeof(char)); + char* _initPwd = (char*)malloc((strlen(initialPwd)+1) * sizeof(char)); + + FILE *fp = fopen(secretfile, "rb"); + fseek(fp, 0L, SEEK_END); + long file_size = ftell(fp); + rewind(fp); + int bytes_to_read = (maxSize >= file_size)? file_size:(maxSize); + fread(buf, 1, bytes_to_read, fp); + if (memcmp(buf, strMAGIC, magicSize)) { + bytes_to_read = (bytes_to_read>MAX_SECRET_SIZE) ? MAX_SECRET_SIZE : bytes_to_read; + memcpy(secret, buf, bytes_to_read); + secret[bytes_to_read] = '\0'; + } else { + unsigned char salt[magicSize]; + unsigned char *key = new unsigned char[keySize32]; + unsigned char *iv = new unsigned char[ivSize16]; + memcpy(&salt, buf + magicSize, magicSize); + memcpy(_initPwd, initialPwd, strlen(initialPwd)); + _initPwd[strlen(initialPwd)]= '\0'; + my_bytes_to_key((unsigned char *) salt, _initPwd, key, iv); + uint32 d_size = 0; + int res = my_aes_decrypt_cbc((const char*)buf + 2 * magicSize, bytes_to_read - 2 * magicSize, + secret, &d_size, key, keySize32, iv, ivSize16, 0); + if (d_size>EncKeys::MAX_SECRET_SIZE) { + d_size = EncKeys::MAX_SECRET_SIZE; + } + delete[] key; + delete[] iv; + secret[d_size] = '\0'; + } + free(buf); + free(_initPwd); + fclose(fp); +} + +/** + * Returns a struct keyentry with the asked 'id' or NULL. + */ +keyentry *EncKeys::getKeys(int id) { + if (KEY_MIN <= id && KEY_MAX >= id && (&keys[id - 1])->iv) + { + return &keys[id - 1]; + } +#ifdef UNIV_DEBUG + else { + + fprintf(stderr, errorNoKeyId, id); + return NULL; + } +#endif //UNIV_DEBUG +} + +/** + * Get the keys from the key file and decrypt it with the key . + * Store the keys with id smaller then in an array of structs keyentry. + * Returns NO_ERROR_PARSE_OK or an appropriate error code. + */ +int EncKeys::parseFile(const char* filename, const ulint maxKeyId, const char *secret) { + int errorCode = 0; + ulint id = 0; + char *buffer = decryptFile(filename, secret, &errorCode); + + if (NO_ERROR_PARSE_OK != errorCode) return errorCode; + else errorCode = NO_ERROR_KEY_FILE_PARSE_OK; + + char *line = strtok(buffer, newLine); + while ( NULL != line) { + keyLineInKeyFile++; + switch (parseLine(line, maxKeyId)) { + case NO_ERROR_PARSE_OK: + id = oneKey->id; + keys[oneKey->id - 1] = *oneKey; + delete(oneKey); + countKeys++; + fprintf(stderr, "Line: %u --> ", keyLineInKeyFile); printKeyEntry(id); + break; + case ERROR_ID_TOO_BIG: + fprintf(stderr, errorExceedKeySize, KEY_MAX, keyLineInKeyFile); + fprintf(stderr, " --> %s\n", line); + errorCode = ERROR_KEY_FILE_EXCEEDS_MAX_NUMBERS_OF_KEYS; + break; + case ERROR_NOINITIALIZEDKEY: + fprintf(stderr, errorNoInitializedKey); + fprintf(stderr, " --> %s\n", line); + errorCode = ERROR_KEY_FILE_PARSE_NULL; + break; + case ERROR_WRONG_NUMBER_OF_MATCHES: + fprintf(stderr, errorInMatches, keyLineInKeyFile); + fprintf(stderr, " --> %s\n", line); + errorCode = ERROR_KEY_FILE_PARSE_NULL; + break; + case NO_ERROR_KEY_GREATER_THAN_ASKED: + fprintf(stderr, "No asked key in line %u: %s\n", keyLineInKeyFile, line); + break; + case NO_ERROR_ISCOMMENT: + fprintf(stderr, "Is comment in line %u: %s\n", keyLineInKeyFile, line); + default: + break; + } + line = strtok(NULL, newLine); + } + + free(line); line = NULL; + delete[] buffer; buffer = NULL; + return errorCode; +} + +int EncKeys::parseLine(const char *line, const ulint maxKeyId) { + int ret = NO_ERROR_PARSE_OK; + if (isComment(line)) + ret = NO_ERROR_ISCOMMENT; + else { + const char *error_p = NULL; + int offset; + pcre *pattern = pcre_compile( + "([0-9]+);([0-9,a-f,A-F]{32});([0-9,a-f,A-F]{64}|[0-9,a-f,A-F]{48}|[0-9,a-f,A-F]{32})", + 0, &error_p, &offset, NULL); + if ( NULL != error_p) + fprintf(stderr, "Error: %s\nOffset: %d\n", error_p, offset); + + int m_len = (int) strlen(line), ovector[MAX_OFFSETS_IN_PCRE_PATTERNS]; + int rc = pcre_exec(pattern, NULL, line, m_len, 0, 0, ovector, MAX_OFFSETS_IN_PCRE_PATTERNS); + pcre_free(pattern); + if (4 == rc) { + char lin[MAX_KEY_LINE_SIZE + 1]; + strncpy( lin, line, MAX_KEY_LINE_SIZE); + lin[MAX_KEY_LINE_SIZE] = '\0'; + char *substring_start = lin + ovector[2]; + int substr_length = ovector[3] - ovector[2]; + if (3 < substr_length) + ret = ERROR_ID_TOO_BIG; + else { + char buffer[4]; + sprintf(buffer, "%.*s", substr_length, substring_start); + ulint id = atoi(buffer); + if (0 == id) ret = ERROR_NOINITIALIZEDKEY; + else if (KEY_MAX < id) ret = ERROR_ID_TOO_BIG; + else if (maxKeyId < id) ret = NO_ERROR_KEY_GREATER_THAN_ASKED; + else { + oneKey = new keyentry; + oneKey->id = id; + substring_start = lin + ovector[4]; + substr_length = ovector[5] - ovector[4]; + oneKey->iv = new char[substr_length + 1]; + sprintf(oneKey->iv, "%.*s", substr_length, substring_start); + substring_start = lin + ovector[6]; + substr_length = ovector[7] - ovector[6]; + oneKey->key = new char[substr_length + 1]; + sprintf(oneKey->key, "%.*s", substr_length, substring_start); + } + } + } + else + ret = ERROR_WRONG_NUMBER_OF_MATCHES; + } + return ret; +} + +/** + * Decrypt the key file 'filename' if it is encrypted with the key 'secret'. + * Store the content of the decrypted file in 'buffer'. The buffer has to be freed + * in the calling function. + */ +char* EncKeys::decryptFile(const char* filename, const char *secret, int *errorCode) { + *errorCode = NO_ERROR_PARSE_OK; + fprintf(stderr, "Reading %s\n\n", filename); + FILE *fp = fopen(filename, "rb"); + if (NULL == fp) { + fprintf(stderr, errorOpenFile, filename); + *errorCode = ERROR_OPEN_FILE; + return NULL; + } + + if (fseek(fp, 0L, SEEK_END)) { + *errorCode = ERROR_READING_FILE; + return NULL; + } + long file_size = ftell(fp); // get the file size + if (MAX_KEY_FILE_SIZE < file_size) { + fprintf(stderr, errorExceedKeyFileSize, filename, MAX_KEY_FILE_SIZE); + *errorCode = ERROR_KEY_FILE_TOO_BIG; + fclose(fp); + return NULL; + } + else if (-1L == file_size) { + fprintf(stderr, errorFileSize, filename); + *errorCode = ERROR_READING_FILE; + return NULL; + } + + rewind(fp); + //Read file into buffer + uchar *buffer = new uchar[file_size + 1]; + size_t read_bytes = fread(buffer, 1, file_size, fp); + buffer[file_size] = '\0'; + fclose(fp); + //Check for file encryption + if (0 == memcmp(buffer, strMAGIC, magicSize)) { //If file is encrypted, decrypt it first. + unsigned char salt[magicSize]; + unsigned char *key = new unsigned char[keySize32]; + unsigned char *iv = new unsigned char[ivSize16]; + char *decrypted = new char[file_size]; + memcpy(&salt, buffer + magicSize, magicSize); + my_bytes_to_key((unsigned char *) salt, secret, key, iv); + uint32 d_size = 0; + int res = my_aes_decrypt_cbc((const char*)buffer + 2 * magicSize, file_size - 2 * magicSize, + decrypted, &d_size, key, keySize32, iv, ivSize16, 0); + if(0 != res) { + *errorCode = ERROR_FALSE_FILE_KEY; + delete[] buffer; buffer = NULL; + fprintf(stderr, errorFalseFileKey, filename); + } + else { + memcpy(buffer, decrypted, d_size); + buffer[d_size] = '\0'; + } + + delete[] decrypted; decrypted = NULL; + delete[] key; key = NULL; + delete[] iv; iv = NULL; + } + return (char*) buffer; +} + +bool EncKeys::isComment(const char *line) { + const char *error_p; + int offset, m_len = (int) strlen(line), ovector[MAX_OFFSETS_IN_PCRE_PATTERNS]; + pcre *pattern = pcre_compile("\\s*#.*", 0, &error_p, &offset, NULL); + int rc = pcre_exec( pattern, NULL, line, m_len, 0, 0, ovector, MAX_OFFSETS_IN_PCRE_PATTERNS); + pcre_free(pattern); + if (0 > rc) return false; + else return true; +} + + +void EncKeys::printKeyEntry( ulint id) +{ +#ifdef UNIV_DEBUG + keyentry *entry = getKeys(id); + if( NULL == entry) { + fprintf(stderr, "No such keyID=%u\n",id); + } + else { + fprintf(stderr, "Key: id:%3u \tiv:%d bytes\tkey:%d bytes\n", entry->id, strlen(entry->iv)/2, strlen(entry->key)/2); + } +#endif //UNIV_DEBUG +} diff --git a/storage/xtradb/enc/KeySingleton.cc b/storage/xtradb/enc/KeySingleton.cc new file mode 100644 index 0000000000000..fc633b4f63ceb --- /dev/null +++ b/storage/xtradb/enc/KeySingleton.cc @@ -0,0 +1,65 @@ +/* Copyright (C) 2014 eperi GmbH. All Rights Reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +/******************************************************************//** +@file KeySingleton.cc +Implementation of single pattern to keep keys for encrypting/decrypting pages. + +Created 09/13/2014 +***********************************************************************/ + + +#include "KeySingleton.h" +#include + + +bool KeySingleton::instanceInited = false; +KeySingleton KeySingleton::theInstance; +EncKeys KeySingleton::encKeys; + + + +KeySingleton & KeySingleton::getInstance() { +#ifdef UNIV_DEBUG + if( !instanceInited) { + fprintf(stderr, "Encryption / decryption keys were not initialized. " + "You can not read encrypted tables or columns\n"); + } +#endif UNIV_DEBUG + return theInstance; +} + +KeySingleton & KeySingleton::getInstance(const char *name, const char *url, + const int initType, const char *filekey) { + + if(instanceInited) return theInstance; + instanceInited = encKeys.initKeys(name, url, initType, filekey); + if( !instanceInited) { + fprintf(stderr, "Could not initialize any of the encryption / decryption keys. " + "You can not read encrypted tables\n\n"); + fflush(stderr); + } + + return theInstance; +} + +keyentry *KeySingleton::getKeys(int id) { + return encKeys.getKeys(id); +} + +ibool KeySingleton::hasKey(int id) { + return encKeys.getKeys(id) != NULL; +} + diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc index d6ed09ed46a57..a6d4f866e7397 100644 --- a/storage/xtradb/fil/fil0fil.cc +++ b/storage/xtradb/fil/fil0fil.cc @@ -26,6 +26,7 @@ Created 10/25/1995 Heikki Tuuri #include "fil0fil.h" +#include "KeySingleton.h" #include #include @@ -56,6 +57,10 @@ Created 10/25/1995 Heikki Tuuri static ulint srv_data_read, srv_data_written; #endif /* !UNIV_HOTBACKUP */ #include "fil0pagecompress.h" + +#include "fil0pageencryption.h" +#include "fsp0pageencryption.h" + #include "zlib.h" #ifdef __linux__ #include @@ -816,8 +821,21 @@ fil_node_open_file( success = os_file_read(node->handle, page, 0, UNIV_PAGE_SIZE, space->flags); + if (fil_page_can_not_decrypt(page)) { + /* if page is (still) encrypted, write an error and return. + * Otherwise the server would crash if decrypting is not possible. + * This may be the case, if the key file could not be opened on server startup. + */ + fprintf(stderr, + "InnoDB: can not decrypt %s\n", + node->name); + return false; + + } + space_id = fsp_header_get_space_id(page); flags = fsp_header_get_flags(page); + page_size = fsp_flags_get_page_size(flags); atomic_writes = fsp_flags_get_atomic_writes(flags); @@ -1336,6 +1354,20 @@ fil_space_create( ut_a(fil_system); + if (fsp_flags_is_page_encrypted(flags)) { + if (!KeySingleton::getInstance().isAvailable() || KeySingleton::getInstance().getKeys(fsp_flags_get_page_encryption_key(flags))==NULL) { + /* by returning here it should be avoided that + * the server crashes, if someone tries to access an + * encrypted table and the encryption key is not available. + * The the table is treaded as non-existent. + */ + ib_logf(IB_LOG_LEVEL_WARN, + "Tablespace '%s' can not be opened, because encryption key can not be found (space id: %lu, key %lu)\n" + , name, (ulong) id, fsp_flags_get_page_encryption_key(flags)); + return (FALSE); + } + } + /* Look for a matching tablespace and if found free it. */ do { mutex_enter(&fil_system->mutex); @@ -2080,6 +2112,7 @@ fil_check_first_page( { ulint space_id; ulint flags; + ulint page_is_encrypted = 0; if (srv_force_recovery >= SRV_FORCE_IGNORE_CORRUPT) { return(NULL); @@ -2087,12 +2120,19 @@ fil_check_first_page( space_id = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_ID + page); flags = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page); + /* Note: the 1st page is usually not encrypted. If the Key Provider or the encryption key is not available, the + * check for reading the first page should intentionally fail with "can not decrypt" message. */ + page_is_encrypted = fil_page_can_not_decrypt(page); + if ((!KeySingleton::getInstance().isAvailable() || (page_is_encrypted == PAGE_ENCRYPTION_KEY_MISSING)) && page_is_encrypted) { + page_is_encrypted = 1; + } else { + page_is_encrypted = 0; + if (UNIV_PAGE_SIZE != fsp_flags_get_page_size(flags)) { + fprintf(stderr, "InnoDB: Error: Current page size %lu != page size on page %lu\n", + UNIV_PAGE_SIZE, fsp_flags_get_page_size(flags)); - if (UNIV_PAGE_SIZE != fsp_flags_get_page_size(flags)) { - fprintf(stderr, "InnoDB: Error: Current page size %lu != page size on page %lu\n", - UNIV_PAGE_SIZE, fsp_flags_get_page_size(flags)); - - return("innodb-page-size mismatch"); + return("innodb-page-size mismatch"); + } } if (!space_id && !flags) { @@ -2108,9 +2148,17 @@ fil_check_first_page( } } - if (buf_page_is_corrupted( + if (!page_is_encrypted && buf_page_is_corrupted( false, page, fsp_flags_get_zip_size(flags))) { return("checksum mismatch"); + } else { + if (page_is_encrypted) { + /* this error message is interpreted by the calling method, which is + * executed if the server starts in recovery mode. + */ + return("can not decrypt"); + + } } if (page_get_space_id(page) == space_id @@ -4307,6 +4355,7 @@ fil_validate_single_table_tablespace( check_first_page: fsp->success = TRUE; + fsp->encryption_error = 0; if (const char* check_msg = fil_read_first_page( fsp->file, FALSE, &fsp->flags, &fsp->id, &fsp->lsn, &fsp->lsn, ULINT_UNDEFINED)) { @@ -4314,6 +4363,14 @@ fil_validate_single_table_tablespace( "%s in tablespace %s (table %s)", check_msg, fsp->filepath, tablename); fsp->success = FALSE; + if (strncmp(check_msg, "can not decrypt", strlen(check_msg))==0) { + /* by returning here, it should be avoided, that the server crashes, + * if started in recovery mode and can not decrypt tables, if + * the key file can not be read. + */ + fsp->encryption_error = 1; + return; + } } if (!fsp->success) { @@ -4456,6 +4513,13 @@ fil_load_single_table_tablespace( } if (!def.success && !remote.success) { + + if (def.encryption_error || remote.encryption_error) { + fprintf(stderr, + "InnoDB: Error: could not open single-table" + " tablespace file %s. Encryption error!\n", def.filepath); + return; + } /* The following call prints an error message */ os_file_get_last_error(true); fprintf(stderr, @@ -5290,7 +5354,7 @@ fil_extend_space_to_desired_size( success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC, node->name, node->handle, buf, offset, page_size * n_pages, - node, NULL, space_id, NULL, 0, 0, 0); + node, NULL, space_id, NULL, 0, 0, 0, 0, 0); #endif /* UNIV_HOTBACKUP */ if (success) { os_has_said_disk_full = FALSE; @@ -5683,6 +5747,9 @@ _fil_io( ibool ignore_nonexistent_pages; ibool page_compressed = FALSE; ulint page_compression_level = 0; + ibool page_encrypted = FALSE; + ulint page_encryption_key = 0; + is_log = type & OS_FILE_LOG; type = type & ~OS_FILE_LOG; @@ -5752,6 +5819,11 @@ _fil_io( page_compressed = fsp_flags_is_page_compressed(space->flags); page_compression_level = fsp_flags_get_page_compression_level(space->flags); + + page_encrypted = fsp_flags_is_page_encrypted(space->flags); + page_encryption_key = fsp_flags_get_page_encryption_key(space->flags); + + /* If we are deleting a tablespace we don't allow any read operations on that. However, we do allow write operations. */ if (space == 0 || (type == OS_FILE_READ && space->stop_new_ops)) { @@ -5896,9 +5968,8 @@ _fil_io( } /* Queue the aio request */ - ret = os_aio(type, mode | wake_later, node->name, node->handle, buf, - offset, len, node, message, space_id, trx, - page_compressed, page_compression_level, write_size); + ret = os_aio(type, mode | wake_later, node->name, node->handle, buf, + offset, len, node, message, space_id, trx, page_compressed, page_compression_level, write_size, page_encrypted, page_encryption_key); #else /* In mysqlbackup do normal i/o, not aio */ @@ -6856,6 +6927,16 @@ fil_space_name( return (space->name); } +/*******************************************************************//** +Return space flags */ +ulint +fil_space_flags( +/*===========*/ + fil_space_t* space) /*!< in: space */ +{ + return (space->flags); +} + /*******************************************************************//** Return page type name */ const char* diff --git a/storage/xtradb/fil/fil0fil.cc.orig b/storage/xtradb/fil/fil0fil.cc.orig new file mode 100644 index 0000000000000..96a80aaab6be0 --- /dev/null +++ b/storage/xtradb/fil/fil0fil.cc.orig @@ -0,0 +1,6885 @@ +/***************************************************************************** + +Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file fil/fil0fil.cc +The tablespace memory cache + +Created 10/25/1995 Heikki Tuuri +*******************************************************/ + +#include "fil0fil.h" + +#include +#include + +#include "mem0mem.h" +#include "hash0hash.h" +#include "os0file.h" +#include "mach0data.h" +#include "buf0buf.h" +#include "buf0flu.h" +#include "log0recv.h" +#include "fsp0fsp.h" +#include "srv0srv.h" +#include "srv0start.h" +#include "mtr0mtr.h" +#include "mtr0log.h" +#include "dict0dict.h" +#include "page0page.h" +#include "page0zip.h" +#include "trx0sys.h" +#include "row0mysql.h" +#ifndef UNIV_HOTBACKUP +# include "buf0lru.h" +# include "ibuf0ibuf.h" +# include "sync0sync.h" +# include "os0sync.h" +#else /* !UNIV_HOTBACKUP */ +# include "srv0srv.h" +static ulint srv_data_read, srv_data_written; +#endif /* !UNIV_HOTBACKUP */ +#include "fil0pagecompress.h" + +#include "fil0pageencryption.h" +#include "fsp0pageencryption.h" + +#include "zlib.h" +#ifdef __linux__ +#include +#include +#include +#endif +#include "row0mysql.h" + +/* + IMPLEMENTATION OF THE TABLESPACE MEMORY CACHE + ============================================= + +The tablespace cache is responsible for providing fast read/write access to +tablespaces and logs of the database. File creation and deletion is done +in other modules which know more of the logic of the operation, however. + +A tablespace consists of a chain of files. The size of the files does not +have to be divisible by the database block size, because we may just leave +the last incomplete block unused. When a new file is appended to the +tablespace, the maximum size of the file is also specified. At the moment, +we think that it is best to extend the file to its maximum size already at +the creation of the file, because then we can avoid dynamically extending +the file when more space is needed for the tablespace. + +A block's position in the tablespace is specified with a 32-bit unsigned +integer. The files in the chain are thought to be catenated, and the block +corresponding to an address n is the nth block in the catenated file (where +the first block is named the 0th block, and the incomplete block fragments +at the end of files are not taken into account). A tablespace can be extended +by appending a new file at the end of the chain. + +Our tablespace concept is similar to the one of Oracle. + +To acquire more speed in disk transfers, a technique called disk striping is +sometimes used. This means that logical block addresses are divided in a +round-robin fashion across several disks. Windows NT supports disk striping, +so there we do not need to support it in the database. Disk striping is +implemented in hardware in RAID disks. We conclude that it is not necessary +to implement it in the database. Oracle 7 does not support disk striping, +either. + +Another trick used at some database sites is replacing tablespace files by +raw disks, that is, the whole physical disk drive, or a partition of it, is +opened as a single file, and it is accessed through byte offsets calculated +from the start of the disk or the partition. This is recommended in some +books on database tuning to achieve more speed in i/o. Using raw disk +certainly prevents the OS from fragmenting disk space, but it is not clear +if it really adds speed. We measured on the Pentium 100 MHz + NT + NTFS file +system + EIDE Conner disk only a negligible difference in speed when reading +from a file, versus reading from a raw disk. + +To have fast access to a tablespace or a log file, we put the data structures +to a hash table. Each tablespace and log file is given an unique 32-bit +identifier. + +Some operating systems do not support many open files at the same time, +though NT seems to tolerate at least 900 open files. Therefore, we put the +open files in an LRU-list. If we need to open another file, we may close the +file at the end of the LRU-list. When an i/o-operation is pending on a file, +the file cannot be closed. We take the file nodes with pending i/o-operations +out of the LRU-list and keep a count of pending operations. When an operation +completes, we decrement the count and return the file node to the LRU-list if +the count drops to zero. */ + +/** When mysqld is run, the default directory "." is the mysqld datadir, +but in the MySQL Embedded Server Library and ibbackup it is not the default +directory, and we must set the base file path explicitly */ +UNIV_INTERN const char* fil_path_to_mysql_datadir = "."; + +/** The number of fsyncs done to the log */ +UNIV_INTERN ulint fil_n_log_flushes = 0; + +/** Number of pending redo log flushes */ +UNIV_INTERN ulint fil_n_pending_log_flushes = 0; +/** Number of pending tablespace flushes */ +UNIV_INTERN ulint fil_n_pending_tablespace_flushes = 0; + +/** Number of files currently open */ +UNIV_INTERN ulint fil_n_file_opened = 0; + +/** The null file address */ +UNIV_INTERN fil_addr_t fil_addr_null = {FIL_NULL, 0}; + +#ifdef UNIV_PFS_MUTEX +/* Key to register fil_system_mutex with performance schema */ +UNIV_INTERN mysql_pfs_key_t fil_system_mutex_key; +#endif /* UNIV_PFS_MUTEX */ + +#ifdef UNIV_PFS_RWLOCK +/* Key to register file space latch with performance schema */ +UNIV_INTERN mysql_pfs_key_t fil_space_latch_key; +#endif /* UNIV_PFS_RWLOCK */ + +/** File node of a tablespace or the log data space */ +struct fil_node_t { + fil_space_t* space; /*!< backpointer to the space where this node + belongs */ + char* name; /*!< path to the file */ + ibool open; /*!< TRUE if file open */ + os_file_t handle; /*!< OS handle to the file, if file open */ + os_event_t sync_event;/*!< Condition event to group and + serialize calls to fsync */ + ibool is_raw_disk;/*!< TRUE if the 'file' is actually a raw + device or a raw disk partition */ + ulint size; /*!< size of the file in database pages, 0 if + not known yet; the possible last incomplete + megabyte may be ignored if space == 0 */ + ulint n_pending; + /*!< count of pending i/o's on this file; + closing of the file is not allowed if + this is > 0 */ + ulint n_pending_flushes; + /*!< count of pending flushes on this file; + closing of the file is not allowed if + this is > 0 */ + ibool being_extended; + /*!< TRUE if the node is currently + being extended. */ + ib_int64_t modification_counter;/*!< when we write to the file we + increment this by one */ + ib_int64_t flush_counter;/*!< up to what + modification_counter value we have + flushed the modifications to disk */ + UT_LIST_NODE_T(fil_node_t) chain; + /*!< link field for the file chain */ + UT_LIST_NODE_T(fil_node_t) LRU; + /*!< link field for the LRU list */ + ulint magic_n;/*!< FIL_NODE_MAGIC_N */ +}; + +/** Value of fil_node_t::magic_n */ +#define FIL_NODE_MAGIC_N 89389 + +/** Tablespace or log data space: let us call them by a common name space */ +struct fil_space_t { + char* name; /*!< space name = the path to the first file in + it */ + ulint id; /*!< space id */ + ib_int64_t tablespace_version; + /*!< in DISCARD/IMPORT this timestamp + is used to check if we should ignore + an insert buffer merge request for a + page because it actually was for the + previous incarnation of the space */ + ibool mark; /*!< this is set to TRUE at database startup if + the space corresponds to a table in the InnoDB + data dictionary; so we can print a warning of + orphaned tablespaces */ + ibool stop_ios;/*!< TRUE if we want to rename the + .ibd file of tablespace and want to + stop temporarily posting of new i/o + requests on the file */ + ibool stop_new_ops; + /*!< we set this TRUE when we start + deleting a single-table tablespace. + When this is set following new ops + are not allowed: + * read IO request + * ibuf merge + * file flush + Note that we can still possibly have + new write operations because we don't + check this flag when doing flush + batches. */ + ulint purpose;/*!< FIL_TABLESPACE, FIL_LOG, or + FIL_ARCH_LOG */ + UT_LIST_BASE_NODE_T(fil_node_t) chain; + /*!< base node for the file chain */ + ulint size; /*!< space size in pages; 0 if a single-table + tablespace whose size we do not know yet; + last incomplete megabytes in data files may be + ignored if space == 0 */ + ulint flags; /*!< tablespace flags; see + fsp_flags_is_valid(), + fsp_flags_get_zip_size() */ + ulint n_reserved_extents; + /*!< number of reserved free extents for + ongoing operations like B-tree page split */ + ulint n_pending_flushes; /*!< this is positive when flushing + the tablespace to disk; dropping of the + tablespace is forbidden if this is positive */ + ulint n_pending_ops;/*!< this is positive when we + have pending operations against this + tablespace. The pending operations can + be ibuf merges or lock validation code + trying to read a block. + Dropping of the tablespace is forbidden + if this is positive */ + hash_node_t hash; /*!< hash chain node */ + hash_node_t name_hash;/*!< hash chain the name_hash table */ +#ifndef UNIV_HOTBACKUP + prio_rw_lock_t latch; /*!< latch protecting the file space storage + allocation */ +#endif /* !UNIV_HOTBACKUP */ + UT_LIST_NODE_T(fil_space_t) unflushed_spaces; + /*!< list of spaces with at least one unflushed + file we have written to */ + bool is_in_unflushed_spaces; + /*!< true if this space is currently in + unflushed_spaces */ + ibool is_corrupt; + UT_LIST_NODE_T(fil_space_t) space_list; + /*!< list of all spaces */ + ulint magic_n;/*!< FIL_SPACE_MAGIC_N */ +}; + +/** Value of fil_space_t::magic_n */ +#define FIL_SPACE_MAGIC_N 89472 + +/** The tablespace memory cache; also the totality of logs (the log +data space) is stored here; below we talk about tablespaces, but also +the ib_logfiles form a 'space' and it is handled here */ +struct fil_system_t { +#ifndef UNIV_HOTBACKUP + ib_mutex_t mutex; /*!< The mutex protecting the cache */ +#endif /* !UNIV_HOTBACKUP */ + hash_table_t* spaces; /*!< The hash table of spaces in the + system; they are hashed on the space + id */ + hash_table_t* name_hash; /*!< hash table based on the space + name */ + UT_LIST_BASE_NODE_T(fil_node_t) LRU; + /*!< base node for the LRU list of the + most recently used open files with no + pending i/o's; if we start an i/o on + the file, we first remove it from this + list, and return it to the start of + the list when the i/o ends; + log files and the system tablespace are + not put to this list: they are opened + after the startup, and kept open until + shutdown */ + UT_LIST_BASE_NODE_T(fil_space_t) unflushed_spaces; + /*!< base node for the list of those + tablespaces whose files contain + unflushed writes; those spaces have + at least one file node where + modification_counter > flush_counter */ + ulint n_open; /*!< number of files currently open */ + ulint max_n_open; /*!< n_open is not allowed to exceed + this */ + ib_int64_t modification_counter;/*!< when we write to a file we + increment this by one */ + ulint max_assigned_id;/*!< maximum space id in the existing + tables, or assigned during the time + mysqld has been up; at an InnoDB + startup we scan the data dictionary + and set here the maximum of the + space id's of the tables there */ + ib_int64_t tablespace_version; + /*!< a counter which is incremented for + every space object memory creation; + every space mem object gets a + 'timestamp' from this; in DISCARD/ + IMPORT this is used to check if we + should ignore an insert buffer merge + request */ + UT_LIST_BASE_NODE_T(fil_space_t) space_list; + /*!< list of all file spaces */ + ibool space_id_reuse_warned; + /* !< TRUE if fil_space_create() + has issued a warning about + potential space_id reuse */ +}; + +/** The tablespace memory cache. This variable is NULL before the module is +initialized. */ +static fil_system_t* fil_system = NULL; + +/** Determine if (i) is a user tablespace id or not. */ +# define fil_is_user_tablespace_id(i) ((i) > srv_undo_tablespaces_open) + +/** Determine if user has explicitly disabled fsync(). */ +#ifndef __WIN__ +# define fil_buffering_disabled(s) \ + (((s)->purpose == FIL_TABLESPACE \ + && srv_unix_file_flush_method == SRV_UNIX_O_DIRECT_NO_FSYNC)\ + || ((s)->purpose == FIL_LOG \ + && srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT)) + +#else /* __WIN__ */ +# define fil_buffering_disabled(s) (0) +#endif /* __WIN__ */ + +#ifdef UNIV_DEBUG +/** Try fil_validate() every this many times */ +# define FIL_VALIDATE_SKIP 17 + +/******************************************************************//** +Checks the consistency of the tablespace cache some of the time. +@return TRUE if ok or the check was skipped */ +static +ibool +fil_validate_skip(void) +/*===================*/ +{ + /** The fil_validate() call skip counter. Use a signed type + because of the race condition below. */ + static int fil_validate_count = FIL_VALIDATE_SKIP; + + /* There is a race condition below, but it does not matter, + because this call is only for heuristic purposes. We want to + reduce the call frequency of the costly fil_validate() check + in debug builds. */ + if (--fil_validate_count > 0) { + return(TRUE); + } + + fil_validate_count = FIL_VALIDATE_SKIP; + return(fil_validate()); +} +#endif /* UNIV_DEBUG */ + +/********************************************************************//** +Determines if a file node belongs to the least-recently-used list. +@return TRUE if the file belongs to fil_system->LRU mutex. */ +UNIV_INLINE +ibool +fil_space_belongs_in_lru( +/*=====================*/ + const fil_space_t* space) /*!< in: file space */ +{ + return(space->purpose == FIL_TABLESPACE + && fil_is_user_tablespace_id(space->id)); +} + +/********************************************************************//** +NOTE: you must call fil_mutex_enter_and_prepare_for_io() first! + +Prepares a file node for i/o. Opens the file if it is closed. Updates the +pending i/o's field in the node and the system appropriately. Takes the node +off the LRU list if it is in the LRU list. The caller must hold the fil_sys +mutex. +@return false if the file can't be opened, otherwise true */ +static +bool +fil_node_prepare_for_io( +/*====================*/ + fil_node_t* node, /*!< in: file node */ + fil_system_t* system, /*!< in: tablespace memory cache */ + fil_space_t* space); /*!< in: space */ +/********************************************************************//** +Updates the data structures when an i/o operation finishes. Updates the +pending i/o's field in the node appropriately. */ +static +void +fil_node_complete_io( +/*=================*/ + fil_node_t* node, /*!< in: file node */ + fil_system_t* system, /*!< in: tablespace memory cache */ + ulint type); /*!< in: OS_FILE_WRITE or OS_FILE_READ; marks + the node as modified if + type == OS_FILE_WRITE */ +/*******************************************************************//** +Frees a space object from the tablespace memory cache. Closes the files in +the chain but does not delete them. There must not be any pending i/o's or +flushes on the files. +@return TRUE on success */ +static +ibool +fil_space_free( +/*===========*/ + ulint id, /* in: space id */ + ibool x_latched); /* in: TRUE if caller has space->latch + in X mode */ +/********************************************************************//** +Reads data from a space to a buffer. Remember that the possible incomplete +blocks at the end of file are ignored: they are not taken into account when +calculating the byte offset within a space. +@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do +i/o on a tablespace which does not exist */ +UNIV_INLINE +dberr_t +fil_read( +/*=====*/ + bool sync, /*!< in: true if synchronous aio is desired */ + ulint space_id, /*!< in: space id */ + ulint zip_size, /*!< in: compressed page size in bytes; + 0 for uncompressed pages */ + ulint block_offset, /*!< in: offset in number of blocks */ + ulint byte_offset, /*!< in: remainder of offset in bytes; in aio + this must be divisible by the OS block size */ + ulint len, /*!< in: how many bytes to read; this must not + cross a file boundary; in aio this must be a + block size multiple */ + void* buf, /*!< in/out: buffer where to store data read; + in aio this must be appropriately aligned */ + void* message, /*!< in: message for aio handler if non-sync + aio used, else ignored */ + ulint* write_size) /*!< in/out: Actual write size initialized + after fist successfull trim + operation for this page and if + initialized we do not trim again if + actual page size does not decrease. */ +{ + return(fil_io(OS_FILE_READ, sync, space_id, zip_size, block_offset, + byte_offset, len, buf, message, write_size)); +} + +/********************************************************************//** +Writes data to a space from a buffer. Remember that the possible incomplete +blocks at the end of file are ignored: they are not taken into account when +calculating the byte offset within a space. +@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do +i/o on a tablespace which does not exist */ +UNIV_INLINE +dberr_t +fil_write( +/*======*/ + bool sync, /*!< in: true if synchronous aio is desired */ + ulint space_id, /*!< in: space id */ + ulint zip_size, /*!< in: compressed page size in bytes; + 0 for uncompressed pages */ + ulint block_offset, /*!< in: offset in number of blocks */ + ulint byte_offset, /*!< in: remainder of offset in bytes; in aio + this must be divisible by the OS block size */ + ulint len, /*!< in: how many bytes to write; this must + not cross a file boundary; in aio this must + be a block size multiple */ + void* buf, /*!< in: buffer from which to write; in aio + this must be appropriately aligned */ + void* message, /*!< in: message for aio handler if non-sync + aio used, else ignored */ + ulint* write_size) /*!< in/out: Actual write size initialized + after fist successfull trim + operation for this page and if + initialized we do not trim again if + actual page size does not decrease. */ +{ + ut_ad(!srv_read_only_mode); + + return(fil_io(OS_FILE_WRITE, sync, space_id, zip_size, block_offset, + byte_offset, len, buf, message, write_size)); +} + +/*******************************************************************//** +Returns the table space by a given id, NULL if not found. */ +fil_space_t* +fil_space_get_by_id( +/*================*/ + ulint id) /*!< in: space id */ +{ + fil_space_t* space; + + ut_ad(mutex_own(&fil_system->mutex)); + + HASH_SEARCH(hash, fil_system->spaces, id, + fil_space_t*, space, + ut_ad(space->magic_n == FIL_SPACE_MAGIC_N), + space->id == id); + + return(space); +} + +/****************************************************************//** +Get space id from fil node */ +ulint +fil_node_get_space_id( +/*==================*/ + fil_node_t* node) /*!< in: Compressed node*/ +{ + ut_ad(node); + ut_ad(node->space); + + return (node->space->id); +} + +/*******************************************************************//** +Returns the table space by a given name, NULL if not found. */ +UNIV_INLINE +fil_space_t* +fil_space_get_by_name( +/*==================*/ + const char* name) /*!< in: space name */ +{ + fil_space_t* space; + ulint fold; + + ut_ad(mutex_own(&fil_system->mutex)); + + fold = ut_fold_string(name); + + HASH_SEARCH(name_hash, fil_system->name_hash, fold, + fil_space_t*, space, + ut_ad(space->magic_n == FIL_SPACE_MAGIC_N), + !strcmp(name, space->name)); + + return(space); +} + +#ifndef UNIV_HOTBACKUP +/*******************************************************************//** +Returns the version number of a tablespace, -1 if not found. +@return version number, -1 if the tablespace does not exist in the +memory cache */ +UNIV_INTERN +ib_int64_t +fil_space_get_version( +/*==================*/ + ulint id) /*!< in: space id */ +{ + fil_space_t* space; + ib_int64_t version = -1; + + ut_ad(fil_system); + + mutex_enter(&fil_system->mutex); + + space = fil_space_get_by_id(id); + + if (space) { + version = space->tablespace_version; + } + + mutex_exit(&fil_system->mutex); + + return(version); +} + +/*******************************************************************//** +Returns the latch of a file space. +@return latch protecting storage allocation */ +UNIV_INTERN +prio_rw_lock_t* +fil_space_get_latch( +/*================*/ + ulint id, /*!< in: space id */ + ulint* flags) /*!< out: tablespace flags */ +{ + fil_space_t* space; + + ut_ad(fil_system); + + mutex_enter(&fil_system->mutex); + + space = fil_space_get_by_id(id); + + ut_a(space); + + if (flags) { + *flags = space->flags; + } + + mutex_exit(&fil_system->mutex); + + return(&(space->latch)); +} + +/*******************************************************************//** +Returns the type of a file space. +@return FIL_TABLESPACE or FIL_LOG */ +UNIV_INTERN +ulint +fil_space_get_type( +/*===============*/ + ulint id) /*!< in: space id */ +{ + fil_space_t* space; + + ut_ad(fil_system); + + mutex_enter(&fil_system->mutex); + + space = fil_space_get_by_id(id); + + ut_a(space); + + mutex_exit(&fil_system->mutex); + + return(space->purpose); +} +#endif /* !UNIV_HOTBACKUP */ + +/**********************************************************************//** +Checks if all the file nodes in a space are flushed. The caller must hold +the fil_system mutex. +@return true if all are flushed */ +static +bool +fil_space_is_flushed( +/*=================*/ + fil_space_t* space) /*!< in: space */ +{ + fil_node_t* node; + + ut_ad(mutex_own(&fil_system->mutex)); + + node = UT_LIST_GET_FIRST(space->chain); + + while (node) { + if (node->modification_counter > node->flush_counter) { + + ut_ad(!fil_buffering_disabled(space)); + return(false); + } + + node = UT_LIST_GET_NEXT(chain, node); + } + + return(true); +} + +/*******************************************************************//** +Appends a new file to the chain of files of a space. File must be closed. +@return pointer to the file name, or NULL on error */ +UNIV_INTERN +char* +fil_node_create( +/*============*/ + const char* name, /*!< in: file name (file must be closed) */ + ulint size, /*!< in: file size in database blocks, rounded + downwards to an integer */ + ulint id, /*!< in: space id where to append */ + ibool is_raw) /*!< in: TRUE if a raw device or + a raw disk partition */ +{ + fil_node_t* node; + fil_space_t* space; + + ut_a(fil_system); + ut_a(name); + + mutex_enter(&fil_system->mutex); + + node = static_cast(mem_zalloc(sizeof(fil_node_t))); + + node->name = mem_strdup(name); + + ut_a(!is_raw || srv_start_raw_disk_in_use); + + node->sync_event = os_event_create(); + node->is_raw_disk = is_raw; + node->size = size; + node->magic_n = FIL_NODE_MAGIC_N; + + space = fil_space_get_by_id(id); + + if (!space) { + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Error: Could not find tablespace %lu for\n" + "InnoDB: file ", (ulong) id); + ut_print_filename(stderr, name); + fputs(" in the tablespace memory cache.\n", stderr); + mem_free(node->name); + + mem_free(node); + + mutex_exit(&fil_system->mutex); + + return(NULL); + } + + space->size += size; + + node->space = space; + + UT_LIST_ADD_LAST(chain, space->chain, node); + + if (id < SRV_LOG_SPACE_FIRST_ID && fil_system->max_assigned_id < id) { + + fil_system->max_assigned_id = id; + } + + mutex_exit(&fil_system->mutex); + + return(node->name); +} + +/********************************************************************//** +Opens a file of a node of a tablespace. The caller must own the fil_system +mutex. +@return false if the file can't be opened, otherwise true */ +static +bool +fil_node_open_file( +/*===============*/ + fil_node_t* node, /*!< in: file node */ + fil_system_t* system, /*!< in: tablespace memory cache */ + fil_space_t* space) /*!< in: space */ +{ + os_offset_t size_bytes; + ibool ret; + ibool success; + byte* buf2; + byte* page; + ulint space_id; + ulint flags=0; + ulint page_size; + ulint atomic_writes=0; + + ut_ad(mutex_own(&(system->mutex))); + ut_a(node->n_pending == 0); + ut_a(node->open == FALSE); + + if (node->size == 0) { + /* It must be a single-table tablespace and we do not know the + size of the file yet. First we open the file in the normal + mode, no async I/O here, for simplicity. Then do some checks, + and close the file again. + NOTE that we could not use the simple file read function + os_file_read() in Windows to read from a file opened for + async I/O! */ + + node->handle = os_file_create_simple_no_error_handling( + innodb_file_data_key, node->name, OS_FILE_OPEN, + OS_FILE_READ_ONLY, &success, 0); + if (!success) { + /* The following call prints an error message */ + os_file_get_last_error(true); + + ut_print_timestamp(stderr); + + ib_logf(IB_LOG_LEVEL_WARN, "InnoDB: Error: cannot " + "open %s\n. InnoDB: Have you deleted .ibd " + "files under a running mysqld server?\n", + node->name); + + return(false); + } + + size_bytes = os_file_get_size(node->handle); + ut_a(size_bytes != (os_offset_t) -1); +#ifdef UNIV_HOTBACKUP + if (space->id == 0) { + node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE); + os_file_close(node->handle); + goto add_size; + } +#endif /* UNIV_HOTBACKUP */ + ut_a(space->purpose != FIL_LOG); + ut_a(fil_is_user_tablespace_id(space->id)); + + if (size_bytes < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) { + fprintf(stderr, + "InnoDB: Error: the size of single-table" + " tablespace file %s\n" + "InnoDB: is only "UINT64PF"," + " should be at least %lu!\n", + node->name, + size_bytes, + (ulong) (FIL_IBD_FILE_INITIAL_SIZE + * UNIV_PAGE_SIZE)); + + ut_a(0); + } + + /* Read the first page of the tablespace */ + + buf2 = static_cast(ut_malloc(2 * UNIV_PAGE_SIZE)); + /* Align the memory for file i/o if we might have O_DIRECT + set */ + page = static_cast(ut_align(buf2, UNIV_PAGE_SIZE)); + + success = os_file_read(node->handle, page, 0, UNIV_PAGE_SIZE, + space->flags); + + space_id = fsp_header_get_space_id(page); + flags = fsp_header_get_flags(page); + page_size = fsp_flags_get_page_size(flags); + atomic_writes = fsp_flags_get_atomic_writes(flags); + + ut_free(buf2); + + /* Close the file now that we have read the space id from it */ + + os_file_close(node->handle); + + if (UNIV_UNLIKELY(space_id != space->id)) { + fprintf(stderr, + "InnoDB: Error: tablespace id is %lu" + " in the data dictionary\n" + "InnoDB: but in file %s it is %lu!\n", + space->id, node->name, space_id); + + ut_error; + } + + if (UNIV_UNLIKELY(space_id == ULINT_UNDEFINED + || space_id == 0)) { + fprintf(stderr, + "InnoDB: Error: tablespace id %lu" + " in file %s is not sensible\n", + (ulong) space_id, node->name); + + ut_error; + } + + if (UNIV_UNLIKELY(fsp_flags_get_page_size(space->flags) + != page_size)) { + fprintf(stderr, + "InnoDB: Error: tablespace file %s" + " has page size 0x%lx\n" + "InnoDB: but the data dictionary" + " expects page size 0x%lx!\n", + node->name, flags, + fsp_flags_get_page_size(space->flags)); + + ut_error; + } + + if (UNIV_UNLIKELY(space->flags != flags)) { + fprintf(stderr, + "InnoDB: Error: table flags are 0x%lx" + " in the data dictionary\n" + "InnoDB: but the flags in file %s are 0x%lx!\n", + space->flags, node->name, flags); + + ut_error; + } + + if (UNIV_UNLIKELY(space->flags != flags)) { + if (!dict_tf_verify_flags(space->flags, flags)) { + fprintf(stderr, + "InnoDB: Error: table flags are 0x%lx" + " in the data dictionary\n" + "InnoDB: but the flags in file %s are 0x%lx!\n", + space->flags, node->name, flags); + ut_error; + } + } + + if (size_bytes >= FSP_EXTENT_SIZE * UNIV_PAGE_SIZE) { + /* Truncate the size to whole extent size. */ + size_bytes = ut_2pow_round(size_bytes, + FSP_EXTENT_SIZE * + UNIV_PAGE_SIZE); + } + + if (!fsp_flags_is_compressed(flags)) { + node->size = (ulint) + (size_bytes + / fsp_flags_get_page_size(flags)); + } else { + node->size = (ulint) + (size_bytes + / fsp_flags_get_zip_size(flags)); + } + +#ifdef UNIV_HOTBACKUP +add_size: +#endif /* UNIV_HOTBACKUP */ + space->size += node->size; + } + + atomic_writes = fsp_flags_get_atomic_writes(space->flags); + + /* printf("Opening file %s\n", node->name); */ + + /* Open the file for reading and writing, in Windows normally in the + unbuffered async I/O mode, though global variables may make + os_file_create() to fall back to the normal file I/O mode. */ + + if (space->purpose == FIL_LOG) { + node->handle = os_file_create(innodb_file_log_key, + node->name, OS_FILE_OPEN, + OS_FILE_AIO, OS_LOG_FILE, + &ret, atomic_writes); + } else if (node->is_raw_disk) { + node->handle = os_file_create(innodb_file_data_key, + node->name, + OS_FILE_OPEN_RAW, + OS_FILE_AIO, OS_DATA_FILE, + &ret, atomic_writes); + } else { + node->handle = os_file_create(innodb_file_data_key, + node->name, OS_FILE_OPEN, + OS_FILE_AIO, OS_DATA_FILE, + &ret, atomic_writes); + } + + ut_a(ret); + + node->open = TRUE; + + system->n_open++; + fil_n_file_opened++; + + if (fil_space_belongs_in_lru(space)) { + + /* Put the node to the LRU list */ + UT_LIST_ADD_FIRST(LRU, system->LRU, node); + } + + return(true); +} + +/**********************************************************************//** +Closes a file. */ +static +void +fil_node_close_file( +/*================*/ + fil_node_t* node, /*!< in: file node */ + fil_system_t* system) /*!< in: tablespace memory cache */ +{ + ibool ret; + + ut_ad(node && system); + ut_ad(mutex_own(&(system->mutex))); + ut_a(node->open); + ut_a(node->n_pending == 0); + ut_a(node->n_pending_flushes == 0); + ut_a(!node->being_extended); +#ifndef UNIV_HOTBACKUP + ut_a(node->modification_counter == node->flush_counter + || srv_fast_shutdown == 2); +#endif /* !UNIV_HOTBACKUP */ + + ret = os_file_close(node->handle); + ut_a(ret); + + /* printf("Closing file %s\n", node->name); */ + + node->open = FALSE; + ut_a(system->n_open > 0); + system->n_open--; + fil_n_file_opened--; + + if (fil_space_belongs_in_lru(node->space)) { + + ut_a(UT_LIST_GET_LEN(system->LRU) > 0); + + /* The node is in the LRU list, remove it */ + UT_LIST_REMOVE(LRU, system->LRU, node); + } +} + +/********************************************************************//** +Tries to close a file in the LRU list. The caller must hold the fil_sys +mutex. +@return TRUE if success, FALSE if should retry later; since i/o's +generally complete in < 100 ms, and as InnoDB writes at most 128 pages +from the buffer pool in a batch, and then immediately flushes the +files, there is a good chance that the next time we find a suitable +node from the LRU list */ +static +ibool +fil_try_to_close_file_in_LRU( +/*=========================*/ + ibool print_info) /*!< in: if TRUE, prints information why it + cannot close a file */ +{ + fil_node_t* node; + + ut_ad(mutex_own(&fil_system->mutex)); + + if (print_info) { + fprintf(stderr, + "InnoDB: fil_sys open file LRU len %lu\n", + (ulong) UT_LIST_GET_LEN(fil_system->LRU)); + } + + for (node = UT_LIST_GET_LAST(fil_system->LRU); + node != NULL; + node = UT_LIST_GET_PREV(LRU, node)) { + + if (node->modification_counter == node->flush_counter + && node->n_pending_flushes == 0 + && !node->being_extended) { + + fil_node_close_file(node, fil_system); + + return(TRUE); + } + + if (!print_info) { + continue; + } + + if (node->n_pending_flushes > 0) { + fputs("InnoDB: cannot close file ", stderr); + ut_print_filename(stderr, node->name); + fprintf(stderr, ", because n_pending_flushes %lu\n", + (ulong) node->n_pending_flushes); + } + + if (node->modification_counter != node->flush_counter) { + fputs("InnoDB: cannot close file ", stderr); + ut_print_filename(stderr, node->name); + fprintf(stderr, + ", because mod_count %ld != fl_count %ld\n", + (long) node->modification_counter, + (long) node->flush_counter); + + } + + if (node->being_extended) { + fputs("InnoDB: cannot close file ", stderr); + ut_print_filename(stderr, node->name); + fprintf(stderr, ", because it is being extended\n"); + } + } + + return(FALSE); +} + +/*******************************************************************//** +Reserves the fil_system mutex and tries to make sure we can open at least one +file while holding it. This should be called before calling +fil_node_prepare_for_io(), because that function may need to open a file. */ +static +void +fil_mutex_enter_and_prepare_for_io( +/*===============================*/ + ulint space_id) /*!< in: space id */ +{ + fil_space_t* space; + ibool success; + ibool print_info = FALSE; + ulint count = 0; + ulint count2 = 0; + +retry: + mutex_enter(&fil_system->mutex); + + if (space_id == 0 || space_id >= SRV_LOG_SPACE_FIRST_ID) { + /* We keep log files and system tablespace files always open; + this is important in preventing deadlocks in this module, as + a page read completion often performs another read from the + insert buffer. The insert buffer is in tablespace 0, and we + cannot end up waiting in this function. */ + + return; + } + + space = fil_space_get_by_id(space_id); + + if (space != NULL && space->stop_ios) { + /* We are going to do a rename file and want to stop new i/o's + for a while */ + + if (count2 > 20000) { + fputs("InnoDB: Warning: tablespace ", stderr); + ut_print_filename(stderr, space->name); + fprintf(stderr, + " has i/o ops stopped for a long time %lu\n", + (ulong) count2); + } + + mutex_exit(&fil_system->mutex); + +#ifndef UNIV_HOTBACKUP + + /* Wake the i/o-handler threads to make sure pending + i/o's are performed */ + os_aio_simulated_wake_handler_threads(); + + /* The sleep here is just to give IO helper threads a + bit of time to do some work. It is not required that + all IO related to the tablespace being renamed must + be flushed here as we do fil_flush() in + fil_rename_tablespace() as well. */ + os_thread_sleep(20000); + +#endif /* UNIV_HOTBACKUP */ + + /* Flush tablespaces so that we can close modified + files in the LRU list */ + fil_flush_file_spaces(FIL_TABLESPACE); + + os_thread_sleep(20000); + + count2++; + + goto retry; + } + + if (fil_system->n_open < fil_system->max_n_open) { + + return; + } + + /* If the file is already open, no need to do anything; if the space + does not exist, we handle the situation in the function which called + this function */ + + if (!space || UT_LIST_GET_FIRST(space->chain)->open) { + + return; + } + + if (count > 1) { + print_info = TRUE; + } + + /* Too many files are open, try to close some */ +close_more: + success = fil_try_to_close_file_in_LRU(print_info); + + if (success && fil_system->n_open >= fil_system->max_n_open) { + + goto close_more; + } + + if (fil_system->n_open < fil_system->max_n_open) { + /* Ok */ + + return; + } + + if (count >= 2) { + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Warning: too many (%lu) files stay open" + " while the maximum\n" + "InnoDB: allowed value would be %lu.\n" + "InnoDB: You may need to raise the value of" + " innodb_open_files in\n" + "InnoDB: my.cnf.\n", + (ulong) fil_system->n_open, + (ulong) fil_system->max_n_open); + + return; + } + + mutex_exit(&fil_system->mutex); + +#ifndef UNIV_HOTBACKUP + /* Wake the i/o-handler threads to make sure pending i/o's are + performed */ + os_aio_simulated_wake_handler_threads(); + + os_thread_sleep(20000); +#endif + /* Flush tablespaces so that we can close modified files in the LRU + list */ + + fil_flush_file_spaces(FIL_TABLESPACE); + + count++; + + goto retry; +} + +/*******************************************************************//** +Frees a file node object from a tablespace memory cache. */ +static +void +fil_node_free( +/*==========*/ + fil_node_t* node, /*!< in, own: file node */ + fil_system_t* system, /*!< in: tablespace memory cache */ + fil_space_t* space) /*!< in: space where the file node is chained */ +{ + ut_ad(node && system && space); + ut_ad(mutex_own(&(system->mutex))); + ut_a(node->magic_n == FIL_NODE_MAGIC_N); + ut_a(node->n_pending == 0); + ut_a(!node->being_extended); + + if (node->open) { + /* We fool the assertion in fil_node_close_file() to think + there are no unflushed modifications in the file */ + + node->modification_counter = node->flush_counter; + os_event_set(node->sync_event); + + if (fil_buffering_disabled(space)) { + + ut_ad(!space->is_in_unflushed_spaces); + ut_ad(fil_space_is_flushed(space)); + + } else if (space->is_in_unflushed_spaces + && fil_space_is_flushed(space)) { + + space->is_in_unflushed_spaces = false; + + UT_LIST_REMOVE(unflushed_spaces, + system->unflushed_spaces, + space); + } + + fil_node_close_file(node, system); + } + + space->size -= node->size; + + UT_LIST_REMOVE(chain, space->chain, node); + + os_event_free(node->sync_event); + mem_free(node->name); + mem_free(node); +} + +#ifdef UNIV_LOG_ARCHIVE +/****************************************************************//** +Drops files from the start of a file space, so that its size is cut by +the amount given. */ +UNIV_INTERN +void +fil_space_truncate_start( +/*=====================*/ + ulint id, /*!< in: space id */ + ulint trunc_len) /*!< in: truncate by this much; it is an error + if this does not equal to the combined size of + some initial files in the space */ +{ + fil_node_t* node; + fil_space_t* space; + + mutex_enter(&fil_system->mutex); + + space = fil_space_get_by_id(id); + + ut_a(space); + + while (trunc_len > 0) { + node = UT_LIST_GET_FIRST(space->chain); + + ut_a(node->size * UNIV_PAGE_SIZE <= trunc_len); + + trunc_len -= node->size * UNIV_PAGE_SIZE; + + fil_node_free(node, fil_system, space); + } + + mutex_exit(&fil_system->mutex); +} + +/****************************************************************//** +Check is there node in file space with given name. */ +UNIV_INTERN +ibool +fil_space_contains_node( +/*====================*/ + ulint id, /*!< in: space id */ + char* node_name) /*!< in: node name */ +{ + fil_node_t* node; + fil_space_t* space; + + mutex_enter(&fil_system->mutex); + + space = fil_space_get_by_id(id); + + ut_a(space); + + for (node = UT_LIST_GET_FIRST(space->chain); node != NULL; + node = UT_LIST_GET_NEXT(chain, node)) { + + if (ut_strcmp(node->name, node_name) == 0) { + mutex_exit(&fil_system->mutex); + return(TRUE); + } + + } + + mutex_exit(&fil_system->mutex); + return(FALSE); +} + +#endif /* UNIV_LOG_ARCHIVE */ + +/*******************************************************************//** +Creates a space memory object and puts it to the 'fil system' hash table. +If there is an error, prints an error message to the .err log. +@return TRUE if success */ +UNIV_INTERN +ibool +fil_space_create( +/*=============*/ + const char* name, /*!< in: space name */ + ulint id, /*!< in: space id */ + ulint flags, /*!< in: tablespace flags */ + ulint purpose)/*!< in: FIL_TABLESPACE, or FIL_LOG if log */ +{ + fil_space_t* space; + + DBUG_EXECUTE_IF("fil_space_create_failure", return(false);); + + ut_a(fil_system); + + /* Look for a matching tablespace and if found free it. */ + do { + mutex_enter(&fil_system->mutex); + + space = fil_space_get_by_name(name); + + if (space != 0) { + ib_logf(IB_LOG_LEVEL_WARN, + "Tablespace '%s' exists in the cache " + "with id %lu != %lu", + name, (ulong) space->id, (ulong) id); + + if (id == 0 || purpose != FIL_TABLESPACE) { + + mutex_exit(&fil_system->mutex); + + return(FALSE); + } + + ib_logf(IB_LOG_LEVEL_WARN, + "Freeing existing tablespace '%s' entry " + "from the cache with id %lu", + name, (ulong) id); + + ibool success = fil_space_free(space->id, FALSE); + ut_a(success); + + mutex_exit(&fil_system->mutex); + } + + } while (space != 0); + + space = fil_space_get_by_id(id); + + if (space != 0) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Trying to add tablespace '%s' with id %lu " + "to the tablespace memory cache, but tablespace '%s' " + "with id %lu already exists in the cache!", + name, (ulong) id, space->name, (ulong) space->id); + + mutex_exit(&fil_system->mutex); + + return(FALSE); + } + + space = static_cast(mem_zalloc(sizeof(*space))); + + space->name = mem_strdup(name); + space->id = id; + + fil_system->tablespace_version++; + space->tablespace_version = fil_system->tablespace_version; + space->mark = FALSE; + + if (purpose == FIL_TABLESPACE && !recv_recovery_on + && id > fil_system->max_assigned_id) { + + if (!fil_system->space_id_reuse_warned) { + fil_system->space_id_reuse_warned = TRUE; + + ib_logf(IB_LOG_LEVEL_WARN, + "Allocated tablespace %lu, old maximum " + "was %lu", + (ulong) id, + (ulong) fil_system->max_assigned_id); + } + + fil_system->max_assigned_id = id; + } + + space->purpose = purpose; + space->flags = flags; + + space->magic_n = FIL_SPACE_MAGIC_N; + + rw_lock_create(fil_space_latch_key, &space->latch, SYNC_FSP); + + HASH_INSERT(fil_space_t, hash, fil_system->spaces, id, space); + + HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash, + ut_fold_string(name), space); + space->is_in_unflushed_spaces = false; + + space->is_corrupt = FALSE; + + UT_LIST_ADD_LAST(space_list, fil_system->space_list, space); + + mutex_exit(&fil_system->mutex); + + return(TRUE); +} + +/*******************************************************************//** +Assigns a new space id for a new single-table tablespace. This works simply by +incrementing the global counter. If 4 billion id's is not enough, we may need +to recycle id's. +@return TRUE if assigned, FALSE if not */ +UNIV_INTERN +ibool +fil_assign_new_space_id( +/*====================*/ + ulint* space_id) /*!< in/out: space id */ +{ + ulint id; + ibool success; + + mutex_enter(&fil_system->mutex); + + id = *space_id; + + if (id < fil_system->max_assigned_id) { + id = fil_system->max_assigned_id; + } + + id++; + + if (id > (SRV_LOG_SPACE_FIRST_ID / 2) && (id % 1000000UL == 0)) { + ut_print_timestamp(stderr); + fprintf(stderr, + "InnoDB: Warning: you are running out of new" + " single-table tablespace id's.\n" + "InnoDB: Current counter is %lu and it" + " must not exceed %lu!\n" + "InnoDB: To reset the counter to zero" + " you have to dump all your tables and\n" + "InnoDB: recreate the whole InnoDB installation.\n", + (ulong) id, + (ulong) SRV_LOG_SPACE_FIRST_ID); + } + + success = (id < SRV_LOG_SPACE_FIRST_ID); + + if (success) { + *space_id = fil_system->max_assigned_id = id; + } else { + ut_print_timestamp(stderr); + fprintf(stderr, + "InnoDB: You have run out of single-table" + " tablespace id's!\n" + "InnoDB: Current counter is %lu.\n" + "InnoDB: To reset the counter to zero you" + " have to dump all your tables and\n" + "InnoDB: recreate the whole InnoDB installation.\n", + (ulong) id); + *space_id = ULINT_UNDEFINED; + } + + mutex_exit(&fil_system->mutex); + + return(success); +} + +/*******************************************************************//** +Frees a space object from the tablespace memory cache. Closes the files in +the chain but does not delete them. There must not be any pending i/o's or +flushes on the files. +@return TRUE if success */ +static +ibool +fil_space_free( +/*===========*/ + /* out: TRUE if success */ + ulint id, /* in: space id */ + ibool x_latched) /* in: TRUE if caller has space->latch + in X mode */ +{ + fil_space_t* space; + fil_space_t* fnamespace; + + ut_ad(mutex_own(&fil_system->mutex)); + + space = fil_space_get_by_id(id); + + if (!space) { + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Error: trying to remove tablespace %lu" + " from the cache but\n" + "InnoDB: it is not there.\n", (ulong) id); + + return(FALSE); + } + + HASH_DELETE(fil_space_t, hash, fil_system->spaces, id, space); + + fnamespace = fil_space_get_by_name(space->name); + ut_a(fnamespace); + ut_a(space == fnamespace); + + HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash, + ut_fold_string(space->name), space); + + if (space->is_in_unflushed_spaces) { + + ut_ad(!fil_buffering_disabled(space)); + space->is_in_unflushed_spaces = false; + + UT_LIST_REMOVE(unflushed_spaces, fil_system->unflushed_spaces, + space); + } + + UT_LIST_REMOVE(space_list, fil_system->space_list, space); + + ut_a(space->magic_n == FIL_SPACE_MAGIC_N); + ut_a(0 == space->n_pending_flushes); + + for (fil_node_t* fil_node = UT_LIST_GET_FIRST(space->chain); + fil_node != NULL; + fil_node = UT_LIST_GET_FIRST(space->chain)) { + + fil_node_free(fil_node, fil_system, space); + } + + ut_a(0 == UT_LIST_GET_LEN(space->chain)); + + if (x_latched) { + rw_lock_x_unlock(&space->latch); + } + + rw_lock_free(&(space->latch)); + + mem_free(space->name); + mem_free(space); + + return(TRUE); +} + +/*******************************************************************//** +Returns a pointer to the file_space_t that is in the memory cache +associated with a space id. The caller must lock fil_system->mutex. +@return file_space_t pointer, NULL if space not found */ +UNIV_INLINE +fil_space_t* +fil_space_get_space( +/*================*/ + ulint id) /*!< in: space id */ +{ + fil_space_t* space; + fil_node_t* node; + + ut_ad(fil_system); + + space = fil_space_get_by_id(id); + if (space == NULL) { + return(NULL); + } + + if (space->size == 0 && space->purpose == FIL_TABLESPACE) { + ut_a(id != 0); + + mutex_exit(&fil_system->mutex); + + /* It is possible that the space gets evicted at this point + before the fil_mutex_enter_and_prepare_for_io() acquires + the fil_system->mutex. Check for this after completing the + call to fil_mutex_enter_and_prepare_for_io(). */ + fil_mutex_enter_and_prepare_for_io(id); + + /* We are still holding the fil_system->mutex. Check if + the space is still in memory cache. */ + space = fil_space_get_by_id(id); + if (space == NULL) { + return(NULL); + } + + /* The following code must change when InnoDB supports + multiple datafiles per tablespace. */ + ut_a(1 == UT_LIST_GET_LEN(space->chain)); + + node = UT_LIST_GET_FIRST(space->chain); + + /* It must be a single-table tablespace and we have not opened + the file yet; the following calls will open it and update the + size fields */ + + if (!fil_node_prepare_for_io(node, fil_system, space)) { + /* The single-table tablespace can't be opened, + because the ibd file is missing. */ + return(NULL); + } + fil_node_complete_io(node, fil_system, OS_FILE_READ); + } + + return(space); +} + +/*******************************************************************//** +Returns the path from the first fil_node_t found for the space ID sent. +The caller is responsible for freeing the memory allocated here for the +value returned. +@return own: A copy of fil_node_t::path, NULL if space ID is zero +or not found. */ +UNIV_INTERN +char* +fil_space_get_first_path( +/*=====================*/ + ulint id) /*!< in: space id */ +{ + fil_space_t* space; + fil_node_t* node; + char* path; + + ut_ad(fil_system); + ut_a(id); + + fil_mutex_enter_and_prepare_for_io(id); + + space = fil_space_get_space(id); + + if (space == NULL) { + mutex_exit(&fil_system->mutex); + + return(NULL); + } + + ut_ad(mutex_own(&fil_system->mutex)); + + node = UT_LIST_GET_FIRST(space->chain); + + path = mem_strdup(node->name); + + mutex_exit(&fil_system->mutex); + + return(path); +} + +/*******************************************************************//** +Returns the size of the space in pages. The tablespace must be cached in the +memory cache. +@return space size, 0 if space not found */ +UNIV_INTERN +ulint +fil_space_get_size( +/*===============*/ + ulint id) /*!< in: space id */ +{ + fil_space_t* space; + ulint size; + + ut_ad(fil_system); + mutex_enter(&fil_system->mutex); + + space = fil_space_get_space(id); + + size = space ? space->size : 0; + + mutex_exit(&fil_system->mutex); + + return(size); +} + +/*******************************************************************//** +Returns the flags of the space. The tablespace must be cached +in the memory cache. +@return flags, ULINT_UNDEFINED if space not found */ +UNIV_INTERN +ulint +fil_space_get_flags( +/*================*/ + ulint id) /*!< in: space id */ +{ + fil_space_t* space; + ulint flags; + + ut_ad(fil_system); + + if (!id) { + return(0); + } + + mutex_enter(&fil_system->mutex); + + space = fil_space_get_space(id); + + if (space == NULL) { + mutex_exit(&fil_system->mutex); + + return(ULINT_UNDEFINED); + } + + flags = space->flags; + + mutex_exit(&fil_system->mutex); + + return(flags); +} + +/*******************************************************************//** +Returns the compressed page size of the space, or 0 if the space +is not compressed. The tablespace must be cached in the memory cache. +@return compressed page size, ULINT_UNDEFINED if space not found */ +UNIV_INTERN +ulint +fil_space_get_zip_size( +/*===================*/ + ulint id) /*!< in: space id */ +{ + ulint flags; + + flags = fil_space_get_flags(id); + + if (flags && flags != ULINT_UNDEFINED) { + + return(fsp_flags_get_zip_size(flags)); + } + + return(flags); +} + +/*******************************************************************//** +Checks if the pair space, page_no refers to an existing page in a tablespace +file space. The tablespace must be cached in the memory cache. +@return TRUE if the address is meaningful */ +UNIV_INTERN +ibool +fil_check_adress_in_tablespace( +/*===========================*/ + ulint id, /*!< in: space id */ + ulint page_no)/*!< in: page number */ +{ + if (fil_space_get_size(id) > page_no) { + + return(TRUE); + } + + return(FALSE); +} + +/****************************************************************//** +Initializes the tablespace memory cache. */ +UNIV_INTERN +void +fil_init( +/*=====*/ + ulint hash_size, /*!< in: hash table size */ + ulint max_n_open) /*!< in: max number of open files */ +{ + ut_a(fil_system == NULL); + + ut_a(hash_size > 0); + ut_a(max_n_open > 0); + + fil_system = static_cast( + mem_zalloc(sizeof(fil_system_t))); + + mutex_create(fil_system_mutex_key, + &fil_system->mutex, SYNC_ANY_LATCH); + + fil_system->spaces = hash_create(hash_size); + fil_system->name_hash = hash_create(hash_size); + + UT_LIST_INIT(fil_system->LRU); + + fil_system->max_n_open = max_n_open; +} + +/*******************************************************************//** +Opens all log files and system tablespace data files. They stay open until the +database server shutdown. This should be called at a server startup after the +space objects for the log and the system tablespace have been created. The +purpose of this operation is to make sure we never run out of file descriptors +if we need to read from the insert buffer or to write to the log. */ +UNIV_INTERN +void +fil_open_log_and_system_tablespace_files(void) +/*==========================================*/ +{ + fil_space_t* space; + + mutex_enter(&fil_system->mutex); + + for (space = UT_LIST_GET_FIRST(fil_system->space_list); + space != NULL; + space = UT_LIST_GET_NEXT(space_list, space)) { + + fil_node_t* node; + + if (fil_space_belongs_in_lru(space)) { + + continue; + } + + for (node = UT_LIST_GET_FIRST(space->chain); + node != NULL; + node = UT_LIST_GET_NEXT(chain, node)) { + + if (!node->open) { + if (!fil_node_open_file(node, fil_system, + space)) { + /* This func is called during server's + startup. If some file of log or system + tablespace is missing, the server + can't start successfully. So we should + assert for it. */ + ut_a(0); + } + } + + if (fil_system->max_n_open < 10 + fil_system->n_open) { + + fprintf(stderr, + "InnoDB: Warning: you must" + " raise the value of" + " innodb_open_files in\n" + "InnoDB: my.cnf! Remember that" + " InnoDB keeps all log files" + " and all system\n" + "InnoDB: tablespace files open" + " for the whole time mysqld is" + " running, and\n" + "InnoDB: needs to open also" + " some .ibd files if the" + " file-per-table storage\n" + "InnoDB: model is used." + " Current open files %lu," + " max allowed" + " open files %lu.\n", + (ulong) fil_system->n_open, + (ulong) fil_system->max_n_open); + } + } + } + + mutex_exit(&fil_system->mutex); +} + +/*******************************************************************//** +Closes all open files. There must not be any pending i/o's or not flushed +modifications in the files. */ +UNIV_INTERN +void +fil_close_all_files(void) +/*=====================*/ +{ + fil_space_t* space; + + mutex_enter(&fil_system->mutex); + + space = UT_LIST_GET_FIRST(fil_system->space_list); + + while (space != NULL) { + fil_node_t* node; + fil_space_t* prev_space = space; + + for (node = UT_LIST_GET_FIRST(space->chain); + node != NULL; + node = UT_LIST_GET_NEXT(chain, node)) { + + if (node->open) { + fil_node_close_file(node, fil_system); + } + } + + space = UT_LIST_GET_NEXT(space_list, space); + + fil_space_free(prev_space->id, FALSE); + } + + mutex_exit(&fil_system->mutex); +} + +/*******************************************************************//** +Closes the redo log files. There must not be any pending i/o's or not +flushed modifications in the files. */ +UNIV_INTERN +void +fil_close_log_files( +/*================*/ + bool free) /*!< in: whether to free the memory object */ +{ + fil_space_t* space; + + mutex_enter(&fil_system->mutex); + + space = UT_LIST_GET_FIRST(fil_system->space_list); + + while (space != NULL) { + fil_node_t* node; + fil_space_t* prev_space = space; + + if (space->purpose != FIL_LOG) { + space = UT_LIST_GET_NEXT(space_list, space); + continue; + } + + for (node = UT_LIST_GET_FIRST(space->chain); + node != NULL; + node = UT_LIST_GET_NEXT(chain, node)) { + + if (node->open) { + fil_node_close_file(node, fil_system); + } + } + + space = UT_LIST_GET_NEXT(space_list, space); + + if (free) { + fil_space_free(prev_space->id, FALSE); + } + } + + mutex_exit(&fil_system->mutex); +} + +/*******************************************************************//** +Sets the max tablespace id counter if the given number is bigger than the +previous value. */ +UNIV_INTERN +void +fil_set_max_space_id_if_bigger( +/*===========================*/ + ulint max_id) /*!< in: maximum known id */ +{ + if (max_id >= SRV_LOG_SPACE_FIRST_ID) { + fprintf(stderr, + "InnoDB: Fatal error: max tablespace id" + " is too high, %lu\n", (ulong) max_id); + ut_error; + } + + mutex_enter(&fil_system->mutex); + + if (fil_system->max_assigned_id < max_id) { + + fil_system->max_assigned_id = max_id; + } + + mutex_exit(&fil_system->mutex); +} + +/****************************************************************//** +Writes the flushed lsn and the latest archived log number to the page header +of the first page of a data file of the system tablespace (space 0), +which is uncompressed. */ +static __attribute__((warn_unused_result)) +dberr_t +fil_write_lsn_and_arch_no_to_file( +/*==============================*/ + ulint space, /*!< in: space to write to */ + ulint sum_of_sizes, /*!< in: combined size of previous files + in space, in database pages */ + lsn_t lsn, /*!< in: lsn to write */ + ulint arch_log_no __attribute__((unused))) + /*!< in: archived log number to write */ +{ + byte* buf1; + byte* buf; + dberr_t err; + + buf1 = static_cast(mem_alloc(2 * UNIV_PAGE_SIZE)); + buf = static_cast(ut_align(buf1, UNIV_PAGE_SIZE)); + + err = fil_read(TRUE, space, 0, sum_of_sizes, 0, + UNIV_PAGE_SIZE, buf, NULL, 0); + if (err == DB_SUCCESS) { + mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn); + + err = fil_write(TRUE, space, 0, sum_of_sizes, 0, + UNIV_PAGE_SIZE, buf, NULL, 0); + } + + mem_free(buf1); + + return(err); +} + +/****************************************************************//** +Writes the flushed lsn and the latest archived log number to the page +header of the first page of each data file in the system tablespace. +@return DB_SUCCESS or error number */ +UNIV_INTERN +dberr_t +fil_write_flushed_lsn_to_data_files( +/*================================*/ + lsn_t lsn, /*!< in: lsn to write */ + ulint arch_log_no) /*!< in: latest archived log file number */ +{ + fil_space_t* space; + fil_node_t* node; + dberr_t err; + + mutex_enter(&fil_system->mutex); + + for (space = UT_LIST_GET_FIRST(fil_system->space_list); + space != NULL; + space = UT_LIST_GET_NEXT(space_list, space)) { + + /* We only write the lsn to all existing data files which have + been open during the lifetime of the mysqld process; they are + represented by the space objects in the tablespace memory + cache. Note that all data files in the system tablespace 0 + and the UNDO log tablespaces (if separate) are always open. */ + + if (space->purpose == FIL_TABLESPACE + && !fil_is_user_tablespace_id(space->id)) { + ulint sum_of_sizes = 0; + + for (node = UT_LIST_GET_FIRST(space->chain); + node != NULL; + node = UT_LIST_GET_NEXT(chain, node)) { + + mutex_exit(&fil_system->mutex); + + err = fil_write_lsn_and_arch_no_to_file( + space->id, sum_of_sizes, lsn, + arch_log_no); + + if (err != DB_SUCCESS) { + + return(err); + } + + mutex_enter(&fil_system->mutex); + + sum_of_sizes += node->size; + } + } + } + + mutex_exit(&fil_system->mutex); + + return(DB_SUCCESS); +} + +/*******************************************************************//** +Checks the consistency of the first data page of a tablespace +at database startup. +@retval NULL on success, or if innodb_force_recovery is set +@return pointer to an error message string */ +static __attribute__((warn_unused_result)) +const char* +fil_check_first_page( +/*=================*/ + const page_t* page) /*!< in: data page */ +{ + ulint space_id; + ulint flags; + + if (srv_force_recovery >= SRV_FORCE_IGNORE_CORRUPT) { + return(NULL); + } + + space_id = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_ID + page); + flags = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page); + + if (UNIV_PAGE_SIZE != fsp_flags_get_page_size(flags)) { + fprintf(stderr, "InnoDB: Error: Current page size %lu != page size on page %lu\n", + UNIV_PAGE_SIZE, fsp_flags_get_page_size(flags)); + + return("innodb-page-size mismatch"); + } + + if (!space_id && !flags) { + ulint nonzero_bytes = UNIV_PAGE_SIZE; + const byte* b = page; + + while (!*b && --nonzero_bytes) { + b++; + } + + if (!nonzero_bytes) { + return("space header page consists of zero bytes"); + } + } + + if (buf_page_is_corrupted( + false, page, fsp_flags_get_zip_size(flags))) { + return("checksum mismatch"); + } + + if (page_get_space_id(page) == space_id + && page_get_page_no(page) == 0) { + return(NULL); + } + + return("inconsistent data in space header"); +} + +/*******************************************************************//** +Reads the flushed lsn, arch no, and tablespace flag fields from a data +file at database startup. +@retval NULL on success, or if innodb_force_recovery is set +@return pointer to an error message string */ +UNIV_INTERN +const char* +fil_read_first_page( +/*================*/ + os_file_t data_file, /*!< in: open data file */ + ibool one_read_already, /*!< in: TRUE if min and max + parameters below already + contain sensible data */ + ulint* flags, /*!< out: tablespace flags */ + ulint* space_id, /*!< out: tablespace ID */ + lsn_t* min_flushed_lsn, /*!< out: min of flushed + lsn values in data files */ + lsn_t* max_flushed_lsn, /*!< out: max of flushed + lsn values in data files */ + ulint orig_space_id) /*!< in: original file space + id */ +{ + byte* buf; + byte* page; + lsn_t flushed_lsn; + const char* check_msg = NULL; + + buf = static_cast(ut_malloc(2 * UNIV_PAGE_SIZE)); + + /* Align the memory for a possible read from a raw device */ + + page = static_cast(ut_align(buf, UNIV_PAGE_SIZE)); + + os_file_read(data_file, page, 0, UNIV_PAGE_SIZE, + orig_space_id != ULINT_UNDEFINED ? + fil_space_is_page_compressed(orig_space_id) : + FALSE); + + *flags = fsp_header_get_flags(page); + + /* Page is page compressed page, need to decompress, before + continue. */ + if (fsp_flags_is_page_compressed(*flags)) { + ulint write_size=0; + fil_decompress_page(NULL, page, UNIV_PAGE_SIZE, &write_size); + } + + *space_id = fsp_header_get_space_id(page); + + flushed_lsn = mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN); + + if (!one_read_already) { + check_msg = fil_check_first_page(page); + } + + ut_free(buf); + + if (check_msg) { + return(check_msg); + } + + if (!one_read_already) { + *min_flushed_lsn = flushed_lsn; + *max_flushed_lsn = flushed_lsn; + + return(NULL); + } + + if (*min_flushed_lsn > flushed_lsn) { + *min_flushed_lsn = flushed_lsn; + } + if (*max_flushed_lsn < flushed_lsn) { + *max_flushed_lsn = flushed_lsn; + } + + return(NULL); +} + +/*================ SINGLE-TABLE TABLESPACES ==========================*/ + +#ifndef UNIV_HOTBACKUP +/*******************************************************************//** +Increments the count of pending operation, if space is not being deleted. +@return TRUE if being deleted, and operation should be skipped */ +UNIV_INTERN +ibool +fil_inc_pending_ops( +/*================*/ + ulint id) /*!< in: space id */ +{ + fil_space_t* space; + + mutex_enter(&fil_system->mutex); + + space = fil_space_get_by_id(id); + + if (space == NULL) { + fprintf(stderr, + "InnoDB: Error: trying to do an operation on a" + " dropped tablespace %lu\n", + (ulong) id); + } + + if (space == NULL || space->stop_new_ops) { + mutex_exit(&fil_system->mutex); + + return(TRUE); + } + + space->n_pending_ops++; + + mutex_exit(&fil_system->mutex); + + return(FALSE); +} + +/*******************************************************************//** +Decrements the count of pending operations. */ +UNIV_INTERN +void +fil_decr_pending_ops( +/*=================*/ + ulint id) /*!< in: space id */ +{ + fil_space_t* space; + + mutex_enter(&fil_system->mutex); + + space = fil_space_get_by_id(id); + + if (space == NULL) { + fprintf(stderr, + "InnoDB: Error: decrementing pending operation" + " of a dropped tablespace %lu\n", + (ulong) id); + } + + if (space != NULL) { + space->n_pending_ops--; + } + + mutex_exit(&fil_system->mutex); +} +#endif /* !UNIV_HOTBACKUP */ + +/********************************************************//** +Creates the database directory for a table if it does not exist yet. */ +static +void +fil_create_directory_for_tablename( +/*===============================*/ + const char* name) /*!< in: name in the standard + 'databasename/tablename' format */ +{ + const char* namend; + char* path; + ulint len; + + len = strlen(fil_path_to_mysql_datadir); + namend = strchr(name, '/'); + ut_a(namend); + path = static_cast(mem_alloc(len + (namend - name) + 2)); + + memcpy(path, fil_path_to_mysql_datadir, len); + path[len] = '/'; + memcpy(path + len + 1, name, namend - name); + path[len + (namend - name) + 1] = 0; + + srv_normalize_path_for_win(path); + + ut_a(os_file_create_directory(path, FALSE)); + mem_free(path); +} + +#ifndef UNIV_HOTBACKUP +/********************************************************//** +Writes a log record about an .ibd file create/rename/delete. */ +static +void +fil_op_write_log( +/*=============*/ + ulint type, /*!< in: MLOG_FILE_CREATE, + MLOG_FILE_CREATE2, + MLOG_FILE_DELETE, or + MLOG_FILE_RENAME */ + ulint space_id, /*!< in: space id */ + ulint log_flags, /*!< in: redo log flags (stored + in the page number field) */ + ulint flags, /*!< in: compressed page size + and file format + if type==MLOG_FILE_CREATE2, or 0 */ + const char* name, /*!< in: table name in the familiar + 'databasename/tablename' format, or + the file path in the case of + MLOG_FILE_DELETE */ + const char* new_name, /*!< in: if type is MLOG_FILE_RENAME, + the new table name in the + 'databasename/tablename' format */ + mtr_t* mtr) /*!< in: mini-transaction handle */ +{ + byte* log_ptr; + ulint len; + + log_ptr = mlog_open(mtr, 11 + 2 + 1); + + if (!log_ptr) { + /* Logging in mtr is switched off during crash recovery: + in that case mlog_open returns NULL */ + return; + } + + log_ptr = mlog_write_initial_log_record_for_file_op( + type, space_id, log_flags, log_ptr, mtr); + if (type == MLOG_FILE_CREATE2) { + mach_write_to_4(log_ptr, flags); + log_ptr += 4; + } + /* Let us store the strings as null-terminated for easier readability + and handling */ + + len = strlen(name) + 1; + + mach_write_to_2(log_ptr, len); + log_ptr += 2; + mlog_close(mtr, log_ptr); + + mlog_catenate_string(mtr, (byte*) name, len); + + if (type == MLOG_FILE_RENAME) { + len = strlen(new_name) + 1; + log_ptr = mlog_open(mtr, 2 + len); + ut_a(log_ptr); + mach_write_to_2(log_ptr, len); + log_ptr += 2; + mlog_close(mtr, log_ptr); + + mlog_catenate_string(mtr, (byte*) new_name, len); + } +} +#endif + +/*******************************************************************//** +Parses the body of a log record written about an .ibd file operation. That is, +the log record part after the standard (type, space id, page no) header of the +log record. + +If desired, also replays the delete or rename operation if the .ibd file +exists and the space id in it matches. Replays the create operation if a file +at that path does not exist yet. If the database directory for the file to be +created does not exist, then we create the directory, too. + +Note that ibbackup --apply-log sets fil_path_to_mysql_datadir to point to the +datadir that we should use in replaying the file operations. + +InnoDB recovery does not replay these fully since it always sets the space id +to zero. But ibbackup does replay them. TODO: If remote tablespaces are used, +ibbackup will only create tables in the default directory since MLOG_FILE_CREATE +and MLOG_FILE_CREATE2 only know the tablename, not the path. + +@return end of log record, or NULL if the record was not completely +contained between ptr and end_ptr */ +UNIV_INTERN +byte* +fil_op_log_parse_or_replay( +/*=======================*/ + byte* ptr, /*!< in: buffer containing the log record body, + or an initial segment of it, if the record does + not fir completely between ptr and end_ptr */ + byte* end_ptr, /*!< in: buffer end */ + ulint type, /*!< in: the type of this log record */ + ulint space_id, /*!< in: the space id of the tablespace in + question, or 0 if the log record should + only be parsed but not replayed */ + ulint log_flags) /*!< in: redo log flags + (stored in the page number parameter) */ +{ + ulint name_len; + ulint new_name_len; + const char* name; + const char* new_name = NULL; + ulint flags = 0; + + if (type == MLOG_FILE_CREATE2) { + if (end_ptr < ptr + 4) { + + return(NULL); + } + + flags = mach_read_from_4(ptr); + ptr += 4; + } + + if (end_ptr < ptr + 2) { + + return(NULL); + } + + name_len = mach_read_from_2(ptr); + + ptr += 2; + + if (end_ptr < ptr + name_len) { + + return(NULL); + } + + name = (const char*) ptr; + + ptr += name_len; + + if (type == MLOG_FILE_RENAME) { + if (end_ptr < ptr + 2) { + + return(NULL); + } + + new_name_len = mach_read_from_2(ptr); + + ptr += 2; + + if (end_ptr < ptr + new_name_len) { + + return(NULL); + } + + new_name = (const char*) ptr; + + ptr += new_name_len; + } + + /* We managed to parse a full log record body */ + /* + printf("Parsed log rec of type %lu space %lu\n" + "name %s\n", type, space_id, name); + + if (type == MLOG_FILE_RENAME) { + printf("new name %s\n", new_name); + } + */ + if (!space_id) { + return(ptr); + } else { + /* Only replay file ops during recovery. This is a + release-build assert to minimize any data loss risk by a + misapplied file operation. */ + ut_a(recv_recovery_is_on()); + } + + /* Let us try to perform the file operation, if sensible. Note that + ibbackup has at this stage already read in all space id info to the + fil0fil.cc data structures. + + NOTE that our algorithm is not guaranteed to work correctly if there + were renames of tables during the backup. See ibbackup code for more + on the problem. */ + + switch (type) { + case MLOG_FILE_DELETE: + if (fil_tablespace_exists_in_mem(space_id)) { + dberr_t err = fil_delete_tablespace( + space_id, BUF_REMOVE_FLUSH_NO_WRITE); + ut_a(err == DB_SUCCESS); + } + + break; + + case MLOG_FILE_RENAME: + /* In order to replay the rename, the following must hold: + * The new name is not already used. + * A tablespace is open in memory with the old name. + * The space ID for that tablepace matches this log entry. + This will prevent unintended renames during recovery. */ + + if (fil_get_space_id_for_table(new_name) == ULINT_UNDEFINED + && space_id == fil_get_space_id_for_table(name)) { + /* Create the database directory for the new name, if + it does not exist yet */ + fil_create_directory_for_tablename(new_name); + + if (!fil_rename_tablespace(name, space_id, + new_name, NULL)) { + ut_error; + } + } + + break; + + case MLOG_FILE_CREATE: + case MLOG_FILE_CREATE2: + if (fil_tablespace_exists_in_mem(space_id)) { + /* Do nothing */ + } else if (fil_get_space_id_for_table(name) + != ULINT_UNDEFINED) { + /* Do nothing */ + } else if (log_flags & MLOG_FILE_FLAG_TEMP) { + /* Temporary table, do nothing */ + } else { + const char* path = NULL; + + /* Create the database directory for name, if it does + not exist yet */ + fil_create_directory_for_tablename(name); + + if (fil_create_new_single_table_tablespace( + space_id, name, path, flags, + DICT_TF2_USE_TABLESPACE, + FIL_IBD_FILE_INITIAL_SIZE) != DB_SUCCESS) { + ut_error; + } + } + + break; + + default: + ut_error; + } + + return(ptr); +} + +/*******************************************************************//** +Allocates a file name for the EXPORT/IMPORT config file name. The +string must be freed by caller with mem_free(). +@return own: file name */ +static +char* +fil_make_cfg_name( +/*==============*/ + const char* filepath) /*!< in: .ibd file name */ +{ + char* cfg_name; + + /* Create a temporary file path by replacing the .ibd suffix + with .cfg. */ + + ut_ad(strlen(filepath) > 4); + + cfg_name = mem_strdup(filepath); + ut_snprintf(cfg_name + strlen(cfg_name) - 3, 4, "cfg"); + return(cfg_name); +} + +/*******************************************************************//** +Check for change buffer merges. +@return 0 if no merges else count + 1. */ +static +ulint +fil_ibuf_check_pending_ops( +/*=======================*/ + fil_space_t* space, /*!< in/out: Tablespace to check */ + ulint count) /*!< in: number of attempts so far */ +{ + ut_ad(mutex_own(&fil_system->mutex)); + + if (space != 0 && space->n_pending_ops != 0) { + + if (count > 5000) { + ib_logf(IB_LOG_LEVEL_WARN, + "Trying to close/delete tablespace " + "'%s' but there are %lu pending change " + "buffer merges on it.", + space->name, + (ulong) space->n_pending_ops); + } + + return(count + 1); + } + + return(0); +} + +/*******************************************************************//** +Check for pending IO. +@return 0 if no pending else count + 1. */ +static +ulint +fil_check_pending_io( +/*=================*/ + fil_space_t* space, /*!< in/out: Tablespace to check */ + fil_node_t** node, /*!< out: Node in space list */ + ulint count) /*!< in: number of attempts so far */ +{ + ut_ad(mutex_own(&fil_system->mutex)); + ut_a(space->n_pending_ops == 0); + + /* The following code must change when InnoDB supports + multiple datafiles per tablespace. */ + ut_a(UT_LIST_GET_LEN(space->chain) == 1); + + *node = UT_LIST_GET_FIRST(space->chain); + + if (space->n_pending_flushes > 0 || (*node)->n_pending > 0) { + + ut_a(!(*node)->being_extended); + + if (count > 1000) { + ib_logf(IB_LOG_LEVEL_WARN, + "Trying to close/delete tablespace '%s' " + "but there are %lu flushes " + " and %lu pending i/o's on it.", + space->name, + (ulong) space->n_pending_flushes, + (ulong) (*node)->n_pending); + } + + return(count + 1); + } + + return(0); +} + +/*******************************************************************//** +Check pending operations on a tablespace. +@return DB_SUCCESS or error failure. */ +static +dberr_t +fil_check_pending_operations( +/*=========================*/ + ulint id, /*!< in: space id */ + fil_space_t** space, /*!< out: tablespace instance in memory */ + char** path) /*!< out/own: tablespace path */ +{ + ulint count = 0; + + ut_a(id != TRX_SYS_SPACE); + ut_ad(space); + + *space = 0; + + mutex_enter(&fil_system->mutex); + fil_space_t* sp = fil_space_get_by_id(id); + if (sp) { + sp->stop_new_ops = TRUE; + } + mutex_exit(&fil_system->mutex); + + /* Check for pending change buffer merges. */ + + do { + mutex_enter(&fil_system->mutex); + + sp = fil_space_get_by_id(id); + + count = fil_ibuf_check_pending_ops(sp, count); + + mutex_exit(&fil_system->mutex); + + if (count > 0) { + os_thread_sleep(20000); + } + + } while (count > 0); + + /* Check for pending IO. */ + + *path = 0; + + do { + mutex_enter(&fil_system->mutex); + + sp = fil_space_get_by_id(id); + + if (sp == NULL) { + mutex_exit(&fil_system->mutex); + return(DB_TABLESPACE_NOT_FOUND); + } + + fil_node_t* node; + + count = fil_check_pending_io(sp, &node, count); + + if (count == 0) { + *path = mem_strdup(node->name); + } + + mutex_exit(&fil_system->mutex); + + if (count > 0) { + os_thread_sleep(20000); + } + + } while (count > 0); + + ut_ad(sp); + + *space = sp; + return(DB_SUCCESS); +} + +/*******************************************************************//** +Closes a single-table tablespace. The tablespace must be cached in the +memory cache. Free all pages used by the tablespace. +@return DB_SUCCESS or error */ +UNIV_INTERN +dberr_t +fil_close_tablespace( +/*=================*/ + trx_t* trx, /*!< in/out: Transaction covering the close */ + ulint id) /*!< in: space id */ +{ + char* path = 0; + fil_space_t* space = 0; + + ut_a(id != TRX_SYS_SPACE); + + dberr_t err = fil_check_pending_operations(id, &space, &path); + + if (err != DB_SUCCESS) { + return(err); + } + + ut_a(space); + ut_a(path != 0); + + rw_lock_x_lock(&space->latch); + +#ifndef UNIV_HOTBACKUP + /* Invalidate in the buffer pool all pages belonging to the + tablespace. Since we have set space->stop_new_ops = TRUE, readahead + or ibuf merge can no longer read more pages of this tablespace to the + buffer pool. Thus we can clean the tablespace out of the buffer pool + completely and permanently. The flag stop_new_ops also prevents + fil_flush() from being applied to this tablespace. */ + + buf_LRU_flush_or_remove_pages(id, BUF_REMOVE_FLUSH_WRITE, trx); +#endif + mutex_enter(&fil_system->mutex); + + /* If the free is successful, the X lock will be released before + the space memory data structure is freed. */ + + if (!fil_space_free(id, TRUE)) { + rw_lock_x_unlock(&space->latch); + err = DB_TABLESPACE_NOT_FOUND; + } else { + err = DB_SUCCESS; + } + + mutex_exit(&fil_system->mutex); + + /* If it is a delete then also delete any generated files, otherwise + when we drop the database the remove directory will fail. */ + + char* cfg_name = fil_make_cfg_name(path); + + os_file_delete_if_exists(innodb_file_data_key, cfg_name); + + mem_free(path); + mem_free(cfg_name); + + return(err); +} + +/*******************************************************************//** +Deletes a single-table tablespace. The tablespace must be cached in the +memory cache. +@return DB_SUCCESS or error */ +UNIV_INTERN +dberr_t +fil_delete_tablespace( +/*==================*/ + ulint id, /*!< in: space id */ + buf_remove_t buf_remove) /*!< in: specify the action to take + on the tables pages in the buffer + pool */ +{ + char* path = 0; + fil_space_t* space = 0; + + ut_a(id != TRX_SYS_SPACE); + + dberr_t err = fil_check_pending_operations(id, &space, &path); + + if (err != DB_SUCCESS) { + + ib_logf(IB_LOG_LEVEL_ERROR, + "Cannot delete tablespace %lu because it is not " + "found in the tablespace memory cache.", + (ulong) id); + + return(err); + } + + ut_a(space); + ut_a(path != 0); + + /* Important: We rely on the data dictionary mutex to ensure + that a race is not possible here. It should serialize the tablespace + drop/free. We acquire an X latch only to avoid a race condition + when accessing the tablespace instance via: + + fsp_get_available_space_in_free_extents(). + + There our main motivation is to reduce the contention on the + dictionary mutex. */ + + rw_lock_x_lock(&space->latch); + +#ifndef UNIV_HOTBACKUP + /* IMPORTANT: Because we have set space::stop_new_ops there + can't be any new ibuf merges, reads or flushes. We are here + because node::n_pending was zero above. However, it is still + possible to have pending read and write requests: + + A read request can happen because the reader thread has + gone through the ::stop_new_ops check in buf_page_init_for_read() + before the flag was set and has not yet incremented ::n_pending + when we checked it above. + + A write request can be issued any time because we don't check + the ::stop_new_ops flag when queueing a block for write. + + We deal with pending write requests in the following function + where we'd minimally evict all dirty pages belonging to this + space from the flush_list. Not that if a block is IO-fixed + we'll wait for IO to complete. + + To deal with potential read requests by checking the + ::stop_new_ops flag in fil_io() */ + + buf_LRU_flush_or_remove_pages(id, buf_remove, 0); + +#endif /* !UNIV_HOTBACKUP */ + + /* If it is a delete then also delete any generated files, otherwise + when we drop the database the remove directory will fail. */ + { + char* cfg_name = fil_make_cfg_name(path); + os_file_delete_if_exists(innodb_file_data_key, cfg_name); + mem_free(cfg_name); + } + + /* Delete the link file pointing to the ibd file we are deleting. */ + if (FSP_FLAGS_HAS_DATA_DIR(space->flags)) { + fil_delete_link_file(space->name); + } + + mutex_enter(&fil_system->mutex); + + /* Double check the sanity of pending ops after reacquiring + the fil_system::mutex. */ + if (fil_space_get_by_id(id)) { + ut_a(space->n_pending_ops == 0); + ut_a(UT_LIST_GET_LEN(space->chain) == 1); + fil_node_t* node = UT_LIST_GET_FIRST(space->chain); + ut_a(node->n_pending == 0); + } + + if (!fil_space_free(id, TRUE)) { + err = DB_TABLESPACE_NOT_FOUND; + } + + mutex_exit(&fil_system->mutex); + + if (err != DB_SUCCESS) { + rw_lock_x_unlock(&space->latch); + } else if (!os_file_delete(innodb_file_data_key, path) + && !os_file_delete_if_exists(innodb_file_data_key, path)) { + + /* Note: This is because we have removed the + tablespace instance from the cache. */ + + err = DB_IO_ERROR; + } + + if (err == DB_SUCCESS) { +#ifndef UNIV_HOTBACKUP + /* Write a log record about the deletion of the .ibd + file, so that ibbackup can replay it in the + --apply-log phase. We use a dummy mtr and the familiar + log write mechanism. */ + mtr_t mtr; + + /* When replaying the operation in ibbackup, do not try + to write any log record */ + mtr_start(&mtr); + + fil_op_write_log(MLOG_FILE_DELETE, id, 0, 0, path, NULL, &mtr); + mtr_commit(&mtr); +#endif + err = DB_SUCCESS; + } + + mem_free(path); + + return(err); +} + +/*******************************************************************//** +Returns TRUE if a single-table tablespace is being deleted. +@return TRUE if being deleted */ +UNIV_INTERN +ibool +fil_tablespace_is_being_deleted( +/*============================*/ + ulint id) /*!< in: space id */ +{ + fil_space_t* space; + ibool is_being_deleted; + + mutex_enter(&fil_system->mutex); + + space = fil_space_get_by_id(id); + + ut_a(space != NULL); + + is_being_deleted = space->stop_new_ops; + + mutex_exit(&fil_system->mutex); + + return(is_being_deleted); +} + +#ifndef UNIV_HOTBACKUP +/*******************************************************************//** +Discards a single-table tablespace. The tablespace must be cached in the +memory cache. Discarding is like deleting a tablespace, but + + 1. We do not drop the table from the data dictionary; + + 2. We remove all insert buffer entries for the tablespace immediately; + in DROP TABLE they are only removed gradually in the background; + + 3. Free all the pages in use by the tablespace. +@return DB_SUCCESS or error */ +UNIV_INTERN +dberr_t +fil_discard_tablespace( +/*===================*/ + ulint id) /*!< in: space id */ +{ + dberr_t err; + + switch (err = fil_delete_tablespace(id, BUF_REMOVE_ALL_NO_WRITE)) { + case DB_SUCCESS: + break; + + case DB_IO_ERROR: + ib_logf(IB_LOG_LEVEL_WARN, + "While deleting tablespace %lu in DISCARD TABLESPACE." + " File rename/delete failed: %s", + (ulong) id, ut_strerr(err)); + break; + + case DB_TABLESPACE_NOT_FOUND: + ib_logf(IB_LOG_LEVEL_WARN, + "Cannot delete tablespace %lu in DISCARD " + "TABLESPACE. %s", + (ulong) id, ut_strerr(err)); + break; + + default: + ut_error; + } + + /* Remove all insert buffer entries for the tablespace */ + + ibuf_delete_for_discarded_space(id); + + return(err); +} +#endif /* !UNIV_HOTBACKUP */ + +/*******************************************************************//** +Renames the memory cache structures of a single-table tablespace. +@return TRUE if success */ +static +ibool +fil_rename_tablespace_in_mem( +/*=========================*/ + fil_space_t* space, /*!< in: tablespace memory object */ + fil_node_t* node, /*!< in: file node of that tablespace */ + const char* new_name, /*!< in: new name */ + const char* new_path) /*!< in: new file path */ +{ + fil_space_t* space2; + const char* old_name = space->name; + + ut_ad(mutex_own(&fil_system->mutex)); + + space2 = fil_space_get_by_name(old_name); + if (space != space2) { + fputs("InnoDB: Error: cannot find ", stderr); + ut_print_filename(stderr, old_name); + fputs(" in tablespace memory cache\n", stderr); + + return(FALSE); + } + + space2 = fil_space_get_by_name(new_name); + if (space2 != NULL) { + fputs("InnoDB: Error: ", stderr); + ut_print_filename(stderr, new_name); + fputs(" is already in tablespace memory cache\n", stderr); + + return(FALSE); + } + + HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash, + ut_fold_string(space->name), space); + mem_free(space->name); + mem_free(node->name); + + space->name = mem_strdup(new_name); + node->name = mem_strdup(new_path); + + HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash, + ut_fold_string(new_name), space); + return(TRUE); +} + +/*******************************************************************//** +Allocates a file name for a single-table tablespace. The string must be freed +by caller with mem_free(). +@return own: file name */ +UNIV_INTERN +char* +fil_make_ibd_name( +/*==============*/ + const char* name, /*!< in: table name or a dir path */ + bool is_full_path) /*!< in: TRUE if it is a dir path */ +{ + char* filename; + ulint namelen = strlen(name); + ulint dirlen = strlen(fil_path_to_mysql_datadir); + ulint pathlen = dirlen + namelen + sizeof "/.ibd"; + + filename = static_cast(mem_alloc(pathlen)); + + if (is_full_path) { + memcpy(filename, name, namelen); + memcpy(filename + namelen, ".ibd", sizeof ".ibd"); + } else { + ut_snprintf(filename, pathlen, "%s/%s.ibd", + fil_path_to_mysql_datadir, name); + + } + + srv_normalize_path_for_win(filename); + + return(filename); +} + +/*******************************************************************//** +Allocates a file name for a tablespace ISL file (InnoDB Symbolic Link). +The string must be freed by caller with mem_free(). +@return own: file name */ +UNIV_INTERN +char* +fil_make_isl_name( +/*==============*/ + const char* name) /*!< in: table name */ +{ + char* filename; + ulint namelen = strlen(name); + ulint dirlen = strlen(fil_path_to_mysql_datadir); + ulint pathlen = dirlen + namelen + sizeof "/.isl"; + + filename = static_cast(mem_alloc(pathlen)); + + ut_snprintf(filename, pathlen, "%s/%s.isl", + fil_path_to_mysql_datadir, name); + + srv_normalize_path_for_win(filename); + + return(filename); +} + +/*******************************************************************//** +Renames a single-table tablespace. The tablespace must be cached in the +tablespace memory cache. +@return TRUE if success */ +UNIV_INTERN +ibool +fil_rename_tablespace( +/*==================*/ + const char* old_name_in, /*!< in: old table name in the + standard databasename/tablename + format of InnoDB, or NULL if we + do the rename based on the space + id only */ + ulint id, /*!< in: space id */ + const char* new_name, /*!< in: new table name in the + standard databasename/tablename + format of InnoDB */ + const char* new_path_in) /*!< in: new full datafile path + if the tablespace is remotely + located, or NULL if it is located + in the normal data directory. */ +{ + ibool success; + fil_space_t* space; + fil_node_t* node; + ulint count = 0; + char* new_path; + char* old_name; + char* old_path; + const char* not_given = "(name not specified)"; + + ut_a(id != 0); + +retry: + count++; + + if (!(count % 1000)) { + ut_print_timestamp(stderr); + fputs(" InnoDB: Warning: problems renaming ", stderr); + ut_print_filename(stderr, + old_name_in ? old_name_in : not_given); + fputs(" to ", stderr); + ut_print_filename(stderr, new_name); + fprintf(stderr, ", %lu iterations\n", (ulong) count); + } + + mutex_enter(&fil_system->mutex); + + space = fil_space_get_by_id(id); + + DBUG_EXECUTE_IF("fil_rename_tablespace_failure_1", space = NULL; ); + + if (space == NULL) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Cannot find space id %lu in the tablespace " + "memory cache, though the table '%s' in a " + "rename operation should have that id.", + (ulong) id, old_name_in ? old_name_in : not_given); + mutex_exit(&fil_system->mutex); + + return(FALSE); + } + + if (count > 25000) { + space->stop_ios = FALSE; + mutex_exit(&fil_system->mutex); + + return(FALSE); + } + + /* We temporarily close the .ibd file because we do not trust that + operating systems can rename an open file. For the closing we have to + wait until there are no pending i/o's or flushes on the file. */ + + space->stop_ios = TRUE; + + /* The following code must change when InnoDB supports + multiple datafiles per tablespace. */ + ut_a(UT_LIST_GET_LEN(space->chain) == 1); + node = UT_LIST_GET_FIRST(space->chain); + + if (node->n_pending > 0 + || node->n_pending_flushes > 0 + || node->being_extended) { + /* There are pending i/o's or flushes or the file is + currently being extended, sleep for a while and + retry */ + + mutex_exit(&fil_system->mutex); + + os_thread_sleep(20000); + + goto retry; + + } else if (node->modification_counter > node->flush_counter) { + /* Flush the space */ + + mutex_exit(&fil_system->mutex); + + os_thread_sleep(20000); + + fil_flush(id); + + goto retry; + + } else if (node->open) { + /* Close the file */ + + fil_node_close_file(node, fil_system); + } + + /* Check that the old name in the space is right */ + + if (old_name_in) { + old_name = mem_strdup(old_name_in); + ut_a(strcmp(space->name, old_name) == 0); + } else { + old_name = mem_strdup(space->name); + } + old_path = mem_strdup(node->name); + + /* Rename the tablespace and the node in the memory cache */ + new_path = new_path_in ? mem_strdup(new_path_in) + : fil_make_ibd_name(new_name, false); + + success = fil_rename_tablespace_in_mem( + space, node, new_name, new_path); + + if (success) { + + DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2", + goto skip_second_rename; ); + + success = os_file_rename( + innodb_file_data_key, old_path, new_path); + + DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2", +skip_second_rename: + success = FALSE; ); + + if (!success) { + /* We have to revert the changes we made + to the tablespace memory cache */ + + ut_a(fil_rename_tablespace_in_mem( + space, node, old_name, old_path)); + } + } + + space->stop_ios = FALSE; + + mutex_exit(&fil_system->mutex); + +#ifndef UNIV_HOTBACKUP + if (success && !recv_recovery_on) { + mtr_t mtr; + + mtr_start(&mtr); + + fil_op_write_log(MLOG_FILE_RENAME, id, 0, 0, old_name, new_name, + &mtr); + mtr_commit(&mtr); + } +#endif /* !UNIV_HOTBACKUP */ + + mem_free(new_path); + mem_free(old_path); + mem_free(old_name); + + return(success); +} + +/*******************************************************************//** +Creates a new InnoDB Symbolic Link (ISL) file. It is always created +under the 'datadir' of MySQL. The datadir is the directory of a +running mysqld program. We can refer to it by simply using the path '.'. +@return DB_SUCCESS or error code */ +UNIV_INTERN +dberr_t +fil_create_link_file( +/*=================*/ + const char* tablename, /*!< in: tablename */ + const char* filepath) /*!< in: pathname of tablespace */ +{ + os_file_t file; + ibool success; + dberr_t err = DB_SUCCESS; + char* link_filepath; + char* prev_filepath = fil_read_link_file(tablename); + + ut_ad(!srv_read_only_mode); + + if (prev_filepath) { + /* Truncate will call this with an existing + link file which contains the same filepath. */ + if (0 == strcmp(prev_filepath, filepath)) { + mem_free(prev_filepath); + return(DB_SUCCESS); + } + mem_free(prev_filepath); + } + + link_filepath = fil_make_isl_name(tablename); + + file = os_file_create_simple_no_error_handling( + innodb_file_data_key, link_filepath, + OS_FILE_CREATE, OS_FILE_READ_WRITE, &success, 0); + + if (!success) { + /* The following call will print an error message */ + ulint error = os_file_get_last_error(true); + + ut_print_timestamp(stderr); + fputs(" InnoDB: Cannot create file ", stderr); + ut_print_filename(stderr, link_filepath); + fputs(".\n", stderr); + + if (error == OS_FILE_ALREADY_EXISTS) { + fputs("InnoDB: The link file: ", stderr); + ut_print_filename(stderr, filepath); + fputs(" already exists.\n", stderr); + err = DB_TABLESPACE_EXISTS; + + } else if (error == OS_FILE_DISK_FULL) { + err = DB_OUT_OF_FILE_SPACE; + + } else if (error == OS_FILE_OPERATION_NOT_SUPPORTED) { + err = DB_UNSUPPORTED; + } else { + err = DB_ERROR; + } + + /* file is not open, no need to close it. */ + mem_free(link_filepath); + return(err); + } + + if (!os_file_write(link_filepath, file, filepath, 0, + strlen(filepath))) { + err = DB_ERROR; + } + + /* Close the file, we only need it at startup */ + os_file_close(file); + + mem_free(link_filepath); + + return(err); +} + +/*******************************************************************//** +Deletes an InnoDB Symbolic Link (ISL) file. */ +UNIV_INTERN +void +fil_delete_link_file( +/*=================*/ + const char* tablename) /*!< in: name of table */ +{ + char* link_filepath = fil_make_isl_name(tablename); + + os_file_delete_if_exists(innodb_file_data_key, link_filepath); + + mem_free(link_filepath); +} + +/*******************************************************************//** +Reads an InnoDB Symbolic Link (ISL) file. +It is always created under the 'datadir' of MySQL. The name is of the +form {databasename}/{tablename}. and the isl file is expected to be in a +'{databasename}' directory called '{tablename}.isl'. The caller must free +the memory of the null-terminated path returned if it is not null. +@return own: filepath found in link file, NULL if not found. */ +UNIV_INTERN +char* +fil_read_link_file( +/*===============*/ + const char* name) /*!< in: tablespace name */ +{ + char* filepath = NULL; + char* link_filepath; + FILE* file = NULL; + + /* The .isl file is in the 'normal' tablespace location. */ + link_filepath = fil_make_isl_name(name); + + file = fopen(link_filepath, "r+b"); + + mem_free(link_filepath); + + if (file) { + filepath = static_cast(mem_alloc(OS_FILE_MAX_PATH)); + + os_file_read_string(file, filepath, OS_FILE_MAX_PATH); + fclose(file); + + if (strlen(filepath)) { + /* Trim whitespace from end of filepath */ + ulint lastch = strlen(filepath) - 1; + while (lastch > 4 && filepath[lastch] <= 0x20) { + filepath[lastch--] = 0x00; + } + srv_normalize_path_for_win(filepath); + } + } + + return(filepath); +} + +/*******************************************************************//** +Opens a handle to the file linked to in an InnoDB Symbolic Link file. +@return TRUE if remote linked tablespace file is found and opened. */ +UNIV_INTERN +ibool +fil_open_linked_file( +/*===============*/ + const char* tablename, /*!< in: database/tablename */ + char** remote_filepath,/*!< out: remote filepath */ + os_file_t* remote_file, /*!< out: remote file handle */ + ulint atomic_writes) /*!< in: atomic writes table option + value */ +{ + ibool success; + + *remote_filepath = fil_read_link_file(tablename); + if (*remote_filepath == NULL) { + return(FALSE); + } + + /* The filepath provided is different from what was + found in the link file. */ + *remote_file = os_file_create_simple_no_error_handling( + innodb_file_data_key, *remote_filepath, + OS_FILE_OPEN, OS_FILE_READ_ONLY, + &success, atomic_writes); + + if (!success) { + char* link_filepath = fil_make_isl_name(tablename); + + /* The following call prints an error message */ + os_file_get_last_error(true); + + ib_logf(IB_LOG_LEVEL_ERROR, + "A link file was found named '%s' " + "but the linked tablespace '%s' " + "could not be opened.", + link_filepath, *remote_filepath); + + mem_free(link_filepath); + mem_free(*remote_filepath); + *remote_filepath = NULL; + } + + return(success); +} + +/*******************************************************************//** +Creates a new single-table tablespace to a database directory of MySQL. +Database directories are under the 'datadir' of MySQL. The datadir is the +directory of a running mysqld program. We can refer to it by simply the +path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp +dir of the mysqld server. + +@return DB_SUCCESS or error code */ +UNIV_INTERN +dberr_t +fil_create_new_single_table_tablespace( +/*===================================*/ + ulint space_id, /*!< in: space id */ + const char* tablename, /*!< in: the table name in the usual + databasename/tablename format + of InnoDB */ + const char* dir_path, /*!< in: NULL or a dir path */ + ulint flags, /*!< in: tablespace flags */ + ulint flags2, /*!< in: table flags2 */ + ulint size) /*!< in: the initial size of the + tablespace file in pages, + must be >= FIL_IBD_FILE_INITIAL_SIZE */ +{ + os_file_t file; + ibool ret; + dberr_t err; + byte* buf2; + byte* page; + char* path; + ibool success; + /* TRUE if a table is created with CREATE TEMPORARY TABLE */ + bool is_temp = !!(flags2 & DICT_TF2_TEMPORARY); + bool has_data_dir = FSP_FLAGS_HAS_DATA_DIR(flags); + ulint atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(flags); + + ut_a(space_id > 0); + ut_ad(!srv_read_only_mode); + ut_a(space_id < SRV_LOG_SPACE_FIRST_ID); + ut_a(size >= FIL_IBD_FILE_INITIAL_SIZE); + ut_a(fsp_flags_is_valid(flags)); + + if (is_temp) { + /* Temporary table filepath */ + ut_ad(dir_path); + path = fil_make_ibd_name(dir_path, true); + } else if (has_data_dir) { + ut_ad(dir_path); + path = os_file_make_remote_pathname(dir_path, tablename, "ibd"); + + /* Since this tablespace file will be created in a + remote directory, let's create the subdirectories + in the path, if they are not there already. */ + success = os_file_create_subdirs_if_needed(path); + if (!success) { + err = DB_ERROR; + goto error_exit_3; + } + } else { + path = fil_make_ibd_name(tablename, false); + } + + file = os_file_create( + innodb_file_data_key, path, + OS_FILE_CREATE | OS_FILE_ON_ERROR_NO_EXIT, + OS_FILE_NORMAL, + OS_DATA_FILE, + &ret, + atomic_writes); + + if (ret == FALSE) { + /* The following call will print an error message */ + ulint error = os_file_get_last_error(true); + + ib_logf(IB_LOG_LEVEL_ERROR, + "Cannot create file '%s'\n", path); + + if (error == OS_FILE_ALREADY_EXISTS) { + ib_logf(IB_LOG_LEVEL_ERROR, + "The file '%s' already exists though the " + "corresponding table did not exist " + "in the InnoDB data dictionary. " + "Have you moved InnoDB .ibd files " + "around without using the SQL commands " + "DISCARD TABLESPACE and IMPORT TABLESPACE, " + "or did mysqld crash in the middle of " + "CREATE TABLE? " + "You can resolve the problem by removing " + "the file '%s' under the 'datadir' of MySQL.", + path, path); + + err = DB_TABLESPACE_EXISTS; + goto error_exit_3; + } + + if (error == OS_FILE_OPERATION_NOT_SUPPORTED) { + err = DB_UNSUPPORTED; + goto error_exit_3; + } + + if (error == OS_FILE_DISK_FULL) { + err = DB_OUT_OF_FILE_SPACE; + goto error_exit_3; + } + + err = DB_ERROR; + goto error_exit_3; + } + + ret = os_file_set_size(path, file, size * UNIV_PAGE_SIZE); + + if (!ret) { + err = DB_OUT_OF_FILE_SPACE; + goto error_exit_2; + } + + /* printf("Creating tablespace %s id %lu\n", path, space_id); */ + + /* We have to write the space id to the file immediately and flush the + file to disk. This is because in crash recovery we must be aware what + tablespaces exist and what are their space id's, so that we can apply + the log records to the right file. It may take quite a while until + buffer pool flush algorithms write anything to the file and flush it to + disk. If we would not write here anything, the file would be filled + with zeros from the call of os_file_set_size(), until a buffer pool + flush would write to it. */ + + buf2 = static_cast(ut_malloc(3 * UNIV_PAGE_SIZE)); + /* Align the memory for file i/o if we might have O_DIRECT set */ + page = static_cast(ut_align(buf2, UNIV_PAGE_SIZE)); + + memset(page, '\0', UNIV_PAGE_SIZE); + + /* Add the UNIV_PAGE_SIZE to the table flags and write them to the + tablespace header. */ + flags = fsp_flags_set_page_size(flags, UNIV_PAGE_SIZE); + fsp_header_init_fields(page, space_id, flags); + mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id); + ut_ad(fsp_flags_is_valid(flags)); + + if (!(fsp_flags_is_compressed(flags))) { + buf_flush_init_for_writing(page, NULL, 0); + ret = os_file_write(path, file, page, 0, UNIV_PAGE_SIZE); + } else { + page_zip_des_t page_zip; + ulint zip_size; + + zip_size = fsp_flags_get_zip_size(flags); + + page_zip_set_size(&page_zip, zip_size); + page_zip.data = page + UNIV_PAGE_SIZE; +#ifdef UNIV_DEBUG + page_zip.m_start = +#endif /* UNIV_DEBUG */ + page_zip.m_end = page_zip.m_nonempty = + page_zip.n_blobs = 0; + buf_flush_init_for_writing(page, &page_zip, 0); + ret = os_file_write(path, file, page_zip.data, 0, zip_size); + } + + ut_free(buf2); + + if (!ret) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Could not write the first page to tablespace " + "'%s'", path); + + err = DB_ERROR; + goto error_exit_2; + } + + ret = os_file_flush(file); + + if (!ret) { + ib_logf(IB_LOG_LEVEL_ERROR, + "File flush of tablespace '%s' failed", path); + err = DB_ERROR; + goto error_exit_2; + } + + if (has_data_dir) { + /* Now that the IBD file is created, make the ISL file. */ + err = fil_create_link_file(tablename, path); + if (err != DB_SUCCESS) { + goto error_exit_2; + } + } + + success = fil_space_create(tablename, space_id, flags, FIL_TABLESPACE); + if (!success || !fil_node_create(path, size, space_id, FALSE)) { + err = DB_ERROR; + goto error_exit_1; + } + +#ifndef UNIV_HOTBACKUP + { + mtr_t mtr; + ulint mlog_file_flag = 0; + + if (is_temp) { + mlog_file_flag |= MLOG_FILE_FLAG_TEMP; + } + + mtr_start(&mtr); + + fil_op_write_log(flags + ? MLOG_FILE_CREATE2 + : MLOG_FILE_CREATE, + space_id, mlog_file_flag, flags, + tablename, NULL, &mtr); + + mtr_commit(&mtr); + } +#endif + err = DB_SUCCESS; + + /* Error code is set. Cleanup the various variables used. + These labels reflect the order in which variables are assigned or + actions are done. */ +error_exit_1: + if (has_data_dir && err != DB_SUCCESS) { + fil_delete_link_file(tablename); + } +error_exit_2: + os_file_close(file); + if (err != DB_SUCCESS) { + os_file_delete(innodb_file_data_key, path); + } +error_exit_3: + mem_free(path); + + return(err); +} + +#ifndef UNIV_HOTBACKUP +/********************************************************************//** +Report information about a bad tablespace. */ +static +void +fil_report_bad_tablespace( +/*======================*/ + const char* filepath, /*!< in: filepath */ + const char* check_msg, /*!< in: fil_check_first_page() */ + ulint found_id, /*!< in: found space ID */ + ulint found_flags, /*!< in: found flags */ + ulint expected_id, /*!< in: expected space id */ + ulint expected_flags) /*!< in: expected flags */ +{ + if (check_msg) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Error %s in file '%s'," + "tablespace id=%lu, flags=%lu. " + "Please refer to " + REFMAN "innodb-troubleshooting-datadict.html " + "for how to resolve the issue.", + check_msg, filepath, + (ulong) expected_id, (ulong) expected_flags); + return; + } + + ib_logf(IB_LOG_LEVEL_ERROR, + "In file '%s', tablespace id and flags are %lu and %lu, " + "but in the InnoDB data dictionary they are %lu and %lu. " + "Have you moved InnoDB .ibd files around without using the " + "commands DISCARD TABLESPACE and IMPORT TABLESPACE? " + "Please refer to " + REFMAN "innodb-troubleshooting-datadict.html " + "for how to resolve the issue.", + filepath, (ulong) found_id, (ulong) found_flags, + (ulong) expected_id, (ulong) expected_flags); +} + +/********************************************************************//** +Tries to open a single-table tablespace and optionally checks that the +space id in it is correct. If this does not succeed, print an error message +to the .err log. This function is used to open a tablespace when we start +mysqld after the dictionary has been booted, and also in IMPORT TABLESPACE. + +NOTE that we assume this operation is used either at the database startup +or under the protection of the dictionary mutex, so that two users cannot +race here. This operation does not leave the file associated with the +tablespace open, but closes it after we have looked at the space id in it. + +If the validate boolean is set, we read the first page of the file and +check that the space id in the file is what we expect. We assume that +this function runs much faster if no check is made, since accessing the +file inode probably is much faster (the OS caches them) than accessing +the first page of the file. This boolean may be initially FALSE, but if +a remote tablespace is found it will be changed to true. + +If the fix_dict boolean is set, then it is safe to use an internal SQL +statement to update the dictionary tables if they are incorrect. + +@return DB_SUCCESS or error code */ +UNIV_INTERN +dberr_t +fil_open_single_table_tablespace( +/*=============================*/ + bool validate, /*!< in: Do we validate tablespace? */ + bool fix_dict, /*!< in: Can we fix the dictionary? */ + ulint id, /*!< in: space id */ + ulint flags, /*!< in: tablespace flags */ + const char* tablename, /*!< in: table name in the + databasename/tablename format */ + const char* path_in) /*!< in: tablespace filepath */ +{ + dberr_t err = DB_SUCCESS; + bool dict_filepath_same_as_default = false; + bool link_file_found = false; + bool link_file_is_bad = false; + fsp_open_info def; + fsp_open_info dict; + fsp_open_info remote; + ulint tablespaces_found = 0; + ulint valid_tablespaces_found = 0; + ulint atomic_writes = 0; + +#ifdef UNIV_SYNC_DEBUG + ut_ad(!fix_dict || rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); +#endif /* UNIV_SYNC_DEBUG */ + ut_ad(!fix_dict || mutex_own(&(dict_sys->mutex))); + + /* Table flags can be ULINT_UNDEFINED if + dict_tf_to_fsp_flags_failure is set. */ + if (flags != ULINT_UNDEFINED) { + if (!fsp_flags_is_valid(flags)) { + return(DB_CORRUPTION); + } + } else { + return(DB_CORRUPTION); + } + + atomic_writes = fsp_flags_get_atomic_writes(flags); + + /* If the tablespace was relocated, we do not + compare the DATA_DIR flag */ + ulint mod_flags = flags & ~FSP_FLAGS_MASK_DATA_DIR; + + memset(&def, 0, sizeof(def)); + memset(&dict, 0, sizeof(dict)); + memset(&remote, 0, sizeof(remote)); + + /* Discover the correct filepath. We will always look for an ibd + in the default location. If it is remote, it should not be here. */ + def.filepath = fil_make_ibd_name(tablename, false); + + /* The path_in was read from SYS_DATAFILES. */ + if (path_in) { + if (strcmp(def.filepath, path_in)) { + dict.filepath = mem_strdup(path_in); + /* possibility of multiple files. */ + validate = true; + } else { + dict_filepath_same_as_default = true; + } + } + + link_file_found = fil_open_linked_file( + tablename, &remote.filepath, &remote.file, atomic_writes); + remote.success = link_file_found; + if (remote.success) { + /* possibility of multiple files. */ + validate = true; + tablespaces_found++; + + /* A link file was found. MySQL does not allow a DATA + DIRECTORY to be be the same as the default filepath. */ + ut_a(strcmp(def.filepath, remote.filepath)); + + /* If there was a filepath found in SYS_DATAFILES, + we hope it was the same as this remote.filepath found + in the ISL file. */ + if (dict.filepath + && (0 == strcmp(dict.filepath, remote.filepath))) { + remote.success = FALSE; + os_file_close(remote.file); + mem_free(remote.filepath); + remote.filepath = NULL; + tablespaces_found--; + } + } + + /* Attempt to open the tablespace at other possible filepaths. */ + if (dict.filepath) { + dict.file = os_file_create_simple_no_error_handling( + innodb_file_data_key, dict.filepath, OS_FILE_OPEN, + OS_FILE_READ_ONLY, &dict.success, atomic_writes); + if (dict.success) { + /* possibility of multiple files. */ + validate = true; + tablespaces_found++; + } + } + + /* Always look for a file at the default location. */ + ut_a(def.filepath); + def.file = os_file_create_simple_no_error_handling( + innodb_file_data_key, def.filepath, OS_FILE_OPEN, + OS_FILE_READ_ONLY, &def.success, atomic_writes); + if (def.success) { + tablespaces_found++; + } + + /* We have now checked all possible tablespace locations and + have a count of how many we found. If things are normal, we + only found 1. */ + if (!validate && tablespaces_found == 1) { + goto skip_validate; + } + + /* Read the first page of the datadir tablespace, if found. */ + if (def.success) { + def.check_msg = fil_read_first_page( + def.file, FALSE, &def.flags, &def.id, + &def.lsn, &def.lsn, id); + def.valid = !def.check_msg; + + /* Validate this single-table-tablespace with SYS_TABLES, + but do not compare the DATA_DIR flag, in case the + tablespace was relocated. */ + if (def.valid && def.id == id + && (def.flags & ~FSP_FLAGS_MASK_DATA_DIR) == mod_flags) { + valid_tablespaces_found++; + } else { + def.valid = false; + /* Do not use this tablespace. */ + fil_report_bad_tablespace( + def.filepath, def.check_msg, def.id, + def.flags, id, flags); + } + } + + /* Read the first page of the remote tablespace */ + if (remote.success) { + remote.check_msg = fil_read_first_page( + remote.file, FALSE, &remote.flags, &remote.id, + &remote.lsn, &remote.lsn, id); + remote.valid = !remote.check_msg; + + /* Validate this single-table-tablespace with SYS_TABLES, + but do not compare the DATA_DIR flag, in case the + tablespace was relocated. */ + if (remote.valid && remote.id == id + && (remote.flags & ~FSP_FLAGS_MASK_DATA_DIR) == mod_flags) { + valid_tablespaces_found++; + } else { + remote.valid = false; + /* Do not use this linked tablespace. */ + fil_report_bad_tablespace( + remote.filepath, remote.check_msg, remote.id, + remote.flags, id, flags); + link_file_is_bad = true; + } + } + + /* Read the first page of the datadir tablespace, if found. */ + if (dict.success) { + dict.check_msg = fil_read_first_page( + dict.file, FALSE, &dict.flags, &dict.id, + &dict.lsn, &dict.lsn, id); + dict.valid = !dict.check_msg; + + /* Validate this single-table-tablespace with SYS_TABLES, + but do not compare the DATA_DIR flag, in case the + tablespace was relocated. */ + if (dict.valid && dict.id == id + && (dict.flags & ~FSP_FLAGS_MASK_DATA_DIR) == mod_flags) { + valid_tablespaces_found++; + } else { + dict.valid = false; + /* Do not use this tablespace. */ + fil_report_bad_tablespace( + dict.filepath, dict.check_msg, dict.id, + dict.flags, id, flags); + } + } + + /* Make sense of these three possible locations. + First, bail out if no tablespace files were found. */ + if (valid_tablespaces_found == 0) { + /* The following call prints an error message */ + os_file_get_last_error(true); + + ib_logf(IB_LOG_LEVEL_ERROR, + "Could not find a valid tablespace file for '%s'. " + "See " REFMAN "innodb-troubleshooting-datadict.html " + "for how to resolve the issue.", + tablename); + + err = DB_CORRUPTION; + + goto cleanup_and_exit; + } + + /* Do not open any tablespaces if more than one tablespace with + the correct space ID and flags were found. */ + if (tablespaces_found > 1) { + ib_logf(IB_LOG_LEVEL_ERROR, + "A tablespace for %s has been found in " + "multiple places;", tablename); + if (def.success) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Default location; %s, LSN=" LSN_PF + ", Space ID=%lu, Flags=%lu", + def.filepath, def.lsn, + (ulong) def.id, (ulong) def.flags); + } + if (remote.success) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Remote location; %s, LSN=" LSN_PF + ", Space ID=%lu, Flags=%lu", + remote.filepath, remote.lsn, + (ulong) remote.id, (ulong) remote.flags); + } + if (dict.success) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Dictionary location; %s, LSN=" LSN_PF + ", Space ID=%lu, Flags=%lu", + dict.filepath, dict.lsn, + (ulong) dict.id, (ulong) dict.flags); + } + + /* Force-recovery will allow some tablespaces to be + skipped by REDO if there was more than one file found. + Unlike during the REDO phase of recovery, we now know + if the tablespace is valid according to the dictionary, + which was not available then. So if we did not force + recovery and there is only one good tablespace, ignore + any bad tablespaces. */ + if (valid_tablespaces_found > 1 || srv_force_recovery > 0) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Will not open the tablespace for '%s'", + tablename); + + if (def.success != def.valid + || dict.success != dict.valid + || remote.success != remote.valid) { + err = DB_CORRUPTION; + } else { + err = DB_ERROR; + } + goto cleanup_and_exit; + } + + /* There is only one valid tablespace found and we did + not use srv_force_recovery during REDO. Use this one + tablespace and clean up invalid tablespace pointers */ + if (def.success && !def.valid) { + def.success = false; + os_file_close(def.file); + tablespaces_found--; + } + if (dict.success && !dict.valid) { + dict.success = false; + os_file_close(dict.file); + /* Leave dict.filepath so that SYS_DATAFILES + can be corrected below. */ + tablespaces_found--; + } + if (remote.success && !remote.valid) { + remote.success = false; + os_file_close(remote.file); + mem_free(remote.filepath); + remote.filepath = NULL; + tablespaces_found--; + } + } + + /* At this point, there should be only one filepath. */ + ut_a(tablespaces_found == 1); + ut_a(valid_tablespaces_found == 1); + + /* Only fix the dictionary at startup when there is only one thread. + Calls to dict_load_table() can be done while holding other latches. */ + if (!fix_dict) { + goto skip_validate; + } + + /* We may need to change what is stored in SYS_DATAFILES or + SYS_TABLESPACES or adjust the link file. + Since a failure to update SYS_TABLESPACES or SYS_DATAFILES does + not prevent opening and using the single_table_tablespace either + this time or the next, we do not check the return code or fail + to open the tablespace. But dict_update_filepath() will issue a + warning to the log. */ + if (dict.filepath) { + if (remote.success) { + dict_update_filepath(id, remote.filepath); + } else if (def.success) { + dict_update_filepath(id, def.filepath); + if (link_file_is_bad) { + fil_delete_link_file(tablename); + } + } else if (!link_file_found || link_file_is_bad) { + ut_ad(dict.success); + /* Fix the link file if we got our filepath + from the dictionary but a link file did not + exist or it did not point to a valid file. */ + fil_delete_link_file(tablename); + fil_create_link_file(tablename, dict.filepath); + } + + } else if (remote.success && dict_filepath_same_as_default) { + dict_update_filepath(id, remote.filepath); + + } else if (remote.success && path_in == NULL) { + /* SYS_DATAFILES record for this space ID was not found. */ + dict_insert_tablespace_and_filepath( + id, tablename, remote.filepath, flags); + } + +skip_validate: + if (err != DB_SUCCESS) { + ; // Don't load the tablespace into the cache + } else if (!fil_space_create(tablename, id, flags, FIL_TABLESPACE)) { + err = DB_ERROR; + } else { + /* We do not measure the size of the file, that is why + we pass the 0 below */ + + if (!fil_node_create(remote.success ? remote.filepath : + dict.success ? dict.filepath : + def.filepath, 0, id, FALSE)) { + err = DB_ERROR; + } + } + +cleanup_and_exit: + if (remote.success) { + os_file_close(remote.file); + } + if (remote.filepath) { + mem_free(remote.filepath); + } + if (dict.success) { + os_file_close(dict.file); + } + if (dict.filepath) { + mem_free(dict.filepath); + } + if (def.success) { + os_file_close(def.file); + } + mem_free(def.filepath); + + return(err); +} +#endif /* !UNIV_HOTBACKUP */ + +#ifdef UNIV_HOTBACKUP +/*******************************************************************//** +Allocates a file name for an old version of a single-table tablespace. +The string must be freed by caller with mem_free()! +@return own: file name */ +static +char* +fil_make_ibbackup_old_name( +/*=======================*/ + const char* name) /*!< in: original file name */ +{ + static const char suffix[] = "_ibbackup_old_vers_"; + char* path; + ulint len = strlen(name); + + path = static_cast(mem_alloc(len + (15 + sizeof suffix))); + + memcpy(path, name, len); + memcpy(path + len, suffix, (sizeof suffix) - 1); + ut_sprintf_timestamp_without_extra_chars( + path + len + ((sizeof suffix) - 1)); + return(path); +} +#endif /* UNIV_HOTBACKUP */ + + +/*******************************************************************//** +Determine the space id of the given file descriptor by reading a few +pages from the beginning of the .ibd file. +@return true if space id was successfully identified, or false. */ +static +bool +fil_user_tablespace_find_space_id( +/*==============================*/ + fsp_open_info* fsp) /* in/out: contains file descriptor, which is + used as input. contains space_id, which is + the output */ +{ + bool st; + os_offset_t file_size; + + file_size = os_file_get_size(fsp->file); + + if (file_size == (os_offset_t) -1) { + ib_logf(IB_LOG_LEVEL_ERROR, "Could not get file size: %s", + fsp->filepath); + return(false); + } + + /* Assuming a page size, read the space_id from each page and store it + in a map. Find out which space_id is agreed on by majority of the + pages. Choose that space_id. */ + for (ulint page_size = UNIV_ZIP_SIZE_MIN; + page_size <= UNIV_PAGE_SIZE_MAX; page_size <<= 1) { + + /* map[space_id] = count of pages */ + std::map verify; + + ulint page_count = 64; + ulint valid_pages = 0; + + /* Adjust the number of pages to analyze based on file size */ + while ((page_count * page_size) > file_size) { + --page_count; + } + + ib_logf(IB_LOG_LEVEL_INFO, "Page size:%lu Pages to analyze:" + "%lu", page_size, page_count); + + byte* buf = static_cast(ut_malloc(2*page_size)); + byte* page = static_cast(ut_align(buf, page_size)); + + for (ulint j = 0; j < page_count; ++j) { + + st = os_file_read(fsp->file, page, (j* page_size), page_size, + fsp_flags_is_page_compressed(fsp->flags)); + + if (!st) { + ib_logf(IB_LOG_LEVEL_INFO, + "READ FAIL: page_no:%lu", j); + continue; + } + + bool uncompressed_ok = false; + + /* For uncompressed pages, the page size must be equal + to UNIV_PAGE_SIZE. */ + if (page_size == UNIV_PAGE_SIZE) { + uncompressed_ok = !buf_page_is_corrupted( + false, page, 0); + } + + bool compressed_ok = !buf_page_is_corrupted( + false, page, page_size); + + if (uncompressed_ok || compressed_ok) { + + ulint space_id = mach_read_from_4(page + + FIL_PAGE_SPACE_ID); + + if (space_id > 0) { + ib_logf(IB_LOG_LEVEL_INFO, + "VALID: space:%lu " + "page_no:%lu page_size:%lu", + space_id, j, page_size); + verify[space_id]++; + ++valid_pages; + } + } + } + + ut_free(buf); + + ib_logf(IB_LOG_LEVEL_INFO, "Page size: %lu, Possible space_id " + "count:%lu", page_size, (ulint) verify.size()); + + const ulint pages_corrupted = 3; + for (ulint missed = 0; missed <= pages_corrupted; ++missed) { + + for (std::map::iterator + m = verify.begin(); m != verify.end(); ++m ) { + + ib_logf(IB_LOG_LEVEL_INFO, "space_id:%lu, " + "Number of pages matched: %lu/%lu " + "(%lu)", m->first, m->second, + valid_pages, page_size); + + if (m->second == (valid_pages - missed)) { + + ib_logf(IB_LOG_LEVEL_INFO, + "Chosen space:%lu\n", m->first); + + fsp->id = m->first; + return(true); + } + } + + } + } + + return(false); +} + +/*******************************************************************//** +Finds the given page_no of the given space id from the double write buffer, +and copies it to the corresponding .ibd file. +@return true if copy was successful, or false. */ +bool +fil_user_tablespace_restore_page( +/*==============================*/ + fsp_open_info* fsp, /* in: contains space id and .ibd + file information */ + ulint page_no) /* in: page_no to obtain from double + write buffer */ +{ + bool err; + ulint flags; + ulint zip_size; + ulint page_size; + ulint buflen; + byte* page; + + ib_logf(IB_LOG_LEVEL_INFO, "Restoring page %lu of tablespace %lu", + page_no, fsp->id); + + // find if double write buffer has page_no of given space id + page = recv_sys->dblwr.find_page(fsp->id, page_no); + + if (!page) { + ib_logf(IB_LOG_LEVEL_WARN, "Doublewrite does not have " + "page_no=%lu of space: %lu", page_no, fsp->id); + err = false; + goto out; + } + + flags = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page); + zip_size = fsp_flags_get_zip_size(flags); + page_size = fsp_flags_get_page_size(flags); + + ut_ad(page_no == page_get_page_no(page)); + + buflen = zip_size ? zip_size: page_size; + + ib_logf(IB_LOG_LEVEL_INFO, "Writing %lu bytes into file: %s", + buflen, fsp->filepath); + + err = os_file_write(fsp->filepath, fsp->file, page, + (zip_size ? zip_size : page_size) * page_no, + buflen); + + os_file_flush(fsp->file); +out: + return(err); +} + +/********************************************************************//** +Opens an .ibd file and adds the associated single-table tablespace to the +InnoDB fil0fil.cc data structures. +Set fsp->success to TRUE if tablespace is valid, FALSE if not. */ +static +void +fil_validate_single_table_tablespace( +/*=================================*/ + const char* tablename, /*!< in: database/tablename */ + fsp_open_info* fsp) /*!< in/out: tablespace info */ +{ + bool restore_attempted = false; + +check_first_page: + fsp->success = TRUE; + if (const char* check_msg = fil_read_first_page( + fsp->file, FALSE, &fsp->flags, &fsp->id, + &fsp->lsn, &fsp->lsn, ULINT_UNDEFINED)) { + ib_logf(IB_LOG_LEVEL_ERROR, + "%s in tablespace %s (table %s)", + check_msg, fsp->filepath, tablename); + fsp->success = FALSE; + } + + if (!fsp->success) { + if (!restore_attempted) { + if (!fil_user_tablespace_find_space_id(fsp)) { + return; + } + restore_attempted = true; + + if (fsp->id > 0 + && !fil_user_tablespace_restore_page(fsp, 0)) { + return; + } + goto check_first_page; + } + return; + } + + if (fsp->id == ULINT_UNDEFINED || fsp->id == 0) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Tablespace is not sensible;" + " Table: %s Space ID: %lu Filepath: %s\n", + tablename, (ulong) fsp->id, fsp->filepath); + fsp->success = FALSE; + return; + } + + mutex_enter(&fil_system->mutex); + fil_space_t* space = fil_space_get_by_id(fsp->id); + mutex_exit(&fil_system->mutex); + if (space != NULL) { + char* prev_filepath = fil_space_get_first_path(fsp->id); + + ib_logf(IB_LOG_LEVEL_ERROR, + "Attempted to open a previously opened tablespace. " + "Previous tablespace %s uses space ID: %lu at " + "filepath: %s. Cannot open tablespace %s which uses " + "space ID: %lu at filepath: %s", + space->name, (ulong) space->id, prev_filepath, + tablename, (ulong) fsp->id, fsp->filepath); + + mem_free(prev_filepath); + fsp->success = FALSE; + return; + } + + fsp->success = TRUE; +} + + +/********************************************************************//** +Opens an .ibd file and adds the associated single-table tablespace to the +InnoDB fil0fil.cc data structures. */ +static +void +fil_load_single_table_tablespace( +/*=============================*/ + const char* dbname, /*!< in: database name */ + const char* filename) /*!< in: file name (not a path), + including the .ibd or .isl extension */ +{ + char* tablename; + ulint tablename_len; + ulint dbname_len = strlen(dbname); + ulint filename_len = strlen(filename); + fsp_open_info def; + fsp_open_info remote; + os_offset_t size; + fil_space_t* space; + + memset(&def, 0, sizeof(def)); + memset(&remote, 0, sizeof(remote)); + + /* The caller assured that the extension is ".ibd" or ".isl". */ + ut_ad(0 == memcmp(filename + filename_len - 4, ".ibd", 4) + || 0 == memcmp(filename + filename_len - 4, ".isl", 4)); + + /* Build up the tablename in the standard form database/table. */ + tablename = static_cast( + mem_alloc(dbname_len + filename_len + 2)); + sprintf(tablename, "%s/%s", dbname, filename); + tablename_len = strlen(tablename) - strlen(".ibd"); + tablename[tablename_len] = '\0'; + + /* There may be both .ibd and .isl file in the directory. + And it is possible that the .isl file refers to a different + .ibd file. If so, we open and compare them the first time + one of them is sent to this function. So if this table has + already been loaded, there is nothing to do.*/ + mutex_enter(&fil_system->mutex); + space = fil_space_get_by_name(tablename); + if (space) { + mem_free(tablename); + mutex_exit(&fil_system->mutex); + return; + } + mutex_exit(&fil_system->mutex); + + /* Build up the filepath of the .ibd tablespace in the datadir. + This must be freed independent of def.success. */ + def.filepath = fil_make_ibd_name(tablename, false); + +#ifdef __WIN__ +# ifndef UNIV_HOTBACKUP + /* If lower_case_table_names is 0 or 2, then MySQL allows database + directory names with upper case letters. On Windows, all table and + database names in InnoDB are internally always in lower case. Put the + file path to lower case, so that we are consistent with InnoDB's + internal data dictionary. */ + + dict_casedn_str(def.filepath); +# endif /* !UNIV_HOTBACKUP */ +#endif + + /* Check for a link file which locates a remote tablespace. */ + remote.success = fil_open_linked_file( + tablename, &remote.filepath, &remote.file, FALSE); + + /* Read the first page of the remote tablespace */ + if (remote.success) { + fil_validate_single_table_tablespace(tablename, &remote); + if (!remote.success) { + os_file_close(remote.file); + mem_free(remote.filepath); + } + } + + + /* Try to open the tablespace in the datadir. */ + def.file = os_file_create_simple_no_error_handling( + innodb_file_data_key, def.filepath, OS_FILE_OPEN, + OS_FILE_READ_WRITE, &def.success, FALSE); + + /* Read the first page of the remote tablespace */ + if (def.success) { + fil_validate_single_table_tablespace(tablename, &def); + if (!def.success) { + os_file_close(def.file); + } + } + + if (!def.success && !remote.success) { + /* The following call prints an error message */ + os_file_get_last_error(true); + fprintf(stderr, + "InnoDB: Error: could not open single-table" + " tablespace file %s\n", def.filepath); + + if (!strncmp(filename, + tmp_file_prefix, tmp_file_prefix_length)) { + /* Ignore errors for #sql tablespaces. */ + mem_free(tablename); + if (remote.filepath) { + mem_free(remote.filepath); + } + if (def.filepath) { + mem_free(def.filepath); + } + return; + } +no_good_file: + fprintf(stderr, + "InnoDB: We do not continue the crash recovery," + " because the table may become\n" + "InnoDB: corrupt if we cannot apply the log" + " records in the InnoDB log to it.\n" + "InnoDB: To fix the problem and start mysqld:\n" + "InnoDB: 1) If there is a permission problem" + " in the file and mysqld cannot\n" + "InnoDB: open the file, you should" + " modify the permissions.\n" + "InnoDB: 2) If the table is not needed, or you" + " can restore it from a backup,\n" + "InnoDB: then you can remove the .ibd file," + " and InnoDB will do a normal\n" + "InnoDB: crash recovery and ignore that table.\n" + "InnoDB: 3) If the file system or the" + " disk is broken, and you cannot remove\n" + "InnoDB: the .ibd file, you can set" + " innodb_force_recovery > 0 in my.cnf\n" + "InnoDB: and force InnoDB to continue crash" + " recovery here.\n"); +will_not_choose: + mem_free(tablename); + if (remote.filepath) { + mem_free(remote.filepath); + } + if (def.filepath) { + mem_free(def.filepath); + } + + if (srv_force_recovery > 0) { + ib_logf(IB_LOG_LEVEL_INFO, + "innodb_force_recovery was set to %lu. " + "Continuing crash recovery even though we " + "cannot access the .ibd file of this table.", + srv_force_recovery); + return; + } + + exit(1); + } + + if (def.success && remote.success) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Tablespaces for %s have been found in two places;\n" + "Location 1: SpaceID: %lu LSN: %lu File: %s\n" + "Location 2: SpaceID: %lu LSN: %lu File: %s\n" + "You must delete one of them.", + tablename, (ulong) def.id, (ulong) def.lsn, + def.filepath, (ulong) remote.id, (ulong) remote.lsn, + remote.filepath); + + def.success = FALSE; + os_file_close(def.file); + os_file_close(remote.file); + goto will_not_choose; + } + + /* At this point, only one tablespace is open */ + ut_a(def.success == !remote.success); + + fsp_open_info* fsp = def.success ? &def : &remote; + + /* Get and test the file size. */ + size = os_file_get_size(fsp->file); + + if (size == (os_offset_t) -1) { + /* The following call prints an error message */ + os_file_get_last_error(true); + + ib_logf(IB_LOG_LEVEL_ERROR, + "could not measure the size of single-table " + "tablespace file %s", fsp->filepath); + + os_file_close(fsp->file); + goto no_good_file; + } + + /* Every .ibd file is created >= 4 pages in size. Smaller files + cannot be ok. */ + ulong minimum_size = FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE; + if (size < minimum_size) { +#ifndef UNIV_HOTBACKUP + ib_logf(IB_LOG_LEVEL_ERROR, + "The size of single-table tablespace file %s " + "is only " UINT64PF ", should be at least %lu!", + fsp->filepath, size, minimum_size); + os_file_close(fsp->file); + goto no_good_file; +#else + fsp->id = ULINT_UNDEFINED; + fsp->flags = 0; +#endif /* !UNIV_HOTBACKUP */ + } + +#ifdef UNIV_HOTBACKUP + if (fsp->id == ULINT_UNDEFINED || fsp->id == 0) { + char* new_path; + + fprintf(stderr, + "InnoDB: Renaming tablespace %s of id %lu,\n" + "InnoDB: to %s_ibbackup_old_vers_\n" + "InnoDB: because its size %" PRId64 " is too small" + " (< 4 pages 16 kB each),\n" + "InnoDB: or the space id in the file header" + " is not sensible.\n" + "InnoDB: This can happen in an ibbackup run," + " and is not dangerous.\n", + fsp->filepath, fsp->id, fsp->filepath, size); + os_file_close(fsp->file); + + new_path = fil_make_ibbackup_old_name(fsp->filepath); + + bool success = os_file_rename( + innodb_file_data_key, fsp->filepath, new_path); + + ut_a(success); + + mem_free(new_path); + + goto func_exit_after_close; + } + + /* A backup may contain the same space several times, if the space got + renamed at a sensitive time. Since it is enough to have one version of + the space, we rename the file if a space with the same space id + already exists in the tablespace memory cache. We rather rename the + file than delete it, because if there is a bug, we do not want to + destroy valuable data. */ + + mutex_enter(&fil_system->mutex); + + space = fil_space_get_by_id(fsp->id); + + if (space) { + char* new_path; + + fprintf(stderr, + "InnoDB: Renaming tablespace %s of id %lu,\n" + "InnoDB: to %s_ibbackup_old_vers_\n" + "InnoDB: because space %s with the same id\n" + "InnoDB: was scanned earlier. This can happen" + " if you have renamed tables\n" + "InnoDB: during an ibbackup run.\n", + fsp->filepath, fsp->id, fsp->filepath, + space->name); + os_file_close(fsp->file); + + new_path = fil_make_ibbackup_old_name(fsp->filepath); + + mutex_exit(&fil_system->mutex); + + bool success = os_file_rename( + innodb_file_data_key, fsp->filepath, new_path); + + ut_a(success); + + mem_free(new_path); + + goto func_exit_after_close; + } + mutex_exit(&fil_system->mutex); +#endif /* UNIV_HOTBACKUP */ + ibool file_space_create_success = fil_space_create( + tablename, fsp->id, fsp->flags, FIL_TABLESPACE); + + if (!file_space_create_success) { + if (srv_force_recovery > 0) { + fprintf(stderr, + "InnoDB: innodb_force_recovery was set" + " to %lu. Continuing crash recovery\n" + "InnoDB: even though the tablespace" + " creation of this table failed.\n", + srv_force_recovery); + goto func_exit; + } + + /* Exit here with a core dump, stack, etc. */ + ut_a(file_space_create_success); + } + + /* We do not use the size information we have about the file, because + the rounding formula for extents and pages is somewhat complex; we + let fil_node_open() do that task. */ + + if (!fil_node_create(fsp->filepath, 0, fsp->id, FALSE)) { + ut_error; + } + +func_exit: + os_file_close(fsp->file); + +#ifdef UNIV_HOTBACKUP +func_exit_after_close: +#else + ut_ad(!mutex_own(&fil_system->mutex)); +#endif + mem_free(tablename); + if (remote.success) { + mem_free(remote.filepath); + } + mem_free(def.filepath); +} + +/***********************************************************************//** +A fault-tolerant function that tries to read the next file name in the +directory. We retry 100 times if os_file_readdir_next_file() returns -1. The +idea is to read as much good data as we can and jump over bad data. +@return 0 if ok, -1 if error even after the retries, 1 if at the end +of the directory */ +static +int +fil_file_readdir_next_file( +/*=======================*/ + dberr_t* err, /*!< out: this is set to DB_ERROR if an error + was encountered, otherwise not changed */ + const char* dirname,/*!< in: directory name or path */ + os_file_dir_t dir, /*!< in: directory stream */ + os_file_stat_t* info) /*!< in/out: buffer where the + info is returned */ +{ + for (ulint i = 0; i < 100; i++) { + int ret = os_file_readdir_next_file(dirname, dir, info); + + if (ret != -1) { + + return(ret); + } + + ib_logf(IB_LOG_LEVEL_ERROR, + "os_file_readdir_next_file() returned -1 in " + "directory %s, crash recovery may have failed " + "for some .ibd files!", dirname); + + *err = DB_ERROR; + } + + return(-1); +} + +#define CHECK_TIME_EVERY_N_FILES 10 +/********************************************************************//** +At the server startup, if we need crash recovery, scans the database +directories under the MySQL datadir, looking for .ibd files. Those files are +single-table tablespaces. We need to know the space id in each of them so that +we know into which file we should look to check the contents of a page stored +in the doublewrite buffer, also to know where to apply log records where the +space id is != 0. +@return DB_SUCCESS or error number */ +UNIV_INTERN +dberr_t +fil_load_single_table_tablespaces(void) +/*===================================*/ +{ + int ret; + char* dbpath = NULL; + ulint dbpath_len = 100; + ulint files_read = 0; + ulint files_read_at_last_check = 0; + ib_time_t prev_report_time = ut_time(); + os_file_dir_t dir; + os_file_dir_t dbdir; + os_file_stat_t dbinfo; + os_file_stat_t fileinfo; + dberr_t err = DB_SUCCESS; + + /* The datadir of MySQL is always the default directory of mysqld */ + + dir = os_file_opendir(fil_path_to_mysql_datadir, TRUE); + + if (dir == NULL) { + + return(DB_ERROR); + } + + dbpath = static_cast(mem_alloc(dbpath_len)); + + /* Scan all directories under the datadir. They are the database + directories of MySQL. */ + + ret = fil_file_readdir_next_file(&err, fil_path_to_mysql_datadir, dir, + &dbinfo); + while (ret == 0) { + ulint len; + /* printf("Looking at %s in datadir\n", dbinfo.name); */ + + if (dbinfo.type == OS_FILE_TYPE_FILE + || dbinfo.type == OS_FILE_TYPE_UNKNOWN) { + + goto next_datadir_item; + } + + /* We found a symlink or a directory; try opening it to see + if a symlink is a directory */ + + len = strlen(fil_path_to_mysql_datadir) + + strlen (dbinfo.name) + 2; + if (len > dbpath_len) { + dbpath_len = len; + + if (dbpath) { + mem_free(dbpath); + } + + dbpath = static_cast(mem_alloc(dbpath_len)); + } + ut_snprintf(dbpath, dbpath_len, + "%s/%s", fil_path_to_mysql_datadir, dbinfo.name); + srv_normalize_path_for_win(dbpath); + + dbdir = os_file_opendir(dbpath, FALSE); + + if (dbdir != NULL) { + + /* We found a database directory; loop through it, + looking for possible .ibd files in it */ + + ret = fil_file_readdir_next_file(&err, dbpath, dbdir, + &fileinfo); + while (ret == 0) { + + if (fileinfo.type == OS_FILE_TYPE_DIR) { + + goto next_file_item; + } + + /* We found a symlink or a file */ + if (strlen(fileinfo.name) > 4 + && (0 == strcmp(fileinfo.name + + strlen(fileinfo.name) - 4, + ".ibd") + || 0 == strcmp(fileinfo.name + + strlen(fileinfo.name) - 4, + ".isl"))) { + /* The name ends in .ibd or .isl; + try opening the file */ + fil_load_single_table_tablespace( + dbinfo.name, fileinfo.name); + files_read++; + if (files_read - files_read_at_last_check > + CHECK_TIME_EVERY_N_FILES) { + ib_time_t cur_time= ut_time(); + files_read_at_last_check= files_read; + double time_elapsed= ut_difftime(cur_time, + prev_report_time); + if (time_elapsed > 15) { + ib_logf(IB_LOG_LEVEL_INFO, + "Processed %ld .ibd/.isl files", + files_read); + prev_report_time= cur_time; + } + } + } +next_file_item: + ret = fil_file_readdir_next_file(&err, + dbpath, dbdir, + &fileinfo); + } + + if (0 != os_file_closedir(dbdir)) { + fputs("InnoDB: Warning: could not" + " close database directory ", stderr); + ut_print_filename(stderr, dbpath); + putc('\n', stderr); + + err = DB_ERROR; + } + } + +next_datadir_item: + ret = fil_file_readdir_next_file(&err, + fil_path_to_mysql_datadir, + dir, &dbinfo); + } + + mem_free(dbpath); + + if (0 != os_file_closedir(dir)) { + fprintf(stderr, + "InnoDB: Error: could not close MySQL datadir\n"); + + return(DB_ERROR); + } + + return(err); +} + +/*******************************************************************//** +Returns TRUE if a single-table tablespace does not exist in the memory cache, +or is being deleted there. +@return TRUE if does not exist or is being deleted */ +UNIV_INTERN +ibool +fil_tablespace_deleted_or_being_deleted_in_mem( +/*===========================================*/ + ulint id, /*!< in: space id */ + ib_int64_t version)/*!< in: tablespace_version should be this; if + you pass -1 as the value of this, then this + parameter is ignored */ +{ + fil_space_t* space; + + ut_ad(fil_system); + + mutex_enter(&fil_system->mutex); + + space = fil_space_get_by_id(id); + + if (space == NULL || space->stop_new_ops) { + mutex_exit(&fil_system->mutex); + + return(TRUE); + } + + if (version != ((ib_int64_t)-1) + && space->tablespace_version != version) { + mutex_exit(&fil_system->mutex); + + return(TRUE); + } + + mutex_exit(&fil_system->mutex); + + return(FALSE); +} + +/*******************************************************************//** +Returns TRUE if a single-table tablespace exists in the memory cache. +@return TRUE if exists */ +UNIV_INTERN +ibool +fil_tablespace_exists_in_mem( +/*=========================*/ + ulint id) /*!< in: space id */ +{ + fil_space_t* space; + + ut_ad(fil_system); + + mutex_enter(&fil_system->mutex); + + space = fil_space_get_by_id(id); + + mutex_exit(&fil_system->mutex); + + return(space != NULL); +} + +/*******************************************************************//** +Report that a tablespace for a table was not found. */ +static +void +fil_report_missing_tablespace( +/*===========================*/ + const char* name, /*!< in: table name */ + ulint space_id) /*!< in: table's space id */ +{ + char index_name[MAX_FULL_NAME_LEN + 1]; + + innobase_format_name(index_name, sizeof(index_name), name, TRUE); + + ib_logf(IB_LOG_LEVEL_ERROR, + "Table %s in the InnoDB data dictionary has tablespace id %lu, " + "but tablespace with that id or name does not exist. Have " + "you deleted or moved .ibd files? This may also be a table " + "created with CREATE TEMPORARY TABLE whose .ibd and .frm " + "files MySQL automatically removed, but the table still " + "exists in the InnoDB internal data dictionary.", + name, space_id); +} + +/*******************************************************************//** +Returns TRUE if a matching tablespace exists in the InnoDB tablespace memory +cache. Note that if we have not done a crash recovery at the database startup, +there may be many tablespaces which are not yet in the memory cache. +@return TRUE if a matching tablespace exists in the memory cache */ +UNIV_INTERN +ibool +fil_space_for_table_exists_in_mem( +/*==============================*/ + ulint id, /*!< in: space id */ + const char* name, /*!< in: table name used in + fil_space_create(). Either the + standard 'dbname/tablename' format + or table->dir_path_of_temp_table */ + ibool mark_space, /*!< in: in crash recovery, at database + startup we mark all spaces which have + an associated table in the InnoDB + data dictionary, so that + we can print a warning about orphaned + tablespaces */ + ibool print_error_if_does_not_exist, + /*!< in: print detailed error + information to the .err log if a + matching tablespace is not found from + memory */ + bool adjust_space, /*!< in: whether to adjust space id + when find table space mismatch */ + mem_heap_t* heap, /*!< in: heap memory */ + table_id_t table_id) /*!< in: table id */ +{ + fil_space_t* fnamespace; + fil_space_t* space; + + ut_ad(fil_system); + + mutex_enter(&fil_system->mutex); + + /* Look if there is a space with the same id */ + + space = fil_space_get_by_id(id); + + /* Look if there is a space with the same name; the name is the + directory path from the datadir to the file */ + + fnamespace = fil_space_get_by_name(name); + if (space && space == fnamespace) { + /* Found */ + + if (mark_space) { + space->mark = TRUE; + } + + mutex_exit(&fil_system->mutex); + + return(TRUE); + } + + /* Info from "fnamespace" comes from the ibd file itself, it can + be different from data obtained from System tables since it is + not transactional. If adjust_space is set, and the mismatching + space are between a user table and its temp table, we shall + adjust the ibd file name according to system table info */ + if (adjust_space + && space != NULL + && row_is_mysql_tmp_table_name(space->name) + && !row_is_mysql_tmp_table_name(name)) { + + mutex_exit(&fil_system->mutex); + + DBUG_EXECUTE_IF("ib_crash_before_adjust_fil_space", + DBUG_SUICIDE();); + + if (fnamespace) { + char* tmp_name; + + tmp_name = dict_mem_create_temporary_tablename( + heap, name, table_id); + + fil_rename_tablespace(fnamespace->name, fnamespace->id, + tmp_name, NULL); + } + + DBUG_EXECUTE_IF("ib_crash_after_adjust_one_fil_space", + DBUG_SUICIDE();); + + fil_rename_tablespace(space->name, id, name, NULL); + + DBUG_EXECUTE_IF("ib_crash_after_adjust_fil_space", + DBUG_SUICIDE();); + + mutex_enter(&fil_system->mutex); + fnamespace = fil_space_get_by_name(name); + ut_ad(space == fnamespace); + mutex_exit(&fil_system->mutex); + + return(TRUE); + } + + if (!print_error_if_does_not_exist) { + + mutex_exit(&fil_system->mutex); + + return(FALSE); + } + + if (space == NULL) { + if (fnamespace == NULL) { + if (print_error_if_does_not_exist) { + fil_report_missing_tablespace(name, id); + } + } else { + ut_print_timestamp(stderr); + fputs(" InnoDB: Error: table ", stderr); + ut_print_filename(stderr, name); + fprintf(stderr, "\n" + "InnoDB: in InnoDB data dictionary has" + " tablespace id %lu,\n" + "InnoDB: but a tablespace with that id" + " does not exist. There is\n" + "InnoDB: a tablespace of name %s and id %lu," + " though. Have\n" + "InnoDB: you deleted or moved .ibd files?\n", + (ulong) id, fnamespace->name, + (ulong) fnamespace->id); + } +error_exit: + fputs("InnoDB: Please refer to\n" + "InnoDB: " REFMAN "innodb-troubleshooting-datadict.html\n" + "InnoDB: for how to resolve the issue.\n", stderr); + + mutex_exit(&fil_system->mutex); + + return(FALSE); + } + + if (0 != strcmp(space->name, name)) { + ut_print_timestamp(stderr); + fputs(" InnoDB: Error: table ", stderr); + ut_print_filename(stderr, name); + fprintf(stderr, "\n" + "InnoDB: in InnoDB data dictionary has" + " tablespace id %lu,\n" + "InnoDB: but the tablespace with that id" + " has name %s.\n" + "InnoDB: Have you deleted or moved .ibd files?\n", + (ulong) id, space->name); + + if (fnamespace != NULL) { + fputs("InnoDB: There is a tablespace" + " with the right name\n" + "InnoDB: ", stderr); + ut_print_filename(stderr, fnamespace->name); + fprintf(stderr, ", but its id is %lu.\n", + (ulong) fnamespace->id); + } + + goto error_exit; + } + + mutex_exit(&fil_system->mutex); + + return(FALSE); +} + +/*******************************************************************//** +Checks if a single-table tablespace for a given table name exists in the +tablespace memory cache. +@return space id, ULINT_UNDEFINED if not found */ +UNIV_INTERN +ulint +fil_get_space_id_for_table( +/*=======================*/ + const char* tablename) /*!< in: table name in the standard + 'databasename/tablename' format */ +{ + fil_space_t* fnamespace; + ulint id = ULINT_UNDEFINED; + + ut_ad(fil_system); + + mutex_enter(&fil_system->mutex); + + /* Look if there is a space with the same name. */ + + fnamespace = fil_space_get_by_name(tablename); + + if (fnamespace) { + id = fnamespace->id; + } + + mutex_exit(&fil_system->mutex); + + return(id); +} + +/**********************************************************************//** +Tries to extend a data file so that it would accommodate the number of pages +given. The tablespace must be cached in the memory cache. If the space is big +enough already, does nothing. +@return TRUE if success */ +UNIV_INTERN +ibool +fil_extend_space_to_desired_size( +/*=============================*/ + ulint* actual_size, /*!< out: size of the space after extension; + if we ran out of disk space this may be lower + than the desired size */ + ulint space_id, /*!< in: space id */ + ulint size_after_extend)/*!< in: desired size in pages after the + extension; if the current space size is bigger + than this already, the function does nothing */ +{ + fil_node_t* node; + fil_space_t* space; + byte* buf2; + byte* buf; + ulint buf_size; + ulint start_page_no; + ulint file_start_page_no; + ulint page_size; + ulint pages_added; + ibool success; + + ut_ad(!srv_read_only_mode); + +retry: + pages_added = 0; + success = TRUE; + + fil_mutex_enter_and_prepare_for_io(space_id); + + space = fil_space_get_by_id(space_id); + ut_a(space); + + if (space->size >= size_after_extend) { + /* Space already big enough */ + + *actual_size = space->size; + + mutex_exit(&fil_system->mutex); + + return(TRUE); + } + + page_size = fsp_flags_get_zip_size(space->flags); + if (!page_size) { + page_size = UNIV_PAGE_SIZE; + } + + node = UT_LIST_GET_LAST(space->chain); + + if (!node->being_extended) { + /* Mark this node as undergoing extension. This flag + is used by other threads to wait for the extension + opereation to finish. */ + node->being_extended = TRUE; + } else { + /* Another thread is currently extending the file. Wait + for it to finish. + It'd have been better to use event driven mechanism but + the entire module is peppered with polling stuff. */ + mutex_exit(&fil_system->mutex); + os_thread_sleep(100000); + goto retry; + } + + if (!fil_node_prepare_for_io(node, fil_system, space)) { + /* The tablespace data file, such as .ibd file, is missing */ + node->being_extended = false; + mutex_exit(&fil_system->mutex); + + return(false); + } + + /* At this point it is safe to release fil_system mutex. No + other thread can rename, delete or close the file because + we have set the node->being_extended flag. */ + mutex_exit(&fil_system->mutex); + + start_page_no = space->size; + file_start_page_no = space->size - node->size; + +#ifdef HAVE_POSIX_FALLOCATE + if (srv_use_posix_fallocate) { + os_offset_t start_offset = start_page_no * page_size; + os_offset_t n_pages = (size_after_extend - start_page_no); + os_offset_t len = n_pages * page_size; + + if (posix_fallocate(node->handle, start_offset, len) == -1) { + ib_logf(IB_LOG_LEVEL_ERROR, "preallocating file " + "space for file \'%s\' failed. Current size " + INT64PF ", desired size " INT64PF "\n", + node->name, start_offset, len+start_offset); + os_file_handle_error_no_exit(node->name, "posix_fallocate", FALSE, __FILE__, __LINE__); + success = FALSE; + } else { + success = TRUE; + } + + mutex_enter(&fil_system->mutex); + + if (success) { + node->size += n_pages; + space->size += n_pages; + os_has_said_disk_full = FALSE; + } + + /* If posix_fallocate was used to extent the file space + we need to complete the io. Because no actual writes were + dispatched read operation is enough here. Without this + there will be assertion at shutdown indicating that + all IO is not completed. */ + fil_node_complete_io(node, fil_system, OS_FILE_READ); + goto file_extended; + } +#endif + + /* Extend at most 64 pages at a time */ + buf_size = ut_min(64, size_after_extend - start_page_no) * page_size; + buf2 = static_cast(mem_alloc(buf_size + page_size)); + buf = static_cast(ut_align(buf2, page_size)); + + memset(buf, 0, buf_size); + + while (start_page_no < size_after_extend) { + ulint n_pages + = ut_min(buf_size / page_size, + size_after_extend - start_page_no); + + os_offset_t offset + = ((os_offset_t) (start_page_no - file_start_page_no)) + * page_size; +#ifdef UNIV_HOTBACKUP + success = os_file_write(node->name, node->handle, buf, + offset, page_size * n_pages); +#else + success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC, + node->name, node->handle, buf, + offset, page_size * n_pages, + NULL, NULL, space_id, NULL, 0, 0, 0, 0, 0); +#endif /* UNIV_HOTBACKUP */ + if (success) { + os_has_said_disk_full = FALSE; + } else { + /* Let us measure the size of the file to determine + how much we were able to extend it */ + os_offset_t size; + + size = os_file_get_size(node->handle); + ut_a(size != (os_offset_t) -1); + + n_pages = ((ulint) (size / page_size)) + - node->size - pages_added; + + pages_added += n_pages; + break; + } + + start_page_no += n_pages; + pages_added += n_pages; + } + + mem_free(buf2); + + mutex_enter(&fil_system->mutex); + + ut_a(node->being_extended); + + space->size += pages_added; + node->size += pages_added; + + fil_node_complete_io(node, fil_system, OS_FILE_WRITE); + + /* At this point file has been extended */ +file_extended: + + node->being_extended = FALSE; + *actual_size = space->size; + +#ifndef UNIV_HOTBACKUP + if (space_id == 0) { + ulint pages_per_mb = (1024 * 1024) / page_size; + + /* Keep the last data file size info up to date, rounded to + full megabytes */ + + srv_data_file_sizes[srv_n_data_files - 1] + = (node->size / pages_per_mb) * pages_per_mb; + } +#endif /* !UNIV_HOTBACKUP */ + + /* + printf("Extended %s to %lu, actual size %lu pages\n", space->name, + size_after_extend, *actual_size); */ + mutex_exit(&fil_system->mutex); + + fil_flush(space_id); + + return(success); +} + +#ifdef UNIV_HOTBACKUP +/********************************************************************//** +Extends all tablespaces to the size stored in the space header. During the +ibbackup --apply-log phase we extended the spaces on-demand so that log records +could be applied, but that may have left spaces still too small compared to +the size stored in the space header. */ +UNIV_INTERN +void +fil_extend_tablespaces_to_stored_len(void) +/*======================================*/ +{ + fil_space_t* space; + byte* buf; + ulint actual_size; + ulint size_in_header; + dberr_t error; + ibool success; + + buf = mem_alloc(UNIV_PAGE_SIZE); + + mutex_enter(&fil_system->mutex); + + space = UT_LIST_GET_FIRST(fil_system->space_list); + + while (space) { + ut_a(space->purpose == FIL_TABLESPACE); + + mutex_exit(&fil_system->mutex); /* no need to protect with a + mutex, because this is a + single-threaded operation */ + error = fil_read(TRUE, space->id, + fsp_flags_get_zip_size(space->flags), + 0, 0, UNIV_PAGE_SIZE, buf, NULL); + ut_a(error == DB_SUCCESS); + + size_in_header = fsp_get_size_low(buf); + + success = fil_extend_space_to_desired_size( + &actual_size, space->id, size_in_header); + if (!success) { + fprintf(stderr, + "InnoDB: Error: could not extend the" + " tablespace of %s\n" + "InnoDB: to the size stored in header," + " %lu pages;\n" + "InnoDB: size after extension %lu pages\n" + "InnoDB: Check that you have free disk space" + " and retry!\n", + space->name, size_in_header, actual_size); + ut_a(success); + } + + mutex_enter(&fil_system->mutex); + + space = UT_LIST_GET_NEXT(space_list, space); + } + + mutex_exit(&fil_system->mutex); + + mem_free(buf); +} +#endif + +/*========== RESERVE FREE EXTENTS (for a B-tree split, for example) ===*/ + +/*******************************************************************//** +Tries to reserve free extents in a file space. +@return TRUE if succeed */ +UNIV_INTERN +ibool +fil_space_reserve_free_extents( +/*===========================*/ + ulint id, /*!< in: space id */ + ulint n_free_now, /*!< in: number of free extents now */ + ulint n_to_reserve) /*!< in: how many one wants to reserve */ +{ + fil_space_t* space; + ibool success; + + ut_ad(fil_system); + + mutex_enter(&fil_system->mutex); + + space = fil_space_get_by_id(id); + + ut_a(space); + + if (space->n_reserved_extents + n_to_reserve > n_free_now) { + success = FALSE; + } else { + space->n_reserved_extents += n_to_reserve; + success = TRUE; + } + + mutex_exit(&fil_system->mutex); + + return(success); +} + +/*******************************************************************//** +Releases free extents in a file space. */ +UNIV_INTERN +void +fil_space_release_free_extents( +/*===========================*/ + ulint id, /*!< in: space id */ + ulint n_reserved) /*!< in: how many one reserved */ +{ + fil_space_t* space; + + ut_ad(fil_system); + + mutex_enter(&fil_system->mutex); + + space = fil_space_get_by_id(id); + + ut_a(space); + ut_a(space->n_reserved_extents >= n_reserved); + + space->n_reserved_extents -= n_reserved; + + mutex_exit(&fil_system->mutex); +} + +/*******************************************************************//** +Gets the number of reserved extents. If the database is silent, this number +should be zero. */ +UNIV_INTERN +ulint +fil_space_get_n_reserved_extents( +/*=============================*/ + ulint id) /*!< in: space id */ +{ + fil_space_t* space; + ulint n; + + ut_ad(fil_system); + + mutex_enter(&fil_system->mutex); + + space = fil_space_get_by_id(id); + + ut_a(space); + + n = space->n_reserved_extents; + + mutex_exit(&fil_system->mutex); + + return(n); +} + +/*============================ FILE I/O ================================*/ + +/********************************************************************//** +NOTE: you must call fil_mutex_enter_and_prepare_for_io() first! + +Prepares a file node for i/o. Opens the file if it is closed. Updates the +pending i/o's field in the node and the system appropriately. Takes the node +off the LRU list if it is in the LRU list. The caller must hold the fil_sys +mutex. +@return false if the file can't be opened, otherwise true */ +static +bool +fil_node_prepare_for_io( +/*====================*/ + fil_node_t* node, /*!< in: file node */ + fil_system_t* system, /*!< in: tablespace memory cache */ + fil_space_t* space) /*!< in: space */ +{ + ut_ad(node && system && space); + ut_ad(mutex_own(&(system->mutex))); + + if (system->n_open > system->max_n_open + 5) { + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Warning: open files %lu" + " exceeds the limit %lu\n", + (ulong) system->n_open, + (ulong) system->max_n_open); + } + + if (node->open == FALSE) { + /* File is closed: open it */ + ut_a(node->n_pending == 0); + + if (!fil_node_open_file(node, system, space)) { + return(false); + } + } + + if (node->n_pending == 0 && fil_space_belongs_in_lru(space)) { + /* The node is in the LRU list, remove it */ + + ut_a(UT_LIST_GET_LEN(system->LRU) > 0); + + UT_LIST_REMOVE(LRU, system->LRU, node); + } + + node->n_pending++; + + return(true); +} + +/********************************************************************//** +Updates the data structures when an i/o operation finishes. Updates the +pending i/o's field in the node appropriately. */ +static +void +fil_node_complete_io( +/*=================*/ + fil_node_t* node, /*!< in: file node */ + fil_system_t* system, /*!< in: tablespace memory cache */ + ulint type) /*!< in: OS_FILE_WRITE or OS_FILE_READ; marks + the node as modified if + type == OS_FILE_WRITE */ +{ + ut_ad(node); + ut_ad(system); + ut_ad(mutex_own(&(system->mutex))); + + ut_a(node->n_pending > 0); + + node->n_pending--; + + if (type == OS_FILE_WRITE) { + ut_ad(!srv_read_only_mode); + system->modification_counter++; + node->modification_counter = system->modification_counter; + + if (fil_buffering_disabled(node->space)) { + + /* We don't need to keep track of unflushed + changes as user has explicitly disabled + buffering. */ + ut_ad(!node->space->is_in_unflushed_spaces); + node->flush_counter = node->modification_counter; + + } else if (!node->space->is_in_unflushed_spaces) { + + node->space->is_in_unflushed_spaces = true; + UT_LIST_ADD_FIRST(unflushed_spaces, + system->unflushed_spaces, + node->space); + } + } + + if (node->n_pending == 0 && fil_space_belongs_in_lru(node->space)) { + + /* The node must be put back to the LRU list */ + UT_LIST_ADD_FIRST(LRU, system->LRU, node); + } +} + +/********************************************************************//** +Report information about an invalid page access. */ +static +void +fil_report_invalid_page_access( +/*===========================*/ + ulint block_offset, /*!< in: block offset */ + ulint space_id, /*!< in: space id */ + const char* space_name, /*!< in: space name */ + ulint byte_offset, /*!< in: byte offset */ + ulint len, /*!< in: I/O length */ + ulint type) /*!< in: I/O type */ +{ + fprintf(stderr, + "InnoDB: Error: trying to access page number %lu" + " in space %lu,\n" + "InnoDB: space name %s,\n" + "InnoDB: which is outside the tablespace bounds.\n" + "InnoDB: Byte offset %lu, len %lu, i/o type %lu.\n" + "InnoDB: If you get this error at mysqld startup," + " please check that\n" + "InnoDB: your my.cnf matches the ibdata files" + " that you have in the\n" + "InnoDB: MySQL server.\n", + (ulong) block_offset, (ulong) space_id, space_name, + (ulong) byte_offset, (ulong) len, (ulong) type); +} + +/********************************************************************//** +Reads or writes data. This operation is asynchronous (aio). +@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do +i/o on a tablespace which does not exist */ +UNIV_INTERN +dberr_t +_fil_io( +/*===*/ + ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE, + ORed to OS_FILE_LOG, if a log i/o + and ORed to OS_AIO_SIMULATED_WAKE_LATER + if simulated aio and we want to post a + batch of i/os; NOTE that a simulated batch + may introduce hidden chances of deadlocks, + because i/os are not actually handled until + all have been posted: use with great + caution! */ + bool sync, /*!< in: true if synchronous aio is desired */ + ulint space_id, /*!< in: space id */ + ulint zip_size, /*!< in: compressed page size in bytes; + 0 for uncompressed pages */ + ulint block_offset, /*!< in: offset in number of blocks */ + ulint byte_offset, /*!< in: remainder of offset in bytes; in + aio this must be divisible by the OS block + size */ + ulint len, /*!< in: how many bytes to read or write; this + must not cross a file boundary; in aio this + must be a block size multiple */ + void* buf, /*!< in/out: buffer where to store read data + or from where to write; in aio this must be + appropriately aligned */ + void* message, /*!< in: message for aio handler if non-sync + aio used, else ignored */ + ulint* write_size, /*!< in/out: Actual write size initialized + after fist successfull trim + operation for this page and if + initialized we do not trim again if + actual page size does not decrease. */ + trx_t* trx) +{ + ulint mode; + fil_space_t* space; + fil_node_t* node; + ibool ret; + ulint is_log; + ulint wake_later; + os_offset_t offset; + ibool ignore_nonexistent_pages; + ibool page_compressed = FALSE; + ulint page_compression_level = 0; + ibool page_encrypted = FALSE; + ulint page_encryption_key = 0; + + + is_log = type & OS_FILE_LOG; + type = type & ~OS_FILE_LOG; + + wake_later = type & OS_AIO_SIMULATED_WAKE_LATER; + type = type & ~OS_AIO_SIMULATED_WAKE_LATER; + + ignore_nonexistent_pages = type & BUF_READ_IGNORE_NONEXISTENT_PAGES; + type &= ~BUF_READ_IGNORE_NONEXISTENT_PAGES; + + ut_ad(byte_offset < UNIV_PAGE_SIZE); + ut_ad(!zip_size || !byte_offset); + ut_ad(ut_is_2pow(zip_size)); + ut_ad(buf); + ut_ad(len > 0); + ut_ad(UNIV_PAGE_SIZE == (ulong)(1 << UNIV_PAGE_SIZE_SHIFT)); +#if (1 << UNIV_PAGE_SIZE_SHIFT_MAX) != UNIV_PAGE_SIZE_MAX +# error "(1 << UNIV_PAGE_SIZE_SHIFT_MAX) != UNIV_PAGE_SIZE_MAX" +#endif +#if (1 << UNIV_PAGE_SIZE_SHIFT_MIN) != UNIV_PAGE_SIZE_MIN +# error "(1 << UNIV_PAGE_SIZE_SHIFT_MIN) != UNIV_PAGE_SIZE_MIN" +#endif + ut_ad(fil_validate_skip()); +#ifndef UNIV_HOTBACKUP +# ifndef UNIV_LOG_DEBUG + /* ibuf bitmap pages must be read in the sync aio mode: */ + ut_ad(recv_no_ibuf_operations + || type == OS_FILE_WRITE + || !ibuf_bitmap_page(zip_size, block_offset) + || sync + || is_log); +# endif /* UNIV_LOG_DEBUG */ + if (sync) { + mode = OS_AIO_SYNC; + } else if (is_log) { + mode = OS_AIO_LOG; + } else if (type == OS_FILE_READ + && !recv_no_ibuf_operations + && ibuf_page(space_id, zip_size, block_offset, NULL)) { + mode = OS_AIO_IBUF; + } else { + mode = OS_AIO_NORMAL; + } +#else /* !UNIV_HOTBACKUP */ + ut_a(sync); + mode = OS_AIO_SYNC; +#endif /* !UNIV_HOTBACKUP */ + + if (type == OS_FILE_READ) { + srv_stats.data_read.add(len); + } else if (type == OS_FILE_WRITE) { + ut_ad(!srv_read_only_mode); + srv_stats.data_written.add(len); + if (fil_page_is_index_page((byte *)buf)) { + srv_stats.index_pages_written.inc(); + } else { + srv_stats.non_index_pages_written.inc(); + } + } + + /* Reserve the fil_system mutex and make sure that we can open at + least one file while holding it, if the file is not already open */ + + fil_mutex_enter_and_prepare_for_io(space_id); + + space = fil_space_get_by_id(space_id); + + page_compressed = fsp_flags_is_page_compressed(space->flags); + page_compression_level = fsp_flags_get_page_compression_level(space->flags); + + page_encrypted = fsp_flags_is_page_encrypted(space->flags); + page_encryption_key = fsp_flags_get_page_encryption_key(space->flags); + + + /* If we are deleting a tablespace we don't allow any read + operations on that. However, we do allow write operations. */ + if (space == 0 || (type == OS_FILE_READ && space->stop_new_ops)) { + mutex_exit(&fil_system->mutex); + + ib_logf(IB_LOG_LEVEL_ERROR, + "Trying to do i/o to a tablespace which does " + "not exist. i/o type %lu, space id %lu, " + "page no. %lu, i/o length %lu bytes", + (ulong) type, (ulong) space_id, (ulong) block_offset, + (ulong) len); + + return(DB_TABLESPACE_DELETED); + } + + ut_ad(mode != OS_AIO_IBUF || space->purpose == FIL_TABLESPACE); + + node = UT_LIST_GET_FIRST(space->chain); + + for (;;) { + if (node == NULL) { + if (ignore_nonexistent_pages) { + mutex_exit(&fil_system->mutex); + return(DB_ERROR); + } + + fil_report_invalid_page_access( + block_offset, space_id, space->name, + byte_offset, len, type); + + ut_error; + + } else if (fil_is_user_tablespace_id(space->id) + && node->size == 0) { + + /* We do not know the size of a single-table tablespace + before we open the file */ + break; + } else if (node->size > block_offset) { + /* Found! */ + break; + } else { + block_offset -= node->size; + node = UT_LIST_GET_NEXT(chain, node); + } + } + + /* Open file if closed */ + if (!fil_node_prepare_for_io(node, fil_system, space)) { + if (space->purpose == FIL_TABLESPACE + && fil_is_user_tablespace_id(space->id)) { + mutex_exit(&fil_system->mutex); + + ib_logf(IB_LOG_LEVEL_ERROR, + "Trying to do i/o to a tablespace which " + "exists without .ibd data file. " + "i/o type %lu, space id %lu, page no %lu, " + "i/o length %lu bytes", + (ulong) type, (ulong) space_id, + (ulong) block_offset, (ulong) len); + + return(DB_TABLESPACE_DELETED); + } + + /* The tablespace is for log. Currently, we just assert here + to prevent handling errors along the way fil_io returns. + Also, if the log files are missing, it would be hard to + promise the server can continue running. */ + ut_a(0); + } + + /* Check that at least the start offset is within the bounds of a + single-table tablespace, including rollback tablespaces. */ + if (UNIV_UNLIKELY(node->size <= block_offset) + && space->id != 0 && space->purpose == FIL_TABLESPACE) { + + fil_report_invalid_page_access( + block_offset, space_id, space->name, byte_offset, + len, type); + + ut_error; + } + + /* Now we have made the changes in the data structures of fil_system */ + mutex_exit(&fil_system->mutex); + + /* Calculate the low 32 bits and the high 32 bits of the file offset */ + + if (!zip_size) { + offset = ((os_offset_t) block_offset << UNIV_PAGE_SIZE_SHIFT) + + byte_offset; + + ut_a(node->size - block_offset + >= ((byte_offset + len + (UNIV_PAGE_SIZE - 1)) + / UNIV_PAGE_SIZE)); + } else { + ulint zip_size_shift; + switch (zip_size) { + case 1024: zip_size_shift = 10; break; + case 2048: zip_size_shift = 11; break; + case 4096: zip_size_shift = 12; break; + case 8192: zip_size_shift = 13; break; + case 16384: zip_size_shift = 14; break; + default: ut_error; + } + offset = ((os_offset_t) block_offset << zip_size_shift) + + byte_offset; + ut_a(node->size - block_offset + >= (len + (zip_size - 1)) / zip_size); + } + + /* Do aio */ + + ut_a(byte_offset % OS_MIN_LOG_BLOCK_SIZE == 0); + ut_a((len % OS_MIN_LOG_BLOCK_SIZE) == 0); + +#ifndef UNIV_HOTBACKUP + if (UNIV_UNLIKELY(space->is_corrupt && srv_pass_corrupt_table)) { + + /* should ignore i/o for the crashed space */ + if (srv_pass_corrupt_table == 1 || + type == OS_FILE_WRITE) { + + mutex_enter(&fil_system->mutex); + fil_node_complete_io(node, fil_system, type); + mutex_exit(&fil_system->mutex); + if (mode == OS_AIO_NORMAL) { + ut_a(space->purpose == FIL_TABLESPACE); + buf_page_io_complete(static_cast + (message)); + } + } + + if (srv_pass_corrupt_table == 1 && type == OS_FILE_READ) { + + return(DB_TABLESPACE_DELETED); + + } else if (type == OS_FILE_WRITE) { + + return(DB_SUCCESS); + } + } + + /* Queue the aio request */ + ret = os_aio(type, mode | wake_later, node->name, node->handle, buf, + offset, len, node, message, space_id, trx, page_compressed, page_compression_level, write_size, page_encrypted, page_encryption_key); + +#else + /* In ibbackup do normal i/o, not aio */ + if (type == OS_FILE_READ) { + ret = os_file_read(node->handle, buf, offset, len); + } else { + ut_ad(!srv_read_only_mode); + ret = os_file_write(node->name, node->handle, buf, + offset, len); + } +#endif /* !UNIV_HOTBACKUP */ + ut_a(ret); + + if (mode == OS_AIO_SYNC) { + /* The i/o operation is already completed when we return from + os_aio: */ + + mutex_enter(&fil_system->mutex); + + fil_node_complete_io(node, fil_system, type); + + mutex_exit(&fil_system->mutex); + + ut_ad(fil_validate_skip()); + } + + return(DB_SUCCESS); +} + +#ifndef UNIV_HOTBACKUP +/**********************************************************************//** +Waits for an aio operation to complete. This function is used to write the +handler for completed requests. The aio array of pending requests is divided +into segments (see os0file.cc for more info). The thread specifies which +segment it wants to wait for. */ +UNIV_INTERN +void +fil_aio_wait( +/*=========*/ + ulint segment) /*!< in: the number of the segment in the aio + array to wait for */ +{ + ibool ret; + fil_node_t* fil_node; + void* message; + ulint type; + ulint space_id = 0; + + ut_ad(fil_validate_skip()); + + if (srv_use_native_aio) { + srv_set_io_thread_op_info(segment, "native aio handle"); +#ifdef WIN_ASYNC_IO + ret = os_aio_windows_handle( + segment, 0, &fil_node, &message, &type, &space_id); +#elif defined(LINUX_NATIVE_AIO) + ret = os_aio_linux_handle( + segment, &fil_node, &message, &type, &space_id); +#else + ut_error; + ret = 0; /* Eliminate compiler warning */ +#endif /* WIN_ASYNC_IO */ + } else { + srv_set_io_thread_op_info(segment, "simulated aio handle"); + + ret = os_aio_simulated_handle( + segment, &fil_node, &message, &type, &space_id); + } + + ut_a(ret); + if (fil_node == NULL) { + ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS); + return; + } + + srv_set_io_thread_op_info(segment, "complete io for fil node"); + + mutex_enter(&fil_system->mutex); + + fil_node_complete_io(fil_node, fil_system, type); + + mutex_exit(&fil_system->mutex); + + ut_ad(fil_validate_skip()); + + /* Do the i/o handling */ + /* IMPORTANT: since i/o handling for reads will read also the insert + buffer in tablespace 0, you have to be very careful not to introduce + deadlocks in the i/o system. We keep tablespace 0 data files always + open, and use a special i/o thread to serve insert buffer requests. */ + + if (fil_node->space->purpose == FIL_TABLESPACE) { + srv_set_io_thread_op_info(segment, "complete io for buf page"); + buf_page_io_complete(static_cast(message)); + } else { + srv_set_io_thread_op_info(segment, "complete io for log"); + log_io_complete(static_cast(message)); + } +} +#endif /* UNIV_HOTBACKUP */ + +/**********************************************************************//** +Flushes to disk possible writes cached by the OS. If the space does not exist +or is being dropped, does not do anything. */ +UNIV_INTERN +void +fil_flush( +/*======*/ + ulint space_id) /*!< in: file space id (this can be a group of + log files or a tablespace of the database) */ +{ + fil_space_t* space; + fil_node_t* node; + os_file_t file; + + + mutex_enter(&fil_system->mutex); + + space = fil_space_get_by_id(space_id); + + if (!space || space->stop_new_ops) { + mutex_exit(&fil_system->mutex); + + return; + } + + if (fil_buffering_disabled(space)) { + + /* No need to flush. User has explicitly disabled + buffering. */ + ut_ad(!space->is_in_unflushed_spaces); + ut_ad(fil_space_is_flushed(space)); + ut_ad(space->n_pending_flushes == 0); + +#ifdef UNIV_DEBUG + for (node = UT_LIST_GET_FIRST(space->chain); + node != NULL; + node = UT_LIST_GET_NEXT(chain, node)) { + ut_ad(node->modification_counter + == node->flush_counter); + ut_ad(node->n_pending_flushes == 0); + } +#endif /* UNIV_DEBUG */ + + mutex_exit(&fil_system->mutex); + return; + } + + space->n_pending_flushes++; /*!< prevent dropping of the space while + we are flushing */ + for (node = UT_LIST_GET_FIRST(space->chain); + node != NULL; + node = UT_LIST_GET_NEXT(chain, node)) { + + ib_int64_t old_mod_counter = node->modification_counter;; + + if (old_mod_counter <= node->flush_counter) { + continue; + } + + ut_a(node->open); + + if (space->purpose == FIL_TABLESPACE) { + fil_n_pending_tablespace_flushes++; + } else { + fil_n_pending_log_flushes++; + fil_n_log_flushes++; + } +#ifdef __WIN__ + if (node->is_raw_disk) { + + goto skip_flush; + } +#endif /* __WIN__ */ +retry: + if (node->n_pending_flushes > 0) { + /* We want to avoid calling os_file_flush() on + the file twice at the same time, because we do + not know what bugs OS's may contain in file + i/o */ + + ib_int64_t sig_count = + os_event_reset(node->sync_event); + + mutex_exit(&fil_system->mutex); + + os_event_wait_low(node->sync_event, sig_count); + + mutex_enter(&fil_system->mutex); + + if (node->flush_counter >= old_mod_counter) { + + goto skip_flush; + } + + goto retry; + } + + ut_a(node->open); + file = node->handle; + node->n_pending_flushes++; + + mutex_exit(&fil_system->mutex); + + os_file_flush(file); + + mutex_enter(&fil_system->mutex); + + os_event_set(node->sync_event); + + node->n_pending_flushes--; +skip_flush: + if (node->flush_counter < old_mod_counter) { + node->flush_counter = old_mod_counter; + + if (space->is_in_unflushed_spaces + && fil_space_is_flushed(space)) { + + space->is_in_unflushed_spaces = false; + + UT_LIST_REMOVE( + unflushed_spaces, + fil_system->unflushed_spaces, + space); + } + } + + if (space->purpose == FIL_TABLESPACE) { + fil_n_pending_tablespace_flushes--; + } else { + fil_n_pending_log_flushes--; + } + } + + space->n_pending_flushes--; + + mutex_exit(&fil_system->mutex); +} + +/**********************************************************************//** +Flushes to disk the writes in file spaces of the given type possibly cached by +the OS. */ +UNIV_INTERN +void +fil_flush_file_spaces( +/*==================*/ + ulint purpose) /*!< in: FIL_TABLESPACE, FIL_LOG */ +{ + fil_space_t* space; + ulint* space_ids; + ulint n_space_ids; + ulint i; + + mutex_enter(&fil_system->mutex); + + n_space_ids = UT_LIST_GET_LEN(fil_system->unflushed_spaces); + if (n_space_ids == 0) { + + mutex_exit(&fil_system->mutex); + return; + } + + /* Assemble a list of space ids to flush. Previously, we + traversed fil_system->unflushed_spaces and called UT_LIST_GET_NEXT() + on a space that was just removed from the list by fil_flush(). + Thus, the space could be dropped and the memory overwritten. */ + space_ids = static_cast( + mem_alloc(n_space_ids * sizeof *space_ids)); + + n_space_ids = 0; + + for (space = UT_LIST_GET_FIRST(fil_system->unflushed_spaces); + space; + space = UT_LIST_GET_NEXT(unflushed_spaces, space)) { + + if (space->purpose == purpose && !space->stop_new_ops) { + + space_ids[n_space_ids++] = space->id; + } + } + + mutex_exit(&fil_system->mutex); + + /* Flush the spaces. It will not hurt to call fil_flush() on + a non-existing space id. */ + for (i = 0; i < n_space_ids; i++) { + + fil_flush(space_ids[i]); + } + + mem_free(space_ids); +} + +/** Functor to validate the space list. */ +struct Check { + void operator()(const fil_node_t* elem) + { + ut_a(elem->open || !elem->n_pending); + } +}; + +/******************************************************************//** +Checks the consistency of the tablespace cache. +@return TRUE if ok */ +UNIV_INTERN +ibool +fil_validate(void) +/*==============*/ +{ + fil_space_t* space; + fil_node_t* fil_node; + ulint n_open = 0; + ulint i; + + mutex_enter(&fil_system->mutex); + + /* Look for spaces in the hash table */ + + for (i = 0; i < hash_get_n_cells(fil_system->spaces); i++) { + + for (space = static_cast( + HASH_GET_FIRST(fil_system->spaces, i)); + space != 0; + space = static_cast( + HASH_GET_NEXT(hash, space))) { + + UT_LIST_VALIDATE( + chain, fil_node_t, space->chain, Check()); + + for (fil_node = UT_LIST_GET_FIRST(space->chain); + fil_node != 0; + fil_node = UT_LIST_GET_NEXT(chain, fil_node)) { + + if (fil_node->n_pending > 0) { + ut_a(fil_node->open); + } + + if (fil_node->open) { + n_open++; + } + } + } + } + + ut_a(fil_system->n_open == n_open); + + UT_LIST_CHECK(LRU, fil_node_t, fil_system->LRU); + + for (fil_node = UT_LIST_GET_FIRST(fil_system->LRU); + fil_node != 0; + fil_node = UT_LIST_GET_NEXT(LRU, fil_node)) { + + ut_a(fil_node->n_pending == 0); + ut_a(!fil_node->being_extended); + ut_a(fil_node->open); + ut_a(fil_space_belongs_in_lru(fil_node->space)); + } + + mutex_exit(&fil_system->mutex); + + return(TRUE); +} + +/********************************************************************//** +Returns TRUE if file address is undefined. +@return TRUE if undefined */ +UNIV_INTERN +ibool +fil_addr_is_null( +/*=============*/ + fil_addr_t addr) /*!< in: address */ +{ + return(addr.page == FIL_NULL); +} + +/********************************************************************//** +Get the predecessor of a file page. +@return FIL_PAGE_PREV */ +UNIV_INTERN +ulint +fil_page_get_prev( +/*==============*/ + const byte* page) /*!< in: file page */ +{ + return(mach_read_from_4(page + FIL_PAGE_PREV)); +} + +/********************************************************************//** +Get the successor of a file page. +@return FIL_PAGE_NEXT */ +UNIV_INTERN +ulint +fil_page_get_next( +/*==============*/ + const byte* page) /*!< in: file page */ +{ + return(mach_read_from_4(page + FIL_PAGE_NEXT)); +} + +/*********************************************************************//** +Sets the file page type. */ +UNIV_INTERN +void +fil_page_set_type( +/*==============*/ + byte* page, /*!< in/out: file page */ + ulint type) /*!< in: type */ +{ + ut_ad(page); + + mach_write_to_2(page + FIL_PAGE_TYPE, type); +} + +/*********************************************************************//** +Gets the file page type. +@return type; NOTE that if the type has not been written to page, the +return value not defined */ +UNIV_INTERN +ulint +fil_page_get_type( +/*==============*/ + const byte* page) /*!< in: file page */ +{ + ut_ad(page); + + return(mach_read_from_2(page + FIL_PAGE_TYPE)); +} + +/****************************************************************//** +Closes the tablespace memory cache. */ +UNIV_INTERN +void +fil_close(void) +/*===========*/ +{ +#ifndef UNIV_HOTBACKUP + /* The mutex should already have been freed. */ + ut_ad(fil_system->mutex.magic_n == 0); +#endif /* !UNIV_HOTBACKUP */ + + hash_table_free(fil_system->spaces); + + hash_table_free(fil_system->name_hash); + + ut_a(UT_LIST_GET_LEN(fil_system->LRU) == 0); + ut_a(UT_LIST_GET_LEN(fil_system->unflushed_spaces) == 0); + ut_a(UT_LIST_GET_LEN(fil_system->space_list) == 0); + + mem_free(fil_system); + + fil_system = NULL; +} + +/********************************************************************//** +Initializes a buffer control block when the buf_pool is created. */ +static +void +fil_buf_block_init( +/*===============*/ + buf_block_t* block, /*!< in: pointer to control block */ + byte* frame) /*!< in: pointer to buffer frame */ +{ + UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE); + + block->frame = frame; + + block->page.io_fix = BUF_IO_NONE; + /* There are assertions that check for this. */ + block->page.buf_fix_count = 1; + block->page.state = BUF_BLOCK_READY_FOR_USE; + + page_zip_des_init(&block->page.zip); +} + +struct fil_iterator_t { + os_file_t file; /*!< File handle */ + const char* filepath; /*!< File path name */ + os_offset_t start; /*!< From where to start */ + os_offset_t end; /*!< Where to stop */ + os_offset_t file_size; /*!< File size in bytes */ + ulint page_size; /*!< Page size */ + ulint n_io_buffers; /*!< Number of pages to use + for IO */ + byte* io_buffer; /*!< Buffer to use for IO */ +}; + +/********************************************************************//** +TODO: This can be made parallel trivially by chunking up the file and creating +a callback per thread. . Main benefit will be to use multiple CPUs for +checksums and compressed tables. We have to do compressed tables block by +block right now. Secondly we need to decompress/compress and copy too much +of data. These are CPU intensive. + +Iterate over all the pages in the tablespace. +@param iter - Tablespace iterator +@param block - block to use for IO +@param callback - Callback to inspect and update page contents +@retval DB_SUCCESS or error code */ +static +dberr_t +fil_iterate( +/*========*/ + const fil_iterator_t& iter, + buf_block_t* block, + PageCallback& callback) +{ + os_offset_t offset; + ulint page_no = 0; + ulint space_id = callback.get_space_id(); + ulint n_bytes = iter.n_io_buffers * iter.page_size; + + ut_ad(!srv_read_only_mode); + + /* TODO: For compressed tables we do a lot of useless + copying for non-index pages. Unfortunately, it is + required by buf_zip_decompress() */ + + for (offset = iter.start; offset < iter.end; offset += n_bytes) { + + byte* io_buffer = iter.io_buffer; + + block->frame = io_buffer; + + if (callback.get_zip_size() > 0) { + page_zip_des_init(&block->page.zip); + page_zip_set_size(&block->page.zip, iter.page_size); + block->page.zip.data = block->frame + UNIV_PAGE_SIZE; + ut_d(block->page.zip.m_external = true); + ut_ad(iter.page_size == callback.get_zip_size()); + + /* Zip IO is done in the compressed page buffer. */ + io_buffer = block->page.zip.data; + } else { + io_buffer = iter.io_buffer; + } + + /* We have to read the exact number of bytes. Otherwise the + InnoDB IO functions croak on failed reads. */ + + n_bytes = static_cast( + ut_min(static_cast(n_bytes), + iter.end - offset)); + + ut_ad(n_bytes > 0); + ut_ad(!(n_bytes % iter.page_size)); + + if (!os_file_read(iter.file, io_buffer, offset, + (ulint) n_bytes, + fil_space_is_page_compressed(space_id))) { + + ib_logf(IB_LOG_LEVEL_ERROR, "os_file_read() failed"); + + return(DB_IO_ERROR); + } + + bool updated = false; + os_offset_t page_off = offset; + ulint n_pages_read = (ulint) n_bytes / iter.page_size; + + for (ulint i = 0; i < n_pages_read; ++i) { + + buf_block_set_file_page(block, space_id, page_no++); + + dberr_t err; + + if ((err = callback(page_off, block)) != DB_SUCCESS) { + + return(err); + + } else if (!updated) { + updated = buf_block_get_state(block) + == BUF_BLOCK_FILE_PAGE; + } + + buf_block_set_state(block, BUF_BLOCK_NOT_USED); + buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE); + + page_off += iter.page_size; + block->frame += iter.page_size; + } + + /* A page was updated in the set, write back to disk. */ + if (updated + && !os_file_write( + iter.filepath, iter.file, io_buffer, + offset, (ulint) n_bytes)) { + + ib_logf(IB_LOG_LEVEL_ERROR, "os_file_write() failed"); + + return(DB_IO_ERROR); + } + } + + return(DB_SUCCESS); +} + +/********************************************************************//** +Iterate over all the pages in the tablespace. +@param table - the table definiton in the server +@param n_io_buffers - number of blocks to read and write together +@param callback - functor that will do the page updates +@return DB_SUCCESS or error code */ +UNIV_INTERN +dberr_t +fil_tablespace_iterate( +/*===================*/ + dict_table_t* table, + ulint n_io_buffers, + PageCallback& callback) +{ + dberr_t err; + os_file_t file; + char* filepath; + + ut_a(n_io_buffers > 0); + ut_ad(!srv_read_only_mode); + + DBUG_EXECUTE_IF("ib_import_trigger_corruption_1", + return(DB_CORRUPTION);); + + if (DICT_TF_HAS_DATA_DIR(table->flags)) { + dict_get_and_save_data_dir_path(table, false); + ut_a(table->data_dir_path); + + filepath = os_file_make_remote_pathname( + table->data_dir_path, table->name, "ibd"); + } else { + filepath = fil_make_ibd_name(table->name, false); + } + + { + ibool success; + + file = os_file_create_simple_no_error_handling( + innodb_file_data_key, filepath, + OS_FILE_OPEN, OS_FILE_READ_WRITE, &success, FALSE); + + DBUG_EXECUTE_IF("fil_tablespace_iterate_failure", + { + static bool once; + + if (!once || ut_rnd_interval(0, 10) == 5) { + once = true; + success = FALSE; + os_file_close(file); + } + }); + + if (!success) { + /* The following call prints an error message */ + os_file_get_last_error(true); + + ib_logf(IB_LOG_LEVEL_ERROR, + "Trying to import a tablespace, but could not " + "open the tablespace file %s", filepath); + + mem_free(filepath); + + return(DB_TABLESPACE_NOT_FOUND); + + } else { + err = DB_SUCCESS; + } + } + + callback.set_file(filepath, file); + + os_offset_t file_size = os_file_get_size(file); + ut_a(file_size != (os_offset_t) -1); + + /* The block we will use for every physical page */ + buf_block_t block; + + memset(&block, 0x0, sizeof(block)); + + /* Allocate a page to read in the tablespace header, so that we + can determine the page size and zip_size (if it is compressed). + We allocate an extra page in case it is a compressed table. One + page is to ensure alignement. */ + + void* page_ptr = mem_alloc(3 * UNIV_PAGE_SIZE); + byte* page = static_cast(ut_align(page_ptr, UNIV_PAGE_SIZE)); + + fil_buf_block_init(&block, page); + + /* Read the first page and determine the page and zip size. */ + + if (!os_file_read(file, page, 0, UNIV_PAGE_SIZE, + dict_tf_get_page_compression(table->flags))) { + + err = DB_IO_ERROR; + + } else if ((err = callback.init(file_size, &block)) == DB_SUCCESS) { + fil_iterator_t iter; + + iter.file = file; + iter.start = 0; + iter.end = file_size; + iter.filepath = filepath; + iter.file_size = file_size; + iter.n_io_buffers = n_io_buffers; + iter.page_size = callback.get_page_size(); + + /* Compressed pages can't be optimised for block IO for now. + We do the IMPORT page by page. */ + + if (callback.get_zip_size() > 0) { + iter.n_io_buffers = 1; + ut_a(iter.page_size == callback.get_zip_size()); + } + + /** Add an extra page for compressed page scratch area. */ + + void* io_buffer = mem_alloc( + (2 + iter.n_io_buffers) * UNIV_PAGE_SIZE); + + iter.io_buffer = static_cast( + ut_align(io_buffer, UNIV_PAGE_SIZE)); + + err = fil_iterate(iter, &block, callback); + + mem_free(io_buffer); + } + + if (err == DB_SUCCESS) { + + ib_logf(IB_LOG_LEVEL_INFO, "Sync to disk"); + + if (!os_file_flush(file)) { + ib_logf(IB_LOG_LEVEL_INFO, "os_file_flush() failed!"); + err = DB_IO_ERROR; + } else { + ib_logf(IB_LOG_LEVEL_INFO, "Sync to disk - done!"); + } + } + + os_file_close(file); + + mem_free(page_ptr); + mem_free(filepath); + + return(err); +} + +/** +Set the tablespace compressed table size. +@return DB_SUCCESS if it is valie or DB_CORRUPTION if not */ +dberr_t +PageCallback::set_zip_size(const buf_frame_t* page) UNIV_NOTHROW +{ + m_zip_size = fsp_header_get_zip_size(page); + + if (!ut_is_2pow(m_zip_size) || m_zip_size > UNIV_ZIP_SIZE_MAX) { + return(DB_CORRUPTION); + } + + return(DB_SUCCESS); +} + +/********************************************************************//** +Delete the tablespace file and any related files like .cfg. +This should not be called for temporary tables. */ +UNIV_INTERN +void +fil_delete_file( +/*============*/ + const char* ibd_name) /*!< in: filepath of the ibd + tablespace */ +{ + /* Force a delete of any stale .ibd files that are lying around. */ + + ib_logf(IB_LOG_LEVEL_INFO, "Deleting %s", ibd_name); + + os_file_delete_if_exists(innodb_file_data_key, ibd_name); + + char* cfg_name = fil_make_cfg_name(ibd_name); + + os_file_delete_if_exists(innodb_file_data_key, cfg_name); + + mem_free(cfg_name); +} + +/************************************************************************* +Return local hash table informations. */ + +ulint +fil_system_hash_cells(void) +/*=======================*/ +{ + if (fil_system) { + return (fil_system->spaces->n_cells + + fil_system->name_hash->n_cells); + } else { + return 0; + } +} + +ulint +fil_system_hash_nodes(void) +/*=======================*/ +{ + if (fil_system) { + return (UT_LIST_GET_LEN(fil_system->space_list) + * (sizeof(fil_space_t) + MEM_BLOCK_HEADER_SIZE)); + } else { + return 0; + } +} + +/** +Iterate over all the spaces in the space list and fetch the +tablespace names. It will return a copy of the name that must be +freed by the caller using: delete[]. +@return DB_SUCCESS if all OK. */ +UNIV_INTERN +dberr_t +fil_get_space_names( +/*================*/ + space_name_list_t& space_name_list) + /*!< in/out: List to append to */ +{ + fil_space_t* space; + dberr_t err = DB_SUCCESS; + + mutex_enter(&fil_system->mutex); + + for (space = UT_LIST_GET_FIRST(fil_system->space_list); + space != NULL; + space = UT_LIST_GET_NEXT(space_list, space)) { + + if (space->purpose == FIL_TABLESPACE) { + ulint len; + char* name; + + len = strlen(space->name); + name = new(std::nothrow) char[len + 1]; + + if (name == 0) { + /* Caller to free elements allocated so far. */ + err = DB_OUT_OF_MEMORY; + break; + } + + memcpy(name, space->name, len); + name[len] = 0; + + space_name_list.push_back(name); + } + } + + mutex_exit(&fil_system->mutex); + + return(err); +} + +/****************************************************************//** +Generate redo logs for swapping two .ibd files */ +UNIV_INTERN +void +fil_mtr_rename_log( +/*===============*/ + ulint old_space_id, /*!< in: tablespace id of the old + table. */ + const char* old_name, /*!< in: old table name */ + ulint new_space_id, /*!< in: tablespace id of the new + table */ + const char* new_name, /*!< in: new table name */ + const char* tmp_name, /*!< in: temp table name used while + swapping */ + mtr_t* mtr) /*!< in/out: mini-transaction */ +{ + if (old_space_id != TRX_SYS_SPACE) { + fil_op_write_log(MLOG_FILE_RENAME, old_space_id, + 0, 0, old_name, tmp_name, mtr); + } + + if (new_space_id != TRX_SYS_SPACE) { + fil_op_write_log(MLOG_FILE_RENAME, new_space_id, + 0, 0, new_name, old_name, mtr); + } +} + +/************************************************************************* +functions to access is_corrupt flag of fil_space_t*/ + +ibool +fil_space_is_corrupt( +/*=================*/ + ulint space_id) +{ + fil_space_t* space; + ibool ret = FALSE; + + mutex_enter(&fil_system->mutex); + + space = fil_space_get_by_id(space_id); + + if (UNIV_UNLIKELY(space && space->is_corrupt)) { + ret = TRUE; + } + + mutex_exit(&fil_system->mutex); + + return(ret); +} + +void +fil_space_set_corrupt( +/*==================*/ + ulint space_id) +{ + fil_space_t* space; + + mutex_enter(&fil_system->mutex); + + space = fil_space_get_by_id(space_id); + + if (space) { + space->is_corrupt = TRUE; + } + + mutex_exit(&fil_system->mutex); +} + +/****************************************************************//** +Acquire fil_system mutex */ +void +fil_system_enter(void) +/*==================*/ +{ + ut_ad(!mutex_own(&fil_system->mutex)); + mutex_enter(&fil_system->mutex); +} + +/****************************************************************//** +Release fil_system mutex */ +void +fil_system_exit(void) +/*=================*/ +{ + ut_ad(mutex_own(&fil_system->mutex)); + mutex_exit(&fil_system->mutex); +} + +/*******************************************************************//** +Return space name */ +char* +fil_space_name( +/*===========*/ + fil_space_t* space) /*!< in: space */ +{ + return (space->name); +} + +/*******************************************************************//** +Return page type name */ +const char* +fil_get_page_type_name( +/*===================*/ + ulint page_type) /*!< in: FIL_PAGE_TYPE */ +{ + switch(page_type) { + case FIL_PAGE_PAGE_COMPRESSED: + return "PAGE_COMPRESSED"; + case FIL_PAGE_INDEX: + return "INDEX"; + case FIL_PAGE_UNDO_LOG: + return "UNDO LOG"; + case FIL_PAGE_INODE: + return "INODE"; + case FIL_PAGE_IBUF_FREE_LIST: + return "IBUF_FREE_LIST"; + case FIL_PAGE_TYPE_ALLOCATED: + return "ALLOCATED"; + case FIL_PAGE_IBUF_BITMAP: + return "IBUF_BITMAP"; + case FIL_PAGE_TYPE_SYS: + return "SYS"; + case FIL_PAGE_TYPE_TRX_SYS: + return "TRX_SYS"; + case FIL_PAGE_TYPE_FSP_HDR: + return "FSP_HDR"; + case FIL_PAGE_TYPE_XDES: + return "XDES"; + case FIL_PAGE_TYPE_BLOB: + return "BLOB"; + case FIL_PAGE_TYPE_ZBLOB: + return "ZBLOB"; + case FIL_PAGE_TYPE_ZBLOB2: + return "ZBLOB2"; + case FIL_PAGE_TYPE_COMPRESSED: + return "ORACLE PAGE COMPRESSED"; + default: + return "PAGE TYPE CORRUPTED"; + } +} diff --git a/storage/xtradb/fil/fil0pagecompress.cc b/storage/xtradb/fil/fil0pagecompress.cc index a4b0e1b486bce..f99299b8e5dda 100644 --- a/storage/xtradb/fil/fil0pagecompress.cc +++ b/storage/xtradb/fil/fil0pagecompress.cc @@ -269,7 +269,8 @@ fil_compress_page( int level = 0; ulint header_len = FIL_PAGE_DATA + FIL_PAGE_COMPRESSED_SIZE; ulint write_size=0; - ulint comp_method = innodb_compression_algorithm; /* Cache to avoid + ulint comp_method = innodb_compression_algorithm; + /* Cache to avoid change during function execution */ ut_ad(buf); diff --git a/storage/xtradb/fil/fil0pageencryption.cc b/storage/xtradb/fil/fil0pageencryption.cc new file mode 100644 index 0000000000000..503e379fd7e7c --- /dev/null +++ b/storage/xtradb/fil/fil0pageencryption.cc @@ -0,0 +1,615 @@ +/***************************************************************************** + +Copyright (C) 2014 eperi GmbH. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*****************************************************************************/ + +/***************************************************************** + @file fil/fil0pageencryption.cc + Implementation for page encryption file spaces. + + Created 08/25/2014 + ***********************************************************************/ + +#include "fil0fil.h" +#include "fil0pageencryption.h" +#include "fsp0pageencryption.h" +#include "my_dbug.h" +#include "page0zip.h" + +#include "buf0checksum.h" + +#include +#include +#include + + +/* + * derived from libFLAC, which is gpl v2 + */ +byte crc_table[] = { + 0x00,0x07,0x0E,0x09,0x1C,0x1B,0x12,0x15,0x38,0x3F,0x36,0x31,0x24,0x23,0x2A,0x2D,0x70,0x77,0x7E,0x79, + 0x6C,0x6B,0x62,0x65,0x48,0x4F,0x46,0x41,0x54,0x53,0x5A,0x5D,0xE0,0xE7,0xEE,0xE9,0xFC,0xFB,0xF2,0xF5, + 0xD8,0xDF,0xD6,0xD1,0xC4,0xC3,0xCA,0xCD,0x90,0x97,0x9E,0x99,0x8C,0x8B,0x82,0x85,0xA8,0xAF,0xA6,0xA1, + 0xB4,0xB3,0xBA,0xBD,0xC7,0xC0,0xC9,0xCE,0xDB,0xDC,0xD5,0xD2,0xFF,0xF8,0xF1,0xF6,0xE3,0xE4,0xED,0xEA, + 0xB7,0xB0,0xB9,0xBE,0xAB,0xAC,0xA5,0xA2,0x8F,0x88,0x81,0x86,0x93,0x94,0x9D,0x9A,0x27,0x20,0x29,0x2E, + 0x3B,0x3C,0x35,0x32,0x1F,0x18,0x11,0x16,0x03,0x04,0x0D,0x0A,0x57,0x50,0x59,0x5E,0x4B,0x4C,0x45,0x42, + 0x6F,0x68,0x61,0x66,0x73,0x74,0x7D,0x7A,0x89,0x8E,0x87,0x80,0x95,0x92,0x9B,0x9C,0xB1,0xB6,0xBF,0xB8, + 0xAD,0xAA,0xA3,0xA4,0xF9,0xFE,0xF7,0xF0,0xE5,0xE2,0xEB,0xEC,0xC1,0xC6,0xCF,0xC8,0xDD,0xDA,0xD3,0xD4, + 0x69,0x6E,0x67,0x60,0x75,0x72,0x7B,0x7C,0x51,0x56,0x5F,0x58,0x4D,0x4A,0x43,0x44,0x19,0x1E,0x17,0x10, + 0x05,0x02,0x0B,0x0C,0x21,0x26,0x2F,0x28,0x3D,0x3A,0x33,0x34,0x4E,0x49,0x40,0x47,0x52,0x55,0x5C,0x5B, + 0x76,0x71,0x78,0x7F,0x6A,0x6D,0x64,0x63,0x3E,0x39,0x30,0x37,0x22,0x25,0x2C,0x2B,0x06,0x01,0x08,0x0F, + 0x1A,0x1D,0x14,0x13,0xAE,0xA9,0xA0,0xA7,0xB2,0xB5,0xBC,0xBB,0x96,0x91,0x98,0x9F,0x8A,0x8D,0x84,0x83, + 0xDE,0xD9,0xD0,0xD7,0xC2,0xC5,0xCC,0xCB,0xE6,0xE1,0xE8,0xEF,0xFA,0xFD,0xF4,0xF3 + +}; + +/* this calculates a crc-8 checksum byte */ +byte fil_page_encryption_calc_checksum(unsigned char* buf, ulint len) { + byte crc = 0; + for (ulint i = 0; i < len; i++) + crc = crc_table[(crc ^ buf[i]) & 0xff]; + return crc; +} + +/****************************************************************//** + For page encrypted pages encrypt the page before actual write + operation. + + Note, that FIL_PAGE_TYPE_FSP_HDR and FIL_PAGE_TYPE_XDES type pages are not encrypted! + + Pages are encrypted with AES/CBC/NoPadding algorithm. + + "No padding" is used to ensure, that the encrypted page does not exceed the page size. + If "no padding" is used, the input for encryption must be of size (multiple * AES blocksize). AES Blocksize is usually 16 (bytes). + + Everything in the page is encrypted except for the 38 byte FIL header. + Since the length of the payload is not a multiple of the AES blocksize, + and to ensure that every byte of the payload is encrypted, two encryption operations are done. + Each time with a block of adequate size as input. + 1st block contains everything from beginning of payload bytes except for the remainder. + 2nd block is of size 64 and contains the remainder and the last (64 - sizeof(remainder)) bytes of the encrypted 1st block. + + Each encrypted page receives a new page type for PAGE_ENCRYPTION. + The original page type (2 bytes) is stored in the Checksum header of the page (position FIL_PAGE_SPACE_OR_CHKSUM). + Additionally the encryption key identifier is stored in the Checksum Header. This uses 1 byte. + Checksum verification for encrypted pages is disabled. This checksum should be restored after decryption. + + To be able to verify decryption in a later stage, a 1-byte checksum at position 4 of the FIL_PAGE_SPACE_OR_CHKSUM header is stored. + For page compressed table pages the log base 2 of the length of the encrypted data is stored. + + @return encrypted page to be written*/ +byte* +fil_encrypt_page( +/*==============*/ + ulint space_id, /*!< in: tablespace id of the table. */ + byte* buf, /*!< in: buffer from which to write; in aio + this must be appropriately aligned */ + byte* out_buf, /*!< out: encrypted buffer */ + ulint len, /*!< in: length of input buffer.*/ + ulint encryption_key,/*!< in: encryption key */ + ulint* out_len, /*!< out: actual length of encrypted page */ + ulint* errorCode, /*!< out: an error code. set, if page is intentionally not encrypted */ + byte* tmp_encryption_buf, /*!< in: temporary buffer or NULL */ + ulint mode /*!< in: calling mode. Should be 0. Can be used for unit tests */ +) { + + int err = AES_OK; + int key = 0; + uint32 data_size = 0; + ulint orig_page_type = 0; + uint32 write_size = 0; + fil_space_t* space = NULL; + byte* tmp_buf = NULL; + ulint unit_test = 0; + ut_ad(buf);ut_ad(out_buf); + key = encryption_key; + ulint offset = 0; + ulint page_len = 0; + unit_test = mode ? 1 : 0; + + *errorCode = AES_OK; + + if (!unit_test) { + ut_ad(fil_space_is_page_encrypted(space_id)); + fil_system_enter(); + space = fil_space_get_by_id(space_id); + fil_system_exit(); + +#ifdef UNIV_DEBUG + ulint pageno = mach_read_from_4(buf + FIL_PAGE_OFFSET); + + fprintf(stderr, + "InnoDB: Note: Preparing for encryption for space %lu name %s len %lu, page no %lu\n", + space_id, fil_space_name(space), len, pageno); +#endif /* UNIV_DEBUG */ + } + /* read original page type */ + orig_page_type = mach_read_from_2(buf + FIL_PAGE_TYPE); + + if ((orig_page_type == FIL_PAGE_TYPE_FSP_HDR) || (orig_page_type == FIL_PAGE_TYPE_XDES) ) { + memcpy(out_buf, buf, len); + + *errorCode = PAGE_ENCRYPTION_WILL_NOT_ENCRYPT; + return (out_buf); + } + + if (FIL_PAGE_PAGE_COMPRESSED == orig_page_type) { + page_len = log10(len)/log10(2); + } + + + + byte checksum_byte = fil_page_encryption_calc_checksum(buf + FIL_PAGE_DATA, len - FIL_PAGE_DATA); + + /* data_size bytes will be encrypted at first. + * data_size will be the length of the cipher text since no padding is used.*/ + data_size = ((len - FIL_PAGE_DATA - FIL_PAGE_DATA_END) / MY_AES_BLOCK_SIZE) * MY_AES_BLOCK_SIZE; + + + + + + const unsigned char rkey[] = {0xbd, 0xe4, 0x72, 0xa2, 0x95, 0x67, 0x5c, 0xa9, + 0x2e, 0x04, 0x67, 0xea, 0xdb, 0xc0, 0xe0, 0x23, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + uint8 key_len = 16; + if (!unit_test) { + KeySingleton& keys = KeySingleton::getInstance(); + if (!keys.isAvailable()) { + err = AES_KEY_CREATION_FAILED; + } else if (keys.getKeys(encryption_key) == NULL) { + err = PAGE_ENCRYPTION_KEY_MISSING; + } else { + char* keyString = keys.getKeys(encryption_key)->key; + key_len = strlen(keyString)/2; + my_aes_hexToUint(keyString, (unsigned char*)&rkey, key_len); + } + } + const unsigned char iv[] = {0x2d, 0x1a, 0xf8, 0xd3, 0x97, 0x4e, 0x0b, 0xd3, 0xef, 0xed, + 0x5a, 0x6f, 0x82, 0x59, 0x4f,0x5e}; + ulint iv_len = 16; + if (!unit_test) { + KeySingleton& keys = KeySingleton::getInstance(); + if (!keys.isAvailable()) { + err = AES_KEY_CREATION_FAILED; + } else if (keys.getKeys(encryption_key) == NULL) { + err = PAGE_ENCRYPTION_KEY_MISSING; + } else { + char* ivString = keys.getKeys(encryption_key)->iv; + if (ivString == NULL) return buf; + my_aes_hexToUint(ivString, (unsigned char*)&iv, 16); + } + } + + /* 1st encryption: data_size bytes starting from FIL_PAGE_DATA */ + if (err == AES_OK) { + err = my_aes_encrypt_cbc((char*) buf + FIL_PAGE_DATA, data_size, + (char *) out_buf + FIL_PAGE_DATA, &write_size, + (const unsigned char *) &rkey, key_len, + (const unsigned char *) &iv, iv_len, 1); + ut_ad(write_size == data_size); + if (err == AES_OK) { + /* copy remaining bytes from input buffer to output buffer. + * Note, that this copies the final 8 bytes of a page, which consists of the + * Old-style checksum and the "Low 32 bits of LSN */ + memcpy(out_buf + FIL_PAGE_DATA + data_size , buf + FIL_PAGE_DATA + data_size , len - FIL_PAGE_DATA -data_size); + + if (tmp_encryption_buf == NULL) { + //create temporary buffer for 2nd encryption + tmp_buf = static_cast(ut_malloc(64)); + } else { + tmp_buf = tmp_encryption_buf; + } + /* 2nd encryption: 64 bytes from out_buf, result length is 64 bytes */ + err = my_aes_encrypt_cbc((char*)out_buf + len -offset -64, + 64, + (char*)tmp_buf, + &write_size, + (const unsigned char *)&rkey, + key_len, + (const unsigned char *)&iv, + iv_len, 1); + ut_ad(write_size == 64); + /* copy 64 bytes from 2nd encryption to out_buf*/ + memcpy(out_buf + len - offset -64, tmp_buf, 64); + } + + } + /* error handling */ + if (err != AES_OK) { + /* If an error occurred we leave the actual page as it was */ + + fprintf(stderr, + "InnoDB: Warning: Encryption failed for space %lu name %s len %lu rt %d write %lu, error: %d\n", + space_id, fil_space_name(space), len, err, data_size, err); + fflush(stderr); + srv_stats.pages_page_encryption_error.inc(); + *out_len = len; + + /* free temporary buffer */ + if (tmp_buf!=NULL && tmp_encryption_buf == NULL) { + ut_free(tmp_buf); + } + *errorCode = err; + + return (buf); + } + + + + /* Set up the page header. Copied from input buffer*/ + memcpy(out_buf, buf, FIL_PAGE_DATA); + + + /* Set up the correct page type */ + mach_write_to_2(out_buf + FIL_PAGE_TYPE, FIL_PAGE_PAGE_ENCRYPTED); + + /* The 1st checksum field is used to store original page type, etc. + * checksum check for page encrypted pages is omitted. + */ + + /* Set up the encryption key. Written to the 1st byte of the checksum header field. This header is currently used to store data. */ + mach_write_to_1(out_buf + FIL_PAGE_SPACE_OR_CHKSUM, key); + + /* store original page type. Written to 2nd and 3rd byte of the checksum header field */ + mach_write_to_2(out_buf + FIL_PAGE_SPACE_OR_CHKSUM + 1, orig_page_type); + + if (FIL_PAGE_PAGE_COMPRESSED == orig_page_type) { + /* set byte 4 of checksum field to page length (ln(len)) */ + memset(out_buf + FIL_PAGE_SPACE_OR_CHKSUM + 3, page_len, 1); + } else { + /* set byte 4 of checksum field to checksum byte */ + memset(out_buf + FIL_PAGE_SPACE_OR_CHKSUM + 3, checksum_byte, 1); + } + +#ifdef UNIV_DEBUG + /* Verify */ + ut_ad(fil_page_is_encrypted(out_buf)); + +#endif /* UNIV_DEBUG */ + + srv_stats.pages_page_encrypted.inc(); + *out_len = len; + + /* free temporary buffer */ + if (tmp_buf!=NULL && tmp_encryption_buf == NULL) { + ut_free(tmp_buf); + } + return (out_buf); +} + +/****************************************************************//** + For page encrypted pages decrypt the page after actual read + operation. + + See fil_encrypt_page for details, how the encryption works. + + If the decryption can be verified, original page should be completely restored. + This includes original page type, 4-byte checksum field at page start. + If it is not a page compressed table's page, decryption is verified against a 1-byte checksum built over the plain data bytes. If this verification fails, an error state is returned.. + + + @return decrypted page */ +ulint fil_decrypt_page( +/*================*/ + byte* page_buf, /*!< in: preallocated buffer or NULL */ + byte* buf, /*!< in/out: buffer from which to read; in aio + this must be appropriately aligned */ + ulint len, /*!< in: length buffer, which should be decrypted.*/ + ulint* write_size, /*!< out: size of the decrypted data. If no error occurred equal to len, except for page compressed tables */ + ibool* page_compressed, /*!(ut_malloc(UNIV_PAGE_SIZE)); + } else { + in_buf = page_buf; + } + data_size = ((len - FIL_PAGE_DATA - FIL_PAGE_DATA_END) / MY_AES_BLOCK_SIZE) * MY_AES_BLOCK_SIZE; + + const unsigned char rkey[] = {0xbd, 0xe4, 0x72, 0xa2, 0x95, 0x67, 0x5c, 0xa9, + 0x2e, 0x04, 0x67, 0xea, 0xdb, 0xc0,0xe0, 0x23, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}; + uint8 key_len = 16; + if (!unit_test) { + KeySingleton& keys = KeySingleton::getInstance(); + if (!keys.isAvailable()) { + err = PAGE_ENCRYPTION_ERROR; + } else if (keys.getKeys(page_decryption_key) == NULL) { + err = PAGE_ENCRYPTION_KEY_MISSING; + } else { + char* keyString = keys.getKeys(page_decryption_key)->key; + key_len = strlen(keyString)/2; + my_aes_hexToUint(keyString, (unsigned char*)&rkey, key_len); + } + } + + + const unsigned char iv[] = {0x2d, 0x1a, 0xf8, 0xd3, 0x97, 0x4e, 0x0b, 0xd3, 0xef, 0xed, + 0x5a, 0x6f, 0x82, 0x59, 0x4f,0x5e}; + + + uint8 iv_len = 16; + if (!unit_test) { + KeySingleton& keys = KeySingleton::getInstance(); + if (!keys.isAvailable()) { + err = PAGE_ENCRYPTION_ERROR; + } else if (keys.getKeys(page_decryption_key) == NULL) { + err = PAGE_ENCRYPTION_KEY_MISSING; + } else { + my_aes_hexToUint(keys.getKeys(page_decryption_key)->iv, (unsigned char*)&iv, 16); + } + } + + + if (err != AES_OK) { + /* surely key could not be determined. */ + fprintf(stderr, "InnoDB: Corruption: Page is marked as encrypted\n" + "InnoDB: but decrypt failed with error %d, encryption key %d.\n", + err, (int)page_decryption_key); + fflush(stderr); + if (NULL == page_buf) { + ut_free(in_buf); + } + return err; + } + + if (tmp_encryption_buf == NULL) { + tmp_buf= static_cast(ut_malloc(64)); + } else { + tmp_buf = tmp_encryption_buf; + } + + + /* 1st decryption: 64 bytes */ + /* 64 bytes from data area are copied to temporary buffer. + * These are the last 64 of the (encrypted) payload */ + memcpy(tmp_buf, buf + len - offset - 64, 64); + if (err == AES_OK) { + err = my_aes_decrypt_cbc((const char*) tmp_buf, 64, + (char *) in_buf + len - offset - 64, + &tmp_write_size, (const unsigned char *) &rkey, key_len, + (const unsigned char *) &iv, iv_len, 1); + } + ut_ad(tmp_write_size == 64); + + + /* If decrypt fails it means that page is corrupted or has an unknown key */ + if (err != AES_OK) { + fprintf(stderr, "InnoDB: Corruption: Page is marked as encrypted\n" + "InnoDB: but decrypt failed with error %d.\n" + "InnoDB: size %lu len %lu, key %d\n", err, data_size, + len, (int)page_decryption_key); + fflush(stderr); + if (NULL == page_buf) { + ut_free(in_buf); + } + if (NULL == tmp_encryption_buf) { + ut_free(tmp_buf); + } + return err; + } + + ut_ad(tmp_write_size == 64); + + /* copy 1st part of payload from buf to in_buf */ + /* do not override result of 1st decryption */ + memcpy(in_buf + FIL_PAGE_DATA, buf + FIL_PAGE_DATA, len -offset -64 - FIL_PAGE_DATA); + + err = my_aes_decrypt_cbc((char*) in_buf + FIL_PAGE_DATA, + data_size, + (char *) buf + FIL_PAGE_DATA, + &tmp_write_size, + (const unsigned char *)&rkey, + key_len, + (const unsigned char *)&iv, + iv_len, + 1); + ut_ad(tmp_write_size = data_size); + + /* copy remaining bytes from in_buf to buf. + */ + ulint bytes_to_copy = len - FIL_PAGE_DATA - data_size - offset; + memcpy(buf + FIL_PAGE_DATA + data_size, in_buf + FIL_PAGE_DATA + data_size, bytes_to_copy); + + if (NULL == tmp_encryption_buf) { + ut_free(tmp_buf); + } + +#ifdef UNIV_PAGEENCRIPTION_DEBUG + fprintf(stderr, "InnoDB: Note: Decryption succeeded for len %lu\n", len); + fflush(stderr); +#endif + + if (NULL == page_buf) { + ut_free(in_buf); + } + + /* setting original page type */ + + mach_write_to_2(buf + FIL_PAGE_TYPE, orig_page_type); + + ulint pageno = mach_read_from_4(buf + FIL_PAGE_OFFSET); + ulint flags = 0; + ulint zip_size = 0; + /* please note, that page with number 0 is not encrypted */ + if (pageno == 0 ) { + + flags = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + buf); + } else { + if (unit_test) { + /* in simple unit test, the tablespace memory cache is n.a. */ + if ((mode & 0x01) != 0x01) { + zip_size = mode; + } + } else { + ulint space_id = mach_read_from_4(buf + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); + fil_system_enter(); + space = fil_space_get_by_id(space_id); + flags = fil_space_flags(space); + fil_system_exit(); + } + } + if (!(page_compression_flag) && (!unit_test || pageno==0)) { + zip_size = fsp_flags_get_zip_size(flags); + } + + if (write_size!=NULL) { + *write_size = len; + } + + + if (!(page_compression_flag)) { + byte checksum_byte = fil_page_encryption_calc_checksum(buf + FIL_PAGE_DATA, len - FIL_PAGE_DATA); + if (checksum_byte != stored_checksum_byte) { + err = PAGE_ENCRYPTION_WRONG_KEY; + fprintf(stderr, "InnoDB: Corruption: Page is marked as encrypted\n" + "InnoDB: but decryption verification failed with error %d, encryption key %d.\n", + err, (int)page_decryption_key); + fflush(stderr); + + return err; + } + } + + if (!(page_compression_flag)) { + /* calc check sums and write to the buffer, if page is not of type PAGE_COMPRESSED. + * if the decryption is verified, it is assumed that the original page was restored, re-calculating the original + * checksums should be ok + */ + do_check_sum(len, zip_size, buf); + } else { + /* page_compression uses BUF_NO_CHECKSUM_MAGIC as checksum */ + mach_write_to_4(buf + FIL_PAGE_SPACE_OR_CHKSUM, BUF_NO_CHECKSUM_MAGIC); + } + + + + srv_stats.pages_page_decrypted.inc(); + return err; +} + + +/* recalculate check sum - from buf0flu.cc*/ +void do_check_sum( + ulint page_size, + ulint zip_size, + byte* buf) { + ib_uint32_t checksum = 0; + + if (zip_size) { + checksum = page_zip_calc_checksum(buf,zip_size, + static_cast( + srv_checksum_algorithm)); + + mach_write_to_4(buf + FIL_PAGE_SPACE_OR_CHKSUM, checksum); + return; + } + + switch ((srv_checksum_algorithm_t) srv_checksum_algorithm) { + case SRV_CHECKSUM_ALGORITHM_CRC32: + case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32: + checksum = buf_calc_page_crc32(buf); + break; + case SRV_CHECKSUM_ALGORITHM_INNODB: + case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB: + checksum = (ib_uint32_t) buf_calc_page_new_checksum(buf); + break; + case SRV_CHECKSUM_ALGORITHM_NONE: + case SRV_CHECKSUM_ALGORITHM_STRICT_NONE: + + checksum = BUF_NO_CHECKSUM_MAGIC; + break; + /* no default so the compiler will emit a warning if new enum + is added and not handled here */ + } + mach_write_to_4(buf + FIL_PAGE_SPACE_OR_CHKSUM, checksum); + + /* old style checksum is omitted */ + +} diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index 93563b16c13d9..f306eda74ce81 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -107,6 +107,9 @@ this program; if not, write to the Free Software Foundation, Inc., #include "page0zip.h" #include "fil0pagecompress.h" +#include "KeySingleton.h" + + #define thd_get_trx_isolation(X) ((enum_tx_isolation)thd_tx_isolation(X)) #ifdef MYSQL_DYNAMIC_PLUGIN @@ -210,6 +213,12 @@ static char* innobase_disable_monitor_counter = NULL; static char* innobase_reset_monitor_counter = NULL; static char* innobase_reset_all_monitor_counter = NULL; +/* Encryption for tables and columns */ +static char* innobase_data_encryption_providername = NULL; +static char* innobase_data_encryption_providerurl = NULL; +static uint innobase_data_encryption_providertype = 0; // 1 == file, 2 == server +static char* innobase_data_encryption_filekey = NULL; + /* The highest file format being used in the database. The value can be set by user, however, it will be adjusted to the newer file format if a table of such format is created/opened. */ @@ -617,6 +626,12 @@ ha_create_table_option innodb_table_option_list[]= HA_TOPTION_NUMBER("PAGE_COMPRESSION_LEVEL", page_compression_level, ULINT_UNDEFINED, 0, 9, 1), /* With this option user can enable atomic writes feature for this table */ HA_TOPTION_ENUM("ATOMIC_WRITES", atomic_writes, "DEFAULT,ON,OFF", 0), + /* With this option the user can enable page encryption for the table */ + HA_TOPTION_BOOL("PAGE_ENCRYPTION", page_encryption, 0), + + /* With this option the user defines the key identifier using for the encryption */ + HA_TOPTION_NUMBER("PAGE_ENCRYPTION_KEY", page_encryption_key, ULINT_UNDEFINED, 1, 255, 1), + HA_TOPTION_END }; @@ -973,6 +988,14 @@ static SHOW_VAR innodb_status_variables[]= { (char*) &export_vars.innodb_page_compressed_trim_op_saved, SHOW_LONGLONG}, {"num_pages_page_decompressed", (char*) &export_vars.innodb_pages_page_decompressed, SHOW_LONGLONG}, + {"num_pages_page_compression_error", + (char*) &export_vars.innodb_pages_page_compression_error, SHOW_LONGLONG}, + {"num_pages_page_encrypted", + (char*) &export_vars.innodb_pages_page_encrypted, SHOW_LONGLONG}, + {"num_pages_page_decrypted", + (char*) &export_vars.innodb_pages_page_decrypted, SHOW_LONGLONG}, + {"num_pages_page_encryption_error", + (char*) &export_vars.innodb_pages_page_encryption_error, SHOW_LONGLONG}, {"have_lz4", (char*) &innodb_have_lz4, SHOW_BOOL}, {"have_lzo", @@ -3412,6 +3435,11 @@ innobase_init( ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR); + + KeySingleton::getInstance( + innobase_data_encryption_providername, innobase_data_encryption_providerurl, + innobase_data_encryption_providertype, innobase_data_encryption_filekey); + #ifndef DBUG_OFF static const char test_filename[] = "-@"; char test_tablename[sizeof test_filename @@ -3494,7 +3522,7 @@ innobase_init( goto error; } } - + #ifndef HAVE_LZ4 if (innodb_compression_algorithm == PAGE_LZ4_ALGORITHM) { sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n" @@ -3562,6 +3590,7 @@ innobase_init( srv_data_home = (innobase_data_home_dir ? innobase_data_home_dir : default_path); + /* Set default InnoDB data file size to 12 MB and let it be auto-extending. Thus users can use InnoDB in >= 4.0 without having to specify any startup options. */ @@ -4049,6 +4078,7 @@ innobase_end( DBUG_ENTER("innobase_end"); DBUG_ASSERT(hton == innodb_hton_ptr); + KeySingleton::getInstance().~KeySingleton(); if (innodb_inited) { THD *thd= current_thd; @@ -11515,6 +11545,8 @@ innobase_table_flags( modified by another thread while the table is being created. */ const ulint default_compression_level = page_zip_level; + const ulint default_encryption_key = 1; + *flags = 0; *flags2 = 0; @@ -11713,9 +11745,11 @@ innobase_table_flags( options->page_compressed, (ulint)options->page_compression_level == ULINT_UNDEFINED ? default_compression_level : options->page_compression_level, - options->atomic_writes); - - if (create_info->options & HA_LEX_CREATE_TMP_TABLE) { + options->atomic_writes, + options->page_encryption, + (ulint)options->page_encryption_key == ULINT_UNDEFINED ? + default_encryption_key : options->page_encryption_key); + if (create_info->options & HA_LEX_CREATE_TMP_TABLE) { *flags2 |= DICT_TF2_TEMPORARY; } @@ -11749,6 +11783,24 @@ ha_innobase::check_table_options( enum row_type row_format = table->s->row_type;; ha_table_option_struct *options= table->s->option_struct; atomic_writes_t awrites = (atomic_writes_t)options->atomic_writes; + if (options->page_encryption) { + if (!use_tablespace) { + push_warning( + thd, Sql_condition::WARN_LEVEL_WARN, + HA_WRONG_CREATE_OPTION, + "InnoDB: PAGE_ENCRYPTION requires" + " innodb_file_per_table."); + return "PAGE_ENCRYPTION"; + } + if (!KeySingleton::getInstance().isAvailable()) { + push_warning( + thd, Sql_condition::WARN_LEVEL_WARN, + HA_WRONG_CREATE_OPTION, + "InnoDB: PAGE_ENCRYPTION needs a key provider" + ); + return "PAGE_ENCRYPTION"; + } + } /* Check page compression requirements */ if (options->page_compressed) { @@ -11813,6 +11865,34 @@ ha_innobase::check_table_options( } } + if ((ulint)options->page_encryption_key != ULINT_UNDEFINED) { + if (options->page_encryption == false) { + /* ignore this to allow alter table without changing page_encryption_key ...*/ + } + + if (options->page_encryption_key < 1 || options->page_encryption_key > 255) { + push_warning_printf( + thd, Sql_condition::WARN_LEVEL_WARN, + HA_WRONG_CREATE_OPTION, + "InnoDB: invalid PAGE_ENCRYPTION_KEY = %lu." + " Valid values are [1..255]", + options->page_encryption_key); + return "PAGE_ENCRYPTION_KEY"; + } + + if (!KeySingleton::getInstance().isAvailable() || KeySingleton::getInstance().getKeys(options->page_encryption_key)==NULL) { + push_warning_printf( + thd, Sql_condition::WARN_LEVEL_WARN, + HA_WRONG_CREATE_OPTION, + "InnoDB: PAGE_ENCRYPTION_KEY encryption key %lu not available", + options->page_encryption_key + ); + return "PAGE_ENCRYPTION_KEY"; + + } + } + + /* Check atomic writes requirements */ if (awrites == ATOMIC_WRITES_ON || (awrites == ATOMIC_WRITES_DEFAULT && srv_use_atomic_writes)) { @@ -20087,6 +20167,27 @@ static MYSQL_SYSVAR_BOOL(use_mtflush, srv_use_mtflush, "Use multi-threaded flush. Default FALSE.", NULL, NULL, FALSE); +static MYSQL_SYSVAR_UINT(data_encryption_providertype, innobase_data_encryption_providertype, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Use table or column encryption / decryption. Default is 0 for no use, 1 for keyfile and 2 for keyserver.", + NULL, NULL, 1, 0, 2, 0); + +static MYSQL_SYSVAR_STR(data_encryption_providername, innobase_data_encryption_providername, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Name of keyfile or keyserver.", + NULL, NULL, NULL); + +static MYSQL_SYSVAR_STR(data_encryption_providerurl, innobase_data_encryption_providerurl, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Path or URL for keyfile or keyserver.", + NULL, NULL, NULL); + + static MYSQL_SYSVAR_STR(data_encryption_filekey, innobase_data_encryption_filekey, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Key to encrypt / decrypt the keyfile.", + NULL, NULL, NULL); + + static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(log_block_size), MYSQL_SYSVAR(additional_mem_pool_size), @@ -20298,6 +20399,10 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(compression_algorithm), MYSQL_SYSVAR(mtflush_threads), MYSQL_SYSVAR(use_mtflush), + MYSQL_SYSVAR(data_encryption_providertype), + MYSQL_SYSVAR(data_encryption_providername), + MYSQL_SYSVAR(data_encryption_providerurl), + MYSQL_SYSVAR(data_encryption_filekey), NULL }; @@ -20307,7 +20412,7 @@ maria_declare_plugin(xtradb) &innobase_storage_engine, innobase_hton_name, plugin_author, - "Percona-XtraDB, Supports transactions, row-level locking, and foreign keys", + "Percona-XtraDB, Supports transactions, row-level locking, foreign keys and encryption for tables and columns", PLUGIN_LICENSE_GPL, innobase_init, /* Plugin Init */ NULL, /* Plugin Deinit */ diff --git a/storage/xtradb/handler/ha_innodb.h b/storage/xtradb/handler/ha_innodb.h index 2d70c67d3bfac..033622aeb61e4 100644 --- a/storage/xtradb/handler/ha_innodb.h +++ b/storage/xtradb/handler/ha_innodb.h @@ -26,6 +26,8 @@ this program; if not, write to the Free Software Foundation, Inc., #include "dict0stats.h" + + /* Structure defines translation table between mysql index and innodb index structures */ struct innodb_idx_translate_t { @@ -58,7 +60,7 @@ typedef struct st_innobase_share { /** Prebuilt structures in an InnoDB table handle used within MySQL */ struct row_prebuilt_t; -/** Engine specific table options are definined using this struct */ +/** Engine specific table options are defined using this struct */ struct ha_table_option_struct { bool page_compressed; /*!< Table is using page compression @@ -71,6 +73,8 @@ struct ha_table_option_struct srv_use_atomic_writes=1. Atomic writes are not used if value OFF.*/ + bool page_encryption; /*!< Flag for an encrypted table */ + int page_encryption_key; /*!< ID of the encryption key */ }; /** The class defining a handle to an Innodb table */ diff --git a/storage/xtradb/handler/handler0alter.cc b/storage/xtradb/handler/handler0alter.cc index 2d7fd259cb150..222985aae1745 100644 --- a/storage/xtradb/handler/handler0alter.cc +++ b/storage/xtradb/handler/handler0alter.cc @@ -280,6 +280,13 @@ ha_innobase::check_if_supported_inplace_alter( ER_ALTER_OPERATION_NOT_SUPPORTED_REASON); DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); } + + if (new_options->page_encryption != old_options->page_encryption || + new_options->page_encryption_key != old_options->page_encryption_key) { + ha_alter_info->unsupported_reason = innobase_get_err_msg( + ER_ALTER_OPERATION_NOT_SUPPORTED_REASON); + DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); + } } if (ha_alter_info->handler_flags diff --git a/storage/xtradb/include/EncKeys.h b/storage/xtradb/include/EncKeys.h new file mode 100644 index 0000000000000..43f2920fd7f4a --- /dev/null +++ b/storage/xtradb/include/EncKeys.h @@ -0,0 +1,88 @@ +/* Copyright (C) 2014 eperi GmbH. All Rights Reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +/******************************************************************//** +@file EncKeys.h +A structure and class to keep keys for encryption/decryption. + +Created 09/15/2014 +***********************************************************************/ + +#ifndef ENCKEYS_H_ +#define ENCKEYS_H_ + +#include "univ.i" +#include +#include + + + + +struct keyentry { + ulint id; + char *iv; + char *key; +}; + + +class EncKeys +{ +private: + static const char *strMAGIC, *newLine; + static const int magicSize; + + enum constants { MAX_OFFSETS_IN_PCRE_PATTERNS = 30}; + enum keyAttributes { KEY_MIN = 1, KEY_MAX = 255, MAX_KEYS = 255, + MAX_IVLEN = 256, MAX_KEYLEN = 512, ivSize16 = 16, keySize32 = 32 }; + enum keyInitType { KEYINITTYPE_FILE = 1, KEYINITTYPE_SERVER = 2 }; + enum errorAttributes { MAX_KEY_LINE_SIZE = 3 * MAX_KEYLEN, MAX_KEY_FILE_SIZE = 1048576 }; + enum errorCodesLine { NO_ERROR_PARSE_OK = 0, NO_ERROR_ISCOMMENT = 10, NO_ERROR_KEY_GREATER_THAN_ASKED = 20, + ERROR_NOINITIALIZEDKEY = 30, ERROR_ID_TOO_BIG = 40, ERROR_WRONG_NUMBER_OF_MATCHES = 50, + ERROR_EQUAL_DOUBLE_KEY = 60, ERROR_UNEQUAL_DOUBLE_KEY = 70 }; + + static const char *errorNoKeyId, *errorInMatches, *errorExceedKeyFileSize, + *errorExceedKeySize, *errorEqualDoubleKey, *errorUnequalDoubleKey, + *errorNoInitializedKey, *errorFalseFileKey, + *errorNotImplemented, *errorOpenFile, *errorReadingFile, *errorFileSize; + + static const char* initialPwd; + ulint countKeys, keyLineInKeyFile; + keyentry keys[MAX_KEYS], *oneKey; + + void printKeyEntry( ulint id); + int initKeysThroughFile( const char *name, const char *path, const char *filekey); + int initKeysThroughServer( const char *name, const char *path, const char *filekey); + bool isComment( const char *line); + char * decryptFile( const char* filename, const char *secret, int *errorCode); + int parseFile( const char* filename, const ulint maxKeyId, const char *secret); + int parseLine( const char *line, const ulint maxKeyId); + +public: + static const size_t MAX_SECRET_SIZE = 256; + + enum errorCodesFile { NO_ERROR_KEY_FILE_PARSE_OK = 0, ERROR_KEY_FILE_PARSE_NULL = 110, + ERROR_KEY_FILE_TOO_BIG = 120, ERROR_KEY_FILE_EXCEEDS_MAX_NUMBERS_OF_KEYS = 130, + ERROR_OPEN_FILE = 140, ERROR_READING_FILE = 150, ERROR_FALSE_FILE_KEY = 160, + ERROR_KEYINITTYPE_SERVER_NOT_IMPLEMENTED = 170, ERROR_ENCRYPTION_SECRET_NULL = 180 }; + EncKeys(); + virtual ~EncKeys(); + bool initKeys( const char *name, const char *url, const int initType, const char *filekey); + keyentry *getKeys( int id); + /* made public for unit testing */ + static void parseSecret( const char *filename, char *secret ); + +}; + +#endif /* ENCKEYS_H_ */ diff --git a/storage/xtradb/include/KeySingleton.h b/storage/xtradb/include/KeySingleton.h new file mode 100644 index 0000000000000..2b2f3991998c0 --- /dev/null +++ b/storage/xtradb/include/KeySingleton.h @@ -0,0 +1,59 @@ +/* Copyright (C) 2014 eperi GmbH. All Rights Reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +/******************************************************************//** +@file KeySingletonPattern.h +Implementation of single pattern to keep keys for encrypting/decrypting pages. + +Created 09/13/2014 +***********************************************************************/ + + +#ifndef KEYSINGLETON_H_ +#define KEYSINGLETON_H_ + +#include "EncKeys.h" + + +class KeySingleton +{ +private: + static bool instanceInited; + static KeySingleton theInstance; + static EncKeys encKeys; + + // No new instance or object possible + KeySingleton() {} + + // No new instance possible through copy constructor + KeySingleton( const KeySingleton&) {} + + // No new instance possible through copy + KeySingleton & operator = (const KeySingleton&); + +public: + virtual ~KeySingleton() {encKeys.~EncKeys();} + static KeySingleton& getInstance(); + // Init the instance for only one time + static KeySingleton& getInstance(const char *name, const char *url, + const int initType, const char *filekey); + keyentry *getKeys(int id); + ibool hasKey(int id); + static bool isAvailable() { + return instanceInited; + } +}; + +#endif /* KEYSINGLETON_H_ */ diff --git a/storage/xtradb/include/dict0dict.h b/storage/xtradb/include/dict0dict.h index 78503d954bad1..a6e2e3883942e 100644 --- a/storage/xtradb/include/dict0dict.h +++ b/storage/xtradb/include/dict0dict.h @@ -918,8 +918,10 @@ dict_tf_set( pages */ ulint page_compression_level, /*!< in: table page compression level */ - ulint atomic_writes) /*!< in: table atomic + ulint atomic_writes, /*!< in: table atomic writes option value*/ + bool page_encrypted,/*!< in: table uses page encryption */ + ulint page_encryption_key) /*!< in: page encryption key */ __attribute__((nonnull)); /********************************************************************//** Convert a 32 bit integer table flags to the 32 bit integer that is diff --git a/storage/xtradb/include/dict0dict.ic b/storage/xtradb/include/dict0dict.ic index 2b698dd721848..39c2f77a905ec 100644 --- a/storage/xtradb/include/dict0dict.ic +++ b/storage/xtradb/include/dict0dict.ic @@ -543,6 +543,10 @@ dict_tf_is_valid( ulint data_dir = DICT_TF_HAS_DATA_DIR(flags); ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(flags); + ulint page_encryption = DICT_TF_GET_PAGE_ENCRYPTION(flags); + ulint page_encryption_key = DICT_TF_GET_PAGE_ENCRYPTION_KEY(flags); + + /* Make sure there are no bits that we do not know about. */ if (unused != 0) { @@ -554,9 +558,11 @@ dict_tf_is_valid( "InnoDB: unused %ld data_dir %ld zip_ssize %ld\n" "InnoDB: page_compression %ld page_compression_level %ld\n" "InnoDB: atomic_writes %ld\n", + "InnoDB: page_encryption %ld page_encryption_key %ld\n", unused, compact, atomic_blobs, unused, data_dir, zip_ssize, - page_compression, page_compression_level, atomic_writes + page_compression, page_compression_level, atomic_writes, + page_encryption, page_encryption_key ); return(false); @@ -693,7 +699,7 @@ dict_sys_tables_type_validate( ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(type); ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(type); - ut_a(atomic_writes <= ATOMIC_WRITES_OFF); + ut_a(atomic_writes >= 0 && atomic_writes <= ATOMIC_WRITES_OFF); /* The low order bit of SYS_TABLES.TYPE is always set to 1. If the format is UNIV_FORMAT_B or higher, this field is the same @@ -856,7 +862,9 @@ dict_tf_set( pages */ ulint page_compression_level, /*!< in: table page compression level */ - ulint atomic_writes) /*!< in: table atomic writes setup */ + ulint atomic_writes, /*!< in: table atomic writes setup */ + bool page_encrypted, /*!< in: table uses page encryption */ + ulint page_encryption_key /*!< in: page encryption key */) { atomic_writes_t awrites = (atomic_writes_t)atomic_writes; @@ -897,6 +905,11 @@ dict_tf_set( *flags |= (atomic_writes << DICT_TF_POS_ATOMIC_WRITES); ut_a(dict_tf_get_atomic_writes(*flags) == awrites); + + if (page_encrypted) { + *flags |= (1 << DICT_TF_POS_PAGE_ENCRYPTION) + | (page_encryption_key << DICT_TF_POS_PAGE_ENCRYPTION_KEY); + } } /********************************************************************//** @@ -919,6 +932,10 @@ dict_tf_to_fsp_flags( ulint fsp_flags; ulint page_compression = DICT_TF_GET_PAGE_COMPRESSION(table_flags); ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(table_flags); + + ulint page_encryption = DICT_TF_GET_PAGE_ENCRYPTION(table_flags); + ulint page_encryption_key = DICT_TF_GET_PAGE_ENCRYPTION_KEY(table_flags); + ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(table_flags); DBUG_EXECUTE_IF("dict_tf_to_fsp_flags_failure", @@ -946,6 +963,14 @@ dict_tf_to_fsp_flags( if page compression is used for this table. */ fsp_flags |= FSP_FLAGS_SET_PAGE_COMPRESSION_LEVEL(fsp_flags, page_compression_level); + /* In addition, tablespace flags also contain if the page + encryption is used for this table. */ + fsp_flags |= FSP_FLAGS_SET_PAGE_ENCRYPTION(fsp_flags, page_encryption); + + /* In addition, tablespace flags also contain page encryption key if the page + encryption is used for this table. */ + fsp_flags |= FSP_FLAGS_SET_PAGE_ENCRYPTION_KEY(fsp_flags, page_encryption_key); + /* In addition, tablespace flags also contain flag if atomic writes is used for this table */ fsp_flags |= FSP_FLAGS_SET_ATOMIC_WRITES(fsp_flags, atomic_writes); @@ -987,6 +1012,9 @@ dict_sys_tables_type_to_tf( | DICT_TF_MASK_PAGE_COMPRESSION | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL | DICT_TF_MASK_ATOMIC_WRITES + | DICT_TF_MASK_PAGE_ENCRYPTION + | DICT_TF_MASK_PAGE_ENCRYPTION_KEY + ); return(flags); @@ -1022,7 +1050,9 @@ dict_tf_to_sys_tables_type( | DICT_TF_MASK_DATA_DIR | DICT_TF_MASK_PAGE_COMPRESSION | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL - | DICT_TF_MASK_ATOMIC_WRITES); + | DICT_TF_MASK_ATOMIC_WRITES + | DICT_TF_MASK_PAGE_ENCRYPTION + | DICT_TF_MASK_PAGE_ENCRYPTION_KEY); return(type); } diff --git a/storage/xtradb/include/dict0mem.h b/storage/xtradb/include/dict0mem.h index 5bea2334131a9..4a71287a8d721 100644 --- a/storage/xtradb/include/dict0mem.h +++ b/storage/xtradb/include/dict0mem.h @@ -135,6 +135,12 @@ Width of the page compression flag #define DICT_TF_WIDTH_PAGE_COMPRESSION 1 #define DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL 4 +/** +Width of the page encryption flag +*/ +#define DICT_TF_WIDTH_PAGE_ENCRYPTION 1 +#define DICT_TF_WIDTH_PAGE_ENCRYPTION_KEY 8 + /** Width of atomic writes flag DEFAULT=0, ON = 1, OFF = 2 @@ -148,7 +154,7 @@ DEFAULT=0, ON = 1, OFF = 2 + DICT_TF_WIDTH_DATA_DIR \ + DICT_TF_WIDTH_PAGE_COMPRESSION \ + DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL \ - + DICT_TF_WIDTH_ATOMIC_WRITES) + + DICT_TF_WIDTH_ATOMIC_WRITES + DICT_TF_WIDTH_PAGE_ENCRYPTION + DICT_TF_WIDTH_PAGE_ENCRYPTION_KEY) /** A mask of all the known/used bits in table flags */ #define DICT_TF_BIT_MASK (~(~0 << DICT_TF_BITS)) @@ -173,9 +179,16 @@ DEFAULT=0, ON = 1, OFF = 2 /** Zero relative shift position of the ATOMIC_WRITES field */ #define DICT_TF_POS_ATOMIC_WRITES (DICT_TF_POS_PAGE_COMPRESSION_LEVEL \ + DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL) + +/** Zero relative shift position of the PAGE_ENCRYPTION field */ +#define DICT_TF_POS_PAGE_ENCRYPTION (DICT_TF_POS_ATOMIC_WRITES \ + + DICT_TF_WIDTH_ATOMIC_WRITES) +/** Zero relative shift position of the PAGE_ENCRYPTION_KEY field */ +#define DICT_TF_POS_PAGE_ENCRYPTION_KEY (DICT_TF_POS_PAGE_ENCRYPTION \ + + DICT_TF_WIDTH_PAGE_ENCRYPTION) /** Zero relative shift position of the start of the UNUSED bits */ -#define DICT_TF_POS_UNUSED (DICT_TF_POS_ATOMIC_WRITES \ - + DICT_TF_WIDTH_ATOMIC_WRITES) +#define DICT_TF_POS_UNUSED (DICT_TF_POS_PAGE_ENCRYPTION_KEY \ + + DICT_TF_WIDTH_PAGE_ENCRYPTION_KEY) /** Bit mask of the COMPACT field */ #define DICT_TF_MASK_COMPACT \ @@ -205,6 +218,14 @@ DEFAULT=0, ON = 1, OFF = 2 #define DICT_TF_MASK_ATOMIC_WRITES \ ((~(~0 << DICT_TF_WIDTH_ATOMIC_WRITES)) \ << DICT_TF_POS_ATOMIC_WRITES) +/** Bit mask of the PAGE_ENCRYPTION field */ +#define DICT_TF_MASK_PAGE_ENCRYPTION \ + ((~(~0 << DICT_TF_WIDTH_PAGE_ENCRYPTION)) \ + << DICT_TF_POS_PAGE_ENCRYPTION) +/** Bit mask of the PAGE_ENCRYPTION_KEY field */ +#define DICT_TF_MASK_PAGE_ENCRYPTION_KEY \ + ((~(~0 << DICT_TF_WIDTH_PAGE_ENCRYPTION_KEY)) \ + << DICT_TF_POS_PAGE_ENCRYPTION_KEY) /** Return the value of the COMPACT field */ #define DICT_TF_GET_COMPACT(flags) \ @@ -222,6 +243,17 @@ DEFAULT=0, ON = 1, OFF = 2 #define DICT_TF_HAS_DATA_DIR(flags) \ ((flags & DICT_TF_MASK_DATA_DIR) \ >> DICT_TF_POS_DATA_DIR) + +/** Return the contents of the PAGE_ENCRYPTION field */ +#define DICT_TF_GET_PAGE_ENCRYPTION(flags) \ + ((flags & DICT_TF_MASK_PAGE_ENCRYPTION) \ + >> DICT_TF_POS_PAGE_ENCRYPTION) +/** Return the contents of the PAGE_ENCRYPTION KEY field */ +#define DICT_TF_GET_PAGE_ENCRYPTION_KEY(flags) \ + ((flags & DICT_TF_MASK_PAGE_ENCRYPTION_KEY) \ + >> DICT_TF_POS_PAGE_ENCRYPTION_KEY) + + /** Return the contents of the UNUSED bits */ #define DICT_TF_GET_UNUSED(flags) \ (flags >> DICT_TF_POS_UNUSED) diff --git a/storage/xtradb/include/dict0pagecompress.ic b/storage/xtradb/include/dict0pagecompress.ic index 811976434a83b..3ada655d601a8 100644 --- a/storage/xtradb/include/dict0pagecompress.ic +++ b/storage/xtradb/include/dict0pagecompress.ic @@ -42,6 +42,8 @@ dict_tf_verify_flags( ulint page_compression = DICT_TF_GET_PAGE_COMPRESSION(table_flags); ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(table_flags); ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(table_flags); + ulint page_encryption = DICT_TF_GET_PAGE_ENCRYPTION(table_flags); + ulint page_encryption_key = DICT_TF_GET_PAGE_ENCRYPTION_KEY(table_flags); ulint post_antelope = FSP_FLAGS_GET_POST_ANTELOPE(fsp_flags); ulint zip_ssize = FSP_FLAGS_GET_ZIP_SSIZE(fsp_flags); ulint fsp_atomic_blobs = FSP_FLAGS_HAS_ATOMIC_BLOBS(fsp_flags); @@ -50,6 +52,9 @@ dict_tf_verify_flags( ulint fsp_page_compression = FSP_FLAGS_GET_PAGE_COMPRESSION(fsp_flags); ulint fsp_page_compression_level = FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(fsp_flags); ulint fsp_atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(fsp_flags); + ulint fsp_page_encryption = FSP_FLAGS_GET_PAGE_ENCRYPTION(fsp_flags); + ulint fsp_page_encryption_key = FSP_FLAGS_GET_PAGE_ENCRYPTION_KEY(fsp_flags); + DBUG_EXECUTE_IF("dict_tf_verify_flags_failure", return(ULINT_UNDEFINED);); @@ -107,6 +112,26 @@ dict_tf_verify_flags( return (FALSE); } + if (page_encryption != fsp_page_encryption) { + fprintf(stderr, + "InnoDB: Error: table flags has page_encryption %ld" + " in the data dictionary\n" + "InnoDB: but the flags in file has page_encryption %ld\n", + page_encryption, fsp_page_encryption); + + return (FALSE); + } + if (page_encryption_key != fsp_page_encryption_key) { + fprintf(stderr, + "InnoDB: Error: table flags has page_encryption_key %ld" + " in the data dictionary\n" + "InnoDB: but the flags in file has page_encryption_key %ld\n", + page_encryption_key, fsp_page_encryption_key); + + return (FALSE); + } + + return(TRUE); } diff --git a/storage/xtradb/include/fil0fil.h b/storage/xtradb/include/fil0fil.h index c2d113bdc1f5c..d915e0b5308b4 100644 --- a/storage/xtradb/include/fil0fil.h +++ b/storage/xtradb/include/fil0fil.h @@ -157,6 +157,14 @@ static const ulint FIL_PAGE_COMPRESS_SIZE_V1 = FIL_PAGE_ORIGINAL_SIZE_V1 + 2; #define FIL_PAGE_COMPRESSION_ZLIB 1 /*!< Compressin algorithm ZLIB. */ #define FIL_PAGE_COMPRESSION_LZ4 2 /*!< Compressin algorithm LZ4. */ +#define FIL_PAGE_ENCRYPTION_AES_128 16 /*!< Encryption algorithm AES-128. */ +#define FIL_PAGE_ENCRYPTION_AES_196 24 /*!< Encryption algorithm AES-196. */ +#define FIL_PAGE_ENCRYPTION_AES_256 32 /*!< Encryption algorithm AES-256. */ + +#define FIL_PAGE_ENCRYPTED_SIZE 2 /*!< Number of bytes used to store + actual payload data size on encrypted pages. */ + + /* @} */ /** File page trailer @{ */ #define FIL_PAGE_END_LSN_OLD_CHKSUM 8 /*!< the low 4 bytes of this are used @@ -168,6 +176,7 @@ static const ulint FIL_PAGE_COMPRESS_SIZE_V1 = FIL_PAGE_ORIGINAL_SIZE_V1 + 2; /** File page types (values of FIL_PAGE_TYPE) @{ */ #define FIL_PAGE_PAGE_COMPRESSED 34354 /*!< Page compressed page */ +#define FIL_PAGE_PAGE_ENCRYPTED 34355 /*!< Page encrypted page */ #define FIL_PAGE_INDEX 17855 /*!< B-tree node */ #define FIL_PAGE_UNDO_LOG 2 /*!< Undo log page */ #define FIL_PAGE_INODE 3 /*!< Index node */ @@ -214,6 +223,8 @@ struct fsp_open_info { lsn_t lsn; /*!< Flushed LSN from header page */ ulint id; /*!< Space ID */ ulint flags; /*!< Tablespace flags */ + ulint encryption_error; /*!< if an encryption error occurs */ + }; #ifndef UNIV_HOTBACKUP @@ -1104,6 +1115,15 @@ fil_space_name( fil_space_t* space); /*!< in: space */ #endif +/*******************************************************************//** +Return space flags */ +ulint +fil_space_flags( +/*===========*/ + fil_space_t* space); /*!< in: space */ + + + /****************************************************************//** Does error handling when a file operation fails. @return TRUE if we should retry the operation */ diff --git a/storage/xtradb/include/fil0pageencryption.h b/storage/xtradb/include/fil0pageencryption.h new file mode 100644 index 0000000000000..2164aeaa0f856 --- /dev/null +++ b/storage/xtradb/include/fil0pageencryption.h @@ -0,0 +1,117 @@ +/***************************************************************************** + +Copyright (C) 2014 eperi GmbH. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*****************************************************************************/ + +#ifndef fil0pageencryption_h +#define fil0pageencryption_h + +#define PAGE_ENCRYPTION_WRONG_KEY 1 +#define PAGE_ENCRYPTION_WRONG_PAGE_TYPE 2 +#define PAGE_ENCRYPTION_ERROR 3 +#define PAGE_ENCRYPTION_KEY_MISSING 4 +#define PAGE_ENCRYPTION_OK 0 +#define PAGE_ENCRYPTION_WILL_NOT_ENCRYPT 5 + +#include "fsp0fsp.h" +#include "fsp0pageencryption.h" + + + + + +/******************************************************************//** +@file include/fil0pageencryption.h +Helper functions for encryption/decryption page data on to table space. + +Created 08/25/2014 +***********************************************************************/ + + +/******************************PAGE_ENCRYPTION_ERROR*************************************//** +Returns the page encryption flag of the space, or false if the space +is not encrypted. The tablespace must be cached in the memory cache. +@return true if page encrypted, false if not or space not found */ +ibool +fil_space_is_page_encrypted( +/*=========================*/ + ulint id); /*!< in: space id */ + + +/*******************************************************************//** +Find out whether the page is page encrypted +@return true if page is page encrypted, false if not */ +UNIV_INLINE +ibool +fil_page_is_encrypted( +/*===================*/ + const byte *buf); /*!< in: page */ + + +/*******************************************************************//** +Find out whether the page can be decrypted +@return true if page can be decrypted, false if not. */ +UNIV_INLINE +ulint +fil_page_can_not_decrypt( +/*===================*/ + const byte *buf); /*!< in: page */ + + +/****************************************************************//** +For page encrypted pages encrypt the page before actual write +operation. +@return encrypted page to be written*/ +byte* +fil_encrypt_page( +/*==============*/ + ulint space_id, /*!< in: tablespace id of the + table. */ + byte* buf, /*!< in: buffer from which to write; in aio + this must be appropriately aligned */ + byte* out_buf, /*!< out: compressed buffer */ + ulint len, /*!< in: length of input buffer.*/ + ulint compression_level, /*!< in: compression level */ + ulint* out_len, /*!< out: actual length of encrypted page */ + ulint* errorCode, /*!< out: an error code. set, if page is intentionally not encrypted */ + byte* tmp_encryption_buf, /*!< in: temporary buffer or NULL */ + ulint mode /*!< in: calling mode. Should be 0. */ + ); + +/****************************************************************//** +For page encrypted pages decrypt the page after actual read +operation. +@return decrypted page */ +ulint +fil_decrypt_page( +/*================*/ + byte* page_buf, /*!< in: preallocated buffer or NULL */ + byte* buf, /*!< out: buffer from which to read; in aio + this must be appropriately aligned */ + ulint len, /*!< in: length buffer, which should be decrypted.*/ + ulint* write_size, /*!< out: size of the decrypted data. If no error occurred equal to len, except for page compressed tables */ + ibool* page_compressed, /*!> FSP_FLAGS_POS_ATOMIC_WRITES) + +#define FSP_FLAGS_GET_PAGE_ENCRYPTION(flags) \ + ((flags & FSP_FLAGS_MASK_PAGE_ENCRYPTION) \ + >> FSP_FLAGS_POS_PAGE_ENCRYPTION) +/** Return the value of the PAGE_ENCRYPTION_KEY field */ +#define FSP_FLAGS_GET_PAGE_ENCRYPTION_KEY(flags) \ + ((flags & FSP_FLAGS_MASK_PAGE_ENCRYPTION_KEY) \ + >> FSP_FLAGS_POS_PAGE_ENCRYPTION_KEY) + + /** Set a PAGE_SSIZE into the correct bits in a given tablespace flags. */ #define FSP_FLAGS_SET_PAGE_SSIZE(flags, ssize) \ @@ -186,6 +223,14 @@ tablespace flags. */ tablespace flags. */ #define FSP_FLAGS_SET_PAGE_COMPRESSION_LEVEL(flags, level) \ (flags | (level << FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL)) + +/** Set a PAGE_ENCRYPTION into the correct bits in a given tablespace flags. */ +#define FSP_FLAGS_SET_PAGE_ENCRYPTION(flags, encryption) \ + (flags | (encryption << FSP_FLAGS_POS_PAGE_ENCRYPTION)) +/** Set a PAGE_ENCRYPTION_KEY into the correct bits in a given tablespace flags. */ +#define FSP_FLAGS_SET_PAGE_ENCRYPTION_KEY(flags, encryption_key) \ + (flags | (encryption_key << FSP_FLAGS_POS_PAGE_ENCRYPTION_KEY)) + /** Set a ATOMIC_WRITES into the correct bits in a given tablespace flags. */ #define FSP_FLAGS_SET_ATOMIC_WRITES(flags, atomics) \ diff --git a/storage/xtradb/include/fsp0fsp.ic b/storage/xtradb/include/fsp0fsp.ic index ddcb87b0e570d..8352044613719 100644 --- a/storage/xtradb/include/fsp0fsp.ic +++ b/storage/xtradb/include/fsp0fsp.ic @@ -66,7 +66,7 @@ fsp_flags_is_valid( ulint unused = FSP_FLAGS_GET_UNUSED(flags); ulint page_compression = FSP_FLAGS_GET_PAGE_COMPRESSION(flags); ulint page_compression_level = FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(flags); - ulint atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(flags); + ulint atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(flags); DBUG_EXECUTE_IF("fsp_flags_is_valid_failure", return(false);); diff --git a/storage/xtradb/include/fsp0pageencryption.h b/storage/xtradb/include/fsp0pageencryption.h new file mode 100644 index 0000000000000..42dac18e60abf --- /dev/null +++ b/storage/xtradb/include/fsp0pageencryption.h @@ -0,0 +1,60 @@ +/***************************************************************************** + + Copyright (C) 2014 eperi GmbH. All Rights Reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +/******************************************************************/ + +/******************************************************************//** +@file include/fsp0pageencryption.h +Helper functions for extracting/storing page encryption information to file space. + +Created 08/28/2014 +***********************************************************************/ + +#ifndef FSP0PAGEENCRYPTION_H_ +#define FSP0PAGEENCRYPTION_H_ + + + +/********************************************************************//** +Determine if the tablespace is page encrypted from dict_table_t::flags. +@return TRUE if page encrypted, FALSE if not page encrypted */ +UNIV_INLINE +ibool +fsp_flags_is_page_encrypted( +/*=========================*/ + ulint flags); /*!< in: tablespace flags */ + + +/********************************************************************//** +Extract the page encryption key from tablespace flags. +A tablespace has only one physical page encryption key +whether that page is encrypted or not. +@return page encryption key of the file-per-table tablespace, +or zero if the table is not encrypted. */ +UNIV_INLINE +ulint +fsp_flags_get_page_encryption_key( +/*=================================*/ + ulint flags); /*!< in: tablespace flags */ + + +#ifndef UNIV_NONINL +#include "fsp0pageencryption.ic" +#endif + + +#endif /* FSP0PAGEENCRYPTION_H_ */ diff --git a/storage/xtradb/include/fsp0pageencryption.ic b/storage/xtradb/include/fsp0pageencryption.ic new file mode 100644 index 0000000000000..04180e89027dc --- /dev/null +++ b/storage/xtradb/include/fsp0pageencryption.ic @@ -0,0 +1,162 @@ +/***************************************************************************** + + Copyright (C) 2014 eperi GmbH. All Rights Reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +/******************************************************************//** +@file include/fsp0pageencryption.ic +Implementation for helper functions for encrypting/decrypting pages +and atomic writes information to file space. + +Created 08/28/2014 +***********************************************************************/ + +#include "fsp0fsp.h" +#include "KeySingleton.h" +#include "fil0pageencryption.h" + + + + +/********************************************************************//** +Determine if the tablespace is page encrypted from dict_table_t::flags. +@return TRUE if page encrypted, FALSE if not page encrypted */ +UNIV_INLINE +ibool +fsp_flags_is_page_encrypted( +/*=========================*/ + ulint flags) /*!< in: tablespace flags */ +{ + return(FSP_FLAGS_GET_PAGE_ENCRYPTION(flags)); +} + +/********************************************************************//** +Extract the page encryption key from tablespace flags. +A tablespace has only one physical page encryption key +whether that page is encrypted or not. +@return page encryption key of the file-per-table tablespace, +or zero if the table is not encrypted. */ +UNIV_INLINE +ulint +fsp_flags_get_page_encryption_key( +/*=================================*/ + ulint flags) /*!< in: tablespace flags */ +{ + return(FSP_FLAGS_GET_PAGE_ENCRYPTION_KEY(flags)); +} + + +/*******************************************************************//** +Returns the page encryption flag of the space, or false if the space +is not encrypted. The tablespace must be cached in the memory cache. +@return true if page encrypted, false if not or space not found */ +UNIV_INLINE +ibool +fil_space_is_page_encrypted( +/*=========================*/ + ulint id) /*!< in: space id */ +{ + ulint flags; + + flags = fil_space_get_flags(id); + + if (flags && flags != ULINT_UNDEFINED) { + + return(fsp_flags_is_page_encrypted(flags)); + } + + return(flags); +} + +/*******************************************************************//** +Returns the page encryption key of the space, or 0 if the space +is not encrypted. The tablespace must be cached in the memory cache. +@return page compression level, ULINT_UNDEFINED if space not found */ +UNIV_INLINE +ulint +fil_space_get_page_encryption_key( +/*=================================*/ + ulint id) /*!< in: space id */ +{ + ulint flags; + + flags = fil_space_get_flags(id); + + if (flags && flags != ULINT_UNDEFINED) { + + return(fsp_flags_get_page_encryption_key(flags)); + } + + return(flags); +} + + + +/*******************************************************************//** +Find out whether the page is page encrypted +@return true if page is page encrypted, false if not */ +UNIV_INLINE +ibool +fil_page_is_encrypted( +/*===================*/ + const byte *buf) /*!< in: page */ +{ + return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_ENCRYPTED); +} + + +/*******************************************************************//** +Find out whether the page can be decrypted. +This is the case, if the page is already decrypted and is not the first page of the table space. +If the page is already decrypted it is not of the FIL_PAGE_PAGE_ENCRYPTED type. +if it is the first page of the table space, it is assumed that a page can be decrypted if the +key found in the flags (part of the 1st page) can be read from the key provider. +The case, if the key changed, is currently not caught. +The function for decrypting the page should already be executed before this. +@return PAGE_ENCRYPTION_KEY_MISSING if key provider is available, but key is not available + PAGE_ENCRYPTION_ERROR if other error occurred + 0 if decryption should be possible +*/ +UNIV_INLINE +ulint +fil_page_can_not_decrypt( +/*===================*/ + const byte *buf) /*!< in: page */ +{ + ulint page_type = mach_read_from_2(buf+FIL_PAGE_TYPE); + if (page_type == FIL_PAGE_TYPE_FSP_HDR) { + ulint flags = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + buf); + if (fsp_flags_is_page_encrypted(flags)) { + if (!KeySingleton::getInstance().isAvailable() || + !KeySingleton::getInstance().hasKey(fsp_flags_get_page_encryption_key(flags))) { + /* accessing table would surely fail, because no key or no key provider available */ + if (KeySingleton::getInstance().isAvailable() && + !KeySingleton::getInstance().hasKey(fsp_flags_get_page_encryption_key(flags))) { + return PAGE_ENCRYPTION_KEY_MISSING; + } + return PAGE_ENCRYPTION_ERROR; + } + } + } + if(page_type == FIL_PAGE_PAGE_ENCRYPTED) { + ulint key = mach_read_from_1(buf + FIL_PAGE_SPACE_OR_CHKSUM); + if (KeySingleton::getInstance().isAvailable() && + !KeySingleton::getInstance().hasKey(key)) { + return PAGE_ENCRYPTION_KEY_MISSING; + } + return PAGE_ENCRYPTION_ERROR; + } + return 0; +} diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h index 518c1605e962d..b2a6c5ef895b2 100644 --- a/storage/xtradb/include/os0file.h +++ b/storage/xtradb/include/os0file.h @@ -322,10 +322,10 @@ The wrapper functions have the prefix of "innodb_". */ # define os_aio(type, mode, name, file, buf, offset, \ n, message1, message2, space_id, \ - trx, page_compressed, page_compression_level, write_size) \ + trx, page_compressed, page_compression_level, write_size, page_encryption, page_encryption_key) \ pfs_os_aio_func(type, mode, name, file, buf, offset, \ n, message1, message2, space_id, trx, \ - page_compressed, page_compression_level, write_size, \ + page_compressed, page_compression_level, write_size, page_encryption, page_encryption_key, \ __FILE__, __LINE__) # define os_file_read(file, buf, offset, n, compressed) \ @@ -374,10 +374,10 @@ to original un-instrumented file I/O APIs */ # define os_aio(type, mode, name, file, buf, offset, n, message1, \ message2, space_id, trx, \ - page_compressed, page_compression_level, write_size) \ + page_compressed, page_compression_level, write_size, page_encryption, page_encryption_key) \ os_aio_func(type, mode, name, file, buf, offset, n, \ message1, message2, space_id, trx, \ - page_compressed, page_compression_level, write_size) + page_compressed, page_compression_level, write_size, page_encryption, page_encryption_key) # define os_file_read(file, buf, offset, n, compressed) \ os_file_read_func(file, buf, offset, n, NULL, compressed) @@ -805,6 +805,10 @@ pfs_os_aio_func( operation for this page and if initialized we do not trim again if actual page size does not decrease. */ + ibool page_encryption, /*!< in: is page encryption used + on this file space */ + ulint page_encryption_key, /*!< page encryption + key to be used */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line);/*!< in: line where the func invoked */ /*******************************************************************//** @@ -1187,11 +1191,16 @@ os_aio_func( on this file space */ ulint page_compression_level, /*!< page compression level to be used */ - ulint* write_size);/*!< in/out: Actual write size initialized + ulint* write_size,/*!< in/out: Actual write size initialized after fist successfull trim operation for this page and if initialized we do not trim again if actual page size does not decrease. */ + ibool page_encryption, /*!< in: is page encryption used + on this file space */ + ulint page_encryption_key); /*!< page encryption key + to be used */ + /************************************************************************//** Wakes up all async i/o threads so that they know to exit themselves in diff --git a/storage/xtradb/include/os0file.ic b/storage/xtradb/include/os0file.ic index 61300387e1bf9..59a5a149424f2 100644 --- a/storage/xtradb/include/os0file.ic +++ b/storage/xtradb/include/os0file.ic @@ -229,6 +229,11 @@ pfs_os_aio_func( operation for this page and if initialized we do not trim again if actual page size does not decrease. */ + ibool page_encryption, /*!< in: is page encryption used + on this file space */ + ulint page_encryption_key, /*!< page encryption + key to be used */ + const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ { @@ -245,7 +250,8 @@ pfs_os_aio_func( result = os_aio_func(type, mode, name, file, buf, offset, n, message1, message2, space_id, trx, - page_compression, page_compression_level, write_size); + page_compression, page_compression_level, write_size , + page_encryption, page_encryption_key); register_pfs_file_io_end(locker, n); diff --git a/storage/xtradb/include/srv0mon.h b/storage/xtradb/include/srv0mon.h index c1585c3072cdb..30495a97e52b0 100644 --- a/storage/xtradb/include/srv0mon.h +++ b/storage/xtradb/include/srv0mon.h @@ -323,6 +323,11 @@ enum monitor_id_t { MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED, MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR, + /* New monitor variables for page encryption */ + MONITOR_OVLD_PAGES_PAGE_ENCRYPTED, + MONITOR_OVLD_PAGES_PAGE_DECRYPTED, + MONITOR_OVLD_PAGES_PAGE_ENCRYPTION_ERROR, + /* Index related counters */ MONITOR_MODULE_INDEX, MONITOR_INDEX_SPLIT, diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h index 2825e37ba86f7..aca29326d536b 100644 --- a/storage/xtradb/include/srv0srv.h +++ b/storage/xtradb/include/srv0srv.h @@ -134,6 +134,13 @@ struct srv_stats_t { /* Number of page compression errors */ ulint_ctr_64_t pages_page_compression_error; + /* Number of pages encrypted with page encryption */ + ulint_ctr_64_t pages_page_encrypted; + /* Number of pages decrypted with page encryption */ + ulint_ctr_64_t pages_page_decrypted; + /* Number of page encryption errors */ + ulint_ctr_64_t pages_page_encryption_error; + /** Number of data read in total (in bytes) */ ulint_ctr_1_t data_read; @@ -1183,6 +1190,12 @@ struct export_var_t{ compression */ ib_int64_t innodb_pages_page_compression_error;/*!< Number of page compression errors */ + ib_int64_t innodb_pages_page_encrypted;/*!< Number of pages + encrypted by page encryption */ + ib_int64_t innodb_pages_page_decrypted;/*!< Number of pages + decrypted by page encryption */ + ib_int64_t innodb_pages_page_encryption_error;/*!< Number of page + encryption errors */ }; /** Thread slot in the thread table. */ diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc index 18ccc3350cf34..6a16bf5d40740 100644 --- a/storage/xtradb/os/os0file.cc +++ b/storage/xtradb/os/os0file.cc @@ -1,246 +1,264 @@ -/*********************************************************************** - -Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2009, Percona Inc. -Copyright (c) 2013, 2014, MariaDB Corporation. - -Portions of this file contain modifications contributed and copyrighted -by Percona Inc.. Those modifications are -gratefully acknowledged and are described briefly in the InnoDB -documentation. The contributions by Percona Inc. are incorporated with -their permission, and subject to the conditions contained in the file -COPYING.Percona. - -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -***********************************************************************/ - -/**************************************************//** -@file os/os0file.cc -The interface to the operating system file i/o primitives - -Created 10/21/1995 Heikki Tuuri -*******************************************************/ - -#include "os0file.h" - -#ifdef UNIV_NONINL -#include "os0file.ic" -#endif -#include "ha_prototypes.h" -#include "ut0mem.h" -#include "srv0srv.h" -#include "srv0start.h" -#include "fil0fil.h" -#include "fil0pagecompress.h" -#include "buf0buf.h" -#include "btr0types.h" -#include "trx0trx.h" -#include "srv0mon.h" -#include "srv0srv.h" -#ifdef HAVE_POSIX_FALLOCATE -#include "fcntl.h" -#endif -#ifndef UNIV_HOTBACKUP -# include "os0sync.h" -# include "os0thread.h" -#else /* !UNIV_HOTBACKUP */ -# ifdef __WIN__ -/* Add includes for the _stat() call to compile on Windows */ -# include -# include -# include -# endif /* __WIN__ */ -#endif /* !UNIV_HOTBACKUP */ - -#if defined(LINUX_NATIVE_AIO) -#include -#endif - -#ifdef _WIN32 -#define IOCP_SHUTDOWN_KEY (ULONG_PTR)-1 -#endif - -#if defined(UNIV_LINUX) && defined(HAVE_SYS_IOCTL_H) -# include -# ifndef DFS_IOCTL_ATOMIC_WRITE_SET -# define DFS_IOCTL_ATOMIC_WRITE_SET _IOW(0x95, 2, uint) -# endif -#endif - -#if defined(UNIV_LINUX) && defined(HAVE_SYS_STATVFS_H) -#include -#endif - -#ifdef HAVE_LZO -#include "lzo/lzo1x.h" -#endif - -/** Insert buffer segment id */ -static const ulint IO_IBUF_SEGMENT = 0; - -/** Log segment id */ -static const ulint IO_LOG_SEGMENT = 1; - -/* This specifies the file permissions InnoDB uses when it creates files in -Unix; the value of os_innodb_umask is initialized in ha_innodb.cc to -my_umask */ - -#ifndef __WIN__ -/** Umask for creating files */ -UNIV_INTERN ulint os_innodb_umask = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; -#else -/** Umask for creating files */ -UNIV_INTERN ulint os_innodb_umask = 0; -#endif /* __WIN__ */ - -#ifndef UNIV_HOTBACKUP -/* We use these mutexes to protect lseek + file i/o operation, if the -OS does not provide an atomic pread or pwrite, or similar */ -#define OS_FILE_N_SEEK_MUTEXES 16 -UNIV_INTERN os_ib_mutex_t os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES]; - -/* In simulated aio, merge at most this many consecutive i/os */ -#define OS_AIO_MERGE_N_CONSECUTIVE 64 - -#ifdef WITH_INNODB_DISALLOW_WRITES -#define WAIT_ALLOW_WRITES() os_event_wait(srv_allow_writes_event) -#else -#define WAIT_ALLOW_WRITES() do { } while (0) -#endif /* WITH_INNODB_DISALLOW_WRITES */ - -/********************************************************************** - -InnoDB AIO Implementation: -========================= - -We support native AIO for windows and linux. For rest of the platforms -we simulate AIO by special io-threads servicing the IO-requests. - -Simulated AIO: -============== - -In platforms where we 'simulate' AIO following is a rough explanation -of the high level design. -There are four io-threads (for ibuf, log, read, write). -All synchronous IO requests are serviced by the calling thread using -os_file_write/os_file_read. The Asynchronous requests are queued up -in an array (there are four such arrays) by the calling thread. -Later these requests are picked up by the io-thread and are serviced -synchronously. - -Windows native AIO: -================== - -If srv_use_native_aio is not set then windows follow the same -code as simulated AIO. If the flag is set then native AIO interface -is used. On windows, one of the limitation is that if a file is opened -for AIO no synchronous IO can be done on it. Therefore we have an -extra fifth array to queue up synchronous IO requests. -There are innodb_file_io_threads helper threads. These threads work -on the four arrays mentioned above in Simulated AIO. No thread is -required for the sync array. -If a synchronous IO request is made, it is first queued in the sync -array. Then the calling thread itself waits on the request, thus -making the call synchronous. -If an AIO request is made the calling thread not only queues it in the -array but also submits the requests. The helper thread then collects -the completed IO request and calls completion routine on it. - -Linux native AIO: -================= - -If we have libaio installed on the system and innodb_use_native_aio -is set to TRUE we follow the code path of native AIO, otherwise we -do simulated AIO. -There are innodb_file_io_threads helper threads. These threads work -on the four arrays mentioned above in Simulated AIO. -If a synchronous IO request is made, it is handled by calling -os_file_write/os_file_read. -If an AIO request is made the calling thread not only queues it in the -array but also submits the requests. The helper thread then collects -the completed IO request and calls completion routine on it. - -**********************************************************************/ - -/** Flag: enable debug printout for asynchronous i/o */ -UNIV_INTERN ibool os_aio_print_debug = FALSE; - -#ifdef UNIV_PFS_IO -/* Keys to register InnoDB I/O with performance schema */ -UNIV_INTERN mysql_pfs_key_t innodb_file_data_key; -UNIV_INTERN mysql_pfs_key_t innodb_file_log_key; -UNIV_INTERN mysql_pfs_key_t innodb_file_temp_key; -UNIV_INTERN mysql_pfs_key_t innodb_file_bmp_key; -#endif /* UNIV_PFS_IO */ - -/** The asynchronous i/o array slot structure */ -struct os_aio_slot_t{ -#ifdef WIN_ASYNC_IO - OVERLAPPED control; /*!< Windows control block for the - aio request, MUST be first element in the structure*/ - void *arr; /*!< Array this slot belongs to*/ -#endif - - ibool is_read; /*!< TRUE if a read operation */ - ulint pos; /*!< index of the slot in the aio - array */ - ibool reserved; /*!< TRUE if this slot is reserved */ - time_t reservation_time;/*!< time when reserved */ - ulint len; /*!< length of the block to read or - write */ - byte* buf; /*!< buffer used in i/o */ - ulint type; /*!< OS_FILE_READ or OS_FILE_WRITE */ - os_offset_t offset; /*!< file offset in bytes */ - os_file_t file; /*!< file where to read or write */ - const char* name; /*!< file name or path */ - ibool io_already_done;/*!< used only in simulated aio: - TRUE if the physical i/o already - made and only the slot message - needs to be passed to the caller - of os_aio_simulated_handle */ - ulint space_id; - fil_node_t* message1; /*!< message which is given by the */ - void* message2; /*!< the requester of an aio operation - and which can be used to identify - which pending aio operation was - completed */ - ulint bitmap; - - byte* page_compression_page; /*!< Memory allocated for - page compressed page and - freed after the write - has been completed */ - - ibool page_compression; - ulint page_compression_level; - - ulint* write_size; /*!< Actual write size initialized - after fist successfull trim - operation for this page and if - initialized we do not trim again if - actual page size does not decrease. */ - - byte* page_buf; /*!< Actual page buffer for - page compressed pages, do not - free this */ - - ibool page_compress_success; - /*!< TRUE if page compression was - successfull, false if not */ - - ulint file_block_size;/*!< file block size */ + /*********************************************************************** + + Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved. + Copyright (c) 2009, Percona Inc. + Copyright (c) 2013, 2014, MariaDB Corporation. + + Portions of this file contain modifications contributed and copyrighted + by Percona Inc.. Those modifications are + gratefully acknowledged and are described briefly in the InnoDB + documentation. The contributions by Percona Inc. are incorporated with + their permission, and subject to the conditions contained in the file + COPYING.Percona. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General + Public License for more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + + ***********************************************************************/ + + /**************************************************//** + @file os/os0file.cc + The interface to the operating system file i/o primitives + + Created 10/21/1995 Heikki Tuuri + *******************************************************/ + + #include "os0file.h" + + #ifdef UNIV_NONINL + #include "os0file.ic" + #endif + #include "ha_prototypes.h" + #include "ut0mem.h" + #include "srv0srv.h" + #include "srv0start.h" + #include "fil0fil.h" + #include "fsp0fsp.h" + #include "fil0pagecompress.h" + #include "fil0pageencryption.h" + #include "buf0buf.h" + #include "btr0types.h" + #include "trx0trx.h" + #include "srv0mon.h" + #include "srv0srv.h" + #ifdef HAVE_POSIX_FALLOCATE + #include "fcntl.h" + #endif + #ifndef UNIV_HOTBACKUP + # include "os0sync.h" + # include "os0thread.h" + #else /* !UNIV_HOTBACKUP */ + # ifdef __WIN__ + /* Add includes for the _stat() call to compile on Windows */ + # include + # include + # include + # endif /* __WIN__ */ + #endif /* !UNIV_HOTBACKUP */ + + #if defined(LINUX_NATIVE_AIO) + #include + #endif + + #ifdef _WIN32 + #define IOCP_SHUTDOWN_KEY (ULONG_PTR)-1 + #endif + + #if defined(UNIV_LINUX) && defined(HAVE_SYS_IOCTL_H) + # include + # ifndef DFS_IOCTL_ATOMIC_WRITE_SET + # define DFS_IOCTL_ATOMIC_WRITE_SET _IOW(0x95, 2, uint) + # endif + #endif + + #if defined(UNIV_LINUX) && defined(HAVE_SYS_STATVFS_H) + #include + #endif + + #ifdef HAVE_LZO + #include "lzo/lzo1x.h" + #endif + + /** Insert buffer segment id */ + static const ulint IO_IBUF_SEGMENT = 0; + + /** Log segment id */ + static const ulint IO_LOG_SEGMENT = 1; + + /* This specifies the file permissions InnoDB uses when it creates files in + Unix; the value of os_innodb_umask is initialized in ha_innodb.cc to + my_umask */ + + #ifndef __WIN__ + /** Umask for creating files */ + UNIV_INTERN ulint os_innodb_umask = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; + #else + /** Umask for creating files */ + UNIV_INTERN ulint os_innodb_umask = 0; + #endif /* __WIN__ */ + + #ifndef UNIV_HOTBACKUP + /* We use these mutexes to protect lseek + file i/o operation, if the + OS does not provide an atomic pread or pwrite, or similar */ + #define OS_FILE_N_SEEK_MUTEXES 16 + UNIV_INTERN os_ib_mutex_t os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES]; + + /* In simulated aio, merge at most this many consecutive i/os */ + #define OS_AIO_MERGE_N_CONSECUTIVE 64 + + #ifdef WITH_INNODB_DISALLOW_WRITES + #define WAIT_ALLOW_WRITES() os_event_wait(srv_allow_writes_event) + #else + #define WAIT_ALLOW_WRITES() do { } while (0) + #endif /* WITH_INNODB_DISALLOW_WRITES */ + + /********************************************************************** + + InnoDB AIO Implementation: + ========================= + + We support native AIO for windows and linux. For rest of the platforms + we simulate AIO by special io-threads servicing the IO-requests. + + Simulated AIO: + ============== + + In platforms where we 'simulate' AIO following is a rough explanation + of the high level design. + There are four io-threads (for ibuf, log, read, write). + All synchronous IO requests are serviced by the calling thread using + os_file_write/os_file_read. The Asynchronous requests are queued up + in an array (there are four such arrays) by the calling thread. + Later these requests are picked up by the io-thread and are serviced + synchronously. + + Windows native AIO: + ================== + + If srv_use_native_aio is not set then windows follow the same + code as simulated AIO. If the flag is set then native AIO interface + is used. On windows, one of the limitation is that if a file is opened + for AIO no synchronous IO can be done on it. Therefore we have an + extra fifth array to queue up synchronous IO requests. + There are innodb_file_io_threads helper threads. These threads work + on the four arrays mentioned above in Simulated AIO. No thread is + required for the sync array. + If a synchronous IO request is made, it is first queued in the sync + array. Then the calling thread itself waits on the request, thus + making the call synchronous. + If an AIO request is made the calling thread not only queues it in the + array but also submits the requests. The helper thread then collects + the completed IO request and calls completion routine on it. + + Linux native AIO: + ================= + + If we have libaio installed on the system and innodb_use_native_aio + is set to TRUE we follow the code path of native AIO, otherwise we + do simulated AIO. + There are innodb_file_io_threads helper threads. These threads work + on the four arrays mentioned above in Simulated AIO. + If a synchronous IO request is made, it is handled by calling + os_file_write/os_file_read. + If an AIO request is made the calling thread not only queues it in the + array but also submits the requests. The helper thread then collects + the completed IO request and calls completion routine on it. + + **********************************************************************/ + + /** Flag: enable debug printout for asynchronous i/o */ + UNIV_INTERN ibool os_aio_print_debug = FALSE; + + #ifdef UNIV_PFS_IO + /* Keys to register InnoDB I/O with performance schema */ + UNIV_INTERN mysql_pfs_key_t innodb_file_data_key; + UNIV_INTERN mysql_pfs_key_t innodb_file_log_key; + UNIV_INTERN mysql_pfs_key_t innodb_file_temp_key; + UNIV_INTERN mysql_pfs_key_t innodb_file_bmp_key; + #endif /* UNIV_PFS_IO */ + + /** The asynchronous i/o array slot structure */ + struct os_aio_slot_t{ + #ifdef WIN_ASYNC_IO + OVERLAPPED control; /*!< Windows control block for the + aio request, MUST be first element in the structure*/ + void *arr; /*!< Array this slot belongs to*/ + #endif + + ibool is_read; /*!< TRUE if a read operation */ + ulint pos; /*!< index of the slot in the aio + array */ + ibool reserved; /*!< TRUE if this slot is reserved */ + time_t reservation_time;/*!< time when reserved */ + ulint len; /*!< length of the block to read or + write */ + byte* buf; /*!< buffer used in i/o */ + ulint type; /*!< OS_FILE_READ or OS_FILE_WRITE */ + os_offset_t offset; /*!< file offset in bytes */ + os_file_t file; /*!< file where to read or write */ + const char* name; /*!< file name or path */ + ibool io_already_done;/*!< used only in simulated aio: + TRUE if the physical i/o already + made and only the slot message + needs to be passed to the caller + of os_aio_simulated_handle */ + ulint space_id; + fil_node_t* message1; /*!< message which is given by the */ + void* message2; /*!< the requester of an aio operation + and which can be used to identify + which pending aio operation was + completed */ + ulint bitmap; + + byte* page_compression_page; /*!< Memory allocated for + page compressed page and + freed after the write + has been completed */ + + byte* page_encryption_page; /*!< Memory allocated for + page encrypted page and + freed after the write + has been completed */ + + + ibool page_compression; + ulint page_compression_level; + + ibool page_encryption; + ulint page_encryption_key; + + ulint* write_size; /*!< Actual write size initialized + after fist successfull trim + operation for this page and if + initialized we do not trim again if + actual page size does not decrease. */ + + byte* page_buf; /*!< Actual page buffer for + page compressed pages, do not + free this */ + + byte* page_buf2; /*!< Actual page buffer for + page encrypted pages, do not + free this */ + byte* tmp_encryption_buf; /*!< a temporal buffer used by page encryption */ + + + ibool page_compress_success; + ibool page_encryption_success; + /*!< TRUE if page compression was + successfull, false if not */ + + ulint file_block_size;/*!< file block size */ #ifdef LINUX_NATIVE_AIO struct iocb control; /* Linux control block for aio */ @@ -384,6 +402,19 @@ os_slot_alloc_lzo_mem( os_aio_slot_t* slot); /*!< in: slot structure */ #endif +/**********************************************************************//** +Allocate memory for temporal buffer used for page encryption. This +buffer is freed later. */ +UNIV_INTERN +void +os_slot_alloc_page_buf2( +os_aio_slot_t* slot); /*!< in: slot structure */ +/**********************************************************************//** +Allocate memory for temporal buffer used for page encryption. */ +UNIV_INTERN +void +os_slot_alloc_tmp_encryption_buf( +os_aio_slot_t* slot); /*!< in: slot structure */ /****************************************************************//** Does error handling when a file operation fails. @return TRUE if we should retry the operation */ @@ -494,19 +525,19 @@ os_get_os_version(void) /* Windows : Handling synchronous IO on files opened asynchronously. -If file is opened for asynchronous IO (FILE_FLAG_OVERLAPPED) and also bound to +If file is opened for asynchronous IO (FILE_FLAG_OVERLAPPED) and also bound to a completion port, then every IO on this file would normally be enqueued to the completion port. Sometimes however we would like to do a synchronous IO. This is possible if we initialitze have overlapped.hEvent with a valid event and set its lowest order bit to 1 (see MSDN ReadFile and WriteFile description for more info) -We'll create this special event once for each thread and store in thread local +We'll create this special event once for each thread and store in thread local storage. */ /***********************************************************************//** -Initialize tls index.for event handle used for synchronized IO on files that +Initialize tls index.for event handle used for synchronized IO on files that might be opened with FILE_FLAG_OVERLAPPED. */ static void win_init_syncio_event() @@ -3108,6 +3139,11 @@ os_file_read_func( os_mutex_exit(os_file_count_mutex); if (ret && len == n) { + if (fil_page_is_encrypted((byte *)buf)) { + if (fil_decrypt_page(NULL, (byte *)buf, n, NULL, &compressed, NULL, 0)!=PAGE_ENCRYPTION_OK) {; + return FALSE; + } + } /* Note that InnoDB writes files that are not formated as file spaces and they do not have FIL_PAGE_TYPE field, thus we must use here information is the actual @@ -3128,7 +3164,11 @@ os_file_read_func( ret = os_file_pread(file, buf, n, offset, trx); if ((ulint) ret == n) { - + if (fil_page_is_encrypted((byte *)buf)) { + if (fil_decrypt_page(NULL, (byte *)buf, n, NULL, &compressed, NULL, 0)!=PAGE_ENCRYPTION_OK) {; + return FALSE; + } + } /* Note that InnoDB writes files that are not formated as file spaces and they do not have FIL_PAGE_TYPE field, thus we must use here information is the actual @@ -3137,6 +3177,7 @@ os_file_read_func( fil_decompress_page(NULL, (byte *)buf, n, NULL); } + return(TRUE); } @@ -3227,6 +3268,9 @@ os_file_read_no_error_handling_func( if (ret && len == n) { + if (fil_page_is_encrypted((byte *)buf)) { + if (fil_decrypt_page(NULL, (byte *)buf, n, NULL, &compressed, NULL, 0)!=PAGE_ENCRYPTION_OK) return (FALSE); + } /* Note that InnoDB writes files that are not formated as file spaces and they do not have FIL_PAGE_TYPE field, thus we must use here information is the actual @@ -3248,6 +3292,12 @@ os_file_read_no_error_handling_func( if ((ulint) ret == n) { + + if (fil_page_is_encrypted((byte *)buf)) { + if (fil_decrypt_page(NULL, (byte *)buf, n, NULL, &compressed, NULL, 0)!=PAGE_ENCRYPTION_OK) return (FALSE); + } + + /* Note that InnoDB writes files that are not formated as file spaces and they do not have FIL_PAGE_TYPE field, thus we must use here information is the actual @@ -3256,6 +3306,9 @@ os_file_read_no_error_handling_func( fil_decompress_page(NULL, (byte *)buf, n, NULL); } + + + return(TRUE); } #endif /* __WIN__ */ @@ -4288,6 +4341,19 @@ os_aio_array_free( } } + for (i = 0; i < array->n_slots; i++) { + os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i); + if (slot->page_encryption_page) { + ut_free(slot->page_encryption_page); + slot->page_encryption_page = NULL; + } + if (slot->tmp_encryption_buf) { + ut_free(slot->tmp_encryption_buf); + slot->tmp_encryption_buf = NULL; + } + } + + ut_free(array->slots); ut_free(array); @@ -4637,6 +4703,10 @@ os_aio_array_reserve_slot( on this file space */ ulint page_compression_level, /*!< page compression level to be used */ + ibool page_encryption, /*!< in: is page encryption used + on this file space */ + ulint page_encryption_key, /*!< page encryption key + to be used */ ulint* write_size)/*!< in/out: Actual write size initialized after fist successfull trim operation for this page and if @@ -4734,9 +4804,13 @@ os_aio_array_reserve_slot( slot->space_id = space_id; slot->page_compress_success = FALSE; + slot->page_encryption_success = FALSE; + slot->write_size = write_size; slot->page_compression_level = page_compression_level; slot->page_compression = page_compression; + slot->page_encryption_key = page_encryption_key; + slot->page_encryption = page_encryption; if (message1) { slot->file_block_size = fil_node_get_block_size(message1); @@ -4787,6 +4861,43 @@ os_aio_array_reserve_slot( /* Take array mutex back */ os_mutex_enter(array->mutex); + } //CMD + /* If the space is page encryption and this is write operation + then we encrypt the page */ + if (message1 && type == OS_FILE_WRITE && page_encryption ) { + ulint real_len = len; + ulint ec = 0; + byte* tmp = NULL; + + /* Release the array mutex while encrypting */ + os_mutex_exit(array->mutex); + + // We allocate memory for page encrypted buffer if and only + // if it is not yet allocated. + if (slot->page_buf2 == NULL) { + os_slot_alloc_page_buf2(slot); + } + os_slot_alloc_tmp_encryption_buf(slot); + + + + ut_ad(slot->page_buf2); + + tmp = fil_encrypt_page(fil_node_get_space_id(slot->message1), (byte *)buf, slot->page_buf2, len, page_encryption_key, &real_len, &ec, slot->tmp_encryption_buf, 0); + + /* If encryption succeeded, set up the length and buffer */ + if (tmp != buf || (ec == PAGE_ENCRYPTION_WILL_NOT_ENCRYPT)) { + len = real_len; + buf = slot->page_buf2; + slot->len = real_len; + slot->page_encryption_success = TRUE; + } else { + slot->page_encryption_success = FALSE; + ut_error; + } + + /* Take array mutex back */ + os_mutex_enter(array->mutex); } #ifdef WIN_ASYNC_IO @@ -4816,7 +4927,11 @@ os_aio_array_reserve_slot( io_prep_pread(iocb, file, buf, len, aio_offset); } else { ut_a(type == OS_FILE_WRITE); - io_prep_pwrite(iocb, file, buf, len, aio_offset); + if (page_encryption && !slot->page_encryption_success) { + ut_error; + } else { + io_prep_pwrite(iocb, file, buf, len, aio_offset); + } } iocb->data = (void*) slot; @@ -5069,12 +5184,18 @@ os_aio_func( on this file space */ ulint page_compression_level, /*!< page compression level to be used */ - ulint* write_size)/*!< in/out: Actual write size initialized + ulint* write_size,/*!< in/out: Actual write size initialized after fist successfull trim operation for this page and if initialized we do not trim again if actual page size does not decrease. */ + ibool page_encryption, /*!< in: is page encryption used + on this file space */ + ulint page_encryption_key) /*!< page encryption key + to be used */ + { + void* buffer = NULL; os_aio_array_t* array; os_aio_slot_t* slot; #ifdef WIN_ASYNC_IO @@ -5093,6 +5214,7 @@ os_aio_func( ut_ad((n & 0xFFFFFFFFUL) == n); #endif + wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER; mode = mode & (~OS_AIO_SIMULATED_WAKE_LATER); @@ -5178,7 +5300,8 @@ os_aio_func( } slot = os_aio_array_reserve_slot(type, array, message1, message2, file, name, buf, offset, n, space_id, - page_compression, page_compression_level, write_size); + page_compression, page_compression_level, + page_encryption, page_encryption_key, write_size); if (type == OS_FILE_READ) { if (srv_use_native_aio) { os_n_file_reads++; @@ -5206,7 +5329,14 @@ os_aio_func( if (srv_use_native_aio) { os_n_file_writes++; #ifdef WIN_ASYNC_IO - ret = WriteFile(file, buf, (DWORD) n, &len, + if (page_encryption) { + if (!slot->page_encryption_success) goto err_exit; + buffer = slot->page_buf2; + n = slot->len; + } else { + buffer = buf; + } + ret = WriteFile(file, buffer, (DWORD) n, &len, &(slot->control)); if(!ret && GetLastError() != ERROR_IO_PENDING) @@ -5289,7 +5419,7 @@ os_aio_windows_handle( HANDLE port = READ_SEGMENT(segment)? read_completion_port : completion_port; for(;;) { - ret = GetQueuedCompletionStatus(port, &len, &key, + ret = GetQueuedCompletionStatus(port, &len, &key, (OVERLAPPED **)&slot, INFINITE); /* If shutdown key was received, repost the shutdown message and exit */ @@ -5304,19 +5434,19 @@ os_aio_windows_handle( if(WRITE_SEGMENT(segment)&& slot->type == OS_FILE_READ) { /* - Redirect read completions to the dedicated completion port + Redirect read completions to the dedicated completion port and thread. We need to split read and write threads. If we do not - do that, and just allow all io threads process all IO, it is possible + do that, and just allow all io threads process all IO, it is possible to get stuck in a deadlock in buffer pool code, - Currently, the problem is solved this way - "write io" threads + Currently, the problem is solved this way - "write io" threads always get all completion notifications, from both async reads and writes. Write completion is handled in the same thread that gets it. Read completion is forwarded via PostQueueCompletionStatus()) to the second completion port dedicated solely to reads. One of the "read io" threads waiting on this port will finally handle the IO. - Forwarding IO completion this way costs a context switch , and this + Forwarding IO completion this way costs a context switch , and this seems tolerable since asynchronous reads are by far less frequent. */ ut_a(PostQueuedCompletionStatus(read_completion_port, len, key, @@ -5364,14 +5494,18 @@ os_aio_windows_handle( switch (slot->type) { case OS_FILE_WRITE: - if (slot->message1 && slot->page_compression && slot->page_buf) { - ret_val = os_file_write(slot->name, slot->file, slot->page_buf, - slot->offset, slot->len); - } else { + if (slot->message1 && slot->page_encryption && slot->page_buf2) { + ret_val = os_file_write(slot->name, slot->file, slot->page_buf2, + slot->offset, slot->len); + } else + if (slot->message1 && slot->page_compression && slot->page_buf) { + ret_val = os_file_write(slot->name, slot->file, slot->page_buf, + slot->offset, slot->len); + } else { - ret_val = os_file_write(slot->name, slot->file, slot->buf, - slot->offset, slot->len); - } + ret_val = os_file_write(slot->name, slot->file, slot->buf, + slot->offset, slot->len); + } break; case OS_FILE_READ: ret_val = os_file_read(slot->file, slot->buf, @@ -5400,6 +5534,23 @@ os_aio_windows_handle( ret_val = ret && len == slot->len; } + /* page encryption */ + if (slot->message1 && slot->page_encryption) { + if (slot->page_buf2==NULL) { + os_slot_alloc_page_buf2(slot); + } + os_slot_alloc_tmp_encryption_buf(slot); + + ut_ad(slot->page_buf2); + + if (slot->type == OS_FILE_READ) { + if (fil_page_is_encrypted(slot->buf)) { + fil_decrypt_page(slot->page_buf2, slot->buf, slot->len, slot->write_size, NULL, slot->tmp_encryption_buf, 0); + } + } + + } + if (slot->message1 && slot->page_compression) { // We allocate memory for page compressed buffer if and only // if it is not yet allocated. @@ -5513,6 +5664,25 @@ os_aio_linux_collect( /* We have not overstepped to next segment. */ ut_a(slot->pos < end_pos); + + + /* page encryption */ + if (slot->message1 && slot->page_encryption) { + if (slot->page_buf2==NULL) { + os_slot_alloc_page_buf2(slot); + } + os_slot_alloc_tmp_encryption_buf(slot); + + ut_ad(slot->page_buf2); + + if (slot->type == OS_FILE_READ) { + if (fil_page_is_encrypted(slot->buf)) { + fil_decrypt_page(slot->page_buf2, slot->buf, slot->len, slot->write_size, NULL, slot->tmp_encryption_buf, 0); + } + } + } + + /* If the table is page compressed and this is read, we decompress before we annouce the read is complete. For writes, we free the compressed page. */ @@ -5542,6 +5712,7 @@ os_aio_linux_collect( } } + /* Mark this request as completed. The error handling will be done in the calling function. */ os_mutex_enter(array->mutex); @@ -6590,6 +6761,37 @@ os_file_trim( } +/**********************************************************************//** +Allocate memory for temporal buffer used for page encryption. This +buffer is freed later. */ +UNIV_INTERN +void +os_slot_alloc_page_buf2( +/*===================*/ + os_aio_slot_t* slot) /*!< in: slot structure */ +{ + byte* cbuf2; + byte* cbuf; + + cbuf2 = static_cast(ut_malloc(UNIV_PAGE_SIZE*2)); + cbuf = static_cast(ut_align(cbuf2, UNIV_PAGE_SIZE)); + slot->page_encryption_page = static_cast(cbuf2); + slot->page_buf2 = static_cast(cbuf); +} + +/**********************************************************************//** +Allocate memory for temporal buffer used for page encryption. */ +UNIV_INTERN +void +os_slot_alloc_tmp_encryption_buf( +/*===================*/ +os_aio_slot_t* slot) /*!< in: slot structure */ +{ + if (slot->tmp_encryption_buf == NULL) { + slot->tmp_encryption_buf = static_cast(ut_malloc(64)); + } +} + /**********************************************************************//** Allocate memory for temporal buffer used for page compression. This buffer is freed later. */ diff --git a/storage/xtradb/os/os0file.cc.rej b/storage/xtradb/os/os0file.cc.rej new file mode 100644 index 0000000000000..6455224e46bc5 --- /dev/null +++ b/storage/xtradb/os/os0file.cc.rej @@ -0,0 +1,20 @@ +--- storage/xtradb/os/os0file.cc ++++ storage/xtradb/os/os0file.cc +@@ -3175,7 +3175,7 @@ + + if (fil_page_is_encrypted((byte *)buf)) { + // if (page_encryption) { +- fil_decrypt_page(NULL, (byte *)buf, n, NULL); ++ fil_decrypt_page(NULL, (byte *)buf, n, NULL, 0); + } + + +@@ -4692,7 +4692,7 @@ + + ut_ad(slot->page_buf2); + //FF +- tmp = fil_encrypt_page(fil_node_get_space_id(slot->message1), (byte *)buf, slot->page_buf2, len, page_encryption_key, &real_len); ++ tmp = fil_encrypt_page(fil_node_get_space_id(slot->message1), (byte *)buf, slot->page_buf2, len, page_encryption_key, &real_len, 0); + + /* If encryption succeeded, set up the length and buffer */ + if (tmp != buf) { diff --git a/storage/xtradb/srv/srv0mon.cc b/storage/xtradb/srv/srv0mon.cc index 7be2596655773..5cbcc818c2c93 100644 --- a/storage/xtradb/srv/srv0mon.cc +++ b/storage/xtradb/srv/srv0mon.cc @@ -1920,6 +1920,15 @@ srv_mon_process_existing_counter( case MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR: value = srv_stats.pages_page_compression_error; break; + case MONITOR_OVLD_PAGES_PAGE_ENCRYPTED: + value = srv_stats.pages_page_encrypted; + break; + case MONITOR_OVLD_PAGES_PAGE_DECRYPTED: + value = srv_stats.pages_page_decrypted; + break; + case MONITOR_OVLD_PAGES_PAGE_ENCRYPTION_ERROR: + value = srv_stats.pages_page_encryption_error; + break; default: ut_error; diff --git a/storage/xtradb/srv/srv0srv.cc b/storage/xtradb/srv/srv0srv.cc index a6035bdaa6c73..6c3eda3de17b6 100644 --- a/storage/xtradb/srv/srv0srv.cc +++ b/storage/xtradb/srv/srv0srv.cc @@ -1922,6 +1922,10 @@ srv_export_innodb_status(void) export_vars.innodb_page_compressed_trim_op = srv_stats.page_compressed_trim_op; export_vars.innodb_page_compressed_trim_op_saved = srv_stats.page_compressed_trim_op_saved; export_vars.innodb_pages_page_decompressed = srv_stats.pages_page_decompressed; + export_vars.innodb_pages_page_compression_error = srv_stats.pages_page_compression_error; + export_vars.innodb_pages_page_decrypted = srv_stats.pages_page_decrypted; + export_vars.innodb_pages_page_encrypted = srv_stats.pages_page_encrypted; + export_vars.innodb_pages_page_encryption_error = srv_stats.pages_page_encryption_error; export_vars.innodb_defragment_compression_failures = btr_defragment_compression_failures; diff --git a/unittest/eperi/CMakeLists.txt b/unittest/eperi/CMakeLists.txt new file mode 100644 index 0000000000000..a833f17f5bccc --- /dev/null +++ b/unittest/eperi/CMakeLists.txt @@ -0,0 +1,155 @@ +# Copyright (C) 2014 eperi GmbH. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/sql + ${PCRE_INCLUDES} + ${CMAKE_SOURCE_DIR}/include +# ${CMAKE_SOURCE_DIR}/include/mysql + ${CMAKE_SOURCE_DIR}/unittest/mytap + ${CMAKE_SOURCE_DIR}/extra/yassl/include + ${CMAKE_SOURCE_DIR}/storage/perfschema/unittest + ${CMAKE_SOURCE_DIR}/storage/perfschema + ${CMAKE_SOURCE_DIR}/storage/xtradb/include + ) + +SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D SINGLETON_TEST_DATA=\\\"${CMAKE_SOURCE_DIR}/unittest/eperi\\\" ") + +MY_ADD_TESTS(eperi_aes + EXT "cc" + LINK_LIBRARIES mysys_ssl) + + +if (WIN32) + +else() + +MY_ADD_TESTS( + pageenc + EXT "cc" + LINK_LIBRARIES mysys_ssl xtradb perfschema mysys sql mysql) + +MY_ADD_TESTS(EperiKeySingleton + EXT "cc" + LINK_LIBRARIES xtradb pcre mysys_ssl) + + +MY_ADD_TESTS(eperi) + +endif() + + +file(COPY + ${CMAKE_CURRENT_LIST_DIR}/keys.txt + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/) +file(COPY + ${CMAKE_CURRENT_LIST_DIR}/xaa + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/) +file(COPY + ${CMAKE_CURRENT_LIST_DIR}/xab + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/) +file(COPY + ${CMAKE_CURRENT_LIST_DIR}/xac + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/) +file(COPY + ${CMAKE_CURRENT_LIST_DIR}/xad + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/) +file(COPY + ${CMAKE_CURRENT_LIST_DIR}/xae + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/) +file(COPY + ${CMAKE_CURRENT_LIST_DIR}/xaf + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/) +file(COPY + ${CMAKE_CURRENT_LIST_DIR}/keys.enc + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/) +file(COPY + ${CMAKE_CURRENT_LIST_DIR}/compressed + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/) +file(COPY + ${CMAKE_CURRENT_LIST_DIR}/compressed_full + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/) +file(COPY + ${CMAKE_CURRENT_LIST_DIR}/compressed_6bytes_av + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/) +file(COPY + ${CMAKE_CURRENT_LIST_DIR}/secret + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/) +file(COPY + ${CMAKE_CURRENT_LIST_DIR}/secret.enc + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/) +file(COPY + ${CMAKE_CURRENT_LIST_DIR}/long_secret + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/) +file(COPY + ${CMAKE_CURRENT_LIST_DIR}/long_secret.enc + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/) +file(COPY + ${CMAKE_CURRENT_LIST_DIR}/secret256 + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/) +file(COPY + ${CMAKE_CURRENT_LIST_DIR}/secret256.enc + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/) +file(COPY + ${CMAKE_CURRENT_LIST_DIR}/row_format_compressedaa + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/) +file(COPY + ${CMAKE_CURRENT_LIST_DIR}/row_format_compressedab + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/) +file(COPY + ${CMAKE_CURRENT_LIST_DIR}/row_format_compressedac + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/) +file(COPY + ${CMAKE_CURRENT_LIST_DIR}/row_format_compressedad + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/) +file(COPY + ${CMAKE_CURRENT_LIST_DIR}/row_format_dynamicaa + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/) +file(COPY + ${CMAKE_CURRENT_LIST_DIR}/row_format_dynamicab + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/) +file(COPY + ${CMAKE_CURRENT_LIST_DIR}/row_format_dynamicac + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/) +file(COPY + ${CMAKE_CURRENT_LIST_DIR}/row_format_dynamicad + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/) +file(COPY + ${CMAKE_CURRENT_LIST_DIR}/row_format_redundantaa + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/) +file(COPY + ${CMAKE_CURRENT_LIST_DIR}/row_format_redundantab + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/) +file(COPY + ${CMAKE_CURRENT_LIST_DIR}/row_format_redundantac + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/) +file(COPY + ${CMAKE_CURRENT_LIST_DIR}/row_format_redundantad + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/) +file(COPY + ${CMAKE_CURRENT_LIST_DIR}/row_format_compactaa + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/) +file(COPY + ${CMAKE_CURRENT_LIST_DIR}/row_format_compactab + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/) +file(COPY + ${CMAKE_CURRENT_LIST_DIR}/row_format_compactac + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/) +file(COPY + ${CMAKE_CURRENT_LIST_DIR}/row_format_compactad + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/) +file(COPY + ${CMAKE_CURRENT_LIST_DIR}/row_format_compactad_encrypted + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/) + \ No newline at end of file diff --git a/unittest/eperi/EperiKeySingleton-t.cc b/unittest/eperi/EperiKeySingleton-t.cc new file mode 100644 index 0000000000000..119e39fe84e0b --- /dev/null +++ b/unittest/eperi/EperiKeySingleton-t.cc @@ -0,0 +1,50 @@ +/******************************************************************//** +@file EperiKeySingleton-t.cc +Implementation of single pattern to keep keys for encrypting/decrypting pages. + +Created 09/15/2014 +***********************************************************************/ + +#include "EperiKeySingleton-t.h" +#include +#include +#include +#include +#include +#include + + +EperiKeySingleton::EperiKeySingleton() { +} + +EperiKeySingleton::~EperiKeySingleton() { +} + + + + +void printEntry(struct keyentry *entry, uint id) +{ + if( NULL == entry) + printf("No such keyID = %d\n", id); + else + printf("%3u. id:%3u \tiv:%s \tkey:%s\n", id, entry->id, entry->iv, entry->key); +} + + +int main() +{ + plan(1); + #ifdef SINGLETON_TEST_DATA + + printf("%s\n", "main() EperiKeySingleton.cc"); + printf("%s\n", SINGLETON_TEST_DATA); + KeySingleton& ksp = KeySingleton::getInstance( "keys.txt", SINGLETON_TEST_DATA, 1, "secret"); + printEntry(ksp.getKeys(0), 0); + + return EXIT_SUCCESS; + #else + + #endif + +} diff --git a/unittest/eperi/EperiKeySingleton-t.h b/unittest/eperi/EperiKeySingleton-t.h new file mode 100644 index 0000000000000..f955686322f87 --- /dev/null +++ b/unittest/eperi/EperiKeySingleton-t.h @@ -0,0 +1,17 @@ +/******************************************************************//** +@file EperiKeySingleton-t.h +Implementation of single pattern to keep keys for encrypting/decrypting pages. + +Created 09/15/2014 +***********************************************************************/ + +#ifndef EPERIKEYSINGLETON_T_H_ +#define EPERIKEYSINGLETONPATTERN_T_H_ + +class EperiKeySingleton { +public: + EperiKeySingleton(); + virtual ~EperiKeySingleton(); +}; + +#endif /* EPERIKEYSINGLETON_T_H_ */ diff --git a/unittest/eperi/compressed b/unittest/eperi/compressed new file mode 100644 index 0000000000000..9ae0e192b4444 Binary files /dev/null and b/unittest/eperi/compressed differ diff --git a/unittest/eperi/compressed_6bytes_av b/unittest/eperi/compressed_6bytes_av new file mode 100644 index 0000000000000..56a76a656a1da Binary files /dev/null and b/unittest/eperi/compressed_6bytes_av differ diff --git a/unittest/eperi/compressed_full b/unittest/eperi/compressed_full new file mode 100644 index 0000000000000..9404efe4cce4a Binary files /dev/null and b/unittest/eperi/compressed_full differ diff --git a/unittest/eperi/eperi-t.c b/unittest/eperi/eperi-t.c new file mode 100644 index 0000000000000..255dad36475ab --- /dev/null +++ b/unittest/eperi/eperi-t.c @@ -0,0 +1,10 @@ +#include + + +int +main(int argc __attribute__((unused)),char *argv[]) +{ + plan(1); + ok(1, "Most simple test ever"); + return 0; +} diff --git a/unittest/eperi/eperi_aes-t.cc b/unittest/eperi/eperi_aes-t.cc new file mode 100644 index 0000000000000..4b4a4e6b761da --- /dev/null +++ b/unittest/eperi/eperi_aes-t.cc @@ -0,0 +1,380 @@ +#define EP_UNIT_TEST 1 +#define UNIV_INLINE + +#define AES_OK 0 +#define AES_BAD_DATA -1 +#define AES_BAD_KEYSIZE -5 +#define AES_KEY_CREATION_FAILED -10 + +#define MY_AES_BLOCK_SIZE 16 /* Block size in bytes */ + + +typedef unsigned char byte; +typedef unsigned long int ulint; +typedef unsigned long int ibool; + +#include +#include +#include +extern "C" { +extern int my_aes_decrypt_cbc(const char* source, uint32 source_length, + char* dest, uint32* dest_length, + const unsigned char* key, uint8 key_length, + const unsigned char* iv, uint8 iv_length, + int noPadding); + + +extern int my_aes_encrypt_cbc(const char* source, uint32 source_length, + char* dest, uint32 *dest_length, + const unsigned char* key, uint8 key_length, + const unsigned char* iv, uint8 iv_length, + int noPadding); +extern void my_bytes_to_key(const unsigned char *salt, const char *secret, unsigned char *key, unsigned char *iv); +} + +#define MY_AES_TEST_TEXTBLOCK "abcdefghijklmnopqrstuvwxyz\ + ABCDEFGHIJKLMNOPQRSTUVW\ + 1234567890ß^!\"§$%&/()=?`\ + öäüÖÄÜ+*#',.-;:_~’µ<>|³²¹¼\ + ½¬{[]}æ“¢ð€đŋħłµ”øþ@¶ſŧ↓„ł«»←\ + abcdefghijklmnopqrstuvwxyz\ + ABCDEFGHIJKLMNOPQRSTUVW\ + 1234567890ß^!\"§$%&/()=?`\ + öäüÖÄÜ+*#',.-;:_~’µ<>|³²¹¼\ + ½¬{[]}æ“¢ð€đŋħłµ”øþ@¶ſŧ↓„ł«»←\ + abcdefghijklmnopqrstuvwxyz\ + ABCDEFGHIJKLMNOPQRSTUVW\ + 1234567890ß^!\"§$%&/()=?`\ + öäüÖÄÜ+*#',.-;:_~’µ<>|³²¹¼\ + ½¬{[]}æ“¢ð€đŋħłµ”øþ@¶ſŧ↓„ł«»←\ + abcdefghijklmnopqrstuvwxyz\ + ABCDEFGHIJKLMNOPQRSTUVW\ + 1234567890ß^!\"§$%&/()=?`\ + öäüÖÄÜ+*#',.-;:_~’µ<>|³²¹¼\ + ½¬{[]}æ“¢ð€đŋħłµ”øþ@¶ſŧ↓„ł«»←\ + abcdefghijklmnopqrstuvwxyz\ + ABCDEFGHIJKLMNOPQRSTUVW\ + 1234567890ß^!\"§$%&/()=?`\ + öäüÖÄÜ+*#',.-;:_~’µ<>|³²¹¼\ + ½¬{[]}æ“¢ð€đŋħłµ”øþ@¶ſŧ↓„ł«»←\ + " + +#define MY_AES_TEST_JOSHUA " David Lightman: [typing] What is the primary goal?\ +Joshua: You should know, Professor. You programmed me.\ +David Lightman: Oh, come on.\ +David Lightman: [typing] What is the primary goal?\ +Joshua: To win the game.\ +" + + +byte* readFile(char* fileName) { +FILE *fileptr; +byte *buffer; +long filelen; + +fileptr = fopen(fileName, "rb"); // Open the file in binary mode +fseek(fileptr, 0, SEEK_END); // Jump to the end of the file +filelen = ftell(fileptr); // Get the current byte offset in the file +rewind(fileptr); // Jump back to the beginning of the file + +buffer = (byte *)malloc((filelen+1)*sizeof(char)); // Enough memory for file + \0 +fread(buffer, filelen, 1, fileptr); // Read in the entire file +fclose(fileptr); // Close the file +return buffer; +} + + + + + + +void +test_cbc128noPadding() +{ + char expected[]= + { + 0x51 , 0xBC , 0xF9 , 0x96 , 0xCB , 0x6A , 0x6D , 0x18 , 0x08 , 0xE1 , 0x08 , 0xC5 , 0x07 , 0x78 , 0x70 , 0xA6, + 0x15 , 0x3E , 0x41 , 0x34 , 0xEC , 0x5E , 0xA2 , 0x67 , 0x52 , 0x51 , 0x87 , 0x61 , 0x8A , 0x15 , 0xE0 , 0xD7, + 0x1D , 0x9A , 0x5B , 0x4A , 0xF9 , 0x9F , 0x13 , 0xEE , 0x3B , 0x77 , 0x1E , 0xD1 , 0xF6 , 0x54 , 0xAD , 0xFE + }; + plan(1); + int i; + char* source = "int i = memcmp(decbuf,inbuf,16);"; + uint32 s_len = strlen(source); + char* dest = (char* ) malloc(100); + char* result = (char*) malloc(100); + + uint32 dest_len = 0; + unsigned char key[16] = {0x58, 0x3b, 0xe7, 0xf3, 0x34, 0xf8, + 0x5e, 0x7d, 0x9d, 0xdb, 0x36, 0x2e, 0x9a, 0xc3, 0x81, 0x51}; + uint8 k_len = 16; + unsigned char iv[16] = {0x33, 0x25, 0xcc, 0x3f, 0x02, 0x20, 0x3f, 0xb6, 0xb8, + 0x49, 0x99, 0x00, 0x42, 0xe5, 0x8b, 0xcb}; + uint8 i_len = 16; + int ec = my_aes_encrypt_cbc(source, s_len, dest, &dest_len, (unsigned char*) &key, k_len,(unsigned char*) &iv, i_len, 1); + ok(ec == AES_OK, "Checking return code."); + ok(memcmp(expected,dest,dest_len)==0, "expected cipher text"); + ok(memcmp(source,dest,32)!=0,"plain and cipher text differ"); + ok(dest_len == s_len, "input length = output length, cbc 128 no padding"); + + ec = my_aes_decrypt_cbc(dest , dest_len, result, &dest_len, (unsigned char*) &key, k_len, (unsigned char*) &iv, i_len, 1); + ok((dest_len == s_len) && (ec == AES_OK) && (strncmp(result, "int i = memcmp(decbuf,inbuf,16);",dest_len)==0),"Decrypted text is identical to original text."); + free(result); + free(dest); +} + +void +test_cbc128() +{ + char expected[]= + { + 0x51 , 0xBC , 0xF9 , 0x96 , 0xCB , 0x6A , 0x6D , 0x18 , 0x08 , 0xE1 , 0x08 , 0xC5 , 0x07 , 0x78 , 0x70 , 0xA6, + 0x15 , 0x3E , 0x41 , 0x34 , 0xEC , 0x5E , 0xA2 , 0x67 , 0x52 , 0x51 , 0x87 , 0x61 , 0x8A , 0x15 , 0xE0 , 0xD7, + 0x1D , 0x9A , 0x5B , 0x4A , 0xF9 , 0x9F , 0x13 , 0xEE , 0x3B , 0x77 , 0x1E , 0xD1 , 0xF6 , 0x54 , 0xAD , 0xFE + }; + plan(2); + int i; + char* source = "int i = memcmp(decbuf,inbuf,16);"; + uint32 s_len = strlen(source); + char* dest = (char* ) malloc(100); + char* result = (char*) malloc(100); + + uint32 dest_len = 0; + unsigned char key[16] = {0x58, 0x3b, 0xe7, 0xf3, 0x34, 0xf8, + 0x5e, 0x7d, 0x9d, 0xdb, 0x36, 0x2e, 0x9a, 0xc3, 0x81, 0x51}; + uint8 k_len = 16; + unsigned char iv[16] = {0x33, 0x25, 0xcc, 0x3f, 0x02, 0x20, 0x3f, 0xb6, 0xb8, + 0x49, 0x99, 0x00, 0x42, 0xe5, 0x8b, 0xcb}; + uint8 i_len = 16; + int ec = my_aes_encrypt_cbc(source, s_len, dest, &dest_len, (unsigned char*) &key, k_len,(unsigned char*) &iv, i_len, 0); + ok(ec == AES_OK, "Checking return code."); + ok(memcmp(expected,dest,48)==0, "expected cipher text"); + ok(memcmp(source,dest,32)!=0,"plain and cipher text differ"); + + ec = my_aes_decrypt_cbc(dest , dest_len, result, &dest_len, (unsigned char*) &key, k_len, (unsigned char*) &iv, i_len, 0); + ok((dest_len == s_len) && (ec == AES_OK) && (strncmp(result, "int i = memcmp(decbuf,inbuf,16);",dest_len)==0),"Decrypted text is identical to original text."); + free(result); + free(dest); +} + +void +test_cbc192noPadding() +{ + + plan(3); + int i; + char* source = "int i = memcmp(decbuf,inbuf,16);"; + uint32 s_len = strlen(source); + char* dest = (char* ) malloc(100); + char* result = (char*) malloc(100); + + uint32 dest_len = 0; + unsigned char key[24] = {0x58, 0x3b, 0xe7, 0xf3, 0x34, 0xf8, + 0x5e, 0x7d, 0x9d, 0xdb, 0x36, 0x2e, 0x9a, 0xc3, 0x81, 0x51, + 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08}; + uint8 k_len = 24; + unsigned char iv[16] = {0x33, 0x25, 0xcc, 0x3f, 0x02, 0x20, 0x3f, 0xb6, 0xb8, + 0x49, 0x99, 0x00, 0x42, 0xe5, 0x8b, 0xcb}; + uint8 i_len = 16; + int ec = my_aes_encrypt_cbc(source, s_len, dest, &dest_len, (unsigned char*) &key, k_len,(unsigned char*) &iv, i_len, 1); + ok (ec == AES_OK, "Checking return code."); + ok (dest_len == s_len, "input length = output length, cbc 192 no padding"); + ec = my_aes_decrypt_cbc(dest , dest_len, result, &dest_len, (unsigned char*) &key, k_len, (unsigned char*) &iv, i_len, 1); + ok((dest_len == s_len) && (ec == AES_OK) && (strncmp(result, "int i = memcmp(decbuf,inbuf,16);",dest_len)==0),"Decrypted text is identical to original text."); + free(result); + free(dest); +} + +void +test_cbc192() +{ + + plan(4); + int i; + char* source = "int i = memcmp(decbuf,inbuf,16);"; + uint32 s_len = strlen(source); + char* dest = (char* ) malloc(100); + char* result = (char*) malloc(100); + + uint32 dest_len = 0; + unsigned char key[24] = {0x58, 0x3b, 0xe7, 0xf3, 0x34, 0xf8, + 0x5e, 0x7d, 0x9d, 0xdb, 0x36, 0x2e, 0x9a, 0xc3, 0x81, 0x51, + 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08}; + uint8 k_len = 24; + unsigned char iv[16] = {0x33, 0x25, 0xcc, 0x3f, 0x02, 0x20, 0x3f, 0xb6, 0xb8, + 0x49, 0x99, 0x00, 0x42, 0xe5, 0x8b, 0xcb}; + uint8 i_len = 16; + int ec = my_aes_encrypt_cbc(source, s_len, dest, &dest_len, (unsigned char*) &key, k_len,(unsigned char*) &iv, i_len, 1); + ok (ec == AES_OK, "Checking return code."); + ec = my_aes_decrypt_cbc(dest , dest_len, result, &dest_len, (unsigned char*) &key, k_len, (unsigned char*) &iv, i_len, 1); + ok((dest_len == s_len) && (ec == AES_OK) && (strncmp(result, "int i = memcmp(decbuf,inbuf,16);",dest_len)==0),"Decrypted text is identical to original text."); + free(result); + free(dest); +} + +void +test_cbc256() +{ + char expected[] = { + 0x81, 0x22, 0x05, 0xA7, 0x3E, 0x9D, 0xB2, 0x18, 0x7F, 0xE2, 0x5C, 0xB4, 0xBD, 0xCD, 0xFB, 0x9B, + 0xB6, 0xEF, 0x64, 0x2C, 0xF4, 0x53, 0x9B, 0x29, 0x98, 0x3A, 0xD6, 0xDE, 0xB2, 0x65, 0xEF, 0x85, + 0xEF, 0x4B, 0xDA, 0x8F, 0xD9, 0xEB, 0xD7, 0x07, 0x80, 0x03, 0x0E, 0x7C, 0x55, 0x2E, 0x97, 0x47 + }; + plan(5); + int i; + char* source = "int i = memcmp(decbuf,inbuf,16);"; + uint32 s_len = strlen(source); + char* dest = (char* ) malloc(100); + char* result = (char*) malloc(100); + + uint32 dest_len = 0; + unsigned char key[32] = {0x58, 0x3b, 0xe7, 0xf3, 0x34, 0xf8, + 0x5e, 0x7d, 0x9d, 0xdb, 0x36, 0x2e, 0x9a, 0xc3, 0x81, 0x51, + 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08, + 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08}; + uint8 k_len = 32; + unsigned char iv[16] = {0x33, 0x25, 0xcc, 0x3f, 0x02, 0x20, 0x3f, 0xb6, 0xb8, + 0x49, 0x99, 0x00, 0x42, 0xe5, 0x8b, 0xcb}; + uint8 i_len = 16; + int ec = my_aes_encrypt_cbc(source, s_len, dest, &dest_len, (unsigned char*) &key, k_len,(unsigned char*) &iv, i_len, 0); + + + ok(ec == AES_OK, "Checking return code."); + ok(memcmp(expected,dest,48)==0,"Excepted cipher text - aes 256 cbc"); + + ec = my_aes_decrypt_cbc(dest , dest_len, result, &dest_len, (unsigned char*) &key, k_len, (unsigned char*) &iv, i_len, 0); + + ok((dest_len == s_len) && (ec == AES_OK) && (strncmp(result, "int i = memcmp(decbuf,inbuf,16);",dest_len)==0),"Decrypted text is identical to original text."); + free(result); + free(dest); +} + + +void +test_cbc256noPadding() +{ + char expected[] = { + 0x81, 0x22, 0x05, 0xA7, 0x3E, 0x9D, 0xB2, 0x18, 0x7F, 0xE2, 0x5C, 0xB4, 0xBD, 0xCD, 0xFB, 0x9B, + 0xB6, 0xEF, 0x64, 0x2C, 0xF4, 0x53, 0x9B, 0x29, 0x98, 0x3A, 0xD6, 0xDE, 0xB2, 0x65, 0xEF, 0x85, + 0xEF, 0x4B, 0xDA, 0x8F, 0xD9, 0xEB, 0xD7, 0x07, 0x80, 0x03, 0x0E, 0x7C, 0x55, 0x2E, 0x97, 0x47 + }; + plan(6); + int i; + char* source = "int i = memcmp(decbuf,inbuf,16);"; + uint32 s_len = strlen(source); + char* dest = (char* ) malloc(100); + char* result = (char*) malloc(100); + + uint32 dest_len = 0; + unsigned char key[32] = {0x58, 0x3b, 0xe7, 0xf3, 0x34, 0xf8, + 0x5e, 0x7d, 0x9d, 0xdb, 0x36, 0x2e, 0x9a, 0xc3, 0x81, 0x51, + 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08, + 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08}; + uint8 k_len = 32; + unsigned char iv[16] = {0x33, 0x25, 0xcc, 0x3f, 0x02, 0x20, 0x3f, 0xb6, 0xb8, + 0x49, 0x99, 0x00, 0x42, 0xe5, 0x8b, 0xcb}; + uint8 i_len = 16; + int ec = my_aes_encrypt_cbc(source, s_len, dest, &dest_len, (unsigned char*) &key, k_len,(unsigned char*) &iv, i_len, 1); + + + ok(ec == AES_OK, "Checking return code."); + ok(memcmp(expected,dest,dest_len)==0,"Excepted cipher text - aes 256 cbc"); + ok(s_len==dest_len,"input length = output length, cbc 256 no padding"); + + ec = my_aes_decrypt_cbc(dest , dest_len, result, &dest_len, (unsigned char*) &key, k_len, (unsigned char*) &iv, i_len, 1); + + ok((dest_len == s_len) && (ec == AES_OK) && (strncmp(result, "int i = memcmp(decbuf,inbuf,16);",dest_len)==0),"Decrypted text is identical to original text."); + free(result); + free(dest); +} + + + +void +test_cbc256noPaddingWrongInputSize() +{ + char expected[] = { + 0x81, 0x22, 0x05, 0xA7, 0x3E, 0x9D, 0xB2, 0x18, 0x7F, 0xE2, 0x5C, 0xB4, 0xBD, 0xCD, 0xFB, 0x9B, + 0xB6, 0xEF, 0x64, 0x2C, 0xF4, 0x53, 0x9B, 0x29, 0x98, 0x3A, 0xD6, 0xDE, 0xB2, 0x65, 0xEF, 0x85, + 0xEF, 0x4B, 0xDA, 0x8F, 0xD9, 0xEB, 0xD7, 0x07, 0x80, 0x03, 0x0E, 0x7C, 0x55, 0x2E, 0x97, 0x47 + }; + plan(7); + int i; + char* source = "int i = memcmp(decbuf,inbuf,16);sdfsd"; + uint32 s_len = strlen(source); + char* dest = (char* ) malloc(100); + char* result = (char*) malloc(100); + + uint32 dest_len = 0; + unsigned char key[32] = {0x58, 0x3b, 0xe7, 0xf3, 0x34, 0xf8, + 0x5e, 0x7d, 0x9d, 0xdb, 0x36, 0x2e, 0x9a, 0xc3, 0x81, 0x51, + 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08, + 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08}; + uint8 k_len = 32; + unsigned char iv[16] = {0x33, 0x25, 0xcc, 0x3f, 0x02, 0x20, 0x3f, 0xb6, 0xb8, + 0x49, 0x99, 0x00, 0x42, 0xe5, 0x8b, 0xcb}; + uint8 i_len = 16; + int ec = my_aes_encrypt_cbc(source, s_len, dest, &dest_len, (unsigned char*) &key, k_len,(unsigned char*) &iv, i_len, 1); + ok (ec== AES_BAD_DATA, "wrong input size detected"); + + free(result); + free(dest); +} + + + + + + + +/* + * Test if bytes for AES Key and IV are generated in the same way as in openssl commandline. + */ +void +test_bytes_to_key() +{ + + char expected[32] = { + 0x2E, 0xFF , 0xB7 , 0x1D , 0xDB , 0x97 , 0xA8 , 0x3A , 0x03 , 0x5A , 0x06 , 0xDF , 0xB0 , 0xDD , 0x72 , 0x29, + 0xA6, 0xD9 , 0x1F , 0xFB , 0xE6 , 0x06 , 0x3B , 0x4B , 0x81 , 0x23 , 0x85 , 0x45 , 0x71 , 0x28 , 0xFF , 0x1F + }; + plan(8); + unsigned char salt[] = {0x0c, 0x3b, 0x72, 0x1b, 0xfe, 0x07, 0xe2, 0xb3}; + char *secret = "secret"; + unsigned char* key = (unsigned char*)malloc(32 * sizeof(char)); + unsigned char iv[16]; + unsigned char keyresult[32] = {0x2E, 0xFF, 0xB7, 0x1D, 0xDB, 0x97, 0xA8, 0x3A, + 0x03, 0x5A, 0x06, 0xDF, 0xB0, 0xDD, 0x72, 0x29, + 0xA6, 0xD9, 0x1F, 0xFB, 0xE6, 0x06, 0x3B, 0x4B, + 0x81, 0x23, 0x85, 0x45, 0x71, 0x28, 0xFF, 0x1F}; + unsigned char ivresult[16] = {0x61, 0xFF, 0xC8, 0x27, 0x5B, 0x46, 0x4C, 0xBD, + 0x55, 0x82, 0x0E, 0x54, 0x8F, 0xE4, 0x44, 0xD9}; + + my_bytes_to_key((unsigned char*) &salt, secret, (unsigned char*) key, (unsigned char*) &iv); + ok(memcmp(key, &keyresult, 32) == 0, "BytesToKey key generated successfully."); + ok(memcmp(iv, &ivresult, 16) == 0, "BytesToKey iv generated successfully."); + // following should ensure, that yassl and openssl calculate the same! + ok(memcmp(expected,key,32)==0, "expected result"); + free(key); +} + +int +main(int argc __attribute__((unused)),char *argv[]) +{ + test_cbc256noPadding(); + test_cbc192noPadding(); + test_cbc128noPadding(); + test_cbc256noPaddingWrongInputSize(); + + test_cbc128(); + test_cbc192(); + test_cbc256(); + + test_bytes_to_key(); + + + return 0; +} + + + diff --git a/unittest/eperi/keys.enc b/unittest/eperi/keys.enc new file mode 100644 index 0000000000000..2774d58404cdd Binary files /dev/null and b/unittest/eperi/keys.enc differ diff --git a/unittest/eperi/keys.txt b/unittest/eperi/keys.txt new file mode 100644 index 0000000000000..b2608073b004f --- /dev/null +++ b/unittest/eperi/keys.txt @@ -0,0 +1,15 @@ +#Page encryption key file +#Each entry consists of ;; +1;F5502320F8429037B8DAEF761B189D12;770A8A65DA156D24EE2A093277530142 +2;35B2FF0795FB84BBD666DB8430CA214E;4D92199549E0F2EF009B4160F3582E5528A11A45017F3EF8 +3;7E892875A52C59A3B588306B13C31FBD;B374A26A71490437AA024E4FADD5B497FDFF1A8EA6FF12F6FB65AF2720B59CCF +15;7E892875A52C59A3B588306B13C31FBD;B374A26A71490437AA024E4FADD5B497FDFF1A8EA6FF12F6FB65AF2720B59CCF +#15;7E892875A52C59A3B588306B13C31FBD;B374A26A71490437AA024E4FADD5B497FDFF1A8EA6FF12F6FB65AF2720B59CCF +1024;7E892875A52C59A3B588306B13C31FBD;B374A26A71490437AA024E4FADD5B497FDFF1A8EA6FF12F6FB65AF2720B59CCF +3;7E892875A52C59A3B5883z6B13C31FBD;B374A26A71490437AA024E4FADD5B497FDFF1A8EA6FF12F6FB65AF2720B59CCF +255;F5502320F8429037B8DAEF761B189D12;770A8A65DA156D24EE2A093277530142 +256;F5502320F8429037B8DAEF761B189D12;770A8A65DA156D24EE2A093277530142 +4;F5502320F8429037B8DAEF761B189D12;770A8A65DA156D24EE2A093277530142770A8A65DA156D24EE2A093277530142770A8A65DA156D24EE2A093277530142770A8A65DA156D24EE2A093277530142770A8A65DA156D24EE2A093277530142770A8A65DA156D24EE2A093277530142770A8A65DA156D24EE2A093277530142770A8A65DA156D24EE2A093277530142770A8A65DA156D24EE2A093277530142770A8A65DA156D24EE2A093277530142770A8A65DA156D24EE2A093277530142770A8A65DA156D24EE2A093277530142770A8A65DA156D24EE2A093277530142770A8A65DA156D24EE2A093277530142770A8A65DA156D24EE2A093277530142770A8A65DA156D24EE2A093277530142770A8A65DA156D24EE2A093277530142770A8A65DA156D24EE2A093277530142770A8A65DA156D24EE2A093277530142770A8A65DA156D24EE2A093277530142770A8A65DA156D24EE2A093277530142770A8A65DA156D24EE2A093277530142770A8A65DA156D24EE2A093277530142770A8A65DA156D24EE2A093277530142 +5;021B0663D4DD7B54E2EBC852677E40BD;18420B5CBA31CCDFFE9716E91EB61374D05914F3ADE23E03 +6;9BF92CEA026CE732DA80821122A8CE97;966050D7777350B6FD5CCB3E5F648DA45C63BEFB6DEDDFA13443F156B7D35C84 +7;BC44D4AFD2D9FCD82A679E4DC6700D06;B5EA210C8C09EF20DB95EC584714A89F diff --git a/unittest/eperi/kf.txt b/unittest/eperi/kf.txt new file mode 100644 index 0000000000000..e468e2850e74f --- /dev/null +++ b/unittest/eperi/kf.txt @@ -0,0 +1 @@ +Florin diff --git a/unittest/eperi/kfo.txt b/unittest/eperi/kfo.txt new file mode 100644 index 0000000000000..b27dd3b900c93 --- /dev/null +++ b/unittest/eperi/kfo.txt @@ -0,0 +1 @@ +Salted__�Yc۸V��ʱ��T����/r��ҩs \ No newline at end of file diff --git a/unittest/eperi/long_secret b/unittest/eperi/long_secret new file mode 100644 index 0000000000000..0d89717a04aba --- /dev/null +++ b/unittest/eperi/long_secret @@ -0,0 +1 @@ +2304832408230498 3094823084092384093824908234 480 32480923840981309548sdmflösdkmflkjmfokjmk4rlkwemflkjrl23409098dsk39i980938098098234098098sdkfölklök1230980sd2304983209483209489fklödkfölk3209483209480932482309480923480923480923480923840932840923840932843399 \ No newline at end of file diff --git a/unittest/eperi/long_secret.enc b/unittest/eperi/long_secret.enc new file mode 100644 index 0000000000000..84105eab40b6a --- /dev/null +++ b/unittest/eperi/long_secret.enc @@ -0,0 +1,3 @@ +Salted__�q�n�5��ԛɊDGU+\n"^dž��{� �~����u�UF�=d��~��o��񪬫�?� ��9���Ҝ�ED}������A��L%�eR,��9o���޷ˈl>U)�)���>.ڬZ��Z~@��d� +��fuR +s%^Q��MJ2���Dڡ;&T���X �x�s͛:_�We��U#� ��*��X�pa������*f"��z(�no�r�� �.�YUC� +#include +#include + +#include +#define FIL_PAGE_TYPE_FSP_HDR 8 /*!< File space header */ +#define FIL_PAGE_TYPE_XDES 9 /*!< Extent descriptor page */ +#define PAGE_ENCRYPTION_WILL_NOT_ENCRYPT 5 + + +extern int summef(int a, int b); +extern int summef2(int a, int b); +extern int multiplikation(int a, int b); +extern ulint fil_page_encryption_calc_checksum(unsigned char* buf, ulint len); +extern "C" { +extern int my_aes_decrypt_cbc(const char* source, unsigned long int source_length, + char* dest, unsigned long int *dest_length, + const unsigned char* key, uint8 key_length, + const unsigned char* iv, uint8 iv_length); +} +void +mach_write_to_4( +/*============*/ + byte* b, /*!< in: pointer to four bytes where to store */ + ulint n) /*!< in: ulint integer to be stored */ +{ + + b[0] = (byte)(n >> 24); + b[1] = (byte)(n >> 16); + b[2] = (byte)(n >> 8); + b[3] = (byte) n; +} +ulint +mach_read_from_2( +/*=============*/ + const byte* b) /*!< in: pointer to 2 bytes */ +{ + return(((ulint)(b[0]) << 8) | (ulint)(b[1])); +} +ulint +mach_read_from_1( +/*=============*/ + const byte* b) /*!< in: pointer to 1 bytes */ +{ + return((ulint)(b[0])); +} + +extern byte* +fil_encrypt_page( +/*==============*/ + ulint space_id, /*!< in: tablespace id of the + table. */ + byte* buf, /*!< in: buffer from which to write; in aio + this must be appropriately aligned */ + byte* out_buf, /*!< out: compressed buffer */ + ulint len, /*!< in: length of input buffer.*/ + ulint compression_level, /*!< in: compression level */ + ulint* out_len, /*!< out: actual length of compressed page */ + ulint* errorCode, /*!< out: an error code. set, if page is intentionally not encrypted */ + byte* tmp_encryption_buf, /*!< in: temorary buffer or NULL */ + ulint mode /*!< in: calling mode */ + ); + +/****************************************************************//** +For page encrypted pages decrypt the page after actual read +operation. +@return decrypted page */ +extern ulint +fil_decrypt_page( +/*================*/ + byte* page_buf, /*!< in: preallocated buffer or NULL */ + byte* buf, /*!< out: buffer from which to read; in aio + this must be appropriately aligned */ + ulint len, /*!< in: length of output buffer.*/ + ulint* write_size, /*!< in/out: Actual payload size of the decrypted data. */ + ibool* page_compressed, + byte* tmp_encryption_buf, /*!< in: temorary buffer or NULL */ + ulint mode /*!