From 5cf2f86c48bdfadebaceddefbcb03bc454a91fb3 Mon Sep 17 00:00:00 2001 From: Frank Hunleth Date: Sun, 10 Jul 2022 16:59:41 -0400 Subject: [PATCH] WIP: Synchronously initialize the data partition This fixes a race condition on first boot where the data partition isn't ready for some early writes. --- lib/nerves_runtime/application.ex | 13 +-- lib/nerves_runtime/init.ex | 50 ++++----- lib/nerves_runtime/kv.ex | 86 +++------------ lib/nerves_runtime/kv_backend/cache.ex | 143 +++++++++++++++++++++++++ lib/nerves_runtime/power.ex | 2 +- 5 files changed, 183 insertions(+), 111 deletions(-) create mode 100644 lib/nerves_runtime/kv_backend/cache.ex diff --git a/lib/nerves_runtime/application.ex b/lib/nerves_runtime/application.ex index a16c2c1..2375100 100644 --- a/lib/nerves_runtime/application.ex +++ b/lib/nerves_runtime/application.ex @@ -9,9 +9,9 @@ defmodule Nerves.Runtime.Application do @impl Application def start(_type, _args) do - load_services() - options = Application.get_all_env(:nerves_runtime) + + load_services(options) children = [{KV, options} | target_children()] opts = [strategy: :one_for_one, name: Nerves.Runtime.Supervisor] @@ -20,18 +20,17 @@ defmodule Nerves.Runtime.Application do if Mix.target() == :host do defp target_children(), do: [] - defp load_services(), do: :ok + defp load_services(_options), do: :ok else defp target_children() do [ NervesLogging.KmsgTailer, NervesLogging.SyslogTailer, - Nerves.Runtime.Power, - Nerves.Runtime.Init + Nerves.Runtime.Power ] end - defp load_services() do + defp load_services(options) do # On systems with hardware random number generation, it is important that # "rngd" gets started as soon as possible to start adding entropy to the # system. So much code directly or indirectly uses random numbers that it's @@ -43,6 +42,8 @@ defmodule Nerves.Runtime.Application do _ = try_load_sysctl_conf() + _ = Nerves.Runtime.Init.init_data_partition(options) + :ok end diff --git a/lib/nerves_runtime/init.ex b/lib/nerves_runtime/init.ex index 28b7856..b4917fd 100644 --- a/lib/nerves_runtime/init.ex +++ b/lib/nerves_runtime/init.ex @@ -1,18 +1,13 @@ defmodule Nerves.Runtime.Init do @moduledoc """ - GenServer that handles device initialization. + Data partition initialization - Initialization currently consists of: - - 1. Mounting the application partition - 2. If the application partition can't be mounted, format it, and then mount it. - - Device initialization is usually a first boot only operation. It's possible - that device filesystems get corrupt enough to cause them to be reinitialized. - Since corruption should be rare, Nerves systems create firmware images - without formatting the application partition. This has the benefit of - exercising the corruption repair code. It's also required since some - filesystem types can only be formatted on device. + Data partition initialization is usually a first boot only operation. It's + possible that device filesystems get corrupt enough to cause them to be + reinitialized. Since corruption should be rare, Nerves systems create + firmware images without formatting the application partition. This has the + benefit of exercising the corruption repair code. It's also required since + some filesystem types can only be formatted on device. Long format times can be problematic in manufacturing. If this is an issue, see if you can use F2FS since it formats much faster than ext4. Some devices @@ -20,10 +15,9 @@ defmodule Nerves.Runtime.Init do generate a UUID. Look into hardcoding UUIDs or enabling a hw random number generator to increase entropy. """ - use GenServer alias Nerves.Runtime - alias Nerves.Runtime.KV + alias Nerves.Runtime.KVBackend.Cache alias Nerves.Runtime.MountParser require Logger @@ -40,27 +34,19 @@ defmodule Nerves.Runtime.Init do # can do so. @app_partition_uuid "3041e38d-615b-48d4-affb-a7787b5c4c39" - @spec start_link(any()) :: GenServer.on_start() - def start_link(_args) do - GenServer.start_link(__MODULE__, [], name: __MODULE__) - end - - @impl GenServer - def init(_args) do - init_application_partition() + @doc """ + Format and mount the data partition if it's missing or corrupt - # This GenServer is only used as a hook to initialize the application data - # partition after the logging GenServers get started. It doesn't do - # anything afterwards, so exit the process. - :ignore - end + If everything is ok, this returns. + """ + @spec init_data_partition(keyword()) :: :mounted | :mounted_with_error | :noop | :unmounted + def init_data_partition(options) do + kv_cache = Cache.new(options) - @spec init_application_partition :: :mounted | :mounted_with_error | :noop | :unmounted - def init_application_partition() do prefix = "nerves_fw_application_part0" - fstype = KV.get_active("#{prefix}_fstype") - target = KV.get_active("#{prefix}_target") - devpath = KV.get_active("#{prefix}_devpath") + fstype = Cache.get_active(kv_cache, "#{prefix}_fstype") + target = Cache.get_active(kv_cache, "#{prefix}_target") + devpath = Cache.get_active(kv_cache, "#{prefix}_devpath") %{mounted: nil, fstype: fstype, target: target, devpath: devpath, format_performed: false} |> do_format() diff --git a/lib/nerves_runtime/kv.ex b/lib/nerves_runtime/kv.ex index 37a3c93..ef05f05 100644 --- a/lib/nerves_runtime/kv.ex +++ b/lib/nerves_runtime/kv.ex @@ -133,6 +133,8 @@ defmodule Nerves.Runtime.KV do require Logger + alias Nerves.Runtime.KVBackend.Cache + @typedoc """ The KV store is a string -> string map @@ -225,97 +227,37 @@ defmodule Nerves.Runtime.KV do @impl GenServer def init(opts) do - {:ok, initial_state(opts)} + {:ok, Cache.new(opts)} end @impl GenServer def handle_call({:get_active, key}, _from, s) do - {:reply, active(key, s), s} + {:reply, Cache.get_active(s, key), s} end def handle_call({:get, key}, _from, s) do - {:reply, Map.get(s.contents, key), s} + {:reply, Cache.get(s, key), s} end def handle_call(:get_all_active, _from, s) do - active = active(s) <> "." - reply = filter_trim_active(s, active) - {:reply, reply, s} + {:reply, Cache.get_all_active(s), s} end def handle_call(:get_all, _from, s) do - {:reply, s.contents, s} + {:reply, Cache.get_all(s), s} end def handle_call({:put, kv}, _from, s) do - {reply, s} = do_put(kv, s) - {:reply, reply, s} - end - - def handle_call({:put_active, kv}, _from, s) do - {reply, s} = - Map.new(kv, fn {key, value} -> {"#{active(s)}.#{key}", value} end) - |> do_put(s) - - {:reply, reply, s} - end - - defp active(s), do: Map.get(s.contents, "nerves_fw_active", "") - - defp active(key, s) do - Map.get(s.contents, "#{active(s)}.#{key}") - end - - defp filter_trim_active(s, active) do - Enum.filter(s.contents, fn {k, _} -> - String.starts_with?(k, active) - end) - |> Enum.map(fn {k, v} -> {String.replace_leading(k, active, ""), v} end) - |> Enum.into(%{}) - end - - defp do_put(kv, s) do - case s.backend.save(kv, s.options) do - :ok -> {:ok, %{s | contents: Map.merge(s.contents, kv)}} - error -> {error, s} + case Cache.put(s, kv) do + {:ok, new_s} -> {:reply, :ok, new_s} + error -> {:reply, error, s} end end - defguardp is_module(v) when is_atom(v) and not is_nil(v) - - defp initial_state(options) do - case options[:kv_backend] do - {backend, opts} when is_module(backend) and is_list(opts) -> - initialize(backend, opts) - - backend when is_module(backend) -> - initialize(backend, []) - - _ -> - # Handle Nerves.Runtime v0.12.0 and earlier way - initial_contents = - options[:modules][Nerves.Runtime.KV.Mock] || options[Nerves.Runtime.KV.Mock] - - Logger.error( - "Using Nerves.Runtime.KV.Mock is deprecated. Use `config :nerves_runtime, kv_backend: {Nerves.Runtime.KVBackend.InMemory, contents: #{inspect(initial_contents)}}`" - ) - - initialize(Nerves.Runtime.KVBackend.InMemory, contents: initial_contents) - end - rescue - error -> - Logger.error("Nerves.Runtime has a bad KV configuration: #{inspect(error)}") - initialize(Nerves.Runtime.KVBackend.InMemory, []) - end - - defp initialize(backend, options) do - case backend.load(options) do - {:ok, contents} -> - %{backend: backend, options: options, contents: contents} - - {:error, reason} -> - Logger.error("Nerves.Runtime failed to load KV: #{inspect(reason)}") - %{backend: Nerves.Runtime.KVBackend.InMemory, options: [], contents: %{}} + def handle_call({:put_active, kv}, _from, s) do + case Cache.put_active(s, kv) do + {:ok, new_s} -> {:reply, :ok, new_s} + error -> {:reply, error, s} end end end diff --git a/lib/nerves_runtime/kv_backend/cache.ex b/lib/nerves_runtime/kv_backend/cache.ex new file mode 100644 index 0000000..333eb85 --- /dev/null +++ b/lib/nerves_runtime/kv_backend/cache.ex @@ -0,0 +1,143 @@ +defmodule Nerves.Runtime.KVBackend.Cache do + @moduledoc """ + Cache for a Key-Value store + + This module makes operating on KV stores more efficient. Note that it + necessarily raises the potential for consistency issues. These are not + handled. Barring important reasons, `Nerves.Runtime.KV` should be used. + """ + + require Logger + + defstruct [:backend, :options, :contents] + + @typedoc false + @type t() :: %{backend: module(), options: keyword(), contents: Nerves.Runtime.KV.string_map()} + + defguardp is_module(v) when is_atom(v) and not is_nil(v) + + @doc """ + Create a new cache + + Options: + * `:kv_backend` - a KV backend of the form `{module, options}` or just `module` + """ + @spec new(keyword()) :: t() + def new(options) do + case options[:kv_backend] do + {backend, opts} when is_module(backend) and is_list(opts) -> + initialize(backend, opts) + + backend when is_module(backend) -> + initialize(backend, []) + + _ -> + # Handle Nerves.Runtime v0.12.0 and earlier way + initial_contents = + options[:modules][Nerves.Runtime.KV.Mock] || options[Nerves.Runtime.KV.Mock] + + Logger.error( + "Using Nerves.Runtime.KV.Mock is deprecated. Use `config :nerves_runtime, kv_backend: {Nerves.Runtime.KVBackend.InMemory, contents: #{inspect(initial_contents)}}`" + ) + + initialize(Nerves.Runtime.KVBackend.InMemory, contents: initial_contents) + end + rescue + error -> + Logger.error("Nerves.Runtime has a bad KV configuration: #{inspect(error)}") + initialize(Nerves.Runtime.KVBackend.InMemory, []) + end + + defp initialize(backend, options) do + case backend.load(options) do + {:ok, contents} -> + %{backend: backend, options: options, contents: contents} + + {:error, reason} -> + Logger.error("Nerves.Runtime failed to load KV: #{inspect(reason)}") + %{backend: Nerves.Runtime.KVBackend.InMemory, options: [], contents: %{}} + end + end + + @doc """ + Get the key for only the active firmware slot + """ + @spec get_active(t(), String.t()) :: String.t() | nil + def get_active(cache, key) when is_binary(key) do + active(key, cache) + end + + @doc """ + Get the key regardless of firmware slot + """ + @spec get(t(), String.t()) :: String.t() | nil + def get(cache, key) when is_binary(key) do + Map.get(cache.contents, key) + end + + @doc """ + Get all key value pairs for only the active firmware slot + """ + @spec get_all_active(t()) :: Nerves.Runtime.KV.string_map() + def get_all_active(cache) do + active = active(cache) <> "." + filter_trim_active(cache, active) + end + + @doc """ + Get all keys regardless of firmware slot + """ + @spec get_all(t()) :: Nerves.Runtime.KV.string_map() + def get_all(cache) do + cache.contents + end + + @doc """ + Write a key-value pair to the firmware metadata + """ + @spec put(t(), String.t(), String.t()) :: {:ok, t()} | {:error, any()} + def put(cache, key, value) when is_binary(key) and is_binary(value) do + put(cache, %{key => value}) + end + + @doc """ + Write a collection of key-value pairs to the firmware metadata + """ + @spec put(t(), Nerves.Runtime.KV.string_map()) :: {:ok, t()} | {:error, any()} + def put(cache, kv) when is_map(kv) do + with :ok <- cache.backend.save(kv, cache.options) do + {:ok, %{cache | contents: Map.merge(cache.contents, kv)}} + end + end + + @doc """ + Write a key-value pair to the active firmware slot + """ + @spec put_active(t(), String.t(), String.t()) :: {:ok, t()} | {:error, any()} + def put_active(cache, key, value) when is_binary(key) and is_binary(value) do + put_active(cache, %{key => value}) + end + + @doc """ + Write a collection of key-value pairs to the active firmware slot + """ + @spec put_active(t(), Nerves.Runtime.KV.string_map()) :: {:ok, t()} | {:error, any()} + def put_active(cache, kv) when is_map(kv) do + kvs = Map.new(kv, fn {key, value} -> {"#{active(cache)}.#{key}", value} end) + put(cache, kvs) + end + + defp active(cache), do: Map.get(cache.contents, "nerves_fw_active", "") + + defp active(key, cache) do + Map.get(cache.contents, "#{active(cache)}.#{key}") + end + + defp filter_trim_active(cache, active) do + Enum.filter(cache.contents, fn {k, _} -> + String.starts_with?(k, active) + end) + |> Enum.map(fn {k, v} -> {String.replace_leading(k, active, ""), v} end) + |> Enum.into(%{}) + end +end diff --git a/lib/nerves_runtime/power.ex b/lib/nerves_runtime/power.ex index 79a6e2e..1494540 100644 --- a/lib/nerves_runtime/power.ex +++ b/lib/nerves_runtime/power.ex @@ -46,7 +46,7 @@ defmodule Nerves.Runtime.Power do Logger.info("#{__MODULE__} : device told to #{cmd}") # Invoke the appropriate command to tell erlinit that a shutdown of the - # Erlang VM is imminent. Once this returns, the Erlang has about 10 + # Erlang VM is imminent. Once this returns, Erlang has about 10 # seconds to exit unless `--graceful-powerdown` is used in the # `erlinit.config` to modify the timeout. {_, 0} = Nerves.Runtime.cmd(cmd, [], :info)