diff --git a/README.md b/README.md
index de2d11372..9b19937c8 100644
--- a/README.md
+++ b/README.md
@@ -17,24 +17,28 @@ The aim is to make the API as model-agnostic as possible while still being user-
 ## Examples
 
 ```julia
-  X = reshape(collect(range(-3.0,3.0,length=100)),:,1)
-  # Set simple scaling of the data
-  k₁ = SqExponentialKernel()
-  K₁ = kernelmatrix(k₁,X,obsdim=1)
+x = range(-3.0, 3.0; length=100)
 
-  # Set a function transformation on the data
-  k₂ = TransformedKernel(Matern32Kernel(),FunctionTransform(x->sin.(x)))
-  K₂ = kernelmatrix(k₂,X,obsdim=1)
+# A simple standardised squared-exponential / exponentiated-quadratic kernel.
+k₁ = SqExponentialKernel()
+K₁ = kernelmatrix(k₁, x)
 
-  # Set a matrix premultiplication on the data
-  k₃ = transform(PolynomialKernel(c=2.0,d=2.0),LowRankTransform(randn(4,1)))
-  K₃ = kernelmatrix(k₃,X,obsdim=1)
+# Set a function transformation on the data
+k₂ = Matern32Kernel() ∘ FunctionTransform(sin)
+K₂ = kernelmatrix(k₂, x)
 
-  # Add and sum kernels
-  k₄ = 0.5*SqExponentialKernel()*LinearKernel(c=0.5) + 0.4*k₂
-  K₄ = kernelmatrix(k₄,X,obsdim=1)
+# Set a matrix premultiplication on the data
+k₃ = PolynomialKernel(; c=2.0, degree=2) ∘ LinearTransform(randn(4, 1))
+K₃ = kernelmatrix(k₃, x)
 
-  plot(heatmap.([K₁,K₂,K₃,K₄],yflip=true,colorbar=false)...,layout=(2,2),title=["K₁" "K₂" "K₃" "K₄"])
+# Add and sum kernels
+k₄ = 0.5 * SqExponentialKernel() * LinearKernel(; c=0.5) + 0.4 * k₂
+K₄ = kernelmatrix(k₄, x)
+
+plot(
+    heatmap.([K₁, K₂, K₃, K₄]; yflip=true, colorbar=false)...;
+    layout=(2, 2), title=["K₁" "K₂" "K₃" "K₄"],
+)
 ```
 <p align=center>
   <img src="docs/src/assets/heatmap_combination.png" width=400px>
@@ -43,10 +47,9 @@ The aim is to make the API as model-agnostic as possible while still being user-
 ## Packages goals (by priority)
 - Ensure AD Compatibility (already the case for Zygote, ForwardDiff)
 - Toeplitz Matrices compatibility
-- BLAS backend
 
 Directly inspired by the [MLKernels](https://github.com/trthatcher/MLKernels.jl) package.
 
 ## Issues/Contributing
 
-If you notice a problem or would like to contribute by adding more kernel functions or features please [submit an issue](https://github.com/JuliaGaussianProcesses/KernelFunctions.jl/issues).
+If you notice a problem or would like to contribute by adding more kernel functions or features please [submit an issue](https://github.com/JuliaGaussianProcesses/KernelFunctions.jl/issues), or open a PR (please see the [ColPrac](https://github.com/SciML/ColPrac) contribution guidelines).
diff --git a/docs/make.jl b/docs/make.jl
index 2949cb7e1..c2c14f8f9 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -33,9 +33,15 @@ makedocs(;
         "create_kernel.md",
         "API" => "api.md",
         "Examples" => "example.md",
+        "Design" => "design.md",
     ],
     strict=true,
     checkdocs=:exports,
+    doctestfilters=[
+        r"{([a-zA-Z0-9]+,\s?)+[a-zA-Z0-9]+}",
+        r"(Array{[a-zA-Z0-9]+,\s?1}|Vector{[a-zA-Z0-9]+})",
+        r"(Array{[a-zA-Z0-9]+,\s?2}|Matrix{[a-zA-Z0-9]+})",
+    ],
 )
 
 deploydocs(;
diff --git a/docs/src/api.md b/docs/src/api.md
index e2579ff5c..2891f2aa7 100644
--- a/docs/src/api.md
+++ b/docs/src/api.md
@@ -1,38 +1,78 @@
 # API Library
 
----
-```@contents
-Pages = ["api.md"]
-```
-
 ```@meta
 CurrentModule = KernelFunctions
 ```
 
 ## Functions
 
+The KernelFunctions API comprises the following four functions.
 ```@docs
 kernelmatrix
 kernelmatrix!
 kernelmatrix_diag
 kernelmatrix_diag!
-kernelpdmat
-nystrom
 ```
 
-## Utilities
+## Input Types
+
+The above API operates on collections of inputs.
+All collections of inputs in KernelFunctions.jl are represented as `AbstractVector`s.
+To understand this choice, please see the [design notes on collections of inputs](@ref why_abstract_vectors).
+The length of any such `AbstractVector` is equal to the number of inputs in the collection.
+For example, this means that
+```julia
+size(kernelmatrix(k, x)) == (length(x), length(x))
+```
+is always true, for some `Kernel` `k`, and `AbstractVector` `x`.
+
+### Univariate Inputs
+
+If each input to your kernel is `Real`-valued, then any `AbstractVector{<:Real}` is a valid
+representation for a collection of inputs.
+More generally, it's completely fine to represent a collection of inputs of type `T` as, for
+example, a `Vector{T}`.
+However, this may not be the most efficient way to represent collection of inputs.
+See [Vector-Valued Inputs](@ref) for an example.
 
+
+### Vector-Valued Inputs
+
+We recommend that collections of vector-valued inputs are stored in an
+`AbstractMatrix{<:Real}` when possible, and wrapped inside a `ColVecs` or `RowVecs` to make
+their interpretation clear:
 ```@docs
 ColVecs
 RowVecs
+```
+These types are specialised upon to ensure good performance e.g. when computing Euclidean distances between pairs of elements.
+The benefit of using this representation, rather than using a `Vector{Vector{<:Real}}`, is that
+optimised matrix-matrix multiplication functionality can be utilised when computing
+pairwise distances between inputs, which are needed for `kernelmatrix` computation.
+
+### Inputs for Multiple Outputs
+
+KernelFunctions.jl views multi-output GPs as GPs on an extended input domain.
+For an explanation of this design choice, see [the design notes on multi-output GPs](@ref inputs_for_multiple_outputs).
+
+An input to a multi-output `Kernel` should be a `Tuple{T, Int}`, whose first element specifies a location in the domain of the multi-output GP, and whose second element specifies which output the inputs corresponds to.
+The type of collections of inputs for multi-output GPs is therefore `AbstractVector{<:Tuple{T, Int}}`.
+
+KernelFunctions.jl provides the following type or situations in which all outputs are observed all of the time:
+```@docs
 MOInput
-NystromFact
 ```
+As with [`ColVecs`](@ref) and [`RowVecs`](@ref) for vector-valued input spaces, this
+type enables specialised implementations of e.g. [`kernelmatrix`](@ref) for
+[`MOInput`](@ref)s in some situations.
 
-## Index
+To find out more about the background, read this [review of kernels for vector-valued functions](https://arxiv.org/pdf/1106.6251.pdf).
 
-```@index
-Pages = ["api.md"]
-Module = ["KernelFunctions"]
-Order = [:type, :function]
+## Utilities
+
+KernelFunctions also provides miscellaneous utility functions.
+```@docs
+kernelpdmat
+nystrom
+NystromFact
 ```
diff --git a/docs/src/assets/heatmap_combination.png b/docs/src/assets/heatmap_combination.png
index 4a0daf139..06d7a8590 100644
Binary files a/docs/src/assets/heatmap_combination.png and b/docs/src/assets/heatmap_combination.png differ
diff --git a/docs/src/design.md b/docs/src/design.md
new file mode 100644
index 000000000..f7faba3c0
--- /dev/null
+++ b/docs/src/design.md
@@ -0,0 +1,172 @@
+# Design
+
+## [Why AbstractVectors Everywhere?](@id why_abstract_vectors)
+
+To understand the advantages of using `AbstractVector`s everywhere to represent collections of inputs, first consider the following properties that it is desirable for a collection of inputs to satisfy.
+
+#### Unique Ordering
+
+There must be a clearly-defined first, second, etc element of an input collection.
+If this were not the case, it would not be possible to determine a unique mapping between a collection of inputs and the output of `kernelmatrix`, as it would not be clear what order the rows and columns of the output should appear in.
+
+Moreover, ordering guarantees that if you permute the collection of inputs, the ordering of the rows and columns of the `kernelmatrix` are correspondingly permuted.
+
+#### Generality
+
+There must be no restriction on the domain of the input.
+Collections of `Real`s, vectors, graphs, finite-dimensional domains, or really anything else that you fancy should be straightforwardly representable.
+Moreover, whichever input class is chosen should not prevent optimal performance from being obtained.
+
+#### Unambiguously-Defined Length
+
+Knowing the length of a collection of inputs is important.
+For example, a well-defined length guarantees that the size of the output of `kernelmatrix`,
+and related functions, are predictable.
+It also makes it possible to perform internal error-checking that ensures that e.g. there
+are the same number of inputs in two collections of inputs.
+
+
+
+### AbstractMatrices Do Not Cut It
+
+Notably, while `AbstractMatrix` objects are often used to represent collections of vector-valued
+inputs, they do _not_ immediately satisfy these properties as it is unclear whether a matrix
+of size `P x Q` represents a collection of `P` `Q`-dimensional inputs (each row is an
+input), or `Q` `P`-dimensional inputs (each column is an input).
+
+Moreover, they occassionally add some aesthetic inconvenience.
+For example, a collection of `Real`-valued inputs, which might be straightforwardly
+represented as an `AbstractVector{<:Real}`, must be reshaped into a matrix.
+
+There are two commonly used ways to partly resolve these shortcomings:
+
+#### Resolution 1: Specify a Convention
+
+One way that these shortcomings can be partly resolved is by specifying a convention that
+everyone adheres to regarding the interpretation of rows vs columns.
+However, opinions about the choice of convention are often surprisingly strongly held, and
+users regularly have to remind themselves _which_ convention has been chosen.
+While this resolves the ordering problem, and in principle defines the "length" of a
+collection of inputs, `AbstractMatrix`s already have a `length` defined in Julia, which
+would generally disagree with our internal notion of `length`.
+This isn't a show-stopper, but it isn't an especially clean situation.
+
+There is also the opportunity for some kinds of silent bugs.
+For example, if an input matrix happens to be square because the number of input dimensions
+is the same as the number of inputs, it would be hard to know whether the correct
+`kernelmatrix` has been computed.
+This kind of bug seems unlikely, but it exists regardless.
+
+Finally, suppose that your inputs are some type `T` that is not simply a vector of real
+numbers, say a graph.
+In this situation, how should a collection of inputs be represented?
+A `N x 1` or `1 x N` matrix is the only obvious candidate, but the additional singular
+dimension seems somewhat redundant.
+
+#### Resolution 2: Always Specify An `obsdim` Argument
+
+Another way to partly resolve these problems is to not commit to a convention, and instead
+to propagate some additional information through the codebase that specifies how the input
+data is to be interpretted.
+For example, a kernel `k` that represents the sum of two other kernels might implement
+`kernelmatrix` as follows:
+```julia
+function kernelmatrix(k::KernelSum, x::AbstractMatrix; obsdim=1)
+    return kernelmatrix(k.kernels[1], x; obsdim=obsdim) +
+        kernelmatrix(k.kernels[2], x; obsdim=obsdim)
+end
+```
+While this prevents this package from having to pre-specify a convention, it doesn't resolve
+the `length` issue, or the issue of representing collections of inputs which aren't
+immediately represented as vectors.
+Moreover, it complicates the internals; in contrast, consider what this function looks like
+with an `AbstractVector`:
+```julia
+function kernelmatrix(k::KernelSum, x::AbstractVector)
+    return kernelmatrix(k.kernels[1], x) + kernelmatrix(k.kernels[2], x)
+end
+```
+This code is clearer (less visual noise), and has removed a possible bug -- if the
+implementer of `kernelmatrix` forgets to pass the `obsdim` kwarg into each subsequent
+`kernelmatrix` call, it's possible to get the wrong answer.
+
+This being said, we do support matrix-valued inputs -- see
+[Why We Have Support for Both](@ref).
+
+
+### AbstractVectors 
+
+Requiring all collections of inputs to be `AbstractVector`s resolves all of these problems,
+and ensures that the data is self-describing to the extent that KernelFunctions.jl requires.
+
+Firstly, the question of how to interpret the columns and rows of a matrix of inputs is
+resolved.
+Users _must_ wrap matrices which represent collections of inputs in either a `ColVecs` or
+`RowVecs`, both of which have clearly defined semantics which are hard to confuse.
+
+By design, there is also no discrepancy between the number of inputs in the collection, and
+the `length` function -- the `length` of a `ColVecs`, `RowVecs`, or `Vector{<:Real}` is
+equal to the number of inputs.
+
+There is no loss of performance.
+
+A collection of `N` `Real`-valued inputs can be represented by an
+`AbstractVector{<:Real}` of `length` `N`, rather than needing to use an
+`AbstractMatrix{<:Real}` of size either `N x 1` or `1 x N`.
+The same can be said for any other input type `T`, and new subtypes of `AbstractVector` can
+be added if particularly efficient ways exist to store collections of inputs of type `T`.
+A good example of this in practice is using `Tuple{S, Int}`, for some input type `S`, as the
+[Inputs for Multiple Outputs](@ref).
+
+This approach can also lead to clearer user code.
+A user need only wrap their inputs in a `ColVecs` or `RowVecs` once in their code, and this
+specification is automatically re-used _everywhere_ in their code.
+In this sense, it is straightforward to write code in such a way that there is one unique
+source of "truth" about the way in which a particular data set should be interpreted.
+Conversely, the `obsdim` resolution requires that the `obsdim` keyword argument is passed
+around with the data _every_ _single_ _time_ that you use it.
+
+The benefits of the `AbstractVector` approach are likely most strongly felt when writing a substantial amount of code on top of KernelFunctions.jl -- in the same way that using
+`AbstractVector`s inside KernelFunctions.jl removes the need for large amounts of keyword
+argument propagation, the same will be true of other code.
+
+
+
+
+### Why We Have Support for Both
+
+In short: many people like matrices, and are familiar with `obsdim`-style keyword
+arguments.
+
+All internals are implemented using `AbstractVector`s though, and the `obsdim` interface
+is just a thin layer of utility functionality which sits on top of this.
+
+
+
+
+
+## [Kernels for Multiple-Outputs](@id inputs_for_multiple_outputs)
+
+There are two equally-valid perspectives on multi-output kernels: they can either be treated
+as matrix-valued kernels, or standard kernels on an extended input domain.
+Each of these perspectives are convenient in different circumstances, but the latter
+greatly simplifies the incorporation of multi-output kernels in KernelFunctions.
+
+More concretely, let `k_mat` be a matrix-valued kernel, mapping pairs of inputs of type `T` to matrices of size `P x P` to describe the covariance between `P` outputs.
+Given inputs `x` and `y` of type `T`, and integers `p` and `q`, we can always find an
+equivalent standard kernel `k` mapping from pairs of inputs of type `Tuple{T, Int}` to the
+`Real`s as follows:
+```julia
+k((x, p), (y, q)) = k_mat(x, y)[p, q]
+```
+This ability to treat multi-output kernels as single-output kernels is very helpful, as it
+means that there is no need to introduce additional concepts into the API of
+KernelFunctions.jl, just additional kernels!
+This in turn simplifies downstream code as they don't need to "know" about the existence of
+multi-output kernels in addition to standard kernels. For example, GP libraries built on
+top of KernelFunctions.jl just need to know about `Kernel`s, and they get multi-output
+kernels, and hence multi-output GPs, for free.
+
+Where there is the need to specialise _implementations_ for multi-output kernels, this is
+done in an encapsulated manner -- parts of KernelFunctions that have nothing to do with
+multi-output kernels know _nothing_ about the existence of multi-output kernels.
diff --git a/docs/src/userguide.md b/docs/src/userguide.md
index a8e633ea4..b3539dc64 100644
--- a/docs/src/userguide.md
+++ b/docs/src/userguide.md
@@ -1,6 +1,6 @@
 # User guide
 
-## Kernel creation
+## Kernel Creation
 
 To create a kernel object, choose one of the pre-implemented kernels, see [Kernel Functions](@ref), or create your own, see [Creating your own kernel](@ref).
 For example, a squared exponential kernel is created by
@@ -10,10 +10,10 @@ For example, a squared exponential kernel is created by
 
 !!! tip "How do I set the lengthscale?"
     Instead of having lengthscale(s) for each kernel we use [`Transform`](@ref) objects which act on the inputs before passing them to the kernel. Note that the transforms such as [`ScaleTransform`](@ref) and [`ARDTransform`](@ref) _multiply_ the input by a scale factor, which corresponds to the _inverse_ of the lengthscale.
-    For example, a lengthscale of 0.5 is equivalent to premultiplying the input by 2.0, and you can create the corresponding kernel as follows:
+    For example, a lengthscale of 0.5 is equivalent to premultiplying the input by 2.0, and you can create the corresponding kernel in either of the following equivalent ways:
     ```julia
-      k = transform(SqExponentialKernel(), ScaleTransform(2.0))
-      k = transform(SqExponentialKernel(), 2.0)  # implicitly constructs a ScaleTransform(2.0)
+      k = SqExponentialKernel() ∘ ScaleTransform(2.0)
+      k = compose(SqExponentialKernel(), ScaleTransform(2.0))
     ```
     Check the [Input Transforms](@ref input_transforms) page for more details.
 
@@ -32,74 +32,97 @@ For example, a squared exponential kernel is created by
     use a squared exponential kernel together with a [`LinearTransform`](@ref) of
     the inputs:
     ```julia
-    k = transform(SqExponentialKernel(), LinearTransform(sqrt(2) .* Q))
+    k = SqExponentialKernel() ∘ LinearTransform(sqrt(2) .* Q)
     ```
     Analogously, you can combine other kernels such as the
     [`PiecewisePolynomialKernel`](@ref) with a [`LinearTransform`](@ref) of the
     inputs to obtain a kernel that is a function of the Mahalanobis distance
     between inputs.
 
-## Using a kernel function
+## Using a Kernel Function
 
 To evaluate the kernel function on two vectors you simply call the kernel object:
 ```julia
-  k = SqExponentialKernel()
-  x1 = rand(3)
-  x2 = rand(3)
-  k(x1, x2)
+k = SqExponentialKernel()
+x1 = rand(3)
+x2 = rand(3)
+k(x1, x2)
 ```
 
-## Creating a kernel matrix
+## Creating a Kernel Matrix
 
 Kernel matrices can be created via the `kernelmatrix` function or `kernelmatrix_diag` for only the diagonal.
-An important argument to give is the data layout of the input `obsdim`. It specifies whether the number of observed data points is along the first dimension (`obsdim=1`, i.e. the matrix shape is number of samples times number of features) or along the second dimension (`obsdim=2`, i.e. the matrix shape is number of features times number of samples), similarly to [Distances.jl](https://github.com/JuliaStats/Distances.jl). If not given explicitly, `obsdim` defaults to `2`.
-For example:
+For example, for a collection of 10 `Real`-valued inputs:
 ```julia
-  k = SqExponentialKernel()
-  A = rand(10, 5)
-  kernelmatrix(k, A, obsdim=1)  # returns a 10x10 matrix
-  kernelmatrix(k, A, obsdim=2)  # returns a 5x5 matrix
-  k(A, obsdim=1)  # Syntactic sugar
+k = SqExponentialKernel()
+x = rand(10)
+kernelmatrix(k, x) # 10x10 matrix
+```
+If your inputs are multi-dimensional, it is common to represent them as a matrix.
+For example
+```julia
+X = rand(10, 5)
+```
+However, it is ambiguous whether this represents a collection of 10 5-dimensional row-vectors, or 5 10-dimensional column-vectors.
+Therefore, we require users to provide some more information.
+
+You can write `RowVecs(X)` to declare that `X` contains 10 5-dimensional row-vectors, or `ColVecs(X)` to declare that `X` contains 5 10-dimensional column-vectors, then
+```julia
+kernelmatrix(k, RowVecs(X))  # returns a 10×10 matrix -- each row of X treated as input
+kernelmatrix(k, ColVecs(X))  # returns a 5×5 matrix -- each column of X treated as input
 ```
+This is the mechanism used throughout KernelFunctions.jl to handle multi-dimensional inputs.
+
+You can also utilise the `obsdim` keyword argument if you prefer:
+```julia
+kernelmatrix(k, X; obsdim=1) # same as RowVecs(X)
+kernelmatrix(k, X; obsdim=2) # same as ColVecs(X)
+```
+This is similar to the convention used in [Distances.jl](https://github.com/JuliaStats/Distances.jl).
+
+See [Input Types](@ref) for a more thorough discussion of these two approaches.
+
+
 
 We also support specific kernel matrix outputs:
-- For a positive-definite matrix object`PDMat` from [`PDMats.jl`](https://github.com/JuliaStats/PDMats.jl), you can call the following:
+- For a positive-definite matrix object of type `PDMat` from [`PDMats.jl`](https://github.com/JuliaStats/PDMats.jl), you can call the following:
 ```julia
-  using PDMats
-  k = SqExponentialKernel()
-  K = kernelpdmat(k, A, obsdim=1)  # PDMat
+using PDMats
+k = SqExponentialKernel()
+K = kernelpdmat(k, RowVecs(X)) # PDMat
+K = kernelpdmat(k, X; obsdim=1) # PDMat
 ```
 It will create a matrix and in case of bad conditioning will add some diagonal noise until the matrix is considered positive-definite; it will then return a `PDMat` object. For this method to work in your code you need to include `using PDMats` first.
 - For a Kronecker matrix, we rely on [`Kronecker.jl`](https://github.com/MichielStock/Kronecker.jl). Here are two examples:
 ```julia
 using Kronecker
-x = range(0, 1, length=10)
-y = range(0, 1, length=50)
+x = range(0, 1; length=10)
+y = range(0, 1; length=50)
 K = kernelkronmat(k, [x, y]) # Kronecker matrix
 K = kernelkronmat(k, x, 5) # Kronecker matrix
 ```
 Make sure that `k` is a kernel compatible with such constructions (with `iskroncompatible(k)`). Both methods will return a Kronecker matrix. For those methods to work in your code you need to include `using Kronecker` first.
 - For a Nystrom approximation: `kernelmatrix(nystrom(k, X, ρ, obsdim=1))` where `ρ` is the fraction of data samples used in the approximation.
 
-## Composite kernels
+## Composite Kernels
 
 Sums and products of kernels are also valid kernels. They can be created via `KernelSum` and `KernelProduct` or using simple operators `+` and `*`.
 For example:
 ```julia
-  k1 = SqExponentialKernel()
-  k2 = Matern32Kernel()
-  k = 0.5 * k1 + 0.2 * k2  # KernelSum
-  k = k1 * k2  # KernelProduct
+k1 = SqExponentialKernel()
+k2 = Matern32Kernel()
+k = 0.5 * k1 + 0.2 * k2 # KernelSum
+k = k1 * k2 # KernelProduct
 ```
 
-## Kernel parameters
+## Kernel Parameters
 
 What if you want to differentiate through the kernel parameters? This is easy even in a highly nested structure such as:
 ```julia
-  k = transform(
-        0.5 * SqExponentialKernel() * Matern12Kernel()
-      + 0.2 * (transform(LinearKernel(), 2.0) + PolynomialKernel()),
-      [0.1, 0.5])
+k = (
+    0.5 * SqExponentialKernel() * Matern12Kernel() +
+    0.2 * (LinearKernel() ∘ ScaleTransform(2.0) + PolynomialKernel())
+) ∘ ARDTransform([0.1, 0.5])
 ```
 One can access the named tuple of trainable parameters via `Functors.functor` from `Functors.jl`.
 This means that in practice you can implicitly optimize the kernel parameters by calling:
diff --git a/src/matrix/kernelmatrix.jl b/src/matrix/kernelmatrix.jl
index 6824db66a..da42bec37 100644
--- a/src/matrix/kernelmatrix.jl
+++ b/src/matrix/kernelmatrix.jl
@@ -1,41 +1,84 @@
 """
-    kernelmatrix!(K::AbstractMatrix, κ::Kernel, X; obsdim::Integer = 2)
-    kernelmatrix!(K::AbstractMatrix, κ::Kernel, X, Y; obsdim::Integer = 2)
+    kernelmatrix!(K::AbstractMatrix, κ::Kernel, x::AbstractVector)
+    kernelmatrix!(K::AbstractMatrix, κ::Kernel, x::AbstractVector, y::AbstractVector)
 
 In-place version of [`kernelmatrix`](@ref) where pre-allocated matrix `K` will be
 overwritten with the kernel matrix.
+
+    kernelmatrix!(K::AbstractMatrix, κ::Kernel, X::AbstractMatrix; obsdim::Integer=2)
+    kernelmatrix!(
+        K::AbstractMatrix,
+        κ::Kernel,
+        X::AbstractMatrix,
+        Y::AbstractMatrix;
+        obsdim::Integer=2,
+    )
+
+Equivalent to `kernelmatrix!(K, κ, ColVecs(X))` and
+`kernelmatrix(K, κ, ColVecs(X), ColVecs(Y))` respectively.
+Set `obsdim=1` to get `RowVecs`.
+
+See also: [`ColVecs`](@ref), [`RowVecs`](@ref)
 """
 kernelmatrix!
 
 """
-    kernelmatrix(κ::Kernel, X; obsdim::Int = 2)
-    kernelmatrix(κ::Kernel, X, Y; obsdim::Int = 2)
+    kernelmatrix(κ::Kernel, x::AbstractVector)
+    kernelmatrix(κ::Kernel, x::AbstractVector, y::AbstractVector)
+
+Calculate the kernel matrix of `x` (and `y`) with respect to kernel `κ`.
+
+    kernelmatrix(κ::Kernel, X::AbstractMatrix; obsdim::Int=2)
+    kernelmatrix(κ::Kernel, X::AbstractMatrix, Y::AbstractMatrix; obsdim::Int=2)
 
-Calculate the kernel matrix of `X` (and `Y`) with respect to kernel `κ`.
-`obsdim = 1` means the matrix `X` (and `Y`) has size #samples x #dimension
-`obsdim = 2` means the matrix `X` (and `Y`) has size #dimension x #samples
+Equivalent to `kernelmatrix(κ, ColVecs(X))` and `kernelmatrix(κ, ColVecs(X), ColVecs(Y))`
+respectively.
+Set `obsdim=1` to get `RowVecs`.
+
+See also: [`ColVecs`](@ref), [`RowVecs`](@ref)
 """
 kernelmatrix
 
 """
-    kernelmatrix_diag!(K::AbstractVector, κ::Kernel, X; obsdim::Int = 2)
-    kernelmatrix_diag!(K::AbstractVector, κ::Kernel, X, Y; obsdim::Int = 2)
+    kernelmatrix_diag!(K::AbstractVector, κ::Kernel, x::AbstractVector)
+    kernelmatrix_diag!(K::AbstractVector, κ::Kernel, x::AbstractVector, y::AbstractVector)
+
+In place version of [`kernelmatrix_diag`](@ref).
+
+    kernelmatrix_diag!(K::AbstractVector, κ::Kernel, X::AbstractMatrix; obsdim::Int=2)
+    kernelmatrix_diag!(
+        K::AbstractVector,
+        κ::Kernel,
+        X::AbstractMatrix,
+        Y::AbstractMatrix;
+        obsdim::Int=2,
+    )
+
+Equivalent to `kernelmatrix_diag!(K, κ, ColVecs(X))` and
+`kernelmatrix_diag!(K, κ, ColVecs(X), ColVecs(Y))` respectively.
+Set `obsdim=1` to get `RowVecs`.
 
-In place version of [`kernelmatrix_diag`](@ref)
+See also: [`ColVecs`](@ref), [`RowVecs`](@ref)
 """
 kernelmatrix_diag!
 
 """
-    kernelmatrix_diag(κ::Kernel, X; obsdim::Int = 2)
+    kernelmatrix_diag(κ::Kernel, x::AbstractVector)
+
+Calculate the diagonal matrix of `x` with respect to kernel `κ`.
+
+    kernelmatrix_diag(κ::Kernel, x::AbstractVector, y::AbstractVector)
+
+Calculate the diagonal of `kernelmatrix(κ, x, y)` efficiently.
+Requires that `x` and `y` are the same length.
 
-Calculate the diagonal matrix of `X` with respect to kernel `κ`
-`obsdim = 1` means the matrix `X` has size #samples x #dimension
-`obsdim = 2` means the matrix `X` has size #dimension x #samples
+    kernelmatrix_diag(κ::Kernel, X::AbstractMatrix; obsdim::Int=2)
+    kernelmatrix_diag(κ::Kernel, X::AbstractMatrix, Y::AbstractMatrix; obsdim::Int=2)
 
-    kernelmatrix_diag(κ::Kernel, X, Y; obsdim::Int = 2)
+Equivalent to `kernelmatrix_diag(κ, ColVecs(X))` and
+`kernelmatrix_diag(κ, ColVecs(X), ColVecs(Y))` respectively.
 
-Calculate the diagonal of `kernelmatrix(κ, X, Y; obsdim)` efficiently. Requires that `X` and
-`Y` are the same length.
+See also: [`ColVecs`](@ref), [`RowVecs`](@ref)
 """
 kernelmatrix_diag
 
diff --git a/src/mokernels/moinput.jl b/src/mokernels/moinput.jl
index e1de49d06..0c16e5a36 100644
--- a/src/mokernels/moinput.jl
+++ b/src/mokernels/moinput.jl
@@ -2,6 +2,27 @@
     MOInput(x::AbstractVector, out_dim::Integer)
 
 A data type to accomodate modelling multi-dimensional output data.
+
+`MOInput(x, out_dim)` has length `length(x) * out_dim`.
+
+```jldoctest
+julia> x = [1, 2, 3];
+
+julia> MOInput(x, 2)
+6-element MOInput{Vector{Int64}}:
+ (1, 1)
+ (2, 1)
+ (3, 1)
+ (1, 2)
+ (2, 2)
+ (3, 2)
+```
+
+As shown above, an `MOInput` represents a vector of tuples.
+The first `length(x)` elements represent the inputs for the first output, the second
+`length(x)` elements represent the inputs for the second output, etc.
+
+See [Inputs for Multiple Outputs](@ref) in the docs for more info.
 """
 struct MOInput{T<:AbstractVector} <: AbstractVector{Tuple{Any,Int}}
     x::T
diff --git a/src/utils.jl b/src/utils.jl
index 7e28c82ef..33a085c56 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -33,8 +33,34 @@ end
 """
     ColVecs(X::AbstractMatrix)
 
-A lightweight wrapper for an `AbstractMatrix` to make it behave like a vector of vectors.
-Each vector represents a column of the matrix
+A lightweight wrapper for an `AbstractMatrix` which interprets it as a vector-of-vectors, in
+which each _column_ of `X` represents a single vector.
+
+That is, by writing `x = ColVecs(X)`, you are saying "`x` is a vector-of-vectors, each of
+which has length `size(X, 1)`. The total number of vectors is `size(X, 2)`."
+
+Phrased differently, `ColVecs(X)` says that `X` should be interpreted as a vector
+of horizontally-concatenated column-vectors, hence the name `ColVecs`.
+
+```jldoctest
+julia> X = randn(2, 5);
+
+julia> x = ColVecs(X);
+
+julia> length(x) == 5
+true
+
+julia> X[:, 3] == x[3]
+true
+```
+
+`ColVecs` is related to [`RowVecs`](@ref) via transposition:
+```jldoctest
+julia> X = randn(2, 5);
+
+julia> ColVecs(X) == RowVecs(X')
+true
+```
 """
 struct ColVecs{T,TX<:AbstractMatrix{T},S} <: AbstractVector{S}
     X::TX
@@ -70,8 +96,37 @@ end
 """
     RowVecs(X::AbstractMatrix)
 
-A lightweight wrapper for an `AbstractMatrix` to make it behave like a vector of vectors.
-Each vector represents a row of the matrix
+A lightweight wrapper for an `AbstractMatrix` which interprets it as a vector-of-vectors, in
+which each _row_ of `X` represents a single vector.
+
+That is, by writing `x = RowVecs(X)`, you are saying "`x` is a vector-of-vectors, each of
+which has length `size(X, 2)`. The total number of vectors is `size(X, 1)`."
+
+Phrased differently, `RowVecs(X)` says that `X` should be interpreted as a vector
+of vertically-concatenated row-vectors, hence the name `RowVecs`.
+
+Internally, the data continues to be represented as an `AbstractMatrix`, so using this type
+does not introduce any kind of performance penalty.
+
+```jldoctest
+julia> X = randn(5, 2);
+
+julia> x = RowVecs(X);
+
+julia> length(x) == 5
+true
+
+julia> X[3, :] == x[3]
+true
+```
+
+`RowVecs` is related to [`ColVecs`](@ref) via transposition:
+```jldoctest
+julia> X = randn(5, 2);
+
+julia> RowVecs(X) == ColVecs(X')
+true
+```
 """
 struct RowVecs{T,TX<:AbstractMatrix{T},S} <: AbstractVector{S}
     X::TX
diff --git a/test/runtests.jl b/test/runtests.jl
index 6dec94201..43c8e24ac 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -162,6 +162,13 @@ include("test_utils.jl")
             end;
             recursive=true,
         )
-        doctest(KernelFunctions)
+        doctest(
+            KernelFunctions;
+            doctestfilters=[
+                r"{([a-zA-Z0-9]+,\s?)+[a-zA-Z0-9]+}",
+                r"(Array{[a-zA-Z0-9]+,\s?1}|Vector{[a-zA-Z0-9]+})",
+                r"(Array{[a-zA-Z0-9]+,\s?2}|Matrix{[a-zA-Z0-9]+})",
+            ],
+        )
     end
 end