diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..94242df7
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+data
+data/*
+plots/*
+wl_*.npz
+*.pkl
diff --git a/ComplexSOM.yaml b/ComplexSOM.yaml
new file mode 100644
index 00000000..5b76fa08
--- /dev/null
+++ b/ComplexSOM.yaml
@@ -0,0 +1,57 @@
+metrics:  [SNR_3x2]
+bands: riz
+training_file: data_buzzard/training.hdf5
+validation_file: data_buzzard/validation.hdf5
+output_file: example/ComplexSOM_output.txt
+# Backend implementing the metrics, either: "firecrown" (default), "jax-cosmo"
+metrics_impl: firecrown
+
+run:
+  # This is a class name which will be looked up
+  ComplexSOM:
+    run_5:
+      # These settings are sent to the classifier
+      testing: False
+      bins: 5
+      som_dim: [51,51]
+      num_groups: 200
+      num_threads: 32
+      group_type: 'redshift'
+      whiten: False
+      redshift_propset: 2
+      data_threshold: [0,30]
+      sparse_frac: 1.0
+      plots: True 
+      plot_dir: 'plots' 
+      metric: "SNR_3x2"
+      use_inbin: False
+      use_outbin: False
+      # These special settings decide whether the
+      # color and error colums are passed to the classifier
+      # as well as the magnitudes
+      colors: False
+      errors: False
+      sizes: True
+    run_10:
+      # These settings are sent to the classifier
+      testing: False
+      bins: 10
+      som_dim: [51,51]
+      num_groups: 200
+      num_threads: 32
+      group_type: 'redshift'
+      redshift_propset: 2
+      data_threshold: [0,30]
+      sparse_frac: 1.0
+      plots: True 
+      plot_dir: 'plots' 
+      metric: "SNR_3x2"
+      use_inbin: False
+      use_outbin: False
+      # These special settings decide whether the
+      # color and error colums are passed to the classifier
+      # as well as the magnitudes
+      colors: False
+      errors: False
+      sizes: True
+
diff --git a/bin/challenge.py b/bin/challenge.py
index 5ec60273..dcabe9e4 100755
--- a/bin/challenge.py
+++ b/bin/challenge.py
@@ -27,12 +27,15 @@ def main(config_yaml):
     # Decide if anyone needs the colors calculating and/or errors loading
     anyone_wants_colors = False
     anyone_wants_errors = False
+    anyone_wants_sizes = False
     for run in config['run'].values():
         for version in run.values():
             if version.get('errors'):
                 anyone_wants_errors = True
             if version.get('colors'):
                 anyone_wants_colors = True
+            if version.get('sizes'):
+                anyone_wants_sizes = True
 
 
     bands = config['bands']
@@ -41,14 +44,16 @@ def main(config_yaml):
         config['training_file'],
         bands,
         errors=anyone_wants_errors,
-        colors=anyone_wants_colors
+        colors=anyone_wants_colors,
+        size=anyone_wants_sizes
     )
 
     validation_data = tc.load_data(
         config['validation_file'],
         bands,
         errors=anyone_wants_errors,
-        colors=anyone_wants_colors
+        colors=anyone_wants_colors,
+        size=anyone_wants_sizes
     )
 
     training_z = tc.load_redshift(config['training_file'])
@@ -59,7 +64,7 @@ def main(config_yaml):
     else:
         metrics_fn = tc.compute_scores
 
-    with open(config['output_file'],'w') as output_file:
+    with open(config['output_file'],'a') as output_file:
         for classifier_name, runs in config['run'].items():
             for run, settings in runs.items():
                 scores = run_one(classifier_name, bands, settings,
@@ -77,15 +82,16 @@ def run_one(classifier_name, bands, settings, train_data, train_z, valid_data,
     if classifier.wants_arrays:
         errors = settings.get('errors')
         colors = settings.get('colors')
-        train_data = tc.dict_to_array(train_data, bands, errors=errors, colors=colors)
-        valid_data = tc.dict_to_array(valid_data, bands, errors=errors, colors=colors)
+        sizes = settings.get('sizes')
+        train_data = tc.dict_to_array(train_data, bands, errors=errors, colors=colors, sizes=sizes)
+        valid_data = tc.dict_to_array(valid_data, bands, errors=errors, colors=colors, sizes=sizes)
 
     print ("Executing: ", classifier_name, bands, settings)
 
     ## first check if options are valid
     print (settings, classifier.valid_options)
     for key in settings.keys():
-        if key not in classifier.valid_options and key not in ['errors', 'colors']:
+        if key not in classifier.valid_options and key not in ['errors', 'colors', 'sizes']:
             raise ValueError(f"Key {key} is not recognized by classifier {classifier_name}")
 
     print ("Initializing classifier...")
@@ -102,7 +108,7 @@ def run_one(classifier_name, bands, settings, train_data, train_z, valid_data,
 
     print ("Making some pretty plots...")
     name = str(classifier.__name__)
-    tc.metrics.plot_distributions(valid_z, results, f"plots/{name}_{settings}_{bands}.png")
+    tc.metrics.plot_distributions(valid_z, results, f"plots/{name}_{bands}.png")
 
     return scores
 
diff --git a/example/SimpleSOM.yaml b/example/SimpleSOM.yaml
new file mode 100644
index 00000000..6f261f85
--- /dev/null
+++ b/example/SimpleSOM.yaml
@@ -0,0 +1,28 @@
+metrics:  [SNR_3x2]
+bands: riz
+training_file: data/training.hdf5
+validation_file: data/validation_small.hdf5
+output_file: example/example_output.txt
+# Backend implementing the metrics, either: "firecrown" (default), "jax-cosmo"
+metrics_impl: firecrown
+
+run:
+  # This is a class name which will be looked up
+  SimpleSOM:
+    run_3:
+      # These settings are sent to the classifier
+      bins: 10
+      som_dim: [151,151]
+      num_groups: 200
+      num_threads: 32
+      group_type: 'redshift'
+      data_threshold: [0,30]
+      sparse_frac: 0.05
+      plots: True 
+      plot_dir: 'plots' 
+      # These special settings decide whether the
+      # color and error colums are passed to the classifier
+      # as well as the magnitudes
+      colors: False
+      errors: False
+
diff --git a/the_atlantics/example.py b/the_atlantics/example.py
new file mode 100644
index 00000000..76a8a8fd
--- /dev/null
+++ b/the_atlantics/example.py
@@ -0,0 +1,74 @@
+import numpy as np
+import matplotlib.pyplot as plt
+from snc import SnCalc
+
+
+# Let's start with a "large" set of 12 samples
+n_bins = 12
+z_cent = np.linspace(0.2, 1.4, n_bins+1)+0.5/n_bins
+sigma_z = 0.05 * (1+z_cent)
+z_arr = np.linspace(0, 2, 1024)
+# Total number of galaxies in each sample
+n_gals = (z_cent/0.5)**2*np.exp(-(z_cent/0.5)**1.5) * 10. * 1.8E4 * 60**2
+
+
+def pz_gau(z, zc, sz):
+    return np.exp(-0.5*((z-zc)/sz)**2)/np.sqrt(2*np.pi*sz**2)
+
+
+# Create the N(z) arrays, which should integrate to the total number
+# of galaxies in each sample
+nzs = [ng * pz_gau(z_arr, zc, sz)
+       for ng, zc, sz in zip(n_gals, z_cent, sigma_z)]
+
+# Let's plot them
+plt.figure()
+for nz in nzs:
+    plt.plot(z_arr, nz)
+plt.xlabel(r'$z$', fontsize=16)
+plt.ylabel(r'$N(z)$', fontsize=16)
+
+
+c = SnCalc(z_arr, nzs)
+# Now let's do a dumb thing and bunch them up into
+# 4, 3, 2 and 1 subsamples
+assign4 = [np.array([0, 1, 2]),
+           np.array([3, 4, 5]),
+           np.array([6, 7, 8]),
+           np.array([9, 10, 11])]
+assign3 = [np.array([0, 1, 2, 3]),
+           np.array([4, 5, 6, 7]),
+           np.array([8, 9, 10, 11])]
+assign2 = [np.array([0, 1, 2, 3, 4, 5]),
+           np.array([6, 7, 8, 9, 10, 11])]
+assign1 = [np.arange(n_bins)]
+
+
+# OK, caclulate S/N for all of these groupings
+snr_d4 = c.get_sn(assign4, full_output=True)
+print('4 bins, S/N = %.1lf' % (snr_d4['snr']))
+snr_d3 = c.get_sn(assign3, full_output=True)
+print('3 bins, S/N = %.1lf' % (snr_d3['snr']))
+snr_d2 = c.get_sn(assign2, full_output=True)
+print('2 bins, S/N = %.1lf' % (snr_d2['snr']))
+snr_d1 = c.get_sn(assign1, full_output=True)
+print('1 bins, S/N = %.1lf' % (snr_d1['snr']))
+
+
+# Note that the calculator also returns the power
+# spectra, if you wanna look at them.
+plt.figure()
+for i in range(4):
+    for j in range(i, 4):
+        if i == j:
+            plt.plot(snr_d4['ls'],
+                     snr_d4['cl'][:, i, j],
+                     'k-')
+            plt.plot(snr_d4['ls'],
+                     snr_d4['nl'][0, i, j]*np.ones_like(snr_d4['ls']), 'k--')
+        else:
+            plt.plot(snr_d4['ls'],
+                     snr_d4['cl'][:, i, j],
+                     '-', c='#AAAAAA')
+plt.loglog()
+plt.show()
diff --git a/the_atlantics/example_minimizer.py b/the_atlantics/example_minimizer.py
new file mode 100644
index 00000000..8e168c7c
--- /dev/null
+++ b/the_atlantics/example_minimizer.py
@@ -0,0 +1,86 @@
+import numpy as np
+import matplotlib.pyplot as plt
+from snc import SnCalc
+from scipy.optimize import minimize
+
+
+# Let's start with a "large" set of 12 samples
+n_bins = 100
+z_cent = np.linspace(0.2, 1.4, n_bins+1)+0.5/n_bins
+sigma_z = 0.05 * (1+z_cent)
+z_arr = np.linspace(0, 2, 1024)
+# Total number of galaxies in each sample
+n_gals = (z_cent/0.5)**2*np.exp(-(z_cent/0.5)**1.5) * 10. * 1.8E4 * 60**2
+
+
+def pz_gau(z, zc, sz):
+    return np.exp(-0.5*((z-zc)/sz)**2)/np.sqrt(2*np.pi*sz**2)
+
+
+# Create the N(z) arrays, which should integrate to the total number
+# of galaxies in each sample.
+nzs = [ng * pz_gau(z_arr, zc, sz)
+       for ng, zc, sz in zip(n_gals, z_cent, sigma_z)]
+
+# Let's plot them
+plt.figure()
+for nz in nzs:
+    plt.plot(z_arr, nz)
+plt.xlabel(r'$z$', fontsize=16)
+plt.ylabel(r'$N(z)$', fontsize=16)
+
+# Now initialize a S/N calculator for these initial groups.
+c_wl = SnCalc(z_arr, nzs, use_clustering=False)
+c_gc = SnCalc(z_arr, nzs, use_clustering=True)
+print("Initializing WL")
+c_wl.get_cl_matrix(fname_save='wl_nb%d.npz' % n_bins)
+print("Initializing GC")
+c_gc.get_cl_matrix(fname_save='gc_nb%d.npz' % n_bins)
+
+# Let's focus on WL for now.
+# OK, let's brute-force explore the total S/N as a function
+# of the bin edge for a 2-bin scenario.
+print("Exploring 2-bins")
+zs = np.linspace(0.01, 1.5, 128)
+sns_wl = np.array([c_wl.get_sn_from_edges(np.array([z])) for z in zs])
+sns_gc = np.array([c_gc.get_sn_from_edges(np.array([z])) for z in zs])
+plt.figure()
+plt.plot(zs, sns_wl/np.amax(sns_wl), 'r-', label='Lensing')
+plt.plot(zs, sns_gc/np.amax(sns_gc), 'k-', label='Clustering')
+plt.xlabel('$z$')
+plt.ylabel(r'$(S/N)/(S/N)_{\rm max}$')
+plt.legend(loc='upper left', frameon=False)
+
+# Now let's do a 3-bin case (more coarsely-grained so we can do it quickly)
+print("Exploring 3-bins")
+zs = np.linspace(0.01, 1.5, 32)
+sns = np.array([[c_wl.get_sn_from_edges(np.array([z1, z2])) for z1 in zs]
+                for z2 in zs])
+
+plt.figure()
+plt.imshow(sns, vmin=1440, extent=[0.01, 1.5, 0.01, 1.5],
+           origin='lower')
+plt.xlabel('$z_1$')
+plt.ylabel('$z_2$')
+cb = plt.colorbar()
+cb.set_label('$S/N$')
+
+
+# Finally, let's write the function to minimize and optimize for a 4-bin case.
+def minus_sn(edges, calc):
+    return -calc.get_sn_from_edges(edges)
+
+
+print("Optimizing WL")
+edges_0 = np.array([0.3, 0.6, 0.9])
+res = minimize(minus_sn, edges_0, method='Powell', args=(c_wl,))
+print("WL final edges: ", res.x)
+print("Maximum S/N: ", c_wl.get_sn_from_edges(res.x))
+print(" ")
+
+# That was for weak lensing. Let's do the same thing for clustering.
+print("Optimizing GC")
+res = minimize(minus_sn, edges_0, method='Powell', args=(c_gc,))
+print("GC final edges: ", res.x)
+print("Maximum S/N: ", c_gc.get_sn_from_edges(res.x))
+plt.show()
diff --git a/the_atlantics/optimizer.py b/the_atlantics/optimizer.py
new file mode 100644
index 00000000..f46dd508
--- /dev/null
+++ b/the_atlantics/optimizer.py
@@ -0,0 +1,153 @@
+from argparse import ArgumentParser
+import numpy as np
+from snc import SnCalc, assign_params_default
+from scipy.interpolate import interp1d
+from scipy.optimize import minimize, brentq
+from scipy.integrate import simps
+
+
+parser = ArgumentParser()
+parser.add_argument("--nz-file", default="nzs.npy", type=str,
+                    help="Path to input file containing the N(z) of all initial groups")
+parser.add_argument("--prefix-out", default='out', type=str,
+                    help="Prefix for all output files")
+parser.add_argument("--suffix-out", default='', type=str,
+                    help="Suffix for all output files")
+parser.add_argument("--n-bins", default=4, type=int,
+                    help="Number of bins (excluding a possible trash bin")
+parser.add_argument("--prob-in-bin", default=-1., type=float,
+                    help="Minimum fraction of the N(z) that should be inside the bin."
+                    "If <= 0, this will not be taken into account in the optimization.")
+parser.add_argument("--prob-out-bin", default=-1., type=float,
+                    help="Maximum fraction of the N(z) that should be inside other bins."
+                    "If <= 0, this will not be taken into account in the optimization.")
+parser.add_argument('--in-bin-free', default=False, action='store_true',
+                    help='Free up in-bin probability (default: False)')
+parser.add_argument('--use-clustering', default=False, action='store_true',
+                    help='Use clustering (default: False)')
+parser.add_argument('--use-3x2', default=False, action='store_true',
+                    help='Use 3x2 (default: False)')
+parser.add_argument('--random-start', default=False, action='store_true',
+                    help='Use random starting points (default: False)')
+parser.add_argument('--random-seed', default=1234, type=int,
+                    help='Random seed')
+o = parser.parse_args()
+
+nzs = np.load(o.nz_file)
+# We should probably read z on input too
+z_arr = np.linspace(0, 2, 124)
+zz = (z_arr[1:]+z_arr[:-1])*0.5
+# Renormalize to use the same sample definitions as the
+# official metric calculators.
+fsky = 0.25
+ndens_arcmin = 20.
+area_arcmin = (180*60/np.pi)**2*4*np.pi*fsky
+ng_tot = ndens_arcmin * area_arcmin
+ng_each_small = simps(nzs, x=zz)
+ng_tot_small = np.sum(ng_each_small)
+ng_each = ng_each_small * ng_tot / ng_tot_small
+nzs = nzs * ng_tot / ng_tot_small
+
+# Initialize calculator
+sc = SnCalc(zz, nzs, fsky=fsky, use_clustering=o.use_clustering, use_3x2=o.use_3x2)
+if o.use_3x2:
+    suffix = 'cl_3x2.npz'
+else:
+    if o.use_clustering:
+        suffix = 'cl_gc.npz'
+    else:
+        suffix = 'cl_wl.npz'
+sc.get_cl_matrix(fname_save=o.prefix_out + suffix)
+
+# Initial edge guess (defined as having equal number of galaxies)
+if o.random_start:
+    np.random.seed(o.random_seed)
+    z_end = 1.5
+    delta_z = z_end / (o.n_bins-1)
+    edges_0 = delta_z * (np.arange(o.n_bins-1) + np.random.rand(o.n_bins-1))
+else:
+    nz_tot = np.sum(nzs, axis=0)
+    cumulative_fraction = np.cumsum(nz_tot) / np.sum(nz_tot)
+    cumul_f = interp1d(zz, cumulative_fraction, bounds_error=False,
+                       fill_value=(0, 1))
+    edges_0 = np.array([brentq(lambda z : cumul_f(z) - q, 0, 2)
+                        for q in (np.arange(o.n_bins-1)+1.)/o.n_bins])
+
+if o.in_bin_free:
+    if o.random_start:
+        pin = np.random.rand()
+    else:
+        pin = 0.5
+    edges_0 = np.concatenate((np.array([pin]), edges_0))
+print(edges_0)
+
+# Minimize
+# Binning parameters
+
+params = assign_params_default.copy()
+if o.prob_in_bin > 0:
+    params['p_inbin_thr'] = o.prob_in_bin
+    params['use_p_inbin'] = True
+if o.prob_out_bin > 0:
+    params['p_outbin_thr'] = o.prob_out_bin
+    params['use_p_outbin'] = True
+if o.in_bin_free:
+    params['use_p_inbin'] = True
+    params['p_inbin_thr'] = edges_0[0]
+
+
+def minus_sn(edges, calc):
+    if o.in_bin_free:
+        if (edges[0] < 0) or (edges[0] > 1):
+            return 0.
+        params['p_inbin_thr'] = edges[0]
+        edges_use = edges[1:]
+    else:
+        edges_use = edges
+    return -calc.get_sn_from_edges(edges_use, assign_params=params)
+
+    
+# Actual optimization
+res = minimize(minus_sn, edges_0, method='Powell', args=(sc,))
+if o.in_bin_free:
+    edges_1 = res.x[1:]
+    params['p_inbin_thr'] = res.x[0]
+else:
+    edges_1 = res.x
+
+# Post-process:
+# S/N
+sn = sc.get_sn_from_edges(edges_1, assign_params=params)
+# N(z)s of the final bins
+nz_best = sc.get_nzs_from_edges(edges_1, assign_params=params)
+# Group assignment
+assign = sc.assign_from_edges(edges_1, assign_params=params,
+                              get_ids=True)
+
+
+def get_bin_name(i):
+    if i == -1:
+        return 'bin_trash'
+    else:
+        return 'bin_%d' % i
+
+print("Redshift edges: ", edges_1)
+print("Optimal S/N: ", sn)
+n_tot = simps(np.sum(nz_best, axis=0),
+              x=zz)
+
+d_out = {'sn': sn,
+         'edges': edges_1}
+if o.in_bin_free:
+    d_out['p_in'] = params['p_inbin_thr']
+    print("P_in: ", params['p_inbin_thr'])
+for (i, a), n in zip(assign, nz_best):
+    name = get_bin_name(i)
+    d_out[name+'_groups'] = a
+    d_out[name+'_nz'] = n
+    n_here = simps(n, x=zz)
+    f = n_here / n_tot
+    frac = "%.1lf" % (100 * f)
+    print("Bin %d, groups: " % i, a, frac)
+
+np.savez(o.prefix_out + o.suffix_out + '_bin_info.npz', **d_out)
diff --git a/the_atlantics/snc.py b/the_atlantics/snc.py
new file mode 100644
index 00000000..4c1d537e
--- /dev/null
+++ b/the_atlantics/snc.py
@@ -0,0 +1,370 @@
+import numpy as np
+import pyccl as ccl
+from scipy.integrate import simps
+import os
+
+
+assign_params_default = {'p_inbin_thr': 0.5,
+                         'p_outbin_thr': 0.2,
+                         'use_p_inbin': False,
+                         'use_p_outbin': False}
+
+
+class SnCalc(object):
+    edges_large = 3.
+
+    def __init__(self, z_arr, nz_list, fsky=0.4, lmax=2000, n_ell=100,
+                 s_gamma=0.26, use_clustering=False, use_3x2=False,
+                 integrator='qag_quad'):#spline'):
+        """ S/N calculator
+        Args:
+            z_arr (array_like): array of redshifts at which all the N(z)s are
+                sampled. They should be linearly spaced.
+            nz_list (list): list of arrays containing the redshift
+                distributions of all the initial groups (each array in the
+                list corresponds to one group). The arrays should contain the
+                redshift distribution sampled at `z_arr`, with its integral
+                over `z_arr` equal to the number of galaxies in that group.
+            fsky (float): sky fraction (for SNR calculation and for
+                transforming number of galaxies into number densities).
+            lmax (float): maximum ell.
+            d_ell (float): ell bandpower width.
+            s_gamma (float): rms ellipticity scatter (relevant if
+                `use_clustering=False`).
+            use_clustering (bool): if `True`, SNR will be computed for
+                clustering instead of lensing.
+            use_3x2 (bool): if `True`, SNR will be computed for 3x2pt.
+            integrator (string): CCL integration method. Either 'qag_quad'
+                or 'spline'.
+        """
+        self.integrator = integrator
+        self.s_gamma = s_gamma
+        self.fsky = fsky
+        self.lmax = lmax
+        self.n_ell = n_ell
+        ell_edges = np.logspace(2, np.log10(lmax), n_ell+1)
+        self.larr = 0.5*(ell_edges[1:]+ell_edges[:-1])
+        self.d_ell = (ell_edges[1:]-ell_edges[:-1])
+        self.n_samples = len(nz_list)
+        self.z_arr = z_arr
+        self.nz_list = nz_list
+        self.n_gals = np.array([simps(nz, x=self.z_arr)
+                                for nz in self.nz_list])
+        self.pz_list = self.nz_list / self.n_gals[:, None]
+        self.z_means = np.array([simps(self.z_arr*nz, x=self.z_arr)
+                                 for nz in self.nz_list]) / self.n_gals
+        self.n_dens = self.n_gals / (4*np.pi*self.fsky)
+        self.cls = None
+        self.use_clustering = use_clustering
+        self.use_3x2 = use_3x2
+        if use_3x2:
+            self.n_tracers = 2*self.n_samples
+        else:
+            self.n_tracers = self.n_samples
+
+    def _bz_model(self, cosmo, z):
+        return 1./ccl.growth_factor(cosmo, 1./(1+z))
+
+    def get_cl_matrix(self, fname_save=None, recompute=False):
+        """ Computes matrix of power spectra between all the initial groups.
+        Args:
+            fname_save (string): if not None, the result will be saved to a
+                file by this name, and if present and `recompute=False`, the
+                matrix will be read from that file.
+            recompute (bool): if `True`, any previous calculations of the
+                C_ell matrix stored on file will be ignored, and the matrix
+                will be recomputed.
+        """
+        if fname_save is not None:
+            if os.path.isfile(fname_save) and not recompute:
+                self.cls = np.load(fname_save)['cls']
+                return
+
+        # Cosmology
+        cosmo = ccl.Cosmology(Omega_c=0.25,
+                              Omega_b=0.05,
+                              h=0.67, n_s=0.96,
+                              sigma8=0.81)
+
+        # Tracers
+        bz = self._bz_model(cosmo, self.z_arr)
+        if self.use_3x2:
+            trs_gc = [ccl.NumberCountsTracer(cosmo, False, (self.z_arr, nz),
+                                             bias=(self.z_arr, bz))
+                      for nz in self.nz_list]
+            trs_wl = [ccl.WeakLensingTracer(cosmo, (self.z_arr, nz))
+                      for nz in self.nz_list]
+            trs = trs_gc + trs_wl
+        else:
+            if self.use_clustering:
+                trs = [ccl.NumberCountsTracer(cosmo, False, (self.z_arr, nz),
+                                              bias=(self.z_arr, bz))
+                       for nz in self.nz_list]
+            else:
+                trs = [ccl.WeakLensingTracer(cosmo, (self.z_arr, nz))
+                       for nz in self.nz_list]
+
+        # Cls
+        self.cls = np.zeros([self.n_ell, self.n_tracers, self.n_tracers])
+        for i in range(self.n_tracers):
+            for j in range(i, self.n_tracers):
+                try:
+                    cl = ccl.angular_cl(cosmo, trs[i], trs[j], self.larr,
+                                        limber_integration_method=self.integrator)
+                except:
+                    cl = np.zeros(len(self.larr))
+                self.cls[:, i, j] = cl
+                if j != i:
+                    self.cls[:, j, i] = cl
+
+        if fname_save is not None:
+            np.savez(fname_save, cls=self.cls)
+
+    def get_resample_metadata(self, assign):
+        """ Transform an assignment list into a weight matrix and the
+        corresponding number densities.
+        Args:
+            assign (list): list of arrays. Each element of the list should
+                be a numpy array of integers, corresponding to the indices
+                of the initial groups that are now resampled into a larger
+                group.
+        """
+        n_resamples = len(assign)
+        weight_res = np.zeros([n_resamples, self.n_samples])
+        n_dens_res = np.zeros(n_resamples)
+        for ia, a in enumerate(assign):
+            ndens = self.n_dens[a]
+            ndens_tot = np.sum(ndens)
+            n_dens_res[ia] = ndens_tot
+            weight_res[ia, a] = ndens / ndens_tot
+        return weight_res, n_dens_res
+
+    def _get_cl_resamples(self, weights):
+        """ Gets C_ell matrix for resampled groups from weights matrix.
+        """
+        if self.cls is None:
+            self.get_cl_matrix()
+        if self.use_3x2:
+            w_use = np.tile(weights, (2, 2))
+        else:
+            w_use = weights
+        return np.sum(np.sum(w_use[None, None, :, :] *
+                             self.cls[:, :, None, :],
+                             axis=-1)[:, None, :, :] *
+                      w_use[None, :, :, None],
+                      axis=-2)
+
+    def _get_nl_resamples(self, n_dens):
+        """ Gets noise contribution to C_ell matrix for resampled groups
+        from list of number densities.
+        """
+        if self.use_3x2:
+            n_diag = np.array(list(1./n_dens) +
+                              list(self.s_gamma**2/n_dens))
+            return np.diag(n_diag)[None, :, :]
+        else:
+            if self.use_clustering:
+                return np.diag(1./n_dens)[None, :, :]
+            else:
+                return np.diag(self.s_gamma**2/n_dens)[None, :, :]
+
+    def get_sn_max(self, full_output=False):
+        """ Compute maximum signal-to-noise ratio given the input
+        groups
+        Args:
+            full_output (bool): if true, a dictionary with additional
+                information will be returned. Otherwise just total S/N.
+        Returns:
+            If `full_output=True`, dictionary containing S/N, power
+            spectra and noise spectra. Otherwise just S/N.
+        """
+        weights = np.eye(self.n_samples)
+        n_dens = self.n_dens
+        return self.get_sn_wn(weights, n_dens, full_output=full_output)
+
+    def get_kltrans(self):
+        """ Compute KL decomposition.
+
+        Returns:
+            Fisher matrix element for each of the KL modes.
+        """
+        nl = self._get_nl_resamples(self.n_dens)
+        nij = nl*np.ones(self.n_ell)[:, None, None]
+        cij = self.cls + nij
+        sij = cij - nij
+        inv_nij = np.linalg.inv(nij)
+        metric = inv_nij
+
+        def change_basis(c, m, ev):
+            return np.array([np.diag(np.dot(ev[i].T,
+                                            np.dot(m[i],
+                                                   np.dot(c[i],
+                                                          np.dot(m[i],
+                                                                 ev[i])))))
+                             for i in range(self.n_ell)])
+
+        def diagonalize(c, m):
+            im = np.linalg.inv(m)
+            ll = np.linalg.cholesky(m)
+            ill = np.linalg.cholesky(im)
+            cl = np.array([np.dot(np.transpose(ll[i]),
+                                  np.dot(c[i], ll[i]))
+                           for i in range(self.n_ell)])
+            c_p, v = np.linalg.eigh(cl)
+            ev = np.array([np.dot(np.transpose(ill[i]), v[i])
+                           for i in range(self.n_ell)])
+            # iden = change_basis(im, m, ev)
+            return ev, c_p
+
+        e_v, c_p = diagonalize(cij, metric)
+        s_p = change_basis(sij, metric, e_v)
+        nmodes_l = (self.fsky * self.d_ell * (self.larr+0.5))
+        fish_kl = nmodes_l[:, None]*(s_p/c_p)**2
+        isort = np.argsort(-np.sum(fish_kl, axis=0))
+        e_o = e_v[:, :, isort]
+        # f_o = np.array([np.dot(inv_nij[l], e_o[l, :, :])
+        #                 for l in range(self.n_ell)])
+        c_p = change_basis(cij, metric, e_o)
+        s_p = change_basis(sij, metric, e_o)
+
+        fish_kl = nmodes_l[:, None]*(s_p/c_p)**2
+        fish_permode = np.sum(fish_kl, axis=0)
+        fish_cumul = np.cumsum(fish_permode)
+        return fish_kl, fish_permode, fish_cumul
+
+    def get_sn_wn(self, weights, n_dens, full_output=False):
+        """ Compute signal-to-noise ratio from weights matrix and number
+        densities of the resampled groups.
+        Args:
+            weights (array_like): weights matrix of shape
+                `(N_resample, N_initial)`, where `N_resample` is the
+                number of new groups, and `N_initial` is the original
+                number of groups. Each entry corresponds to the weight
+                which with a given initial group enters the new set of
+                groups.
+            n_dens (array_like): number density (in sterad^-1) of the
+                new groups.
+            full_output (bool): if true, a dictionary with additional
+                information will be returned. Otherwise just total S/N.
+        Returns:
+            If `full_output=True`, dictionary containing S/N, power
+            spectra and noise spectra. Otherwise just S/N.
+        """
+        sl = self._get_cl_resamples(weights)
+        nl = self._get_nl_resamples(n_dens)
+        cl = sl + nl
+        icl = np.linalg.inv(cl)
+        sn2_1pt = np.sum(sl[:, :, :, None] * icl[:, None, :, :], axis=2)
+        sn2_ell = np.sum(sn2_1pt[:, :, :, None] * sn2_1pt[:, None, :, :],
+                         axis=2)
+        trsn2_ell = np.trace(sn2_ell, axis1=1, axis2=2)
+        snr = np.sqrt(np.sum(trsn2_ell * (self.larr + 0.5) *
+                             self.d_ell * self.fsky))
+
+        if full_output:
+            dret = {'snr': snr, 'cl': sl, 'nl': nl, 'ls': self.larr}
+            return dret
+        else:
+            return snr
+
+    def get_sn(self, assign, full_output=False):
+        """ Compute signal-to-noise ratio from a bin assignment list.
+        Args:
+            assign (list): list of arrays. Each element of the list should
+                be a numpy array of integers, corresponding to the indices
+                of the initial groups that are now resampled into a larger
+                group.
+            full_output (bool): if true, a dictionary with additional
+                information will be returned. Otherwise just total S/N.
+        Returns:
+            If `full_output=True`, dictionary containing S/N, power
+            spectra and noise spectra. Otherwise just S/N.
+        """
+        w, ndens = self.get_resample_metadata(assign)
+        return self.get_sn_wn(w, ndens,
+                              full_output=full_output)
+
+    def check_edges(self, edges):
+        """ Returns `True` if there's something wrong with the edges.
+        """
+        swapped = False
+        if np.ndim(edges) > 0:
+            swapped = np.any(np.diff(edges) < 0)
+        return np.any(edges < 0) or \
+            np.any(edges > self.edges_large) or \
+            swapped
+
+    def get_nzs_from_edges(self, edges, assign_params=assign_params_default):
+        assign = self.assign_from_edges(edges, assign_params=assign_params)
+        nzs = np.array([np.sum(self.nz_list[a, :], axis=0)
+                        for a in assign])
+        return nzs
+
+    def assign_from_edges(self, edges, assign_params=assign_params_default,
+                          get_ids=False):
+        """ Get assignment list from edges and assign parameters.
+        Args:
+            edges (array_like): array of bin edges.
+            assign_params (dict): dictionary of assignment parameters
+                (see `assign_params_default`).
+            get_ids (bool): if `True`, output assignment arrays will
+                be accompanied by the associated bin id (with -1 for the
+                trash bin).
+        Returns:
+            List of assignment arrays ready to be used in e.g. `get_sn`.
+        """
+        edges = np.atleast_1d(edges)
+        nbins = len(edges) + 1
+        # Bin IDs based on mean z
+        ids = np.digitize(self.z_means, bins=edges)
+        if assign_params['use_p_inbin'] or assign_params['use_p_outbin']:
+            def integrate_safe(p, z, m):
+                if np.sum(m) == 0:
+                    return 0
+                else:
+                    return simps(p[m], x=z[m])
+            # Calculate probabilities in each bin
+            zgroups = np.digitize(self.z_arr, bins=edges)
+            masks = [zgroups == i for i in range(nbins)]
+            if assign_params['use_p_outbin']:
+                # Matrix of probabilities in each bin
+                pzs = np.array([[integrate_safe(pz, self.z_arr, m)
+                                 for m in masks]
+                                for pz in self.pz_list])
+                pzd = np.array([pzs[j, ids[j]]
+                                for j in range(self.n_samples)])
+            else:
+                # Overlaps in own bin
+                pzd = np.array([integrate_safe(pz, self.z_arr, masks[i])
+                                for pz, i in zip(self.pz_list, ids)])
+            # Throw away based on in-bin probability
+            if assign_params['use_p_inbin']:
+                ids[pzd < assign_params['p_inbin_thr']] = -1
+            # Throw away based on off-bin probability
+            if assign_params['use_p_outbin']:
+                ids[np.array([np.sum(p > assign_params['p_outbin_thr']) > 1
+                    for p in pzs])] = -1
+                # Assignment list
+        if get_ids:
+            return [(i, np.where(ids == i)[0]) for i in np.unique(ids)]
+        else:
+            return [np.where(ids == i)[0] for i in np.unique(ids)]
+
+    def get_sn_from_edges(self, edges, full_output=False,
+                          assign_params=assign_params_default):
+        """ Compute signal-to-noise ratio from a set of edges and assignment
+        parameters.
+        Args:
+                        edges (array_like): array of bin edges.
+            assign_params (dict): dictionary of assignment parameters
+                (see `assign_params_default`).
+            full_output (bool): if true, a dictionary with additional
+                information will be returned. Otherwise just total S/N.
+        Returns:
+                        If `full_output=True`, dictionary containing S/N, power
+            spectra and noise spectra. Otherwise just S/N.
+        """
+        if self.check_edges(edges):
+            return 0
+        assign = self.assign_from_edges(edges, assign_params=assign_params)
+        return self.get_sn(assign, full_output=full_output)
diff --git a/tomo_challenge/classifiers/ComplexSOM.py b/tomo_challenge/classifiers/ComplexSOM.py
new file mode 100644
index 00000000..f43cdebf
--- /dev/null
+++ b/tomo_challenge/classifiers/ComplexSOM.py
@@ -0,0 +1,470 @@
+"""
+This is an example tomographic bin generator using a random forest.
+
+Every classifier module needs to:
+ - have construction of the type 
+       __init__ (self, bands, options) (see examples below)
+ -  implement two functions: 
+        train (self, training_data,training_z)
+        apply (self, data).
+ - define valid_options class varible.
+
+See Classifier Documentation below.
+"""
+
+import os 
+from .base import Tomographer
+import numpy as np
+import rpy2
+import rpy2.robjects as ro
+import rpy2.robjects.packages as rpack
+from rpy2.robjects.vectors import StrVector, IntVector, DataFrame, FloatVector, FactorVector
+import pandas as pd
+from rpy2.robjects import pandas2ri
+from rpy2.robjects.conversion import localconverter
+from ..snc import SnCalc 
+from scipy.interpolate import interp1d
+from scipy.optimize import minimize, brentq
+from scipy.integrate import simps
+import pickle
+
+#Check that all the needed packages are installed
+# R package nameo
+packnames = ('data.table','itertools','foreach','doParallel','RColorBrewer','devtools','matrixStats')
+base=ro.packages.importr("base")
+utils=ro.packages.importr("utils")
+stats=ro.packages.importr("stats")
+gr=ro.packages.importr("graphics")
+dev=ro.packages.importr("grDevices")
+utils.chooseCRANmirror(ind=1)
+# Selectively install what needs to be installed.
+names_to_install = [x for x in packnames if not rpack.isinstalled(x)]
+if len(names_to_install) > 0:
+    utils.install_packages(StrVector(names_to_install))
+base.Sys_setenv(TAR=base.system("which tar",intern=True))
+devtools=ro.packages.importr("devtools")
+#devtools.install_github("AngusWright/helpRfuncs")
+#devtools.install_github("AngusWright/kohonen/kohonen")
+kohonen=ro.packages.importr("kohonen")
+
+class ComplexSOM(Tomographer):
+    """ Complex SOM Classifier with SNR Optimisation """
+    
+    # valid parameter -- see below
+    valid_options = ['bins','som_dim','num_groups','num_threads',
+            'group_type','data_threshold','sparse_frac','plots',
+            'plot_dir','metric','use_inbin','use_outbin','testing',
+            'whiten','redshift_propset']
+    # this settings means arrays will be sent to train and apply instead
+    # of dictionaries
+    wants_arrays = False
+    
+    def __init__ (self, bands, options):
+        """Constructor
+        
+        Parameters:
+        -----------
+        bands: str
+          string containg valid bands, like 'riz' or 'griz'
+        options: dict
+          options come through here. Valid keys are listed as valid_options
+          class variable. 
+
+        Note:
+        -----
+        Valiad options are:
+            'bins' - number of tomographic bins
+            'som_dim' - dimensions of the SOM
+            'num_groups' - number of SOM groups/associations
+            'num_threads' - number of threads to use
+            'sparse_frac' - the sparse-sampling fraction used for training
+            'group_type' - do we want grouping by colour or redshift
+            'data_threshold' - number of threads to use
+
+        """
+        self.bands = bands
+        self.opt = options
+
+    def train (self, training_data, training_z):
+        """Trains the SOM and outputs the resulting bins
+        
+        Parameters:
+        -----------
+        training_data: numpy array, size Ngalaxes x Nbands
+          training data, each row is a galaxy, each column is a band as per
+          band defined above
+        training_z: numpy array, size Ngalaxies
+          true redshift for the training sample
+
+        """
+
+        print("Initialising")
+        #Number of tomographic bins 
+        n_bin = self.opt['bins']
+        #Dimensions of the SOM
+        som_dim = self.opt['som_dim'] 
+        #Number of discrete SOM groups
+        num_groups = self.opt['num_groups'] 
+        #Number of threads 
+        num_threads = self.opt['num_threads'] 
+        #Sparse Frac 
+        sparse_frac = self.opt['sparse_frac'] 
+        #Data Threshold
+        data_threshold = self.opt['data_threshold']
+        #Metric
+        metric = self.opt['metric']
+        #Flag bad bins with inbin probability
+        use_inbin = self.opt['use_inbin']
+        #Flag bad bins with outbin probability
+        use_outbin = self.opt['use_outbin']
+        #Plots
+        plots = self.opt['plots']
+        #redshift properties to use in grouping
+        redshift_propset = self.opt['redshift_propset']
+        #Whiten the redshift data when grouping?
+        whiten = self.opt['whiten']
+        #Plot Output Directory
+        plot_dir = self.opt['plot_dir']
+        #Group Type
+        group_type = self.opt['group_type']
+        #Check that the group type is a valid choice
+        if group_type != 'colour' and group_type != 'redshift':
+            group_type = 'redshift'
+        #Define the assign_params variable, used in bin optimisation 
+        assign_params = {'p_inbin_thr': 0.5,
+                         'p_outbin_thr': 0.2,
+                         'use_p_inbin': use_inbin,
+                         'use_p_outbin': use_outbin}
+
+        #Define the redshift summary statistics (used for making groups in the 'redshift' case
+        if redshift_propset==1:
+            property_labels = StrVector(("mean_z_true","med_z_true","sd_z_true","mad_z_true","iqr_z_true"))
+            property_expressions = StrVector(("mean(data$redshift_true)","median(data$redshift_true)","sd(data$redshift_true)",
+                    "mad(data$redshift_true)",
+                    "diff(quantile(data$redshift_true,probs=pnorm(c(-2,2))))"))
+        elif redshift_propset==2: 
+            property_labels = StrVector(("mean_z_true","med_z_true","mad_z_true"))
+            property_expressions = StrVector(("mean(data$redshift_true)","median(data$redshift_true)",
+                    "mad(data$redshift_true)"))
+        elif redshift_propset==3: 
+            property_labels = StrVector(("quan_05","quan_16","quan_50","quan_84","quan_95"))
+            property_expressions = ("quantile(data$redshift_true,probs=c(5,16,50,84,95)/100)")
+            property_labels = StrVector(("quan_05","quan_16","quan_84","quan_95"))
+            property_expressions = ("quantile(data$redshift_true,probs=c(5,16,84,95)/100)")
+        else:
+            property_labels = StrVector(("mean_z_true","med_z_true","sd_z_true","mad_z_true","iqr_z_true"))
+            property_expressions = StrVector(("mean(data$redshift_true)","median(data$redshift_true)","sd(data$redshift_true)",
+                    "mad(data$redshift_true)",
+                    "diff(quantile(data$redshift_true,probs=pnorm(c(-2,2))))"))
+        
+        #Define the SOM variables
+        if self.bands == 'riz':
+            #riz bands
+            if self.opt['sizes'] == True:
+                expressions = ("r-i","r-z","i-z",
+                               "z","r-i-(i-z)","mcal_T")
+            else: 
+                expressions = ("r-i","r-z","i-z",
+                               "z","r-i-(i-z)")
+        elif self.bands == 'griz':
+            #griz bands
+            if self.opt['sizes'] == True:
+                expressions = ("g-r","g-i",
+                               "g-z","r-i","r-z","i-z",
+                               "z","g-r-(r-i)",
+                               "r-i-(i-z)","mcal_T")
+            else: 
+                expressions = ("g-r","g-i",
+                               "g-z","r-i","r-z","i-z",
+                               "z","g-r-(r-i)",
+                               "r-i-(i-z)")
+        elif self.bands == 'grizy':
+            #grizy bands
+            if self.opt['sizes'] == True:
+                expressions = ("g-r","g-i",
+                               "g-z","g-y","r-i","r-z","r-y","i-z","i-y",
+                               "z-y","z","g-r-(r-i)",
+                               "r-i-(i-z)","i-z-(z-y)","mcal_T")
+            else: 
+                expressions = ("g-r","g-i",
+                               "g-z","g-y","r-i","r-z","r-y","i-z","i-y",
+                               "z-y","z","g-r-(r-i)",
+                               "r-i-(i-z)","i-z-(z-y)")
+        elif self.bands == 'ugriz':
+            #ugrizy bands
+            if self.opt['sizes'] == True:
+                expressions = ("u-g","u-r","u-i","u-z","g-r","g-i",
+                               "g-z","r-i","r-z","i-z",
+                               "z","u-g-(g-r)","g-r-(r-i)",
+                               "r-i-(i-z)","mcal_T")
+            else: 
+                expressions = ("u-g","u-r","u-i","u-z","g-r","g-i",
+                               "g-z","r-i","r-z","i-z",
+                               "z","u-g-(g-r)","g-r-(r-i)",
+                               "r-i-(i-z)")
+        elif self.bands == 'ugrizy':
+            #ugrizy bands
+            if self.opt['sizes'] == True:
+                expressions = ("u-g","u-r","u-i","u-z","u-y","g-r","g-i",
+                               "g-z","g-y","r-i","r-z","r-y","i-z","i-y",
+                               "z-y","z","u-g-(g-r)","g-r-(r-i)",
+                               "r-i-(i-z)","i-z-(z-y)","mcal_T")
+            else: 
+                expressions = ("u-g","u-r","u-i","u-z","u-y","g-r","g-i",
+                               "g-z","g-y","r-i","r-z","r-y","i-z","i-y",
+                               "z-y","z","u-g-(g-r)","g-r-(r-i)",
+                               "r-i-(i-z)","i-z-(z-y)")
+
+        #Define the output SOM name
+        if self.opt['sizes'] == True: 
+            som_outname = f"SOM_{som_dim}_{self.bands}_withsizelin.pkl"
+        else:
+            som_outname = f"SOM_{som_dim}_{self.bands}.pkl"
+
+        #If Testing, load the pretrained SOM and matching training catalogue
+        if self.opt['testing'] == True:
+            #Use the default training set 
+            with open(f"test_{som_outname}", 'rb') as f:
+                som = pickle.load(f)
+                print(base.length(som.rx2['unit.classif']))
+            with open("training_testcat.pkl", 'rb') as f:
+                train_df = pickle.load(f)
+                print(utils.str(train_df))
+        else: 
+            #Use the training set provided
+            print("Preparing the data")
+            training_data = pd.DataFrame.from_dict(training_data)
+            #Add the redshift variable to the train data
+            print("Adding redshift info to training data")
+            training_data['redshift_true'] = training_z
+
+            if sparse_frac < 1:
+                print("Sparse Sampling the training data")
+                cut = np.random.uniform(0, 1, training_z.size) < sparse_frac
+                training_data = training_data[cut]
+                training_z = training_z[cut]
+
+            #Construct the training data frame (just a python-to-R data conversion)
+            print("Converting the data to R format")
+            with localconverter(ro.default_converter + pandas2ri.converter):
+                  train_df = ro.conversion.py2rpy(training_data)
+
+            #with open("training_testcat.pkl", 'wb') as f:
+            #    pickle.dump(train_df, f)
+
+            #Construct or Load the SOM 
+            if not os.path.exists(som_outname):
+                print("Training the SOM using R kohtrain")
+                #Train the SOM using R kohtrain
+                som=kohonen.kohtrain(data=train_df,som_dim=IntVector(som_dim),max_na_frac=0,data_threshold=FloatVector(data_threshold),
+                            n_cores=num_threads,train_expr=StrVector(expressions),train_sparse=False,sparse_frac=sparse_frac)
+                print(base.length(som.rx2['unit.classif']))
+                #Output the SOM 
+                with open(som_outname, 'wb') as f:
+                    pickle.dump(som, f)
+            else:
+                print("Loading the pretrained SOM")
+                with open(som_outname, 'rb') as f:
+                    som = pickle.load(f)
+                som.rx2['unit.classif']=FloatVector([])
+
+        #If grouping by redshift, construct the cell redshift statistics
+        if group_type == 'redshift' or plots == True:
+            print("Constructing cell-based redshift properties")
+            #Construct the Nz properties per SOM cell
+            cell_prop=kohonen.generate_kohgroup_property(som=som,data=train_df,
+                        expression=property_expressions,expr_label=property_labels,returnMatrix=True)
+            som=cell_prop.rx2('som')
+            som.rx2['clust.classif']=FloatVector([])
+            print("Constructing redshift-based hierarchical cluster tree")
+            #Cluster the SOM cells into num_groups groups
+            if whiten==True:
+                props = kohonen.kohwhiten(cell_prop.rx2['property'],train_expr=base.colnames(cell_prop.rx2['property']),
+                        data_missing='NA',data_threshold=FloatVector([0,12]))
+                props = props.rx2("data.white")
+                props.rx[base.which(base.is_na(props))] = -10
+            else:
+                props = cell_prop.rx2['property']
+                props.rx[base.which(base.is_na(props))] = -1
+
+            print(base.summary(props))
+        
+        if group_type=='redshift':
+            print("Groups will be constructed by redshift property")
+            hclust=stats.hclust(stats.dist(props))
+            cell_group=stats.cutree(hclust,k=num_groups)
+            #Assign the cell groups to the SOM structure
+            som.rx2['hclust']=hclust
+            som.rx2['cell_clust']=cell_group
+        else:
+            print("Groups will be constructed by colour property")
+
+        #Construct the Nz properties per SOM group
+        print("Constructing group-based redshift properties")
+        group_prop=kohonen.generate_kohgroup_property(som=som,data=train_df,
+            expression=property_expressions,expr_label=property_labels,
+            n_cluster_bins=num_groups,returnMatrix=True)
+
+        #extract the training som (just for convenience)
+        train_som = group_prop.rx2('som')
+
+        if plots == True and redshift_propset==1: 
+            #Make the diagnostic plots
+            props = group_prop.rx2('property')
+            cprops = cell_prop.rx2('property')
+            print(base.colnames(props))
+            print("Constructing SOM diagnostic figures")
+            #Initialise the plot device
+            dev.png(file=f'{plot_dir}/SOMfig_%02d.png',height=5,width=5,res=220,unit='in')
+            #Paint the SOM by the training properties
+            for i in range(len(expressions)): 
+                gr.plot(train_som,property=i+1,shape='straight',heatkeywidth=som_dim[0]/20,ncolors=1e3,zlim=FloatVector([-3,3]))
+            #Plot the standard diagnostics:
+            #Count
+            gr.plot(train_som,type='count',shape='straight',heatkeywidth=som_dim[0]/20,zlog=True,ncolors=1e3)
+            #Quality
+            gr.plot(train_som,type='quality',shape='straight',heatkeywidth=som_dim[0]/20,zlim=FloatVector([0,0.5]),ncolors=1e3)
+            #UMatrix
+            gr.plot(train_som,type='dist',shape='straight',heatkeywidth=som_dim[0]/20,zlog=True,zlim=FloatVector([0,0.5]),ncolors=1e3)
+            #Sometimes these figures need customised limits; the "try" stops bad plots from stopping the code
+             
+            #Paint by the redshift diagnostics:
+            #mean redshift
+            gr.plot(train_som,property=cprops.rx(True,base.which(cprops.colnames.ro=='mean_z_true')),ncolors=1e3,zlog=False,
+                    type='property',shape='straight',heatkeywidth=som_dim[0]/20,main='Cell mean redshift',zlim=FloatVector([0,1.8]))
+            #redshift std
+            gr.plot(train_som,property=cprops.rx(True,base.which(cprops.colnames.ro=='sd_z_true')),ncolors=1e3,zlog=False,
+                    type='property',shape='straight',heatkeywidth=som_dim[0]/20,main='Cell redshift stdev',zlim=FloatVector([0,0.2]))
+            #2sigma redshift IQR
+            gr.plot(train_som,property=cprops.rx(True,base.which(cprops.colnames.ro=='iqr_z_true')),ncolors=1e3,zlog=False,
+                    type='property',shape='straight',heatkeywidth=som_dim[0]/20,main='Cell 2sigma IQR',zlim=FloatVector([0,0.4]))
+            #mean redshift
+            gr.plot(train_som,property=props.rx(True,base.which(props.colnames.ro=='mean_z_true')),ncolors=1e3,zlog=False,
+                    type='property',shape='straight',heatkeywidth=som_dim[0]/20,main='Group mean redshift',zlim=FloatVector([0,1.8]))
+            #redshift std
+            gr.plot(train_som,property=props.rx(True,base.which(props.colnames.ro=='sd_z_true')),ncolors=1e3,zlog=False,
+                    type='property',shape='straight',heatkeywidth=som_dim[0]/20,main='Group redshift stdev',zlim=FloatVector([0,0.2]))
+            #2sigma redshift IQR
+            gr.plot(train_som,property=props.rx(True,base.which(props.colnames.ro=='iqr_z_true')),ncolors=1e3,zlog=False,
+                    type='property',shape='straight',heatkeywidth=som_dim[0]/20,main='Group 2sigma IQR',zlim=FloatVector([0,0.4]))
+            ##N group
+            #gr.plot(train_som,property=props.rx(True,base.which(props.colnames.ro=='N')),ncolors=1e3,zlog=False,
+            #        type='property',shape='straight',heatkeywidth=som_dim[0]/20,main='N group')
+
+            #Close the plot device 
+            dev.dev_off()
+
+        print("Constructing the per-group Nz")
+        #Get the group assignments for the training data 
+        train_group = np.array(train_som.rx2['clust.classif'])
+        tab=base.table(FactorVector(train_som.rx2['clust.classif'],levels=base.seq(num_groups)))
+        print(tab)
+        print(np.arange(num_groups)[np.array(tab)==0])
+        #Construct the Nz z-axis
+        z_arr = np.linspace(0, 2, 124)
+        z_cen = z_arr[0:-1]+(z_arr[1]-z_arr[0])/2. 
+        #Construct the per-group Nz
+        nzs = [(np.histogram(training_z[train_group == group+1],z_arr)[0])
+               for group in np.arange(num_groups)[np.array(tab)!=0]]
+        #np.save('plots/nzs.npy', nzs)
+       
+        #Update the fsky
+        # Renormalize to use the same sample definitions as the
+        # official metric calculators.
+        fsky = 0.25
+        ndens_arcmin = 20.
+        area_arcmin = (180*60/np.pi)**2*4*np.pi*fsky
+        ng_tot_goal = ndens_arcmin * area_arcmin
+        ng_tot_curr = np.sum(np.array(tab))
+        nzs = np.array([nz * ng_tot_goal / ng_tot_curr for nz in nzs])
+        #np.save('plots/nzs_norm.npy', nzs)
+    
+        # Now initialize a S/N calculator for these initial groups.
+        os.system('rm wl_nb%d.npz' % n_bin)
+        if metric == 'SNR_ww': 
+            c_wl = SnCalc(z_cen, nzs, use_clustering=False, fsky=fsky)
+        else:
+            c_wl = SnCalc(z_cen, nzs, use_clustering=True,fsky=fsky)
+        print("Initializing Cls")
+        c_wl.get_cl_matrix(fname_save='wl_nb%d.npz' % n_bin)
+
+        # Initial edge guess
+        nz_tot = np.sum(nzs, axis=0)
+        cumulative_fraction = np.cumsum(nz_tot) / np.sum(nz_tot)
+        cumul_f = interp1d(z_cen, cumulative_fraction, bounds_error=False,
+                           fill_value=(0, 1))
+        edges_0 = np.array([brentq(lambda z : cumul_f(z) - q, 0, 2)
+                            for q in (np.arange(o.n_bins-1)+1.)/o.n_bins])
+        
+        # Finally, let's write the function to minimize and optimize for a 4-bin case.
+        def minus_sn(edges, calc):
+            return -calc.get_sn_from_edges(edges,assign_params=assign_params)
+
+        print("Optimizing WL")
+        res = minimize(minus_sn, edges_0, method='Powell', args=(c_wl,))
+        print("Final edges: ", res.x)
+        print("Maximum S/N: ", c_wl.get_sn_from_edges(res.x,
+                                                      assign_params=assign_params))
+        print(" ")
+
+        #Construct the per-group Nz
+        print("Outputting trained SOM and redshift ordering of SOM groups")
+        groups_in_tomo = c_wl.assign_from_edges(res.x,
+                                                assign_params=assign_params,
+                                                get_ids=True)
+        group_bins = np.zeros(num_groups)
+        for bin_no, groups in groups_in_tomo:
+            print(bin_no, groups)
+            group_bins[groups] = bin_no
+
+        print("Finished")
+        self.train_som = train_som
+        self.group_bins = group_bins
+
+    def apply (self, data):
+        """Applies training to the data.
+        
+        Parameters:
+        -----------
+        Data: numpy array, size Ngalaxes x Nbands
+          testing data, each row is a galaxy, each column is a band as per
+          band defined above
+
+        Returns: 
+        tomographic_selections: numpy array, int, size Ngalaxies
+          tomographic selection for galaxies return as bin number for 
+          each galaxy.
+        """
+        
+        #Number of tomographic bins 
+        n_bin = self.opt['bins']
+        #Number of discrete SOM groups
+        num_groups = self.opt['num_groups'] 
+
+        print("Preparing the data")
+        data = pd.DataFrame.from_dict(data)
+
+        #Construct the validation data frame (just a python-to-R data conversion)
+        print("Converting the data to R format")
+        with localconverter(ro.default_converter + pandas2ri.converter):
+              data_df = ro.conversion.py2rpy(data)
+
+        print("Parsing the validation data into the SOM groupings")
+        #Generate the validation associations/groups
+        group_prop=kohonen.generate_kohgroup_property(som=self.train_som,data=data_df,
+            expression="nrow(data)",expr_label="N",
+            n_cluster_bins=num_groups,n_cores=self.opt['num_threads'],returnMatrix=True)
+
+        #extract the validation som (just for convenience)
+        valid_som = group_prop.rx2('som')
+
+        #Assign the sources, by group, to tomographic bins
+        print("Output source tomographic bin assignments")
+        valid_bin = base.unsplit(IntVector(self.group_bins),
+                                 FactorVector(valid_som.rx2['clust.classif'],
+                                              levels=base.seq(num_groups)),
+                                 drop=False)
+        valid_bin = np.array(valid_bin)
+
+        return valid_bin
+
diff --git a/tomo_challenge/classifiers/SimpleSOM.py b/tomo_challenge/classifiers/SimpleSOM.py
new file mode 100644
index 00000000..43d7ed98
--- /dev/null
+++ b/tomo_challenge/classifiers/SimpleSOM.py
@@ -0,0 +1,323 @@
+"""
+This is an example tomographic bin generator using a random forest.
+
+Every classifier module needs to:
+ - have construction of the type 
+       __init__ (self, bands, options) (see examples below)
+ -  implement two functions: 
+        train (self, training_data,training_z)
+        apply (self, data).
+ - define valid_options class varible.
+
+See Classifier Documentation below.
+"""
+
+from .base import Tomographer
+import numpy as np
+import rpy2
+import rpy2.robjects as ro
+import rpy2.robjects.packages as rpack
+from rpy2.robjects.vectors import StrVector, IntVector, DataFrame, FloatVector, FactorVector
+import pandas as pd
+from rpy2.robjects import pandas2ri
+from rpy2.robjects.conversion import localconverter
+
+#Check that all the needed packages are installed
+# R package nameo
+packnames = ('data.table','itertools','foreach','doParallel','RColorBrewer','devtools','matrixStats')
+base=ro.packages.importr("base")
+utils=ro.packages.importr("utils")
+stats=ro.packages.importr("stats")
+gr=ro.packages.importr("graphics")
+dev=ro.packages.importr("grDevices")
+utils.chooseCRANmirror(ind=1)
+# Selectively install what needs to be installed.
+names_to_install = [x for x in packnames if not rpack.isinstalled(x)]
+if len(names_to_install) > 0:
+    utils.install_packages(StrVector(names_to_install))
+base.Sys_setenv(TAR=base.system("which tar",intern=True))
+devtools=ro.packages.importr("devtools")
+#devtools.install_github("AngusWright/helpRfuncs")
+#devtools.install_github("AngusWright/kohonen/kohonen")
+kohonen=ro.packages.importr("kohonen")
+
+class SimpleSOM(Tomographer):
+    """ Simplistic SOM Classifier """
+    
+    # valid parameter -- see below
+    valid_options = ['bins','som_dim','num_groups','num_threads',
+            'group_type','data_threshold','sparse_frac','plots','plot_dir']
+    # this settings means arrays will be sent to train and apply instead
+    # of dictionaries
+    wants_arrays = False
+    
+    def __init__ (self, bands, options):
+        """Constructor
+        
+        Parameters:
+        -----------
+        bands: str
+          string containg valid bands, like 'riz' or 'griz'
+        options: dict
+          options come through here. Valid keys are listed as valid_options
+          class variable. 
+
+        Note:
+        -----
+        Valiad options are:
+            'bins' - number of tomographic bins
+            'som_dim' - dimensions of the SOM
+            'num_groups' - number of SOM groups/associations
+            'num_threads' - number of threads to use
+            'sparse_frac' - the sparse-sampling fraction used for training
+            'group_type' - do we want grouping by colour or redshift
+            'data_threshold' - number of threads to use
+
+        """
+        self.bands = bands
+        self.opt = options
+
+    def train (self, training_data, training_z):
+        """Trains the SOM and outputs the resulting bins
+        
+        Parameters:
+        -----------
+        training_data: numpy array, size Ngalaxes x Nbands
+          training data, each row is a galaxy, each column is a band as per
+          band defined above
+        training_z: numpy array, size Ngalaxies
+          true redshift for the training sample
+
+        """
+
+        print("Initialising")
+        #Number of tomographic bins 
+        n_bin = self.opt['bins']
+        #Dimensions of the SOM
+        som_dim = self.opt['som_dim'] 
+        #Number of discrete SOM groups
+        num_groups = self.opt['num_groups'] 
+        #Number of threads 
+        num_threads = self.opt['num_threads'] 
+        #Sparse Frac 
+        sparse_frac = self.opt['sparse_frac'] 
+        #Data Threshold
+        data_threshold = self.opt['data_threshold']
+        #Plots
+        plots = self.opt['plots']
+        #Plot Output Directory
+        plot_dir = self.opt['plot_dir']
+        #Group Type
+        group_type = self.opt['group_type']
+        #Check that the group type is a valid choice
+        if group_type != 'colour' and group_type != 'redshift':
+            group_type = 'redshift'
+
+        #Define the redshift summary statistics (used for making groups in the 'redshift' case
+        property_labels = ("mean_z_true","med_z_true","sd_z_true","mad_z_true","N","iqr_z_true")
+        property_expressions = ("mean(data$redshift_true)","median(data$redshift_true)","sd(data$redshift_true)",
+                                "mad(data$redshift_true)","nrow(data)",
+                                "diff(quantile(data$redshift_true,probs=pnorm(c(-2,2))))")
+        #Define the SOM variables
+        if self.bands == 'riz':
+            #riz bands
+            #expressions = ("r_mag-i_mag","r_mag-z_mag","i_mag-z_mag",
+            #               "z_mag","r_mag-i_mag-(i_mag-z_mag)")
+            expressions = ("r-i","r-z","i-z",
+                           "z","r-i-(i-z)")
+        elif self.bands == 'griz':
+            #griz bands
+            #expressions = ("g_mag-r_mag","g_mag-i_mag",
+            #               "g_mag-z_mag","r_mag-i_mag","r_mag-z_mag","i_mag-z_mag",
+            #               "z_mag","g_mag-r_mag-(r_mag-i_mag)",
+            #               "r_mag-i_mag-(i_mag-z_mag)")
+            expressions = ("g-r","g-i",
+                           "g-z","r-i","r-z","i-z",
+                           "z","g-r-(r-i)",
+                           "r-i-(i-z)")
+        elif self.bands == 'grizy':
+            #grizy bands
+            #expressions = ("g_mag-r_mag","g_mag-i_mag",
+            #               "g_mag-z_mag","g_mag-y_mag","r_mag-i_mag","r_mag-z_mag","r_mag-y_mag","i_mag-z_mag","i_mag-y_mag",
+            #               "z_mag-y_mag","z_mag","g_mag-r_mag-(r_mag-i_mag)",
+            #               "r_mag-i_mag-(i_mag-z_mag)","i_mag-z_mag-(z_mag-y_mag)")
+            expressions = ("g-r","g-i",
+                           "g-z","g-y","r-i","r-z","r-y","i-z","i-y",
+                           "z-y","z","g-r-(r-i)",
+                           "r-i-(i-z)","i-z-(z-y)")
+        elif self.bands == 'ugriz':
+            #ugrizy bands
+            #expressions = ("u_mag-g_mag","u_mag-r_mag","u_mag-i_mag","u_mag-z_mag","g_mag-r_mag","g_mag-i_mag",
+            #               "g_mag-z_mag","r_mag-i_mag","r_mag-z_mag","i_mag-z_mag",
+            #               "z_mag","u_mag-g_mag-(g_mag-r_mag)","g_mag-r_mag-(r_mag-i_mag)",
+            #               "r_mag-i_mag-(i_mag-z_mag)")
+            expressions = ("u-g","u-r","u-i","u-z","g-r","g-i",
+                           "g-z","r-i","r-z","i-z",
+                           "z","u-g-(g-r)","g-r-(r-i)",
+                           "r-i-(i-z)")
+        elif self.bands == 'ugrizy':
+            #ugrizy bands
+            #expressions = ("u_mag-g_mag","u_mag-r_mag","u_mag-i_mag","u_mag-z_mag","u_mag-y_mag","g_mag-r_mag","g_mag-i_mag",
+            #               "g_mag-z_mag","g_mag-y_mag","r_mag-i_mag","r_mag-z_mag","r_mag-y_mag","i_mag-z_mag","i_mag-y_mag",
+            #               "z_mag-y_mag","z_mag","u_mag-g_mag-(g_mag-r_mag)","g_mag-r_mag-(r_mag-i_mag)",
+            #               "r_mag-i_mag-(i_mag-z_mag)","i_mag-z_mag-(z_mag-y_mag)")
+            expressions = ("u-g","u-r","u-i","u-z","u-y","g-r","g-i",
+                           "g-z","g-y","r-i","r-z","r-y","i-z","i-y",
+                           "z-y","z","u-g-(g-r)","g-r-(r-i)",
+                           "r-i-(i-z)","i-z-(z-y)")
+
+        print("Preparing the data")
+        training_data = pd.DataFrame.from_dict(training_data)
+        #Add the redshift variable to the train data
+        print("Adding redshift info to training data")
+        training_data['redshift_true'] = training_z
+
+        if sparse_frac < 1:
+            print("Sparse Sampling the training data")
+            cut = np.random.uniform(0, 1, training_z.size) < sparse_frac
+            training_data = training_data[cut]
+
+        #Construct the training data frame (just a python-to-R data conversion)
+        print("Converting the data to R format")
+        with localconverter(ro.default_converter + pandas2ri.converter):
+              #train_df = ro.conversion.py2rpy(train[['u_mag','g_mag','r_mag','i_mag','z_mag','y_mag']])
+              train_df = ro.conversion.py2rpy(training_data)
+
+        print("Training the SOM using R kohtrain")
+        #Train the SOM using R kohtrain
+        som=kohonen.kohtrain(data=train_df,som_dim=IntVector(som_dim),max_na_frac=0,data_threshold=FloatVector(data_threshold),
+                    n_cores=num_threads,train_expr=StrVector(expressions),train_sparse=False,sparse_frac=sparse_frac)
+
+        #If grouping by redshift, construct the cell redshift statistics
+        if group_type == 'redshift' or plots == True:
+            print("Constructing cell-based redshift properties")
+            #Construct the Nz properties per SOM cell
+            cell_prop=kohonen.generate_kohgroup_property(som=som,data=train_df,
+                        expression=StrVector(property_expressions),expr_label=StrVector(property_labels))
+            print("Constructing redshift-based hierarchical cluster tree")
+            #Cluster the SOM cells into num_groups groups
+            hclust=stats.hclust(stats.dist(cell_prop.rx2['property']))
+            cell_group=stats.cutree(hclust,k=num_groups)
+
+            #Assign the cell groups to the SOM structure
+            som.rx2['hclust']=hclust
+            som.rx2['cell_clust']=cell_group
+
+        #Construct the Nz properties per SOM group
+        print("Constructing group-based redshift properties")
+        group_prop=kohonen.generate_kohgroup_property(som=som,data=train_df,
+            expression=StrVector(property_expressions),expr_label=StrVector(property_labels),
+            n_cluster_bins=num_groups)
+
+        #extract the training som (just for convenience)
+        train_som = group_prop.rx2('som')
+
+        if plots == True: 
+            #Make the diagnostic plots
+            props = group_prop.rx2('property')
+            print(base.colnames(props))
+            print("Constructing SOM diagnostic figures")
+            #Initialise the plot device
+            dev.png(file=f'{plot_dir}/SOMfig_%02d.png',height=5,width=5,res=220,unit='in')
+            #Paint the SOM by the training properties
+            for i in range(len(expressions)): 
+                gr.plot(train_som,property=i+1,shape='straight',heatkeywidth=som_dim[0]/20,ncolors=1e3,zlim=FloatVector([-3,3]))
+            #Plot the standard diagnostics:
+            #Count
+            gr.plot(train_som,type='count',shape='straight',heatkeywidth=som_dim[0]/20,zlog=True,ncolors=1e3)
+            #Quality
+            gr.plot(train_som,type='quality',shape='straight',heatkeywidth=som_dim[0]/20,zlim=FloatVector([0,0.5]),ncolors=1e3)
+            #UMatrix
+            gr.plot(train_som,type='dist',shape='straight',heatkeywidth=som_dim[0]/20,zlog=True,zlim=FloatVector([0,0.5]),ncolors=1e3)
+            #Sometimes these figures need customised limits; the "try" stops bad plots from stopping the code
+             
+            #Paint by the redshift diagnostics:
+            #mean redshift
+            gr.plot(train_som,property=props.rx(True,base.which(props.colnames.ro=='mean_z_true')),ncolors=1e3,zlog=False,
+                    type='property',shape='straight',heatkeywidth=som_dim[0]/20,main='Group mean redshift',zlim=FloatVector([0,1.8]))
+            #redshift std
+            gr.plot(train_som,property=props.rx(True,base.which(props.colnames.ro=='sd_z_true')),ncolors=1e3,zlog=False,
+                    type='property',shape='straight',heatkeywidth=som_dim[0]/20,main='Group redshift stdev',zlim=FloatVector([0,0.2]))
+            #2sigma redshift IQR
+            gr.plot(train_som,property=props.rx(True,base.which(props.colnames.ro=='iqr_z_true')),ncolors=1e3,zlog=False,
+                    type='property',shape='straight',heatkeywidth=som_dim[0]/20,main='group 2sigma IQR',zlim=FloatVector([0,0.4]))
+            #N group
+            gr.plot(train_som,property=props.rx(True,base.which(props.colnames.ro=='N')),ncolors=1e3,zlog=False,
+                    type='property',shape='straight',heatkeywidth=som_dim[0]/20,main='N group')
+
+            #Close the plot device 
+            dev.dev_off()
+
+
+
+        print("Outputting trained SOM and redshift ordering of SOM groups")
+        #Extract the mean-z per group
+        group_z = group_prop.rx2('property').rx(True,
+                base.which(group_prop.rx2('property').colnames.ro=='mean_z_true'))
+        #Order the groups by mean z
+        z_order = base.order(group_z)
+
+        print("Finished")
+        self.train_som = train_som
+        self.group_z = group_z
+        self.z_order = z_order
+
+    def apply (self, data):
+        """Applies training to the data.
+        
+        Parameters:
+        -----------
+        Data: numpy array, size Ngalaxes x Nbands
+          testing data, each row is a galaxy, each column is a band as per
+          band defined above
+
+        Returns: 
+        tomographic_selections: numpy array, int, size Ngalaxies
+          tomographic selection for galaxies return as bin number for 
+          each galaxy.
+        """
+        
+        #Number of tomographic bins 
+        n_bin = self.opt['bins']
+
+        print("Preparing the data")
+        data = pd.DataFrame.from_dict(data)
+
+        #Construct the validation data frame (just a python-to-R data conversion)
+        print("Converting the data to R format")
+        with localconverter(ro.default_converter + pandas2ri.converter):
+              #train_df = ro.conversion.py2rpy(train[['u_mag','g_mag','r_mag','i_mag','z_mag','y_mag']])
+              data_df = ro.conversion.py2rpy(data)
+
+        print("Parsing the validation data into the SOM groupings")
+        #Generate the validation associations/groups
+        group_prop=kohonen.generate_kohgroup_property(som=self.train_som,data=data_df,
+            expression="nrow(data)",expr_label="N",
+            n_cluster_bins=self.opt['num_groups'],n_cores=self.opt['num_threads'])
+
+        #Calculate the cumulative count 
+        print("Generate cumulative source counts a.f.o. group mean z")
+        zcumsum=base.cumsum(group_prop.rx2('property').rx(self.z_order))
+
+        # Find the edges that split the redshifts into n_z bins of
+        # equal number counts in each
+        print("Assign the groups to tomographic bins")
+        p = np.linspace(0, 100, n_bin + 1)
+        n_edges = np.percentile(zcumsum, p)
+
+        # Now put the groups into redshift bins.
+        group_bins = FloatVector(base.cut(FloatVector(zcumsum),FloatVector(n_edges),
+            include=True)).rx(base.order(self.z_order))
+
+        #extract the validation som (just for convenience)
+        valid_som = group_prop.rx2('som')
+
+        #Assign the sources, by group, to tomographic bins
+        print("Output source tomographic bin assignments")
+        valid_bin = base.unsplit(group_bins,FactorVector(valid_som.rx2['clust.classif'],
+            levels=base.seq(self.opt['num_groups'])),drop=False)
+        #valid_bin = valid_som.rx2['clust.classif']
+        valid_bin = np.array(valid_bin)-1
+
+        return valid_bin
+
diff --git a/tomo_challenge/data.py b/tomo_challenge/data.py
index 03eadc5c..5b8a7af4 100644
--- a/tomo_challenge/data.py
+++ b/tomo_challenge/data.py
@@ -124,6 +124,12 @@ def add_colors(data, bands, errors=False):
         if errors:
             data[f'{b}{c}_err'] = np.sqrt(data[f'{b}_err']**2 + data[f'{c}_err']**2)
 
+def add_size(data, filename):
+
+    with h5py.File(filename, 'r') as f:
+        # load all bands
+        data['mcal_T'] = f[f'mcal_T'][:]
+
 
 def dict_to_array(data, bands, errors=False, colors=False):
     nobj = data[bands[0]].size
@@ -165,12 +171,15 @@ def colors_for_bands(bands):
 
 
 
-def load_data(filename, bands, colors=False, errors=False, array=False):
+def load_data(filename, bands, colors=False, errors=False, size=False, array=False):
     data = load_mags(filename, bands, errors=errors)
 
     if colors:
         add_colors(data, bands, errors=errors)
 
+    if size:
+        add_size(data, filename)
+
     if array:
         data = dict_to_array(data, bands, errors=errors, colors=colors)
 
diff --git a/tomo_challenge/snc.py b/tomo_challenge/snc.py
new file mode 100644
index 00000000..4c1d537e
--- /dev/null
+++ b/tomo_challenge/snc.py
@@ -0,0 +1,370 @@
+import numpy as np
+import pyccl as ccl
+from scipy.integrate import simps
+import os
+
+
+assign_params_default = {'p_inbin_thr': 0.5,
+                         'p_outbin_thr': 0.2,
+                         'use_p_inbin': False,
+                         'use_p_outbin': False}
+
+
+class SnCalc(object):
+    edges_large = 3.
+
+    def __init__(self, z_arr, nz_list, fsky=0.4, lmax=2000, n_ell=100,
+                 s_gamma=0.26, use_clustering=False, use_3x2=False,
+                 integrator='qag_quad'):#spline'):
+        """ S/N calculator
+        Args:
+            z_arr (array_like): array of redshifts at which all the N(z)s are
+                sampled. They should be linearly spaced.
+            nz_list (list): list of arrays containing the redshift
+                distributions of all the initial groups (each array in the
+                list corresponds to one group). The arrays should contain the
+                redshift distribution sampled at `z_arr`, with its integral
+                over `z_arr` equal to the number of galaxies in that group.
+            fsky (float): sky fraction (for SNR calculation and for
+                transforming number of galaxies into number densities).
+            lmax (float): maximum ell.
+            d_ell (float): ell bandpower width.
+            s_gamma (float): rms ellipticity scatter (relevant if
+                `use_clustering=False`).
+            use_clustering (bool): if `True`, SNR will be computed for
+                clustering instead of lensing.
+            use_3x2 (bool): if `True`, SNR will be computed for 3x2pt.
+            integrator (string): CCL integration method. Either 'qag_quad'
+                or 'spline'.
+        """
+        self.integrator = integrator
+        self.s_gamma = s_gamma
+        self.fsky = fsky
+        self.lmax = lmax
+        self.n_ell = n_ell
+        ell_edges = np.logspace(2, np.log10(lmax), n_ell+1)
+        self.larr = 0.5*(ell_edges[1:]+ell_edges[:-1])
+        self.d_ell = (ell_edges[1:]-ell_edges[:-1])
+        self.n_samples = len(nz_list)
+        self.z_arr = z_arr
+        self.nz_list = nz_list
+        self.n_gals = np.array([simps(nz, x=self.z_arr)
+                                for nz in self.nz_list])
+        self.pz_list = self.nz_list / self.n_gals[:, None]
+        self.z_means = np.array([simps(self.z_arr*nz, x=self.z_arr)
+                                 for nz in self.nz_list]) / self.n_gals
+        self.n_dens = self.n_gals / (4*np.pi*self.fsky)
+        self.cls = None
+        self.use_clustering = use_clustering
+        self.use_3x2 = use_3x2
+        if use_3x2:
+            self.n_tracers = 2*self.n_samples
+        else:
+            self.n_tracers = self.n_samples
+
+    def _bz_model(self, cosmo, z):
+        return 1./ccl.growth_factor(cosmo, 1./(1+z))
+
+    def get_cl_matrix(self, fname_save=None, recompute=False):
+        """ Computes matrix of power spectra between all the initial groups.
+        Args:
+            fname_save (string): if not None, the result will be saved to a
+                file by this name, and if present and `recompute=False`, the
+                matrix will be read from that file.
+            recompute (bool): if `True`, any previous calculations of the
+                C_ell matrix stored on file will be ignored, and the matrix
+                will be recomputed.
+        """
+        if fname_save is not None:
+            if os.path.isfile(fname_save) and not recompute:
+                self.cls = np.load(fname_save)['cls']
+                return
+
+        # Cosmology
+        cosmo = ccl.Cosmology(Omega_c=0.25,
+                              Omega_b=0.05,
+                              h=0.67, n_s=0.96,
+                              sigma8=0.81)
+
+        # Tracers
+        bz = self._bz_model(cosmo, self.z_arr)
+        if self.use_3x2:
+            trs_gc = [ccl.NumberCountsTracer(cosmo, False, (self.z_arr, nz),
+                                             bias=(self.z_arr, bz))
+                      for nz in self.nz_list]
+            trs_wl = [ccl.WeakLensingTracer(cosmo, (self.z_arr, nz))
+                      for nz in self.nz_list]
+            trs = trs_gc + trs_wl
+        else:
+            if self.use_clustering:
+                trs = [ccl.NumberCountsTracer(cosmo, False, (self.z_arr, nz),
+                                              bias=(self.z_arr, bz))
+                       for nz in self.nz_list]
+            else:
+                trs = [ccl.WeakLensingTracer(cosmo, (self.z_arr, nz))
+                       for nz in self.nz_list]
+
+        # Cls
+        self.cls = np.zeros([self.n_ell, self.n_tracers, self.n_tracers])
+        for i in range(self.n_tracers):
+            for j in range(i, self.n_tracers):
+                try:
+                    cl = ccl.angular_cl(cosmo, trs[i], trs[j], self.larr,
+                                        limber_integration_method=self.integrator)
+                except:
+                    cl = np.zeros(len(self.larr))
+                self.cls[:, i, j] = cl
+                if j != i:
+                    self.cls[:, j, i] = cl
+
+        if fname_save is not None:
+            np.savez(fname_save, cls=self.cls)
+
+    def get_resample_metadata(self, assign):
+        """ Transform an assignment list into a weight matrix and the
+        corresponding number densities.
+        Args:
+            assign (list): list of arrays. Each element of the list should
+                be a numpy array of integers, corresponding to the indices
+                of the initial groups that are now resampled into a larger
+                group.
+        """
+        n_resamples = len(assign)
+        weight_res = np.zeros([n_resamples, self.n_samples])
+        n_dens_res = np.zeros(n_resamples)
+        for ia, a in enumerate(assign):
+            ndens = self.n_dens[a]
+            ndens_tot = np.sum(ndens)
+            n_dens_res[ia] = ndens_tot
+            weight_res[ia, a] = ndens / ndens_tot
+        return weight_res, n_dens_res
+
+    def _get_cl_resamples(self, weights):
+        """ Gets C_ell matrix for resampled groups from weights matrix.
+        """
+        if self.cls is None:
+            self.get_cl_matrix()
+        if self.use_3x2:
+            w_use = np.tile(weights, (2, 2))
+        else:
+            w_use = weights
+        return np.sum(np.sum(w_use[None, None, :, :] *
+                             self.cls[:, :, None, :],
+                             axis=-1)[:, None, :, :] *
+                      w_use[None, :, :, None],
+                      axis=-2)
+
+    def _get_nl_resamples(self, n_dens):
+        """ Gets noise contribution to C_ell matrix for resampled groups
+        from list of number densities.
+        """
+        if self.use_3x2:
+            n_diag = np.array(list(1./n_dens) +
+                              list(self.s_gamma**2/n_dens))
+            return np.diag(n_diag)[None, :, :]
+        else:
+            if self.use_clustering:
+                return np.diag(1./n_dens)[None, :, :]
+            else:
+                return np.diag(self.s_gamma**2/n_dens)[None, :, :]
+
+    def get_sn_max(self, full_output=False):
+        """ Compute maximum signal-to-noise ratio given the input
+        groups
+        Args:
+            full_output (bool): if true, a dictionary with additional
+                information will be returned. Otherwise just total S/N.
+        Returns:
+            If `full_output=True`, dictionary containing S/N, power
+            spectra and noise spectra. Otherwise just S/N.
+        """
+        weights = np.eye(self.n_samples)
+        n_dens = self.n_dens
+        return self.get_sn_wn(weights, n_dens, full_output=full_output)
+
+    def get_kltrans(self):
+        """ Compute KL decomposition.
+
+        Returns:
+            Fisher matrix element for each of the KL modes.
+        """
+        nl = self._get_nl_resamples(self.n_dens)
+        nij = nl*np.ones(self.n_ell)[:, None, None]
+        cij = self.cls + nij
+        sij = cij - nij
+        inv_nij = np.linalg.inv(nij)
+        metric = inv_nij
+
+        def change_basis(c, m, ev):
+            return np.array([np.diag(np.dot(ev[i].T,
+                                            np.dot(m[i],
+                                                   np.dot(c[i],
+                                                          np.dot(m[i],
+                                                                 ev[i])))))
+                             for i in range(self.n_ell)])
+
+        def diagonalize(c, m):
+            im = np.linalg.inv(m)
+            ll = np.linalg.cholesky(m)
+            ill = np.linalg.cholesky(im)
+            cl = np.array([np.dot(np.transpose(ll[i]),
+                                  np.dot(c[i], ll[i]))
+                           for i in range(self.n_ell)])
+            c_p, v = np.linalg.eigh(cl)
+            ev = np.array([np.dot(np.transpose(ill[i]), v[i])
+                           for i in range(self.n_ell)])
+            # iden = change_basis(im, m, ev)
+            return ev, c_p
+
+        e_v, c_p = diagonalize(cij, metric)
+        s_p = change_basis(sij, metric, e_v)
+        nmodes_l = (self.fsky * self.d_ell * (self.larr+0.5))
+        fish_kl = nmodes_l[:, None]*(s_p/c_p)**2
+        isort = np.argsort(-np.sum(fish_kl, axis=0))
+        e_o = e_v[:, :, isort]
+        # f_o = np.array([np.dot(inv_nij[l], e_o[l, :, :])
+        #                 for l in range(self.n_ell)])
+        c_p = change_basis(cij, metric, e_o)
+        s_p = change_basis(sij, metric, e_o)
+
+        fish_kl = nmodes_l[:, None]*(s_p/c_p)**2
+        fish_permode = np.sum(fish_kl, axis=0)
+        fish_cumul = np.cumsum(fish_permode)
+        return fish_kl, fish_permode, fish_cumul
+
+    def get_sn_wn(self, weights, n_dens, full_output=False):
+        """ Compute signal-to-noise ratio from weights matrix and number
+        densities of the resampled groups.
+        Args:
+            weights (array_like): weights matrix of shape
+                `(N_resample, N_initial)`, where `N_resample` is the
+                number of new groups, and `N_initial` is the original
+                number of groups. Each entry corresponds to the weight
+                which with a given initial group enters the new set of
+                groups.
+            n_dens (array_like): number density (in sterad^-1) of the
+                new groups.
+            full_output (bool): if true, a dictionary with additional
+                information will be returned. Otherwise just total S/N.
+        Returns:
+            If `full_output=True`, dictionary containing S/N, power
+            spectra and noise spectra. Otherwise just S/N.
+        """
+        sl = self._get_cl_resamples(weights)
+        nl = self._get_nl_resamples(n_dens)
+        cl = sl + nl
+        icl = np.linalg.inv(cl)
+        sn2_1pt = np.sum(sl[:, :, :, None] * icl[:, None, :, :], axis=2)
+        sn2_ell = np.sum(sn2_1pt[:, :, :, None] * sn2_1pt[:, None, :, :],
+                         axis=2)
+        trsn2_ell = np.trace(sn2_ell, axis1=1, axis2=2)
+        snr = np.sqrt(np.sum(trsn2_ell * (self.larr + 0.5) *
+                             self.d_ell * self.fsky))
+
+        if full_output:
+            dret = {'snr': snr, 'cl': sl, 'nl': nl, 'ls': self.larr}
+            return dret
+        else:
+            return snr
+
+    def get_sn(self, assign, full_output=False):
+        """ Compute signal-to-noise ratio from a bin assignment list.
+        Args:
+            assign (list): list of arrays. Each element of the list should
+                be a numpy array of integers, corresponding to the indices
+                of the initial groups that are now resampled into a larger
+                group.
+            full_output (bool): if true, a dictionary with additional
+                information will be returned. Otherwise just total S/N.
+        Returns:
+            If `full_output=True`, dictionary containing S/N, power
+            spectra and noise spectra. Otherwise just S/N.
+        """
+        w, ndens = self.get_resample_metadata(assign)
+        return self.get_sn_wn(w, ndens,
+                              full_output=full_output)
+
+    def check_edges(self, edges):
+        """ Returns `True` if there's something wrong with the edges.
+        """
+        swapped = False
+        if np.ndim(edges) > 0:
+            swapped = np.any(np.diff(edges) < 0)
+        return np.any(edges < 0) or \
+            np.any(edges > self.edges_large) or \
+            swapped
+
+    def get_nzs_from_edges(self, edges, assign_params=assign_params_default):
+        assign = self.assign_from_edges(edges, assign_params=assign_params)
+        nzs = np.array([np.sum(self.nz_list[a, :], axis=0)
+                        for a in assign])
+        return nzs
+
+    def assign_from_edges(self, edges, assign_params=assign_params_default,
+                          get_ids=False):
+        """ Get assignment list from edges and assign parameters.
+        Args:
+            edges (array_like): array of bin edges.
+            assign_params (dict): dictionary of assignment parameters
+                (see `assign_params_default`).
+            get_ids (bool): if `True`, output assignment arrays will
+                be accompanied by the associated bin id (with -1 for the
+                trash bin).
+        Returns:
+            List of assignment arrays ready to be used in e.g. `get_sn`.
+        """
+        edges = np.atleast_1d(edges)
+        nbins = len(edges) + 1
+        # Bin IDs based on mean z
+        ids = np.digitize(self.z_means, bins=edges)
+        if assign_params['use_p_inbin'] or assign_params['use_p_outbin']:
+            def integrate_safe(p, z, m):
+                if np.sum(m) == 0:
+                    return 0
+                else:
+                    return simps(p[m], x=z[m])
+            # Calculate probabilities in each bin
+            zgroups = np.digitize(self.z_arr, bins=edges)
+            masks = [zgroups == i for i in range(nbins)]
+            if assign_params['use_p_outbin']:
+                # Matrix of probabilities in each bin
+                pzs = np.array([[integrate_safe(pz, self.z_arr, m)
+                                 for m in masks]
+                                for pz in self.pz_list])
+                pzd = np.array([pzs[j, ids[j]]
+                                for j in range(self.n_samples)])
+            else:
+                # Overlaps in own bin
+                pzd = np.array([integrate_safe(pz, self.z_arr, masks[i])
+                                for pz, i in zip(self.pz_list, ids)])
+            # Throw away based on in-bin probability
+            if assign_params['use_p_inbin']:
+                ids[pzd < assign_params['p_inbin_thr']] = -1
+            # Throw away based on off-bin probability
+            if assign_params['use_p_outbin']:
+                ids[np.array([np.sum(p > assign_params['p_outbin_thr']) > 1
+                    for p in pzs])] = -1
+                # Assignment list
+        if get_ids:
+            return [(i, np.where(ids == i)[0]) for i in np.unique(ids)]
+        else:
+            return [np.where(ids == i)[0] for i in np.unique(ids)]
+
+    def get_sn_from_edges(self, edges, full_output=False,
+                          assign_params=assign_params_default):
+        """ Compute signal-to-noise ratio from a set of edges and assignment
+        parameters.
+        Args:
+                        edges (array_like): array of bin edges.
+            assign_params (dict): dictionary of assignment parameters
+                (see `assign_params_default`).
+            full_output (bool): if true, a dictionary with additional
+                information will be returned. Otherwise just total S/N.
+        Returns:
+                        If `full_output=True`, dictionary containing S/N, power
+            spectra and noise spectra. Otherwise just S/N.
+        """
+        if self.check_edges(edges):
+            return 0
+        assign = self.assign_from_edges(edges, assign_params=assign_params)
+        return self.get_sn(assign, full_output=full_output)