TASBE · jakebeal · Jan 21, 2019 · Jan 17, 2019 · Jan 17, 2019 · Jan 17, 2019
diff --git a/code/@ColorModel/au_to_ERF.m b/code/@ColorModel/au_to_ERF.m
@@ -1,4 +1,4 @@
-% AU_TO_ERF translates arbitrary units of a particular channel in a ColorModel object to ERF
+% AU_TO_ERF translates arbitrary units of a particular channel in a ColorModel object to ERF (or Eum)
 %
 % Copyright (C) 2010-2018, Raytheon BBN Technologies and contributors listed
 % in the AUTHORS file in TASBE analytics package distribution's top directory.
@@ -9,7 +9,15 @@
 % package distribution's top directory.
 
 function data = au_to_ERF(CM,channel,audata)
-    % don't attempt to translate for unprocessed channels
+    % first check if it's the size channel
+    if ~isempty(CM.size_unit_translation)
+        if(find(CM,channel)==find(CM,CM.um_channel))
+            data = um_channel_AU_to_um(CM.size_unit_translation,audata);
+            return;
+        end
+    end
+
+    % otherwise, don't attempt to translate for unprocessed channels
     if(isUnprocessed(channel)), data = audata; return; end
 
     ERF_channel_AU_data = translate(CM.color_translation_model,audata,channel,CM.ERF_channel);

diff --git a/code/@ColorModel/read_filtered_au.m b/code/@ColorModel/read_filtered_au.m
@@ -9,7 +9,7 @@
 % exception, as described in the file LICENSE in the TASBE analytics
 % package distribution's top directory.
 
-function [data fcshdr] = read_filtered_au(CM,filename)
+function [data,fcshdr,n_removed] = read_filtered_au(CM,filename)
     % Get read FCS file and select channels of interest
     [fcsunscaled fcshdr rawfcs] = fca_readfcs(filename);
     if (isempty(fcshdr))
@@ -28,3 +28,7 @@
 
     % if requested to dequantize, add a random value in [-0.5, 0.5]
     if(CM.dequantize), data = data + rand(size(data)) - 0.5; end;
+
+    % count how many have been removed, all told
+    n_removed = size(rawfcs,1) - size(data,1);
+
diff --git a/code/@ColorModel/readfcs_compensated_ERF.m b/code/@ColorModel/readfcs_compensated_ERF.m
@@ -9,11 +9,11 @@
 % exception, as described in the file LICENSE in the TASBE analytics
 % package distribution's top directory.
 
-function data = readfcs_compensated_ERF(CM,filename,with_AF,floor)
+function [data,n_removed] = readfcs_compensated_ERF(CM,filename,with_AF,floor)
     if(CM.initialized<1), TASBESession.error('TASBE:ReadFCS','Unresolved','Cannot read ERF: ColorModel not yet resolved'); end; % ensure initted
 
     % Read to arbitrary units
-    audata = readfcs_compensated_au(CM,filename,with_AF,floor);
+    [audata,n_preremoved] = readfcs_compensated_au(CM,filename,with_AF,floor);
 
     % Translate each (processed) channel to ERF channel 
     ERF_channel_data = zeros(size(audata));
@@ -47,3 +47,5 @@
     if numel(data)<numel(audata)*0.1 % Threshold: at least 10% retained
         TASBESession.warn('TASBE:ReadFCS','TooMuchDataDiscarded','ERF (post)filters may be discarding too much data: only %d%% retained in %s',numel(data)/numel(audata)*100,filename);
     end
+
+    n_removed = n_preremoved + (size(audata,1)-size(data,1));
diff --git a/code/@ColorModel/readfcs_compensated_au.m b/code/@ColorModel/readfcs_compensated_au.m
@@ -10,11 +10,11 @@
 % exception, as described in the file LICENSE in the TASBE analytics
 % package distribution's top directory.
 
-function data = readfcs_compensated_au(CM,filename,with_AF,floor)
+function [data,n_removed] = readfcs_compensated_au(CM,filename,with_AF,floor)
     if(CM.initialized<0.5), TASBESession.error('TASBE:ReadFCS','Unresolved','Cannot read a.u.: ColorModel not yet resolved'); end; % ensure initted
 
     % Acquire initial data, discarding likely contaminated portions
-    [rawfcs, fcshdr] = read_filtered_au(CM,filename);
+    [rawfcs, fcshdr, n_preremoved] = read_filtered_au(CM,filename);
 
     [rawdata, channel_desc] = select_channels(CM.Channels,rawfcs,fcshdr);
 
@@ -58,3 +58,5 @@
     % make sure nothing's below 1, for geometric statistics
     if(floor), data(data<1) = 1; end
 
+    n_removed = n_preremoved + (size(rawfcs,1)-size(data,1));
+
diff --git a/code/compute_sample_statistics.m b/code/compute_sample_statistics.m
@@ -40,9 +40,11 @@
 popcmeans = zeros(n_components,n_channels);
 popcstds = zeros(n_components,n_channels);
 popcweights = zeros(n_components,n_channels);
+drop_thresholds = zeros(n_channels,1);
 for k=1:n_channels
     % compute channel-specific drop threshold:
     drop_threshold = au_to_ERF(colorModel,getChannel(colorModel,k),get_pem_drop_threshold(analysisParams));
+    drop_thresholds(k) = drop_threshold;
 
     % divide into included/excluded set
     pos = data(:,k)>drop_threshold;
@@ -76,7 +78,7 @@
 nonexpressing = sum(nonexpressing_set);
 
 % Create (possibly filtered) subpopulation statistics [note: ignore excluded, as already calculated above]
-[counts means stds] = subpopulation_statistics(getBins(analysisParams), data, c_index, 'geometric');
+[counts,means,stds] = subpopulation_statistics(getBins(analysisParams), data, c_index, 'geometric', drop_thresholds);
 if numel(data)
     plasmid_counts = estimate_plasmids(PEM,means(:,c_index));
     fraction_active = estimate_fraction_active(PEM, means(:,c_index));

diff --git a/code/per_color_constitutive_analysis.m b/code/per_color_constitutive_analysis.m
@@ -13,18 +13,21 @@
 % The 'results' here is not a standard ExperimentResults, but a similar scratch structure
 
 TASBESession.warn('TASBE:Analysis','UpdateNeeded','Need to update per_color_constitutive_analysis to use new samplestatistics');
-batch_size = size(batch_description,1);
+n_conditions = size(batch_description,1);
 
 % check to make sure batch_file has the correct dimensions
 if size(batch_description, 2) ~= 2
     TASBESession.error('TASBE:Analysis', 'DimensionMismatch', 'Batch analysis invoked with incorrect number of columns. Make sure batch_file is a n X 2 matrix.');
 end
 
-for i = 1:batch_size
+data = cell(n_conditions,1);
+n_removed = data;
+for i = 1:n_conditions
     condition_name = batch_description{i,1};
     fileset = batch_description{i,2};
     experiment = Experiment(condition_name,'', {0,fileset});
-    data{i} = read_data(colorModel, experiment, AP);
+    [data{i},n_removed_sub] = read_data(colorModel, experiment, AP);
+    n_removed{i} = [n_removed_sub{1}{:}];
     if exist('cloudNames', 'var')
         filenames = {cloudNames{i}};
     else
@@ -41,7 +44,6 @@
     rawresults{i} = process_constitutive_batch( colorModel, batch_description, AP, data);
 end
 
-n_conditions = size(batch_description,1);
 bincenters = get_bin_centers(getBins(AP));
 
 results = cell(n_conditions,1); sampleresults = results;
@@ -92,5 +94,6 @@
     results{i}.on_fracStd = std(cell2mat(on_fracs));
     results{i}.off_fracMean = mean(cell2mat(off_fracs));
     results{i}.off_fracStd = std(cell2mat(off_fracs));
+    results{i}.n_removed = n_removed{i};
 end
 
diff --git a/code/read_data.m b/code/read_data.m
@@ -9,20 +9,21 @@
 % exception, as described in the file LICENSE in the TASBE analytics
 % package distribution's top directory.
 
-function data = read_data( colorModel, experiment, analysisParams)
+function [data,n_removed] = read_data( colorModel, experiment, analysisParams)
 filenames = getInducerLevelsToFiles(experiment); % array of file names
 n_conditions = numel(filenames);
 
 % Process each file for each condition in turn, computing results
 % incrementally
 data = cell(size(filenames));
+n_removed = data;
 for i=1:n_conditions
     perInducerFiles = filenames{i};
     numberOfPerInducerFiles = numel(perInducerFiles);
     if (numberOfPerInducerFiles == 0), TASBESession.warn('TASBE:Analysis','MissingDataFile','An inducer level is missing a data file'); end;
     for j = 1:numberOfPerInducerFiles
         fileName = perInducerFiles{j};
         % Read data and extract statistics
-        data{i}{j} = readfcs_compensated_ERF(colorModel, fileName, getUseAutoFluorescence(analysisParams), true);
+        [data{i}{j},n_removed{i}{j}] = readfcs_compensated_ERF(colorModel, fileName, getUseAutoFluorescence(analysisParams), true);
     end
 end
diff --git a/code/subpopulation_statistics.m b/code/subpopulation_statistics.m
@@ -9,7 +9,8 @@
 % exception, as described in the file LICENSE in the TASBE analytics
 % package distribution's top directory.
 
-function [counts, means, stds, excluded] = subpopulation_statistics(BSeq,data,selector,mode)
+function [counts, means, stds, excluded] = subpopulation_statistics(BSeq,data,selector,mode,drop_thresholds)
+if nargin<5, drop_thresholds = []; end;
 
 bedges = get_bin_edges(BSeq);
 
@@ -24,7 +25,9 @@
 switch(mode)
     case 'geometric'
         for i=1:n
-            which = find(data(:,selector)>bedges(i) & data(:,selector)<=bedges(i+1));
+            selection = data(:,selector)>bedges(i) & data(:,selector)<=bedges(i+1);
+            if ~isempty(drop_thresholds), selection = selection & data(:,selector)>drop_thresholds(selector); end;
+            which = find(selection);
             counts(i) = numel(which);
 
             for j=1:ncol

diff --git a/tests/test_batchAnalysisOutput.m b/tests/test_batchAnalysisOutput.m
@@ -111,46 +111,46 @@
 % The first five rows should be enough to verify writing the histogram file
 % correctly.
 expected_bincounts = [...
-        6799        2200        1383;
+        6799         637          36; % clipped by the drop threshold
         8012        2732        2696;
         8780        3327        2638;
         8563        4637        2632;
         7622        4623        3741;
         ];
 
 % Means and stddevs tests writing the statistics file correctly.
-expected_means = 1e5 * [...
-    0.2217    2.4948    4.1064
-    0.2219    2.4891    4.0757
-    0.2211    2.5766    4.2599
-    0.2205    2.5874    4.3344
-    0.2216    2.5099    4.3095
-    0.2255    2.4862    4.2764
-    0.2281    2.5457    4.2586
-    0.2539    2.5739    4.4073
-    0.3791    2.4218    4.6213
-    0.4891    2.3266    4.7217
-    0.6924    2.1068    4.6593
-    1.0930    1.7513    5.6729
-    1.5909    1.5451    6.7144
-    1.9472    1.4175    7.4609
+expected_means = [...
+    22170	260800	429100
+    22200	260700	426500
+    22110	269300	444900
+    22050	271400	454400
+    22160	262600	450600
+    22550	260800	448200
+    22820	266800	445000
+    25400	268800	460800
+    37920	253000	482200
+    48930	242800	492200
+    69260	220000	486300
+    109400	182500	592500
+    159100	160100	697900
+    194800	146800	772000
     ];
 
 expected_stds = [...
-    1.6006    6.7653    8.1000
-    1.5990    6.8670    8.1306
-    1.5981    6.8650    8.1230
-    1.6036    6.9155    8.2135
-    1.6035    6.7565    8.1069
-    1.6427    6.8020    8.2742
-    1.7030    6.7618    8.1220
-    1.9914    6.7701    8.2937
-    3.0568    6.4579    8.4052
-    3.6868    6.1704    8.4187
-    4.5068    5.8686    8.2393
-    5.2819    5.2780    8.7369
-    5.6018    4.7061    8.5892
-    5.5773    4.3900    8.4391
+    1.601	6.611	7.921
+    1.600	6.702	7.942
+    1.599	6.710	7.944
+    1.603	6.746	8.014
+    1.604	6.599	7.922
+    1.643	6.641	8.080
+    1.703	6.595	7.936
+    1.992	6.613	8.106
+    3.057	6.308	8.224
+    3.688	6.028	8.242
+    4.508	5.731	8.056
+    5.283	5.163	8.538
+    5.603	4.613	8.392
+    5.578	4.301	8.263
     ];
 
 expected_gmm_means = 10.^[...

diff --git a/tests/test_batch_analysis.m b/tests/test_batch_analysis.m
@@ -62,7 +62,7 @@
 % Check results in results:
 
 result1_expected_bincounts = [...
-        6806        2178        1373;
+        6806         637           0; % clipped by the drop threshold
         8017        2753        2706;
         8782        3323        2637;
         8558        4640        2623;
@@ -125,37 +125,37 @@
            ];
 
 result_expected_means = 1e5 * [...
-    0.2213      2.4796      4.0832
-    0.2217      2.4605      4.0592
-    0.2201      2.5462      4.2334
-    0.2192      2.5524      4.2831
-    0.2202      2.4833      4.2658
-    0.2230      2.4613      4.2593
-    0.2246      2.5221      4.2116
-    0.2488      2.5455      4.3739
-    0.3723      2.3947      4.5774
-    0.4764      2.2973      4.6921
-    0.6808      2.0812      4.6243
-    1.0768      1.7335      5.6471
-    1.5798      1.5283      6.6706
-    1.9350      1.4045      7.4507
+    0.2214    2.5920    4.2576
+    0.2217    2.5773    4.2344
+    0.2201    2.6618    4.4116
+    0.2193    2.6768    4.4791
+    0.2202    2.5979    4.4447
+    0.2230    2.5794    4.4459
+    0.2246    2.6410    4.3857
+    0.2489    2.6600    4.5635
+    0.3723    2.5019    4.7657
+    0.4765    2.3962    4.8756
+    0.6809    2.1736    4.8155
+    1.0768    1.8055    5.8724
+    1.5798    1.5830    6.9178
+    1.9350    1.4546    7.6877
     ];
 
 result_expected_stds = [...
-    1.5964      6.7127      8.0237
-    1.5955      6.7718      8.0481
-    1.5855      6.7734      8.0462
-    1.5881      6.8054      8.0832
-    1.5876      6.6713      7.9815
-    1.6027      6.7166      8.1935
-    1.6348      6.6764      7.9794
-    1.8831      6.6863      8.2016
-    2.9448      6.3741      8.2915
-    3.5238      6.0771      8.3279
-    4.3923      5.7805      8.1472
-    5.1748      5.2045      8.6548
-    5.5516      4.6337      8.5021
-    5.5331      4.3324      8.3937
+    1.5964    6.5596    7.8289
+    1.5955    6.6127    7.8521
+    1.5856    6.6173    7.8504
+    1.5881    6.6379    7.8693
+    1.5876    6.5152    7.7858
+    1.6027    6.5557    7.9894
+    1.6348    6.5152    7.7879
+    1.8831    6.5316    7.9963
+    2.9448    6.2255    8.0919
+    3.5238    5.9371    8.1359
+    4.3923    5.6446    7.9458
+    5.1748    5.0904    8.4391
+    5.5515    4.5412    8.2867
+    5.5330    4.2440    8.2005
     ];
 
 % Blue, Yellow, Red
@@ -207,6 +207,11 @@
 assertElementsAlmostEqual(results{14}.gmm_stds,  result_expected14_gmm_stds,  'relative', 1e-2);
 assertElementsAlmostEqual(results{14}.gmm_weights,  result_expected14_gmm_weights,  'relative', 1e-2);
 
+% raw, filtered
+firstlast_event_counts = [220379 183753; 161222 145854];
+assertElementsAlmostEqual(results{1}.n_removed,  firstlast_event_counts(1,1)-firstlast_event_counts(2,1),  'relative', 1e-2);
+assertElementsAlmostEqual(results{14}.n_removed, firstlast_event_counts(1,2)-firstlast_event_counts(2,2),  'relative', 1e-2);
+
 function test_batch_analysis_plot_warnings
 % Test for warnings in plot_batch_histograms
 CM2 = load_or_make_testing_colormodel2();