diff --git a/regression/run_cluster.sh b/regression/run_cluster.sh index 34159cf..10b407b 100755 --- a/regression/run_cluster.sh +++ b/regression/run_cluster.sh @@ -8,7 +8,7 @@ CLUDB="${RESULTS}/clu" awk 'BEGIN { l = "" } l != $1 { l = $1; cnt++; } { t++; } END { print cnt"\t"t"\t"(t/cnt) }' "$RESULTS/results_cluster.tsv" > "$RESULTS/results_summary.tsv" ACTUAL="$(cut -f1 "$RESULTS/results_summary.tsv")" -TARGET="15722" +TARGET="15710" awk -v actual="$ACTUAL" -v target="$TARGET" \ 'BEGIN { print (actual == target) ? "GOOD" : "BAD"; print "Expected: ", target; print "Actual: ", actual; }' \ > "${RESULTS}.report" diff --git a/regression/run_cluster_update.sh b/regression/run_cluster_update.sh index 5c1c6c2..8b50906 100755 --- a/regression/run_cluster_update.sh +++ b/regression/run_cluster_update.sh @@ -17,7 +17,7 @@ CLUSTERMEMEBER=$(wc -l "$RESULTS/clu_updated.tsv" | awk '{print $1}') CLUSTER=$(echo $(cut -f1 "$RESULTS/clu_updated.tsv" | sort -u | wc -l)) UPDATEDSEQCNT=$(wc -l "$RESULTS/seqdb_update.index" | awk '{print $1}') -TARGET="32132 24753 32132" +TARGET="32132 24742 32132" ACTUAL="$CLUSTERMEMEBER $CLUSTER $UPDATEDSEQCNT" awk -v actual="$ACTUAL" -v target="$TARGET" 'BEGIN { print (actual == target) ? "GOOD" : "BAD"; \ print "Expected: ", target; \ diff --git a/regression/run_easy_cluster.sh b/regression/run_easy_cluster.sh index 8cc986c..9e97437 100755 --- a/regression/run_easy_cluster.sh +++ b/regression/run_easy_cluster.sh @@ -3,7 +3,7 @@ awk 'BEGIN { l = "" } l != $1 { l = $1; cnt++; } { t++; } END { print cnt"\t"t"\t"(t/cnt) }' "$RESULTS/results_cluster.tsv" > "$RESULTS/results_summary.tsv" ACTUAL="$(cut -f1 "$RESULTS/results_summary.tsv")" -TARGET="15722" +TARGET="15710" awk -v actual="$ACTUAL" -v target="$TARGET" \ 'BEGIN { print (actual == target) ? "GOOD" : "BAD"; print "Expected: ", target; print "Actual: ", actual; }' \ > "${RESULTS}.report" diff --git a/regression/run_easy_cluster_reassign.sh b/regression/run_easy_cluster_reassign.sh index 67a50ba..691a37c 100755 --- a/regression/run_easy_cluster_reassign.sh +++ b/regression/run_easy_cluster_reassign.sh @@ -4,7 +4,7 @@ cat "${DATADIR}/clu.fasta" | "${MMSEQS}" easy-cluster stdin "$RESULTS/results" awk 'BEGIN { l = "" } l != $1 { l = $1; cnt++; } { t++; } END { print cnt"\t"t"\t"(t/cnt) }' "$RESULTS/results_cluster.tsv" > "$RESULTS/results_summary.tsv" ACTUAL="$(cut -f1 "$RESULTS/results_summary.tsv")" -TARGET="17257" +TARGET="17248" awk -v actual="$ACTUAL" -v target="$TARGET" \ 'BEGIN { print (actual == target) ? "GOOD" : "BAD"; print "Expected: ", target; print "Actual: ", actual; }' \ > "${RESULTS}.report" diff --git a/regression/run_easy_linclust.sh b/regression/run_easy_linclust.sh index aba13fd..3e5e04a 100755 --- a/regression/run_easy_linclust.sh +++ b/regression/run_easy_linclust.sh @@ -3,7 +3,7 @@ awk 'BEGIN { l = "" } l != $1 { l = $1; cnt++; } { t++; } END { print cnt"\t"t"\t"(t/cnt) }' "$RESULTS/results_cluster.tsv" > "$RESULTS/results_summary.tsv" ACTUAL="$(cut -f1 "$RESULTS/results_summary.tsv")" -TARGET="26493" +TARGET="26146" awk -v actual="$ACTUAL" -v target="$TARGET" \ 'BEGIN { print (actual == target) ? "GOOD" : "BAD"; print "Expected: ", target; print "Actual: ", actual; }' \ > "${RESULTS}.report" diff --git a/regression/run_linclust.sh b/regression/run_linclust.sh index 7cfdd29..fd10a4c 100755 --- a/regression/run_linclust.sh +++ b/regression/run_linclust.sh @@ -7,7 +7,7 @@ CLUDB= awk 'BEGIN { l = "" } l != $1 { l = $1; cnt++; } { t++; } END { print cnt"\t"t"\t"(t/cnt) }' "$RESULTS/results_cluster.tsv" > "$RESULTS/results_summary.tsv" ACTUAL="$(cut -f1 "$RESULTS/results_summary.tsv")" -TARGET="26491" +TARGET="26135" awk -v actual="$ACTUAL" -v target="$TARGET" \ 'BEGIN { print (actual == target) ? "GOOD" : "BAD"; print "Expected: ", target; print "Actual: ", actual; }' \ > "${RESULTS}.report" diff --git a/regression/run_linclust_adjacent_disabled.sh b/regression/run_linclust_adjacent_disabled.sh new file mode 100755 index 0000000..4839c0d --- /dev/null +++ b/regression/run_linclust_adjacent_disabled.sh @@ -0,0 +1,13 @@ +#!/bin/sh -e +CLUDB= +"${MMSEQS}" createdb "${DATADIR}/clu.fasta" "${RESULTS}/clu" + +"${MMSEQS}" linclust "${RESULTS}/clu" "$RESULTS/results_clu" "$RESULTS/tmp" --cov-mode 1 --cluster-mode 0 -c 0.90 --min-seq-id 0.50 --match-adjacent-seq false +"${MMSEQS}" createtsv "${RESULTS}/clu" "${RESULTS}/clu" "$RESULTS/results_clu" "$RESULTS/results_cluster.tsv" + +awk 'BEGIN { l = "" } l != $1 { l = $1; cnt++; } { t++; } END { print cnt"\t"t"\t"(t/cnt) }' "$RESULTS/results_cluster.tsv" > "$RESULTS/results_summary.tsv" +ACTUAL="$(cut -f1 "$RESULTS/results_summary.tsv")" +TARGET="26491" +awk -v actual="$ACTUAL" -v target="$TARGET" \ + 'BEGIN { print (actual == target) ? "GOOD" : "BAD"; print "Expected: ", target; print "Actual: ", actual; }' \ + > "${RESULTS}.report" diff --git a/regression/run_linclust_split.sh b/regression/run_linclust_split.sh index 3d8c340..b78987b 100755 --- a/regression/run_linclust_split.sh +++ b/regression/run_linclust_split.sh @@ -6,7 +6,7 @@ awk 'BEGIN { l = "" } l != $1 { l = $1; cnt++; } { t++; } END { print cnt"\t"t"\t"(t/cnt) }' "$RESULTS/results_cluster.tsv" > "$RESULTS/results_summary.tsv" ACTUAL="$(cut -f1 "$RESULTS/results_summary.tsv")" -TARGET="26491" +TARGET="26135" awk -v actual="$ACTUAL" -v target="$TARGET" \ 'BEGIN { print (actual == target) ? "GOOD" : "BAD"; print "Expected: ", target; print "Actual: ", actual; }' \ > "${RESULTS}.report" diff --git a/run_regression.sh b/run_regression.sh index ac37719..e311e9b 100755 --- a/run_regression.sh +++ b/run_regression.sh @@ -78,6 +78,7 @@ run_test EASY_CLUSTER "run_easy_cluster.sh" run_test EASY_NUCL_CLUSTER "run_easy_nuclcluster.sh" run_test CLUSTER_REASSIGN "run_easy_cluster_reassign.sh" run_test LINCLUST "run_linclust.sh" +run_test LINCLUST_ADJACENT_DISABLED "run_linclust_adjacent_disabled.sh" run_test LINCLUST_SPLIT "run_linclust_split.sh" run_test EASY_LINCLUST "run_easy_linclust.sh" run_test CLUSTHASH "run_clusthash.sh"