diff --git a/.github/workflows/micromamba.yml.old b/.github/workflows/micromamba.yml.old deleted file mode 100644 index f4e7801e..00000000 --- a/.github/workflows/micromamba.yml.old +++ /dev/null @@ -1,69 +0,0 @@ -name: micromamba -# concurrency: -# group: ${{ github.head_ref || github.run_id }} -# cancel-in-progress: true - -on: - # pull_request: - workflow_dispatch: - schedule: - - cron: "30 1 * * *" # at 1.30am -## these permissions are only for deployment to gh pages -# permissions: -# id-token: write -# pages: write - -jobs: - run-benchmark-micromamba: - name: run_clustbench_micromamba - ## runs-on: ubuntu-latest - runs-on: self-hosted - strategy: - matrix: - ob_branch: [dev, reduce_install_scope, main] - micromamba-version: ['2.1.1-0', '1.5.12-0'] - fail-fast: false - concurrency: - group: micromamba-${{ matrix.micromamba-version }}-${{ matrix.ob_branch }} - cancel-in-progress: false # true - steps: - - name: Check out repository - uses: actions/checkout@v4 - - - name: Install (with) micromamba - uses: mamba-org/setup-micromamba@v2 - with: - cache-environment: false # true - micromamba-version: ${{ matrix.micromamba-version }} - download-micromamba: true - micromamba-binary-path: ${{ runner.temp }}/bin/micromamba-${{ matrix.micromamba-version }}/micromamba - environment-name: test-env-${{matrix.ob_branch }}-${{ matrix.micromamba-version }} - create-args: >- - python=3.12 - pip - conda - post-cleanup: environment # all - - name: Overwrite omnibenchmark CLI to branch - shell: bash -l {0} - run: | - micromamba --version - pip install git+https://github.com/omnibenchmark/omnibenchmark.git@${{ matrix.ob_branch }} - - # - name: Enable a benchmarking `out` cache - # id: cache-benchmark - # uses: actions/cache@v3 - # with: - # path: out/ - # key: benchmark-${{ runner.os }}-${{ hashFiles('Clustering.yaml') }} - - - name: Run benchmark - shell: bash -l {0} - run: | - env - output=$( echo "y" | ob run benchmark -b Clustering_conda.yml --local --cores 10 2>&1 ) - status=$? - if echo "$output" | grep -i 'Benchmark run has finished successfully'; then - status=0 - fi - echo -e $output - sh -c "exit $status" diff --git a/.github/workflows/miniconda_miniforge.yml.old b/.github/workflows/miniconda_miniforge.yml.old deleted file mode 100644 index 069e9311..00000000 --- a/.github/workflows/miniconda_miniforge.yml.old +++ /dev/null @@ -1,78 +0,0 @@ -name: clustbench_miniforge -# concurrency: -# group: ${{ github.head_ref || github.run_id }} -# cancel-in-progress: true - -on: - # pull_request: - workflow_dispatch: - schedule: - - cron: "30 1 * * *" # at 1.30am - -## these permissions are only for deployment to gh pages -# permissions: -# id-token: write -# pages: write - -jobs: - run-benchmark-miniforge: - name: run_clustbench_miniforge - ## runs-on: ubuntu-latest - runs-on: self-hosted - strategy: - matrix: - ob_branch: [dev, reduce_install_scope, main] - fail-fast: false - concurrency: - group: mambaforge-${{ matrix.ob_branch }} - cancel-in-progress: false # true - steps: - - name: Check out repository - uses: actions/checkout@v4 - - - name: Install Mambaforge - uses: conda-incubator/setup-miniconda@v3 - with: - miniforge-variant: Miniforge3 - use-mamba: true - activate-environment: test-env-${{matrix.ob_branch }} - python-version: "3.12" - auto-update-conda: true - channels: conda-forge - - - name: Cache environment - id: cache-env - uses: actions/cache@v3 - with: - path: | - ~/.conda/pkgs - ~/.conda/envs/omnibenchmark-env - ~/.cache/pip - key: ${{ runner.os }}-conda-pip-${{ hashFiles('requirements.txt') }} - restore-keys: | - ${{ runner.os }}-conda-pip- - - - name: Install omnibenchmark CLI - shell: bash -l {0} - run: | - mamba install -y pip - pip install git+https://github.com/omnibenchmark/omnibenchmark.git@${{ matrix.ob_branch }} - - # - name: Enable a benchmarking `out` cache - # id: cache-benchmark - # uses: actions/cache@v3 - # with: - # path: out/ - # key: benchmark-${{ runner.os }}-${{ hashFiles('Clustering.yaml') }} - - - name: Run benchmark - shell: bash -l {0} - run: | - env - output=$( echo "y" | ob run benchmark -b Clustering_conda.yml --local --cores 10 2>&1 ) - status=$? - if echo "$output" | grep -i 'Benchmark run has finished successfully'; then - status=0 - fi - echo -e $output - sh -c "exit $status" diff --git a/.github/workflows/oras.yml.old b/.github/workflows/oras.yml.old deleted file mode 100644 index 697446d1..00000000 --- a/.github/workflows/oras.yml.old +++ /dev/null @@ -1,60 +0,0 @@ -name: oras -# concurrency: -# group: ${{ github.head_ref || github.run_id }} -# cancel-in-progress: true - -on: - # pull_request: - workflow_dispatch: - schedule: - - cron: "30 1 * * *" # at 1.30am -## these permissions are only for deployment to gh pages -# permissions: -# id-token: write -# pages: write - -jobs: - run-benchmark-oras: - name: run_clustbench_oras - runs-on: ubuntu-latest - # runs-on: self-hosted - strategy: - matrix: - ob_branch: [main] - micromamba-version: ['2.1.1-0'] - fail-fast: false - concurrency: - group: oras-${{ matrix.micromamba-version }}-${{ matrix.ob_branch }} - cancel-in-progress: false # true - steps: - - name: Check out repository - uses: actions/checkout@v4 - - - uses: eWaterCycle/setup-apptainer@v2 - with: - apptainer-version: 1.4.0 - - - name: Install ob (with) micromamba - uses: mamba-org/setup-micromamba@v2 - with: - cache-environment: false # true - micromamba-version: ${{ matrix.micromamba-version }} - download-micromamba: true - micromamba-binary-path: ${{ runner.temp }}/bin/micromamba-${{ matrix.micromamba-version }}/micromamba - environment-name: test-env-${{matrix.ob_branch }}-${{ matrix.micromamba-version }} - create-args: >- - python=3.12 - pip - conda - post-cleanup: environment # all - - - name: Overwrite omnibenchmark CLI to branch - shell: bash -l {0} - run: | - micromamba --version - pip install git+https://github.com/omnibenchmark/omnibenchmark.git@${{ matrix.ob_branch }} - - - name: Run benchmark - shell: bash -l {0} - run: | - ob run benchmark -b Clustering_oras.yml --local --cores 10 --yes diff --git a/.github/workflows/run_conda.yml b/.github/workflows/run_conda.yml index 01fc104a..21a69d57 100644 --- a/.github/workflows/run_conda.yml +++ b/.github/workflows/run_conda.yml @@ -1,4 +1,4 @@ -name: run_on_conda +name: run_ob_main_conda on: push: workflow_dispatch: @@ -19,4 +19,4 @@ jobs: with: yaml: Clustering_conda.yml backend: conda - omnibenchmark_branch: 'dev' + omnibenchmark_branch: 'main' diff --git a/.github/workflows/run_conda_dev.yml b/.github/workflows/run_conda_dev.yml new file mode 100644 index 00000000..8e0e683a --- /dev/null +++ b/.github/workflows/run_conda_dev.yml @@ -0,0 +1,22 @@ +name: run_ob_dev_conda +on: + push: + workflow_dispatch: + # schedule: + # - cron: "30 1 * * *" # at 1.30am + +jobs: + run_benchmark_main: + name: Run on conda + runs-on: ubuntu-latest + # runs-on: self-hosted + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Use action + uses: omnibenchmark/run_omnibenchmark@main + with: + yaml: Clustering_conda.yml + backend: conda + omnibenchmark_branch: 'dev' diff --git a/Clustering_conda.yml b/Clustering_conda.yml index f7b4f1ee..9b511e22 100644 --- a/Clustering_conda.yml +++ b/Clustering_conda.yml @@ -43,7 +43,7 @@ stages: commit: 366c5a2 parameters: # comments depict the possible cardinalities and the number of curated labelsets - values: ["--dataset_generator", "fcps", "--dataset_name", "atom"] # 2 1 - # - values: ["--dataset_generator", "fcps", "--dataset_name", "chainlink"] # 2 1 + - values: ["--dataset_generator", "fcps", "--dataset_name", "chainlink"] # 2 1 # - values: ["--dataset_generator", "fcps", "--dataset_name", "engytime"] # 2 2 # - values: ["--dataset_generator", "fcps", "--dataset_name", "hepta"] # 7 1 # - values: ["--dataset_generator", "fcps", "--dataset_name", "lsun"] # 3 1 @@ -51,7 +51,7 @@ stages: # - values: ["--dataset_generator", "fcps", "--dataset_name", "tetra"] # 4 1 # - values: ["--dataset_generator", "fcps", "--dataset_name", "twodiamonds"] # 2 1 # - values: ["--dataset_generator", "fcps", "--dataset_name", "wingnut"] # 2 1 - - values: ["--dataset_generator", "graves", "--dataset_name", "dense"] # 2 1 + # - values: ["--dataset_generator", "graves", "--dataset_name", "dense"] # 2 1 # - values: ["--dataset_generator", "graves", "--dataset_name", "fuzzyx"] # 2, 4, 5 6 # - values: ["--dataset_generator", "graves", "--dataset_name", "line"] # 2 1 # - values: ["--dataset_generator", "graves", "--dataset_name", "parabolic"] # 2, 4 2 @@ -64,10 +64,10 @@ stages: # - values: ["--dataset_generator", "other", "--dataset_name", "chameleon_t4_8k"] # 6 1 # - values: ["--dataset_generator", "other", "--dataset_name", "chameleon_t5_8k"] # 6 1 # - values: ["--dataset_generator", "other", "--dataset_name", "hdbscan"] # 6 1 - - values: ["--dataset_generator", "other", "--dataset_name", "iris"] # 3 1 + # - values: ["--dataset_generator", "other", "--dataset_name", "iris"] # 3 1 # - values: ["--dataset_generator", "other", "--dataset_name", "iris5"] # 3 1 # - values: ["--dataset_generator", "other", "--dataset_name", "square"] # 2 1 - - values: ["--dataset_generator", "sipu", "--dataset_name", "aggregation"] # 7 1 + # - values: ["--dataset_generator", "sipu", "--dataset_name", "aggregation"] # 7 1 # - values: ["--dataset_generator", "sipu", "--dataset_name", "compound"] # 4, 5, 6 5 # - values: ["--dataset_generator", "sipu", "--dataset_name", "flame"] # 2 2 # - values: ["--dataset_generator", "sipu", "--dataset_name", "jain"] # 2 1 @@ -75,7 +75,7 @@ stages: # - values: ["--dataset_generator", "sipu", "--dataset_name", "r15"] # 8, 9, 15 3 # - values: ["--dataset_generator", "sipu", "--dataset_name", "spiral"] # 3 1 # - values: ["--dataset_generator", "sipu", "--dataset_name", "unbalance"] # 8 1 - - values: ["--dataset_generator", "uci", "--dataset_name", "ecoli"] # 8 1 + # - values: ["--dataset_generator", "uci", "--dataset_name", "ecoli"] # 8 1 # - values: ["--dataset_generator", "uci", "--dataset_name", "ionosphere"] # 2 1 # - values: ["--dataset_generator", "uci", "--dataset_name", "sonar"] # 2 1 # - values: ["--dataset_generator", "uci", "--dataset_name", "statlog"] # 7 1 @@ -169,16 +169,16 @@ stages: parameters: # - values: ["--method", "FCPS_AdaptiveDensityPeak"] # not in conda - values: ["--method", "FCPS_Minimax", "--seed", 2] - - values: ["--method", "FCPS_MinEnergy", "--seed", 2] - - values: ["--method", "FCPS_HDBSCAN_2", "--seed", 2] - - values: ["--method", "FCPS_HDBSCAN_4", "--seed", 2] - - values: ["--method", "FCPS_HDBSCAN_8", "--seed", 2] - - values: ["--method", "FCPS_Diana", "--seed", 2] - - values: ["--method", "FCPS_Fanny", "--seed", 2] - - values: ["--method", "FCPS_Hardcl", "--seed", 2] - - values: ["--method", "FCPS_Softcl", "--seed", 2] - - values: ["--method", "FCPS_Clara", "--seed", 2] - - values: ["--method", "FCPS_PAM", "--seed", 2] + # - values: ["--method", "FCPS_MinEnergy", "--seed", 2] + # - values: ["--method", "FCPS_HDBSCAN_2", "--seed", 2] + # - values: ["--method", "FCPS_HDBSCAN_4", "--seed", 2] + # - values: ["--method", "FCPS_HDBSCAN_8", "--seed", 2] + # - values: ["--method", "FCPS_Diana", "--seed", 2] + # - values: ["--method", "FCPS_Fanny", "--seed", 2] + # - values: ["--method", "FCPS_Hardcl", "--seed", 2] + # - values: ["--method", "FCPS_Softcl", "--seed", 2] + # - values: ["--method", "FCPS_Clara", "--seed", 2] + # - values: ["--method", "FCPS_PAM", "--seed", 2] inputs: - entries: - data.matrix @@ -198,10 +198,10 @@ stages: parameters: - values: ["--metric", "normalized_clustering_accuracy"] - values: ["--metric", "adjusted_fm_score"] - - values: ["--metric", "adjusted_mi_score"] - - values: ["--metric", "adjusted_rand_score"] - - values: ["--metric", "fm_score"] - - values: ["--metric", "mi_score"] + # - values: ["--metric", "adjusted_mi_score"] + # - values: ["--metric", "adjusted_rand_score"] + # - values: ["--metric", "fm_score"] + # - values: ["--metric", "mi_score"] # - values: ["--metric", "normalized_clustering_accuracy"] # - values: ["--metric", "normalized_mi_score"] # - values: ["--metric", "normalized_pivoted_accuracy"] @@ -214,89 +214,3 @@ stages: outputs: - id: metrics.scores path: "{input}/{stage}/{module}/{params}/{dataset}.scores.gz" - - # ## daniel's data ########################################################################### - - # - id: danielsdata - # modules: - # - id: iris_manual - # name: "Iris Dataset" - # software_environment: "sklearn" - # repository: - # url: https://github.com/omnibenchmark-example/iris.git - # commit: 47c63f0 - # - id: penguins - # name: "Penguins Dataset" - # software_environment: "sklearn" - # repository: - # url: https://github.com/omnibenchmark-example/penguins.git - # commit: 9032478 - # outputs: - # - id: data.features - # path: "{input}/{stage}/{module}/{params}/{dataset}.features.csv" - # - id: data.labels - # path: "{input}/{stage}/{module}/{params}/{dataset}.labels.csv" - - # ## daniel's distances ######################################################################## - - # - id: distances - # modules: - # - id: D1 - # software_environment: "sklearn" - # parameters: - # - values: ["--measure", "cosine"] - # - values: ["--measure", "euclidean"] - # - values: ["--measure", "manhattan"] - # - values: ["--measure", "chebyshev"] - # repository: - # url: https://github.com/omnibenchmark-example/distance.git - # commit: dd99d4f - # inputs: - # - entries: - # - data.features - # outputs: - # - id: distances - # path: "{input}/{stage}/{module}/{params}/{dataset}.distances.csv" - - # ## daniel's methods ################################################################### - - # - id: danielmethods - # modules: - # - id: kmeans - # software_environment: "sklearn" - # repository: - # url: https://github.com/omnibenchmark-example/kmeans.git - # commit: 049c8b1 - # - id: ward - # software_environment: "R" - # repository: - # url: https://github.com/omnibenchmark-example/ward.git - # commit: 976e3f3 - # inputs: - # - entries: - # - distances - # outputs: - # - id: methods.clusters - # path: "{input}/{stage}/{module}/{params}/{dataset}.clusters.csv" - - # ## daniel's metrics ################################################################### - - # - id: danielsmetrics - # modules: - # - id: ari - # software_environment: "R" - # repository: - # url: https://github.com/omnibenchmark-example/ari.git - # commit: 72708f0 - # - id: accuracy - # software_environment: "R" - # repository: - # url: https://github.com/omnibenchmark-example/accuracy.git - # commit: e26b32f - # inputs: - # - entries: - # - methods.clusters - # - data.labels - # outputs: - # - id: metrics.mapping - # path: "{input}/{stage}/{module}/{params}/{dataset}.metrics.txt" diff --git a/Clustering_envmodules.yml b/Clustering_envmodules.yml index a08c9331..ed17cda2 100644 --- a/Clustering_envmodules.yml +++ b/Clustering_envmodules.yml @@ -46,7 +46,7 @@ stages: commit: 366c5a2 parameters: # comments depict the possible cardinalities and the number of curated labelsets - values: ["--dataset_generator", "fcps", "--dataset_name", "atom"] # 2 1 - # - values: ["--dataset_generator", "fcps", "--dataset_name", "chainlink"] # 2 1 + - values: ["--dataset_generator", "fcps", "--dataset_name", "chainlink"] # 2 1 # - values: ["--dataset_generator", "fcps", "--dataset_name", "engytime"] # 2 2 # - values: ["--dataset_generator", "fcps", "--dataset_name", "hepta"] # 7 1 # - values: ["--dataset_generator", "fcps", "--dataset_name", "lsun"] # 3 1 @@ -54,7 +54,7 @@ stages: # - values: ["--dataset_generator", "fcps", "--dataset_name", "tetra"] # 4 1 # - values: ["--dataset_generator", "fcps", "--dataset_name", "twodiamonds"] # 2 1 # - values: ["--dataset_generator", "fcps", "--dataset_name", "wingnut"] # 2 1 - - values: ["--dataset_generator", "graves", "--dataset_name", "dense"] # 2 1 + # - values: ["--dataset_generator", "graves", "--dataset_name", "dense"] # 2 1 # - values: ["--dataset_generator", "graves", "--dataset_name", "fuzzyx"] # 2, 4, 5 6 # - values: ["--dataset_generator", "graves", "--dataset_name", "line"] # 2 1 # - values: ["--dataset_generator", "graves", "--dataset_name", "parabolic"] # 2, 4 2 @@ -67,10 +67,10 @@ stages: # - values: ["--dataset_generator", "other", "--dataset_name", "chameleon_t4_8k"] # 6 1 # - values: ["--dataset_generator", "other", "--dataset_name", "chameleon_t5_8k"] # 6 1 # - values: ["--dataset_generator", "other", "--dataset_name", "hdbscan"] # 6 1 - - values: ["--dataset_generator", "other", "--dataset_name", "iris"] # 3 1 + # - values: ["--dataset_generator", "other", "--dataset_name", "iris"] # 3 1 # - values: ["--dataset_generator", "other", "--dataset_name", "iris5"] # 3 1 # - values: ["--dataset_generator", "other", "--dataset_name", "square"] # 2 1 - - values: ["--dataset_generator", "sipu", "--dataset_name", "aggregation"] # 7 1 + # - values: ["--dataset_generator", "sipu", "--dataset_name", "aggregation"] # 7 1 # - values: ["--dataset_generator", "sipu", "--dataset_name", "compound"] # 4, 5, 6 5 # - values: ["--dataset_generator", "sipu", "--dataset_name", "flame"] # 2 2 # - values: ["--dataset_generator", "sipu", "--dataset_name", "jain"] # 2 1 @@ -78,7 +78,7 @@ stages: # - values: ["--dataset_generator", "sipu", "--dataset_name", "r15"] # 8, 9, 15 3 # - values: ["--dataset_generator", "sipu", "--dataset_name", "spiral"] # 3 1 # - values: ["--dataset_generator", "sipu", "--dataset_name", "unbalance"] # 8 1 - - values: ["--dataset_generator", "uci", "--dataset_name", "ecoli"] # 8 1 + # - values: ["--dataset_generator", "uci", "--dataset_name", "ecoli"] # 8 1 # - values: ["--dataset_generator", "uci", "--dataset_name", "ionosphere"] # 2 1 # - values: ["--dataset_generator", "uci", "--dataset_name", "sonar"] # 2 1 # - values: ["--dataset_generator", "uci", "--dataset_name", "statlog"] # 7 1 @@ -172,16 +172,16 @@ stages: parameters: # - values: ["--method", "FCPS_AdaptiveDensityPeak"] # not in conda - values: ["--method", "FCPS_Minimax", "--seed", 2] - - values: ["--method", "FCPS_MinEnergy", "--seed", 2] - - values: ["--method", "FCPS_HDBSCAN_2", "--seed", 2] - - values: ["--method", "FCPS_HDBSCAN_4", "--seed", 2] - - values: ["--method", "FCPS_HDBSCAN_8", "--seed", 2] - - values: ["--method", "FCPS_Diana", "--seed", 2] - - values: ["--method", "FCPS_Fanny", "--seed", 2] - - values: ["--method", "FCPS_Hardcl", "--seed", 2] - - values: ["--method", "FCPS_Softcl", "--seed", 2] - - values: ["--method", "FCPS_Clara", "--seed", 2] - - values: ["--method", "FCPS_PAM", "--seed", 2] + # - values: ["--method", "FCPS_MinEnergy", "--seed", 2] + # - values: ["--method", "FCPS_HDBSCAN_2", "--seed", 2] + # - values: ["--method", "FCPS_HDBSCAN_4", "--seed", 2] + # - values: ["--method", "FCPS_HDBSCAN_8", "--seed", 2] + # - values: ["--method", "FCPS_Diana", "--seed", 2] + # - values: ["--method", "FCPS_Fanny", "--seed", 2] + # - values: ["--method", "FCPS_Hardcl", "--seed", 2] + # - values: ["--method", "FCPS_Softcl", "--seed", 2] + # - values: ["--method", "FCPS_Clara", "--seed", 2] + # - values: ["--method", "FCPS_PAM", "--seed", 2] inputs: - entries: - data.matrix @@ -201,10 +201,10 @@ stages: parameters: - values: ["--metric", "normalized_clustering_accuracy"] - values: ["--metric", "adjusted_fm_score"] - - values: ["--metric", "adjusted_mi_score"] - - values: ["--metric", "adjusted_rand_score"] - - values: ["--metric", "fm_score"] - - values: ["--metric", "mi_score"] + # - values: ["--metric", "adjusted_mi_score"] + # - values: ["--metric", "adjusted_rand_score"] + # - values: ["--metric", "fm_score"] + # - values: ["--metric", "mi_score"] # - values: ["--metric", "normalized_clustering_accuracy"] # - values: ["--metric", "normalized_mi_score"] # - values: ["--metric", "normalized_pivoted_accuracy"] @@ -217,89 +217,3 @@ stages: outputs: - id: metrics.scores path: "{input}/{stage}/{module}/{params}/{dataset}.scores.gz" - - # ## daniel's data ########################################################################### - - # - id: danielsdata - # modules: - # - id: iris_manual - # name: "Iris Dataset" - # software_environment: "sklearn" - # repository: - # url: https://github.com/omnibenchmark-example/iris.git - # commit: 47c63f0 - # - id: penguins - # name: "Penguins Dataset" - # software_environment: "sklearn" - # repository: - # url: https://github.com/omnibenchmark-example/penguins.git - # commit: 9032478 - # outputs: - # - id: data.features - # path: "{input}/{stage}/{module}/{params}/{dataset}.features.csv" - # - id: data.labels - # path: "{input}/{stage}/{module}/{params}/{dataset}.labels.csv" - - # ## daniel's distances ######################################################################## - - # - id: distances - # modules: - # - id: D1 - # software_environment: "sklearn" - # parameters: - # - values: ["--measure", "cosine"] - # - values: ["--measure", "euclidean"] - # - values: ["--measure", "manhattan"] - # - values: ["--measure", "chebyshev"] - # repository: - # url: https://github.com/omnibenchmark-example/distance.git - # commit: dd99d4f - # inputs: - # - entries: - # - data.features - # outputs: - # - id: distances - # path: "{input}/{stage}/{module}/{params}/{dataset}.distances.csv" - - # ## daniel's methods ################################################################### - - # - id: danielmethods - # modules: - # - id: kmeans - # software_environment: "sklearn" - # repository: - # url: https://github.com/omnibenchmark-example/kmeans.git - # commit: 049c8b1 - # - id: ward - # software_environment: "R" - # repository: - # url: https://github.com/omnibenchmark-example/ward.git - # commit: 976e3f3 - # inputs: - # - entries: - # - distances - # outputs: - # - id: methods.clusters - # path: "{input}/{stage}/{module}/{params}/{dataset}.clusters.csv" - - # ## daniel's metrics ################################################################### - - # - id: danielsmetrics - # modules: - # - id: ari - # software_environment: "R" - # repository: - # url: https://github.com/omnibenchmark-example/ari.git - # commit: 72708f0 - # - id: accuracy - # software_environment: "R" - # repository: - # url: https://github.com/omnibenchmark-example/accuracy.git - # commit: e26b32f - # inputs: - # - entries: - # - methods.clusters - # - data.labels - # outputs: - # - id: metrics.mapping - # path: "{input}/{stage}/{module}/{params}/{dataset}.metrics.txt" diff --git a/Clustering_oras.yml b/Clustering_oras.yml index ebe9e231..53145934 100644 --- a/Clustering_oras.yml +++ b/Clustering_oras.yml @@ -44,7 +44,7 @@ stages: commit: 366c5a2 parameters: # comments depict the possible cardinalities and the number of curated labelsets - values: ["--dataset_generator", "fcps", "--dataset_name", "atom"] # 2 1 - # - values: ["--dataset_generator", "fcps", "--dataset_name", "chainlink"] # 2 1 + - values: ["--dataset_generator", "fcps", "--dataset_name", "chainlink"] # 2 1 # - values: ["--dataset_generator", "fcps", "--dataset_name", "engytime"] # 2 2 # - values: ["--dataset_generator", "fcps", "--dataset_name", "hepta"] # 7 1 # - values: ["--dataset_generator", "fcps", "--dataset_name", "lsun"] # 3 1 @@ -52,7 +52,7 @@ stages: # - values: ["--dataset_generator", "fcps", "--dataset_name", "tetra"] # 4 1 # - values: ["--dataset_generator", "fcps", "--dataset_name", "twodiamonds"] # 2 1 # - values: ["--dataset_generator", "fcps", "--dataset_name", "wingnut"] # 2 1 - - values: ["--dataset_generator", "graves", "--dataset_name", "dense"] # 2 1 + # - values: ["--dataset_generator", "graves", "--dataset_name", "dense"] # 2 1 # - values: ["--dataset_generator", "graves", "--dataset_name", "fuzzyx"] # 2, 4, 5 6 # - values: ["--dataset_generator", "graves", "--dataset_name", "line"] # 2 1 # - values: ["--dataset_generator", "graves", "--dataset_name", "parabolic"] # 2, 4 2 @@ -65,10 +65,10 @@ stages: # - values: ["--dataset_generator", "other", "--dataset_name", "chameleon_t4_8k"] # 6 1 # - values: ["--dataset_generator", "other", "--dataset_name", "chameleon_t5_8k"] # 6 1 # - values: ["--dataset_generator", "other", "--dataset_name", "hdbscan"] # 6 1 - - values: ["--dataset_generator", "other", "--dataset_name", "iris"] # 3 1 + # - values: ["--dataset_generator", "other", "--dataset_name", "iris"] # 3 1 # - values: ["--dataset_generator", "other", "--dataset_name", "iris5"] # 3 1 # - values: ["--dataset_generator", "other", "--dataset_name", "square"] # 2 1 - - values: ["--dataset_generator", "sipu", "--dataset_name", "aggregation"] # 7 1 + # - values: ["--dataset_generator", "sipu", "--dataset_name", "aggregation"] # 7 1 # - values: ["--dataset_generator", "sipu", "--dataset_name", "compound"] # 4, 5, 6 5 # - values: ["--dataset_generator", "sipu", "--dataset_name", "flame"] # 2 2 # - values: ["--dataset_generator", "sipu", "--dataset_name", "jain"] # 2 1 @@ -76,7 +76,7 @@ stages: # - values: ["--dataset_generator", "sipu", "--dataset_name", "r15"] # 8, 9, 15 3 # - values: ["--dataset_generator", "sipu", "--dataset_name", "spiral"] # 3 1 # - values: ["--dataset_generator", "sipu", "--dataset_name", "unbalance"] # 8 1 - - values: ["--dataset_generator", "uci", "--dataset_name", "ecoli"] # 8 1 + # - values: ["--dataset_generator", "uci", "--dataset_name", "ecoli"] # 8 1 # - values: ["--dataset_generator", "uci", "--dataset_name", "ionosphere"] # 2 1 # - values: ["--dataset_generator", "uci", "--dataset_name", "sonar"] # 2 1 # - values: ["--dataset_generator", "uci", "--dataset_name", "statlog"] # 7 1 @@ -170,16 +170,16 @@ stages: parameters: # - values: ["--method", "FCPS_AdaptiveDensityPeak"] # not in conda - values: ["--method", "FCPS_Minimax", "--seed", 2] - - values: ["--method", "FCPS_MinEnergy", "--seed", 2] - - values: ["--method", "FCPS_HDBSCAN_2", "--seed", 2] - - values: ["--method", "FCPS_HDBSCAN_4", "--seed", 2] - - values: ["--method", "FCPS_HDBSCAN_8", "--seed", 2] - - values: ["--method", "FCPS_Diana", "--seed", 2] - - values: ["--method", "FCPS_Fanny", "--seed", 2] - - values: ["--method", "FCPS_Hardcl", "--seed", 2] - - values: ["--method", "FCPS_Softcl", "--seed", 2] - - values: ["--method", "FCPS_Clara", "--seed", 2] - - values: ["--method", "FCPS_PAM", "--seed", 2] + # - values: ["--method", "FCPS_MinEnergy", "--seed", 2] + # - values: ["--method", "FCPS_HDBSCAN_2", "--seed", 2] + # - values: ["--method", "FCPS_HDBSCAN_4", "--seed", 2] + # - values: ["--method", "FCPS_HDBSCAN_8", "--seed", 2] + # - values: ["--method", "FCPS_Diana", "--seed", 2] + # - values: ["--method", "FCPS_Fanny", "--seed", 2] + # - values: ["--method", "FCPS_Hardcl", "--seed", 2] + # - values: ["--method", "FCPS_Softcl", "--seed", 2] + # - values: ["--method", "FCPS_Clara", "--seed", 2] + # - values: ["--method", "FCPS_PAM", "--seed", 2] inputs: - entries: - data.matrix @@ -199,10 +199,10 @@ stages: parameters: - values: ["--metric", "normalized_clustering_accuracy"] - values: ["--metric", "adjusted_fm_score"] - - values: ["--metric", "adjusted_mi_score"] - - values: ["--metric", "adjusted_rand_score"] - - values: ["--metric", "fm_score"] - - values: ["--metric", "mi_score"] + # - values: ["--metric", "adjusted_mi_score"] + # - values: ["--metric", "adjusted_rand_score"] + # - values: ["--metric", "fm_score"] + # - values: ["--metric", "mi_score"] # - values: ["--metric", "normalized_clustering_accuracy"] # - values: ["--metric", "normalized_mi_score"] # - values: ["--metric", "normalized_pivoted_accuracy"] @@ -215,89 +215,3 @@ stages: outputs: - id: metrics.scores path: "{input}/{stage}/{module}/{params}/{dataset}.scores.gz" - - # ## daniel's data ########################################################################### - - # - id: danielsdata - # modules: - # - id: iris_manual - # name: "Iris Dataset" - # software_environment: "sklearn" - # repository: - # url: https://github.com/omnibenchmark-example/iris.git - # commit: 47c63f0 - # - id: penguins - # name: "Penguins Dataset" - # software_environment: "sklearn" - # repository: - # url: https://github.com/omnibenchmark-example/penguins.git - # commit: 9032478 - # outputs: - # - id: data.features - # path: "{input}/{stage}/{module}/{params}/{dataset}.features.csv" - # - id: data.labels - # path: "{input}/{stage}/{module}/{params}/{dataset}.labels.csv" - - # ## daniel's distances ######################################################################## - - # - id: distances - # modules: - # - id: D1 - # software_environment: "sklearn" - # parameters: - # - values: ["--measure", "cosine"] - # - values: ["--measure", "euclidean"] - # - values: ["--measure", "manhattan"] - # - values: ["--measure", "chebyshev"] - # repository: - # url: https://github.com/omnibenchmark-example/distance.git - # commit: dd99d4f - # inputs: - # - entries: - # - data.features - # outputs: - # - id: distances - # path: "{input}/{stage}/{module}/{params}/{dataset}.distances.csv" - - # ## daniel's methods ################################################################### - - # - id: danielmethods - # modules: - # - id: kmeans - # software_environment: "sklearn" - # repository: - # url: https://github.com/omnibenchmark-example/kmeans.git - # commit: 049c8b1 - # - id: ward - # software_environment: "R" - # repository: - # url: https://github.com/omnibenchmark-example/ward.git - # commit: 976e3f3 - # inputs: - # - entries: - # - distances - # outputs: - # - id: methods.clusters - # path: "{input}/{stage}/{module}/{params}/{dataset}.clusters.csv" - - # ## daniel's metrics ################################################################### - - # - id: danielsmetrics - # modules: - # - id: ari - # software_environment: "R" - # repository: - # url: https://github.com/omnibenchmark-example/ari.git - # commit: 72708f0 - # - id: accuracy - # software_environment: "R" - # repository: - # url: https://github.com/omnibenchmark-example/accuracy.git - # commit: e26b32f - # inputs: - # - entries: - # - methods.clusters - # - data.labels - # outputs: - # - id: metrics.mapping - # path: "{input}/{stage}/{module}/{params}/{dataset}.metrics.txt" diff --git a/README.md b/README.md index 5b3dc633..8532c720 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,10 @@ A clustering example for omnibenchmark 3. Move to the cloned repository `cd clustering_example` 4. Run locally, somewhat in parallel `ob run benchmark -b CLUSTERING.YAML --local-storage --cores 6`. Choose `Clustering.yml` specification based on whether running it with conda, easybuild, apptainer, etc. [More details about the available backends](https://github.com/omnibenchmark/clustering_example/blob/main/envs/README.md). +# Disclaimer + +This does not work in arm64. + # Clustbench attribution by Marek Gagolewski, modified by Izaskun Mallona