diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..a5e092c --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,17 @@ +# Contributing Guidelines + +- Fork the repo. +- Install env with `python>=3.7`, e.g., for linux run: + +```bash +# install python 3.7.16, e.g, if using pyenv run: +# pyenv install 3.7.16 +# pyenv local 3.7.16 +$ bash .ci/install-dev.sh +$ bash .ci/install.sh + +# activate env:- +$ activate .venv/bin/activate +``` + +- Modify the code, commit the changes and create a PR. diff --git a/README.md b/README.md index 9970d3a..d9964ce 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,11 @@ ## Installation +- python>=3.7 +- torch>=1.12 +- torch_geometric>=2.0 +- dgl>=1.1 + ```bash $ python -m pip install graph_datasets ``` @@ -44,21 +49,6 @@ $ nohup bash scripts/install.sh > logs/install.log & -## Contribution -- Fork the repo. -- Install env with `python>=3.7`. For linux run: -```bash -# install python 3.7.16, e.g, if using pyenv run: -# pyenv install 3.7.16 -# pyenv local 3.7.16 -bash .ci/install-dev.sh -bash .ci/install.sh - -# activate env:- -activate .venv/bin/activate -``` -- Modify the code, commit the changes and create a PR. - ## Statistics | idx | source | dataset | n_nodes | n_feats | n_edges | n_clusters | |------:|:---------|:----------------|:----------|:----------|:------------|-------------:| @@ -124,3 +114,7 @@ activate .venv/bin/activate ## Requirements See [requirements-dev.txt](./requirements-dev.txt) and [requirements.txt](./requirements.txt). + +## Contributing + +See [CONTRIBUTING.md](./CONTRIBUTING.md). diff --git a/doc_repo b/doc_repo index a464bc3..a6ccb7b 160000 --- a/doc_repo +++ b/doc_repo @@ -1 +1 @@ -Subproject commit a464bc35396e507197061170b60007066d673e4c +Subproject commit a6ccb7be79aaed4d8459c9bcb393c2d13ed57b97 diff --git a/docs/rst/graph_datasets.utils.rst b/docs/rst/graph_datasets.utils.rst index 1169740..085175b 100644 --- a/docs/rst/graph_datasets.utils.rst +++ b/docs/rst/graph_datasets.utils.rst @@ -1,16 +1,13 @@ -utils -====================== - evaluation ------------- +====================== .. automodule:: graph_datasets.utils.evaluation.evaluation :members: :show-inheritance: - :exclude-members: + :exclude-members: save_to_csv_files :no-undoc-members: statistics ------------- +====================== .. automodule:: graph_datasets.utils.statistics :members: :show-inheritance: @@ -18,21 +15,21 @@ statistics :no-undoc-members: model management ------------------ +====================== .. automodule:: graph_datasets.utils.model_management :members: :show-inheritance: :no-undoc-members: output -------- +====================== .. automodule:: graph_datasets.utils.output :members: :show-inheritance: :no-undoc-members: common -------- +====================== .. automodule:: graph_datasets.utils.common :members: :show-inheritance: diff --git a/docs/rst/table.rst b/docs/rst/table.rst index 5dc8f6b..cf3e5d7 100644 --- a/docs/rst/table.rst +++ b/docs/rst/table.rst @@ -1,4 +1,4 @@ -statistics +dataset cheatsheet =========== +-----+----------+-------------+-----------+---------+------------+------------+ | idx | source | dataset | n_nodes | n_feats | n_edges | n_clusters | diff --git a/graph_datasets/datasets/critical.py b/graph_datasets/datasets/critical.py index 0cb45a5..d3e5c97 100644 --- a/graph_datasets/datasets/critical.py +++ b/graph_datasets/datasets/critical.py @@ -36,7 +36,8 @@ def load_critical_dataset( Returns: Tuple[dgl.DGLGraph, torch.Tensor, int]: [graph, label, n_clusters] """ - data_file = os.path.join(directory, f"{dataset_name.replace('-','_')}.npz") + dataset_name = dataset_name.replace("-", "_") + data_file = os.path.join(directory, f"{dataset_name}.npz") if not os.path.exists(data_file): url = f"{CRITICAL_URL}/{dataset_name}.npz?raw=true" diff --git a/graph_datasets/utils/__init__.py b/graph_datasets/utils/__init__.py index 2f76e0c..456c011 100644 --- a/graph_datasets/utils/__init__.py +++ b/graph_datasets/utils/__init__.py @@ -3,7 +3,6 @@ # pylint:disable=invalid-name from .common import * from .evaluation import evaluate_from_embed_file -from .evaluation import save_to_csv_files from .model_management import check_modelfile_exists from .model_management import get_modelfile_path from .model_management import load_model @@ -13,6 +12,7 @@ from .output import csv2file from .output import make_parent_dirs from .output import refresh_file +from .output import save_to_csv_files from .statistics import edge_homo from .statistics import node_homo from .statistics import statistics diff --git a/graph_datasets/utils/evaluation/__init__.py b/graph_datasets/utils/evaluation/__init__.py index a22713e..cdf11d2 100644 --- a/graph_datasets/utils/evaluation/__init__.py +++ b/graph_datasets/utils/evaluation/__init__.py @@ -1,4 +1,3 @@ """Evalutaion """ from .evaluation import evaluate_from_embed_file -from .evaluation import save_to_csv_files diff --git a/graph_datasets/utils/evaluation/evaluation.py b/graph_datasets/utils/evaluation/evaluation.py index 6ae423f..7816d54 100644 --- a/graph_datasets/utils/evaluation/evaluation.py +++ b/graph_datasets/utils/evaluation/evaluation.py @@ -7,7 +7,6 @@ import torch -from ..output import csv2file from .eval_tools import evaluate_results_nc @@ -44,12 +43,12 @@ def evaluate_from_embed_file( Example: .. code-block:: python - from graph_datasets import evaluate_embed_file + from graph_datasets import evaluate_from_embed_file method_name='orderedgnn' data_name='texas' - clustering_res, classification_res = evaluate_embed_file( + clustering_res, classification_res = evaluate_from_embed_file( f'{data_name}_{method_name}_embeds.pth', f'{data_name}_data.pth', save_path='./save/', @@ -102,52 +101,6 @@ def evaluate_from_embed_file( return clustering_results, classification_results -def save_to_csv_files( - results: dict, - add_info: dict, - csv_name: str, - save_path="./results", -): - """Save the evaluation results to a local csv file. - - Args: - results (dict): Evaluation results document. - add_info (dict): Additional information, such as data set name, method name. - csv_name (str): csv file name to store. - save_path (str, optional): Folder path to store. Defaults to './results'. - - Example: - .. code-block:: python - - from graph_datasets import evaluate_embed_file - from graph_datasets import save_to_csv_files - - method_name='orderedgnn' - data_name='texas' - - clustering_res, classification_res = evaluate_embed_file( - f'{data_name}_{method_name}_embeds.pth', - f'{data_name}_data.pth', - save_path='./save/', - ) - - add_info = {'data': data_name, 'method': method_name,} - save_to_csv_files(clustering_res, add_info, 'clutering.csv') - save_to_csv_files(classification_res, add_info, 'classification.csv') - """ - # save to csv file - results.update(add_info) - - # list of values - csv2file( - target_path=os.path.join(save_path, csv_name), - thead=list(results.keys()), - tbody=list(results.values()), - refresh=False, - is_dict=False, - ) - - # if __name__ == "__main__": # method_name = 'orderedgnn' # 'selene' 'greet' 'hgrl' 'nwr-gae' 'orderedgnn' # data_name = 'texas' # 'actor' 'chameleon' 'cornell' 'squirrel' 'texas' 'wisconsin' @@ -159,7 +112,7 @@ def save_to_csv_files( # save_path='/data/gnn/heter/save/', # quiet=True, # ) -# from graph_datasets.utils import tab_printer +# from graph_datasets.utils import tab_printer, save_to_csv_files # tab_printer(clu_res, sort=False) # tab_printer(cls_res, sort=False) diff --git a/graph_datasets/utils/output.py b/graph_datasets/utils/output.py index cd4a227..6294ee0 100644 --- a/graph_datasets/utils/output.py +++ b/graph_datasets/utils/output.py @@ -104,3 +104,49 @@ def csv2file( dict_writer.writerow(elem) else: csv_write.writerow(tbody) + + +def save_to_csv_files( + results: dict, + add_info: dict, + csv_name: str, + save_path="./results", +) -> None: + """Save the evaluation results to a local csv file. + + Args: + results (dict): Evaluation results document. + add_info (dict): Additional information, such as data set name, method name. + csv_name (str): csv file name to store. + save_path (str, optional): Folder path to store. Defaults to './results'. + + Example: + .. code-block:: python + + from graph_datasets import evaluate_from_embed_file + from graph_datasets import save_to_csv_files + + method_name='orderedgnn' + data_name='texas' + + clustering_res, classification_res = evaluate_from_embed_file( + f'{data_name}_{method_name}_embeds.pth', + f'{data_name}_data.pth', + save_path='./save/', + ) + + add_info = {'data': data_name, 'method': method_name,} + save_to_csv_files(clustering_res, add_info, 'clutering.csv') + save_to_csv_files(classification_res, add_info, 'classification.csv') + """ + # save to csv file + results.update(add_info) + + # list of values + csv2file( + target_path=os.path.join(save_path, csv_name), + thead=list(results.keys()), + tbody=list(results.values()), + refresh=False, + is_dict=False, + ) diff --git a/pyproject.toml b/pyproject.toml index b360d76..4d28cba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,8 +17,8 @@ classifiers = [ "Topic :: Software Development :: Libraries :: Python Modules", ] dependencies = [ - "torch>=1.10.2", - "torch-geometric>=2.0.3", + "torch>=1.12", + "torch-geometric>=2.0", "torchaudio>=0.10.2", "torchvision>=0.11.3", "dgl>=1.1.0",