Skip to content

Commit 75df1bc

Browse files
committed
Merge branch 'sparsity'
2 parents 285a8b6 + 6a5d946 commit 75df1bc

File tree

117 files changed

+12966
-2397
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

117 files changed

+12966
-2397
lines changed

Makefile

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,13 @@ export OFFLOAD_FCXX
1414
all: $(BDIR)/lib/libctf.a
1515

1616
EXAMPLES = dft dft_3D gemm gemm_4D scalar trace weigh_4D subworld_gemm \
17-
permute_multiworld strassen slice_gemm ccsd sparse_permuted_slice
17+
permute_multiworld strassen slice_gemm ccsd sparse_permuted_slice qinformatics endomorphism endomorphism_cust endomorphism_cust_sp \
18+
univar_function univar_accumulator_cust univar_accumulator_cust_sp spmv spmm jacobi apsp fft \
19+
sparse_mp3
1820

1921
TESTS = test_suite pgemm_test nonsq_pgemm_test diag_sym sym3 readwrite_test \
20-
ccsdt_t3_to_t2 ccsdt_map_test multi_tsr_sym diag_ctr readall_test sy_times_ns repack
22+
ccsdt_t3_to_t2 ccsdt_map_test multi_tsr_sym diag_ctr readall_test sy_times_ns repack \
23+
sptensor_sum speye
2124

2225
BENCHMARKS = nonsq_pgemm_bench bench_contraction bench_nosym_transp bench_redistribution
2326

configure

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,7 @@ elif [ "$NERSC_HOST" = "hopper" -o "$NERSC_HOST" = "edison" ]; then
316316
LDFGLAGS="-lpthread"
317317
realcompiler
318318
defaultflags
319-
DEFS="$DEFS -D`echo $NERSC_HOST | tr a-z A-Z`"
319+
DEFS="$DEFS -DUSE_SP_MKL=1 -D`echo $NERSC_HOST | tr a-z A-Z`"
320320

321321

322322
elif (hostname | grep 'surveyor\|intrepid\|challenger\|udawn'); then
@@ -480,6 +480,7 @@ else
480480
UNDERSCORE=1
481481
echo 'BLAS library was not specified, using -mkl.'
482482
BLASLIBS=-mkl
483+
DEFS="$DEFS -DUSE_SP_MKL=1"
483484
elif testlink -lblas dgemm_ 0; then
484485
UNDERSCORE=1
485486
echo 'BLAS library was not specified, using -lblas (with underscores).'

examples/Makefile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
EXAMPLES = dft dft_3D gemm gemm_4D scalar trace weigh_4D subworld_gemm \
22
permute_multiworld strassen slice_gemm ccsd sparse_permuted_slice \
3-
qinformatics
3+
qinformatics endomorphism endomorphism_cust endomorphism_cust_sp \
4+
univar_function univar_accumulator_cust univar_accumulator_cust_sp \
5+
spmv spmm jacobi apsp fft sparse_mp3
46

57
.PHONY:
68
$(EXAMPLES): %: $(BDIR)/bin/%

examples/apsp.cxx

Lines changed: 241 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,241 @@
1+
/** \addtogroup examples
2+
* @{
3+
* \defgroup apsp apsp
4+
* @{
5+
* \brief All-pairs shortest-paths via path doubling and Tiskin's augmented path doubling
6+
*/
7+
8+
#include <ctf.hpp>
9+
#include <float.h>
10+
using namespace CTF;
11+
struct path {
12+
int w, h;
13+
path(int w_, int h_){ w=w_; h=h_; }
14+
path(){};
15+
};
16+
17+
namespace CTF {
18+
template <>
19+
inline void Set<path>::print(char const * a, FILE * fp) const {
20+
fprintf(fp,"(%d %d)",((path*)a)[0].w,((path*)a)[0].h);
21+
}
22+
}
23+
// calculate APSP on a graph of n nodes distributed on World (communicator) dw
24+
int apsp(int n,
25+
World & dw,
26+
int niter=0){
27+
28+
//tropical semiring, define additive identity to be INT_MAX/2 to prevent integer overflow
29+
Semiring<int> s(INT_MAX/2,
30+
[](int a, int b){ return std::min(a,b); },
31+
MPI_MIN,
32+
0,
33+
[](int a, int b){ return a+b; });
34+
35+
//random adjacency matrix
36+
Matrix<int> A(n, n, dw, s);
37+
srand(dw.rank);
38+
A.fill_random(0, n*n);
39+
//no loops
40+
A["ii"] = 0;
41+
42+
//distance matrix to compute
43+
Matrix<int> D(n, n, dw, s);
44+
45+
//initialize to adjacency graph
46+
D["ij"] = A["ij"];
47+
48+
for (int i=1; i<n; i=i<<1){
49+
//all shortest paths of up 2i hops consist of one or two shortest paths up to i hops
50+
D["ij"] += D["ik"]*D["kj"];
51+
}
52+
53+
//struct for path with w=path weight, h=#hops
54+
MPI_Op opath;
55+
MPI_Op_create(
56+
[](void *invec, void *inoutvec, int *len, MPI_Datatype *datatype){
57+
for (int i=0; i<*len; i++){
58+
if (((path*)invec)[i].w <= ((path*)inoutvec)[i].w)
59+
((path*)inoutvec)[i] = ((path*)invec)[i];
60+
}
61+
},
62+
1, &opath);
63+
64+
//tropical semiring with hops carried by winner of min
65+
Semiring<path> p(path(INT_MAX/2,0),
66+
[](path a, path b){ if (a.w<b.w || (a.w == b.w && a.h<b.h)) return a; else return b; },
67+
opath,
68+
path(0,0),
69+
[](path a, path b){ return path(a.w+b.w, a.h+b.h); });
70+
71+
//path matrix to contain distance matrix
72+
Matrix<path> P(n, n, dw, p);
73+
74+
Function<int,path> setw([](int w){ return path(w, 1); });
75+
76+
P["ij"] = setw(A["ij"]);
77+
78+
//sparse path matrix to contain all paths of exactly i hops
79+
Matrix<path> Pi(n, n, SP, dw, p);
80+
81+
for (int i=1; i<n; i=i<<1){
82+
//let Pi be all paths in P consisting of exactly i hops
83+
Pi["ij"] = P["ij"];
84+
Pi.sparsify([=](path p){ return (p.h == i); });
85+
86+
//all shortest paths of up to 2i hops either
87+
// (1) are a shortest path of length up to i
88+
// (2) consist of a shortest path of length up to i and a shortest path of length exactly i
89+
P["ij"] += Pi["ik"]*P["kj"];
90+
}
91+
92+
//check correctness by subtracting the two computed shortest path matrices from one another and checking that no nonzeros are left
93+
Transform<path,int> xtrw([](path p, int & w){ return w-=p.w; });
94+
95+
xtrw(P["ij"], D["ij"]);
96+
97+
Matrix<int> D2(D);
98+
99+
D2.sparsify([](int w){ return w!=0; });
100+
101+
int64_t loc_nnz;
102+
Pair<int> * prs;
103+
D2.read_local_nnz(&loc_nnz, &prs);
104+
105+
int pass = (loc_nnz == 0);
106+
107+
if (dw.rank == 0){
108+
MPI_Reduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD);
109+
if (pass)
110+
printf("{ APSP by path doubling } passed \n");
111+
else
112+
printf("{ APSP by path doubling } failed \n");
113+
} else
114+
MPI_Reduce(&pass, MPI_IN_PLACE, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD);
115+
free(prs);
116+
#ifndef TEST_SUITE
117+
if (dw.rank == 0){
118+
printf("Starting %d benchmarking iterations of dense APSP-PD...\n", niter);
119+
}
120+
double min_time = DBL_MAX;
121+
double max_time = 0.0;
122+
double tot_time = 0.0;
123+
double times[niter];
124+
Timer_epoch dapsp("dense APSP-PD");
125+
dapsp.begin();
126+
for (int i=0; i<niter; i++){
127+
D["ij"] = A["ij"];
128+
double start_time = MPI_Wtime();
129+
for (int j=1; j<n; j=j<<1){
130+
D["ij"] += D["ik"]*D["kj"];
131+
}
132+
double end_time = MPI_Wtime();
133+
double iter_time = end_time-start_time;
134+
times[i] = iter_time;
135+
tot_time += iter_time;
136+
if (iter_time < min_time) min_time = iter_time;
137+
if (iter_time > max_time) max_time = iter_time;
138+
}
139+
dapsp.end();
140+
141+
if (dw.rank == 0){
142+
printf("Completed %d benchmarking iterations of dense APSP-PD (n=%d).\n", niter, n);
143+
printf("All iterations times: ");
144+
for (int i=0; i<niter; i++){
145+
printf("%lf ", times[i]);
146+
}
147+
printf("\n");
148+
std::sort(times,times+niter);
149+
printf("Dense APSP (n=%d) Min time=%lf, Avg time = %lf, Med time = %lf, Max time = %lf\n",n,min_time,tot_time/niter, times[niter/2], max_time);
150+
}
151+
if (dw.rank == 0){
152+
printf("Starting %d benchmarking iterations of sparse APSP-PD...\n", niter);
153+
}
154+
min_time = DBL_MAX;
155+
max_time = 0.0;
156+
tot_time = 0.0;
157+
Timer_epoch sapsp("sparse APSP-PD");
158+
sapsp.begin();
159+
for (int i=0; i<niter; i++){
160+
P["ij"] = setw(A["ij"]);
161+
double start_time = MPI_Wtime();
162+
for (int j=1; j<n; j=j<<1){
163+
Pi["ij"] = P["ij"];
164+
Pi.sparsify([=](path p){ return (p.h == j); });
165+
P["ij"] += Pi["ik"]*P["kj"];
166+
}
167+
double end_time = MPI_Wtime();
168+
double iter_time = end_time-start_time;
169+
times[i] = iter_time;
170+
tot_time += iter_time;
171+
if (iter_time < min_time) min_time = iter_time;
172+
if (iter_time > max_time) max_time = iter_time;
173+
}
174+
sapsp.end();
175+
176+
if (dw.rank == 0){
177+
printf("Completed %d benchmarking iterations of sparse APSP-PD (n=%d).\n", niter, n);
178+
printf("All iterations times: ");
179+
for (int i=0; i<niter; i++){
180+
printf("%lf ", times[i]);
181+
}
182+
printf("\n");
183+
std::sort(times,times+niter);
184+
printf("Sparse APSP (n=%d): Min time=%lf, Avg time = %lf, Med time = %lf, Max time = %lf\n",n,min_time,tot_time/niter, times[niter/2], max_time);
185+
}
186+
187+
#endif
188+
return pass;
189+
}
190+
191+
192+
#ifndef TEST_SUITE
193+
char* getCmdOption(char ** begin,
194+
char ** end,
195+
const std::string & option){
196+
char ** itr = std::find(begin, end, option);
197+
if (itr != end && ++itr != end){
198+
return *itr;
199+
}
200+
return 0;
201+
}
202+
203+
204+
int main(int argc, char ** argv){
205+
int rank, np, n, pass, niter;
206+
int const in_num = argc;
207+
char ** input_str = argv;
208+
209+
MPI_Init(&argc, &argv);
210+
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
211+
MPI_Comm_size(MPI_COMM_WORLD, &np);
212+
213+
if (getCmdOption(input_str, input_str+in_num, "-n")){
214+
n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
215+
if (n < 0) n = 7;
216+
} else n = 7;
217+
218+
if (getCmdOption(input_str, input_str+in_num, "-niter")){
219+
niter = atof(getCmdOption(input_str, input_str+in_num, "-niter"));
220+
if (niter < 0) niter = 10;
221+
} else niter = 10;
222+
223+
{
224+
World dw(argc, argv);
225+
226+
if (rank == 0){
227+
printf("Computing APSP of dense graph with %d nodes using dense and sparse path doubling\n",n);
228+
}
229+
pass = apsp(n, dw, niter);
230+
assert(pass);
231+
}
232+
233+
MPI_Finalize();
234+
return 0;
235+
}
236+
/**
237+
* @}
238+
* @}
239+
*/
240+
241+
#endif

examples/ccsd.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,7 @@ void ccsd(Integrals &V,
275275
Dabij["abij"] -= V["b"];
276276

277277

278-
Bivar_Function<> fctr(&divide);
278+
Function<> fctr(&divide);
279279

280280
T.ai->contract(1.0, *(Zai.parent), "ai", Dai, "ai", 0.0, "ai", fctr);
281281
T.abij->contract(1.0, *(Zabij.parent), "abij", Dabij, "abij", 0.0, "abij", fctr);

examples/dft.cxx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ int test_dft(int64_t n,
1919
std::complex<double> imag(0,1);
2020
MPI_Comm_size(MPI_COMM_WORLD, &numPes);
2121
MPI_Comm_rank(MPI_COMM_WORLD, &myRank);
22-
Matrix < std::complex<double>, false >DFT(n, n, SY, wrld, "DFT", 1);
23-
Matrix < std::complex<double>, false >IDFT(n, n, SY, wrld, "IDFT", 0);
22+
Matrix < std::complex<double> >DFT(n, n, SY, wrld, "DFT", 1);
23+
Matrix < std::complex<double> >IDFT(n, n, SY, wrld, "IDFT", 0);
2424

2525
DFT.read_local(&np, &idx, &data);
2626

examples/dft_3D.cxx

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,11 @@ int test_dft_3D(int n,
2727
MPI_Comm_size(MPI_COMM_WORLD, &numPes);
2828
MPI_Comm_rank(MPI_COMM_WORLD, &myRank);
2929

30-
CTF::Ring< std::complex<long double>, false > ldr;
30+
CTF::Ring< std::complex<long double> > ldr;
3131

32-
Matrix < std::complex<long double>, false >DFT(n, n, SY, wrld, ldr);
33-
Matrix < std::complex<long double>, false >IDFT(n, n, SY, wrld, ldr);
34-
Tensor < std::complex<long double>, false >MESH(3, len, sym, wrld, ldr);
32+
Matrix < std::complex<long double> >DFT(n, n, SY, wrld, ldr);
33+
Matrix < std::complex<long double> >IDFT(n, n, SY, wrld, ldr);
34+
Tensor < std::complex<long double> >MESH(3, len, sym, wrld, ldr);
3535

3636
DFT.read_local(&np, &idx, &data);
3737

0 commit comments

Comments
 (0)