Skip to content

Commit 9a87361

Browse files
author
Johnathan Rhyne
committed
New panel factorization implemented into dorgx family and pass local tests. Check CI before moving to other precisions
1 parent a644e25 commit 9a87361

File tree

16 files changed

+3555
-181
lines changed

16 files changed

+3555
-181
lines changed

SRC/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,8 @@ set(DLASRC
316316
dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrs3.f dlatrz.f dlauu2.f
317317
dlauum.f dopgtr.f dopmtr.f dorg2l.f dorg2r.f
318318
dorgbr.f dorghr.f dorgl2.f dorglq.f dorgql.f dorgqr.f dorgr2.f
319+
dorgkr.f dorgrk.f dorgkl.f dorglk.f
320+
dlumm.f dtrtrm.f dtrmmoop.f
319321
dorgrq.f dorgtr.f dorgtsqr.f dorgtsqr_row.f dorm2l.f dorm2r.f dorm22.f
320322
dormbr.f dormhr.f dorml2.f dormlq.f dormql.f dormqr.f dormr2.f
321323
dormr3.f dormrq.f dormrz.f dormtr.f dpbcon.f dpbequ.f dpbrfs.f

SRC/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,8 @@ DLASRC = \
345345
dlasyf.o dlasyf_rook.o dlasyf_rk.o \
346346
dlatbs.o dlatdf.o dlatps.o dlatrd.o dlatrs.o dlatrs3.o dlatrz.o dlauu2.o \
347347
dlauum.o dopgtr.o dopmtr.o dorg2l.o dorg2r.o \
348+
dorgkr.o dorgrk.o dorgkl.o dorglk.o \
349+
dlumm.o dtrtrm.o dtrmmoop.o \
348350
dorgbr.o dorghr.o dorgl2.o dorglq.o dorgql.o dorgqr.o dorgr2.o \
349351
dorgrq.o dorgtr.o dorgtsqr.o dorgtsqr_row.o dorm2l.o dorm2r.o dorm22.o \
350352
dormbr.o dormhr.o dorml2.o dormlq.o dormql.o dormqr.o dormr2.o \

SRC/dlarfb0c2.f

Lines changed: 34 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ SUBROUTINE DLARFB0C2(C2I, SIDE, TRANS, DIRECT, STOREV, M, N,
189189
! Array arguments
190190
DOUBLE PRECISION V(LDV,*), C(LDC,*), T(LDT,*)
191191
! Local scalars
192-
LOGICAL QR, LQ, QL, DIRF, COLV, SIDEL, SIDER,
192+
LOGICAL QR, LQ, QL, RQ, DIRF, COLV, SIDEL, SIDER,
193193
$ TRANST
194194
INTEGER I, J
195195
! External functions
@@ -224,10 +224,7 @@ SUBROUTINE DLARFB0C2(C2I, SIDE, TRANS, DIRECT, STOREV, M, N,
224224
225225
! RQ is when we store the reflectors row by row and have the
226226
! 'first' reflector stored in the last row
227-
! RQ = (.NOT.DIRF).AND.(.NOT.COLV)
228-
! Since we have exactly one of these 4 modes, we don't need to actually
229-
! store the value of RQ, instead we assume this is the case if we fail
230-
! the above 3 checks.
227+
RQ = (.NOT.DIRF).AND.(.NOT.COLV)
231228
232229
IF (QR) THEN
233230
! We are computing C = HC = (I - VTV')C
@@ -312,7 +309,7 @@ SUBROUTINE DLARFB0C2(C2I, SIDE, TRANS, DIRECT, STOREV, M, N,
312309
CALL DTRMM('Left', 'Lower', 'No Transpose', 'Unit',
313310
$ K, N, NEG_ONE, V, LDV, C, LDC)
314311
ELSE IF (LQ) THEN
315-
! We are computing C = CH' = C(I-V'T'V)
312+
! We are computing C = C op(H) = C(I-V' op(T) V)
316313
! Where: V = [ V1 V2 ] and C = [ C1 C2 ]
317314
! with the following dimensions:
318315
! V1\in\R^{K\times K}
@@ -324,20 +321,20 @@ SUBROUTINE DLARFB0C2(C2I, SIDE, TRANS, DIRECT, STOREV, M, N,
324321
! without having to allocate anything extra.
325322
! This lets us simplify our above equation to get
326323
!
327-
! C = CH' = [ 0, C2 ](I - [ V1' ]T'[ V1, V2 ])
328-
! [ V2' ]
324+
! C = C op(H) = [ 0, C2 ](I - [ V1' ]op(T)[ V1, V2 ])
325+
! [ V2' ]
329326
!
330-
! = [ 0, C2 ] - [ 0, C2 ][ V1' ]T'[ V1, V2 ]
327+
! = [ 0, C2 ] - [ 0, C2 ][ V1' ]op(T)[ V1, V2 ]
331328
! [ V2' ]
332329
!
333-
! = [ 0, C2 ] - C2*V2'*T'[ V1, V2 ]
330+
! = [ 0, C2 ] - C2*V2'*op(T)[ V1, V2 ]
334331
!
335-
! = [ -C2*V2'*T'*V1, C2 - C2*V2'*T'*V2 ]
332+
! = [ -C2*V2'*op(T)*V1, C2 - C2*V2'*op(T)*V2 ]
336333
!
337334
! So, we can order our computations as follows:
338335
!
339336
! C1 = C2*V2'
340-
! C1 = C1*T'
337+
! C1 = C1*op(T)
341338
! C2 = C2 - C1*V2
342339
! C1 = -C1*V1
343340
!
@@ -348,9 +345,6 @@ SUBROUTINE DLARFB0C2(C2I, SIDE, TRANS, DIRECT, STOREV, M, N,
348345
IF( .NOT.SIDER ) THEN
349346
CALL XERBLA('DLARFB0C2', 2)
350347
RETURN
351-
ELSE IF(.NOT.TRANST) THEN
352-
CALL XERBLA('DLARFB0C2', 3)
353-
RETURN
354348
END IF
355349
!
356350
! C1 = C2*V2'
@@ -369,8 +363,13 @@ SUBROUTINE DLARFB0C2(C2I, SIDE, TRANS, DIRECT, STOREV, M, N,
369363
!
370364
! C1 = C1*T'
371365
!
372-
CALL DTRMM('Right', 'Upper', 'Transpose', 'Non-unit',
373-
$ M, K, ONE, T, LDT, C, LDC)
366+
IF (TRANST) THEN
367+
CALL DTRMM('Right', 'Upper', 'Transpose',
368+
$ 'Non-unit', M, K, ONE, T, LDT, C, LDC)
369+
ELSE
370+
CALL DTRMM('Right', 'Lower', 'No Transpose',
371+
$ 'Non-unit', M, K, ONE, T, LDT, C, LDC)
372+
END IF
374373
!
375374
! C2 = C2 - C1*V2 = -C1*V2 + C2
376375
!
@@ -471,8 +470,8 @@ SUBROUTINE DLARFB0C2(C2I, SIDE, TRANS, DIRECT, STOREV, M, N,
471470
!
472471
CALL DTRMM('Left', 'Upper', 'No Transpose', 'Unit',
473472
$ K, N, NEG_ONE, V(M-K+1,1), LDV, C(M-K+1,1), LDC)
474-
ELSE ! IF (RQ) THEN
475-
! We are computing C = CH' = C(I-V'T'V)
473+
ELSE IF (RQ) THEN
474+
! We are computing C = C op(H) = C(I-V' op(T) V)
476475
! Where: V = [ V2 V1] and C = [ C2 C1 ]
477476
! with the following dimensions:
478477
! V1\in\R^{K\times K}
@@ -484,36 +483,33 @@ SUBROUTINE DLARFB0C2(C2I, SIDE, TRANS, DIRECT, STOREV, M, N,
484483
! without having to allocate anything extra.
485484
! This lets us simplify our above equation to get
486485
!
487-
! C = CH' = [ C2, 0 ] (I - [ V2' ]T'[ V2, V1 ]
488-
! [ V1' ]
486+
! C = C op(H) = [ C2, 0 ] (I - [ V2' ]op(T)[ V2, V1 ]
487+
! [ V1' ]
489488
!
490-
! = [ C2, 0 ] - [ C2, 0 ] [ V2' ]T'[ V2, V1 ]
489+
! = [ C2, 0 ] - [ C2, 0 ] [ V2' ]op(T)[ V2, V1 ]
491490
! [ V1' ]
492491
!
493-
! = [ C2, 0 ] - C2*V2'*T'[ V2, V1 ]
492+
! = [ C2, 0 ] - C2*V2'*op(T)[ V2, V1 ]
494493
!
495-
! = [ C2, 0 ] - [ C2*V2'*T'*V2, C2*V2'*T'*V1 ]
494+
! = [ C2, 0 ] - [ C2*V2'*op(T)*V2, C2*V2'*op(T)*V1 ]
496495
!
497-
! = [ C2 - C2*V2'*T'*V2, -C2*V2'*T'*V1 ]
496+
! = [ C2 - C2*V2'*op(T)*V2, -C2*V2'*op(T)*V1 ]
498497
!
499498
! So, we can order our computations as follows:
500499
!
501500
! C1 = C2*V2'
502-
! C1 = C1*T'
501+
! C1 = C1*op(T)
503502
! C2 = C2 - C1*V2
504503
! C1 = -C1*V1
505504
!
506505
!
507506
! To achieve the same end result
508507
!
509-
! Check to ensure side and trans are the expected values
508+
! Check to ensure side has the expected value
510509
!
511510
IF( .NOT.SIDER ) THEN
512511
CALL XERBLA('DLARFB0C2', 2)
513512
RETURN
514-
ELSE IF(.NOT.TRANST) THEN
515-
CALL XERBLA('DLARFB0C2', 3)
516-
RETURN
517513
END IF
518514
!
519515
! C1 = C2*V2'
@@ -529,10 +525,15 @@ SUBROUTINE DLARFB0C2(C2I, SIDE, TRANS, DIRECT, STOREV, M, N,
529525
$ ONE, C, LDC, V, LDV, ZERO, C(1, N-K+1), LDC)
530526
END IF
531527
!
532-
! C1 = C1*T'
528+
! C1 = C1*op(T)
533529
!
534-
CALL DTRMM('Right', 'Lower', 'Transpose', 'Non-unit',
535-
$ M, K, ONE, T, LDT, C(1, N-K+1), LDC)
530+
IF( TRANST ) THEN
531+
CALL DTRMM('Right', 'Lower', 'Transpose',
532+
$ 'Non-unit', M, K, ONE, T, LDT, C(1, N-K+1), LDC)
533+
ELSE
534+
CALL DTRMM('Right', 'Upper', 'No Transpose',
535+
$ 'Non-unit', M, K, ONE, T, LDT, C(1, N-K+1), LDC)
536+
END IF
536537
!
537538
! C2 = C2 - C1*V2 = -C1*V2 + C2
538539
!

0 commit comments

Comments
 (0)