; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN:   -mcpu=future -ppc-asm-full-reg-names \
; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix \
; RUN:   -mcpu=future -ppc-asm-full-reg-names \
; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE

define void @tdmrz(ptr nocapture readonly %vp1, ptr nocapture %resp)  {
; CHECK-LABEL: tdmrz:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    dmsetdmrz dmr0
; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-NEXT:    stxvp vsp34, 96(r4)
; CHECK-NEXT:    stxvp vsp36, 64(r4)
; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-NEXT:    stxvp vsp34, 32(r4)
; CHECK-NEXT:    stxvp vsp36, 0(r4)
; CHECK-NEXT:    blr
;
; CHECK-BE-LABEL: tdmrz:
; CHECK-BE:       # %bb.0: # %entry
; CHECK-BE-NEXT:    dmsetdmrz dmr0
; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-BE-NEXT:    stxvp vsp36, 96(r4)
; CHECK-BE-NEXT:    stxvp vsp34, 64(r4)
; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-BE-NEXT:    stxvp vsp36, 32(r4)
; CHECK-BE-NEXT:    stxvp vsp34, 0(r4)
; CHECK-BE-NEXT:    blr
entry:
  %z = call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
  store <1024 x i1> %z, ptr %resp, align 32
  ret void
}

define void @tdmmr(ptr nocapture readonly %vp1, ptr nocapture %resp)  {
; CHECK-LABEL: tdmmr:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    lxvp vsp34, 0(r3)
; CHECK-NEXT:    lxvp vsp36, 32(r3)
; CHECK-NEXT:    lxvp vsp32, 64(r3)
; CHECK-NEXT:    lxvp vsp38, 96(r3)
; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
; CHECK-NEXT:    dmmr dmr0, dmr0
; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-NEXT:    stxvp vsp34, 96(r4)
; CHECK-NEXT:    stxvp vsp36, 64(r4)
; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-NEXT:    stxvp vsp34, 32(r4)
; CHECK-NEXT:    stxvp vsp36, 0(r4)
; CHECK-NEXT:    blr
;
; CHECK-BE-LABEL: tdmmr:
; CHECK-BE:       # %bb.0: # %entry
; CHECK-BE-NEXT:    lxvp vsp34, 96(r3)
; CHECK-BE-NEXT:    lxvp vsp36, 64(r3)
; CHECK-BE-NEXT:    lxvp vsp32, 32(r3)
; CHECK-BE-NEXT:    lxvp vsp38, 0(r3)
; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-BE-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
; CHECK-BE-NEXT:    dmmr dmr0, dmr0
; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-BE-NEXT:    stxvp vsp36, 96(r4)
; CHECK-BE-NEXT:    stxvp vsp34, 64(r4)
; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-BE-NEXT:    stxvp vsp36, 32(r4)
; CHECK-BE-NEXT:    stxvp vsp34, 0(r4)
; CHECK-BE-NEXT:    blr
entry:
  %l = load <1024 x i1>, ptr %vp1, align 32
  %c = call <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1> %l)
  store <1024 x i1> %c, ptr %resp, align 32
  ret void
}

define void @tdmxor(ptr nocapture readonly %vp1, ptr %vp2, ptr nocapture %resp)  {
; CHECK-LABEL: tdmxor:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    lxvp vsp34, 0(r3)
; CHECK-NEXT:    lxvp vsp36, 32(r3)
; CHECK-NEXT:    lxvp vsp32, 64(r3)
; CHECK-NEXT:    lxvp vsp38, 96(r3)
; CHECK-NEXT:    lxvp vsp40, 0(r4)
; CHECK-NEXT:    lxvp vsp42, 32(r4)
; CHECK-NEXT:    lxvp vsp44, 64(r4)
; CHECK-NEXT:    lxvp vsp46, 96(r4)
; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-NEXT:    dmxxinstdmr512 wacc_hi1, vsp42, vsp40, 1
; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
; CHECK-NEXT:    dmxxinstdmr512 wacc1, vsp46, vsp44, 0
; CHECK-NEXT:    dmxor dmr0, dmr1
; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-NEXT:    stxvp vsp34, 96(r5)
; CHECK-NEXT:    stxvp vsp36, 64(r5)
; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-NEXT:    stxvp vsp34, 32(r5)
; CHECK-NEXT:    stxvp vsp36, 0(r5)
; CHECK-NEXT:    blr
;
; CHECK-BE-LABEL: tdmxor:
; CHECK-BE:       # %bb.0: # %entry
; CHECK-BE-NEXT:    lxvp vsp34, 96(r3)
; CHECK-BE-NEXT:    lxvp vsp36, 64(r3)
; CHECK-BE-NEXT:    lxvp vsp32, 32(r3)
; CHECK-BE-NEXT:    lxvp vsp38, 0(r3)
; CHECK-BE-NEXT:    lxvp vsp40, 96(r4)
; CHECK-BE-NEXT:    lxvp vsp42, 64(r4)
; CHECK-BE-NEXT:    lxvp vsp44, 32(r4)
; CHECK-BE-NEXT:    lxvp vsp46, 0(r4)
; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi1, vsp42, vsp40, 1
; CHECK-BE-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
; CHECK-BE-NEXT:    dmxxinstdmr512 wacc1, vsp46, vsp44, 0
; CHECK-BE-NEXT:    dmxor dmr0, dmr1
; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-BE-NEXT:    stxvp vsp36, 96(r5)
; CHECK-BE-NEXT:    stxvp vsp34, 64(r5)
; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-BE-NEXT:    stxvp vsp36, 32(r5)
; CHECK-BE-NEXT:    stxvp vsp34, 0(r5)
; CHECK-BE-NEXT:    blr
entry:
  %l = load <1024 x i1>, ptr %vp1, align 32
  %r = load <1024 x i1>, ptr %vp2, align 32
  %x = call <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1> %l, <1024 x i1> %r)
  store <1024 x i1> %x, ptr %resp, align 32
  ret void
}

define void @text512(ptr %vp1, ptr %rp1, ptr %rp2, ptr %rp3, ptr %rp4)  {
; CHECK-LABEL: text512:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    dmsetdmrz dmr0
; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-NEXT:    stxv v2, 16(r4)
; CHECK-NEXT:    stxv v3, 0(r4)
; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-NEXT:    stxv v2, 16(r6)
; CHECK-NEXT:    stxv v3, 0(r6)
; CHECK-NEXT:    blr
;
; CHECK-BE-LABEL: text512:
; CHECK-BE:       # %bb.0: # %entry
; CHECK-BE-NEXT:    dmsetdmrz dmr0
; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-BE-NEXT:    stxv v3, 16(r4)
; CHECK-BE-NEXT:    stxv v2, 0(r4)
; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-BE-NEXT:    stxv v3, 16(r6)
; CHECK-BE-NEXT:    stxv v2, 0(r6)
; CHECK-BE-NEXT:    blr
entry:
  %z = call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
  %x = call { <256 x i1>, <256 x i1> } @llvm.ppc.mma.dmxxextfdmr512(<1024 x i1> %z, i32 0)
  %p = extractvalue { <256 x i1>, <256 x i1 > } %x, 0
  store <256 x i1> %p, ptr %rp1, align 16
  %y = call { <256 x i1>, <256 x i1> } @llvm.ppc.mma.dmxxextfdmr512(<1024 x i1> %z, i32 1)
  %q = extractvalue { <256 x i1>, <256 x i1 > } %y, 0
  store <256 x i1> %q, ptr %rp3, align 16
  ret void
}

define void @text256(ptr %vp1, ptr %rp1, ptr %rp2, ptr %rp3, ptr %rp4)  {
; CHECK-LABEL: text256:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    dmsetdmrz dmr0
; CHECK-NEXT:    dmxxextfdmr256 vsp34, dmr0, 0
; CHECK-NEXT:    stxv v2, 16(r4)
; CHECK-NEXT:    stxv v3, 0(r4)
; CHECK-NEXT:    dmxxextfdmr256 vsp34, dmr0, 1
; CHECK-NEXT:    stxv v2, 16(r5)
; CHECK-NEXT:    stxv v3, 0(r5)
; CHECK-NEXT:    dmxxextfdmr256 vsp34, dmr0, 2
; CHECK-NEXT:    stxv v2, 16(r6)
; CHECK-NEXT:    stxv v3, 0(r6)
; CHECK-NEXT:    dmxxextfdmr256 vsp34, dmr0, 3
; CHECK-NEXT:    stxv v2, 16(r7)
; CHECK-NEXT:    stxv v3, 0(r7)
; CHECK-NEXT:    blr
;
; CHECK-BE-LABEL: text256:
; CHECK-BE:       # %bb.0: # %entry
; CHECK-BE-NEXT:    dmsetdmrz dmr0
; CHECK-BE-NEXT:    dmxxextfdmr256 vsp34, dmr0, 0
; CHECK-BE-NEXT:    stxv v3, 16(r4)
; CHECK-BE-NEXT:    stxv v2, 0(r4)
; CHECK-BE-NEXT:    dmxxextfdmr256 vsp34, dmr0, 1
; CHECK-BE-NEXT:    stxv v3, 16(r5)
; CHECK-BE-NEXT:    stxv v2, 0(r5)
; CHECK-BE-NEXT:    dmxxextfdmr256 vsp34, dmr0, 2
; CHECK-BE-NEXT:    stxv v3, 16(r6)
; CHECK-BE-NEXT:    stxv v2, 0(r6)
; CHECK-BE-NEXT:    dmxxextfdmr256 vsp34, dmr0, 3
; CHECK-BE-NEXT:    stxv v3, 16(r7)
; CHECK-BE-NEXT:    stxv v2, 0(r7)
; CHECK-BE-NEXT:    blr
entry:
  %z = call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
  %x = call <256 x i1> @llvm.ppc.mma.dmxxextfdmr256(<1024 x i1> %z, i32 0)
  store <256 x i1> %x, ptr %rp1, align 16
  %q = call <256 x i1> @llvm.ppc.mma.dmxxextfdmr256(<1024 x i1> %z, i32 1)
  store <256 x i1> %q, ptr %rp2, align 16
  %w = call <256 x i1> @llvm.ppc.mma.dmxxextfdmr256(<1024 x i1> %z, i32 2)
  store <256 x i1> %w, ptr %rp3, align 16
  %y = call <256 x i1> @llvm.ppc.mma.dmxxextfdmr256(<1024 x i1> %z, i32 3)
  store <256 x i1> %y, ptr %rp4, align 16
  ret void
}

define void @tins512(ptr %vp1, ptr %vp2, ptr %vp3, ptr %vp4, ptr %rp1, ptr %rp2)  {
; CHECK-LABEL: tins512:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    lxv v2, 16(r3)
; CHECK-NEXT:    lxv v3, 0(r3)
; CHECK-NEXT:    lxv v4, 16(r4)
; CHECK-NEXT:    lxv v5, 0(r4)
; CHECK-NEXT:    dmsetdmrz dmr0
; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp34, vsp36, 0
; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-NEXT:    stxvp vsp34, 96(r7)
; CHECK-NEXT:    stxvp vsp36, 64(r7)
; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-NEXT:    stxvp vsp34, 32(r7)
; CHECK-NEXT:    stxvp vsp36, 0(r7)
; CHECK-NEXT:    lxv v2, 16(r5)
; CHECK-NEXT:    lxv v4, 16(r6)
; CHECK-NEXT:    lxv v3, 0(r5)
; CHECK-NEXT:    lxv v5, 0(r6)
; CHECK-NEXT:    dmxxextfdmr512 vsp32, vsp38, wacc0, 0
; CHECK-NEXT:    stxvp vsp32, 96(r8)
; CHECK-NEXT:    stxvp vsp38, 64(r8)
; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp34, vsp36, 1
; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-NEXT:    stxvp vsp34, 32(r8)
; CHECK-NEXT:    stxvp vsp36, 0(r8)
; CHECK-NEXT:    blr
;
; CHECK-BE-LABEL: tins512:
; CHECK-BE:       # %bb.0: # %entry
; CHECK-BE-NEXT:    lxv v2, 0(r3)
; CHECK-BE-NEXT:    lxv v4, 0(r4)
; CHECK-BE-NEXT:    lxv v3, 16(r3)
; CHECK-BE-NEXT:    lxv v5, 16(r4)
; CHECK-BE-NEXT:    dmsetdmrz dmr0
; CHECK-BE-NEXT:    dmxxextfdmr512 vsp32, vsp38, wacc_hi0, 1
; CHECK-BE-NEXT:    stxvp vsp38, 96(r7)
; CHECK-BE-NEXT:    dmxxinstdmr512 wacc0, vsp34, vsp36, 0
; CHECK-BE-NEXT:    stxvp vsp32, 64(r7)
; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-BE-NEXT:    stxvp vsp36, 32(r7)
; CHECK-BE-NEXT:    stxvp vsp34, 0(r7)
; CHECK-BE-NEXT:    lxv v2, 0(r5)
; CHECK-BE-NEXT:    lxv v4, 0(r6)
; CHECK-BE-NEXT:    lxv v3, 16(r5)
; CHECK-BE-NEXT:    lxv v5, 16(r6)
; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi0, vsp34, vsp36, 1
; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-BE-NEXT:    stxvp vsp36, 96(r8)
; CHECK-BE-NEXT:    stxvp vsp34, 64(r8)
; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-BE-NEXT:    stxvp vsp36, 32(r8)
; CHECK-BE-NEXT:    stxvp vsp34, 0(r8)
; CHECK-BE-NEXT:    blr
entry:
  %z = call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
  %l1 = load <256 x i1>, ptr %vp1, align 16
  %r1 = load <256 x i1>, ptr %vp2, align 16
  %a = call <1024 x i1> @llvm.ppc.mma.dmxxinstdmr512(<1024 x i1> %z, <256 x i1> %l1, <256 x i1> %r1, i32 0)
  store <1024 x i1> %a, ptr %rp1, align 16
  %l2 = load <256 x i1>, ptr %vp3, align 16
  %r2 = load <256 x i1>, ptr %vp4, align 16
  %b = call <1024 x i1> @llvm.ppc.mma.dmxxinstdmr512(<1024 x i1> %a, <256 x i1> %l2, <256 x i1> %r2, i32 1)
  store <1024 x i1> %b, ptr %rp2, align 16
  ret void
}

define void @tins256(ptr %vp1, ptr %vp2, ptr %vp3, ptr %vp4, ptr %rp1, ptr %rp2, ptr %rp3, ptr %rp4)  {
; CHECK-LABEL: tins256:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    lxv v2, 16(r3)
; CHECK-NEXT:    lxv v3, 0(r3)
; CHECK-NEXT:    dmsetdmrz dmr0
; CHECK-NEXT:    dmxxinstdmr256 dmr0, vsp34, 0
; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-NEXT:    stxvp vsp34, 96(r7)
; CHECK-NEXT:    stxvp vsp36, 64(r7)
; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-NEXT:    stxvp vsp34, 32(r7)
; CHECK-NEXT:    stxvp vsp36, 0(r7)
; CHECK-NEXT:    lxv v2, 16(r4)
; CHECK-NEXT:    lxv v3, 0(r4)
; CHECK-NEXT:    dmxxinstdmr256 dmr0, vsp34, 1
; CHECK-NEXT:    dmxxextfdmr512 vsp36, vsp32, wacc0, 0
; CHECK-NEXT:    stxvp vsp36, 96(r8)
; CHECK-NEXT:    stxvp vsp32, 64(r8)
; CHECK-NEXT:    dmxxextfdmr512 vsp36, vsp32, wacc_hi0, 1
; CHECK-NEXT:    dmxxinstdmr256 dmr0, vsp34, 2
; CHECK-NEXT:    stxvp vsp36, 32(r8)
; CHECK-NEXT:    stxvp vsp32, 0(r8)
; CHECK-NEXT:    dmxxextfdmr512 vsp36, vsp32, wacc0, 0
; CHECK-NEXT:    stxvp vsp36, 96(r9)
; CHECK-NEXT:    stxvp vsp32, 64(r9)
; CHECK-NEXT:    dmxxextfdmr512 vsp36, vsp32, wacc_hi0, 1
; CHECK-NEXT:    dmxxinstdmr256 dmr0, vsp34, 3
; CHECK-NEXT:    stxvp vsp36, 32(r9)
; CHECK-NEXT:    stxvp vsp32, 0(r9)
; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-NEXT:    stxvp vsp34, 96(r10)
; CHECK-NEXT:    stxvp vsp36, 64(r10)
; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-NEXT:    stxvp vsp34, 32(r10)
; CHECK-NEXT:    stxvp vsp36, 0(r10)
; CHECK-NEXT:    blr
;
; CHECK-BE-LABEL: tins256:
; CHECK-BE:       # %bb.0: # %entry
; CHECK-BE-NEXT:    lxv v2, 0(r3)
; CHECK-BE-NEXT:    lxv v3, 16(r3)
; CHECK-BE-NEXT:    dmsetdmrz dmr0
; CHECK-BE-NEXT:    dmxxinstdmr256 dmr0, vsp34, 0
; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-BE-NEXT:    stxvp vsp36, 96(r7)
; CHECK-BE-NEXT:    stxvp vsp34, 64(r7)
; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-BE-NEXT:    stxvp vsp36, 32(r7)
; CHECK-BE-NEXT:    stxvp vsp34, 0(r7)
; CHECK-BE-NEXT:    lxv v2, 0(r4)
; CHECK-BE-NEXT:    lxv v3, 16(r4)
; CHECK-BE-NEXT:    dmxxextfdmr512 vsp36, vsp32, wacc_hi0, 1
; CHECK-BE-NEXT:    stxvp vsp32, 96(r8)
; CHECK-BE-NEXT:    stxvp vsp36, 64(r8)
; CHECK-BE-NEXT:    dmxxinstdmr256 dmr0, vsp34, 1
; CHECK-BE-NEXT:    dmxxextfdmr512 vsp36, vsp32, wacc0, 0
; CHECK-BE-NEXT:    dmxxinstdmr256 dmr0, vsp34, 2
; CHECK-BE-NEXT:    stxvp vsp32, 32(r8)
; CHECK-BE-NEXT:    stxvp vsp36, 0(r8)
; CHECK-BE-NEXT:    dmxxextfdmr512 vsp36, vsp32, wacc_hi0, 1
; CHECK-BE-NEXT:    stxvp vsp32, 96(r9)
; CHECK-BE-NEXT:    stxvp vsp36, 64(r9)
; CHECK-BE-NEXT:    dmxxextfdmr512 vsp36, vsp32, wacc0, 0
; CHECK-BE-NEXT:    dmxxinstdmr256 dmr0, vsp34, 3
; CHECK-BE-NEXT:    stxvp vsp32, 32(r9)
; CHECK-BE-NEXT:    stxvp vsp36, 0(r9)
; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-BE-NEXT:    stxvp vsp36, 96(r10)
; CHECK-BE-NEXT:    stxvp vsp34, 64(r10)
; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-BE-NEXT:    stxvp vsp36, 32(r10)
; CHECK-BE-NEXT:    stxvp vsp34, 0(r10)
; CHECK-BE-NEXT:    blr
entry:
  %z = call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
  %l1 = load <256 x i1>, ptr %vp1, align 16
  %a = call <1024 x i1> @llvm.ppc.mma.dmxxinstdmr256(<1024 x i1> %z, <256 x i1> %l1, i32 0)
  store <1024 x i1> %a, ptr %rp1, align 16
  %l2 = load <256 x i1>, ptr %vp2, align 16
  %b = call <1024 x i1> @llvm.ppc.mma.dmxxinstdmr256(<1024 x i1> %a, <256 x i1> %l2, i32 1)
  store <1024 x i1> %b, ptr %rp2, align 16
  %l3 = load <256 x i1>, ptr %vp3, align 16
  %c = call <1024 x i1> @llvm.ppc.mma.dmxxinstdmr256(<1024 x i1> %b, <256 x i1> %l2, i32 2)
  store <1024 x i1> %c, ptr %rp3, align 16
  %l4 = load <256 x i1>, ptr %vp4, align 16
  %d = call <1024 x i1> @llvm.ppc.mma.dmxxinstdmr256(<1024 x i1> %c, <256 x i1> %l2, i32 3)
  store <1024 x i1> %d, ptr %rp4, align 16
  ret void
}

define void @tbuild(ptr %p1, ptr %p2, ptr %res1, ptr %res2, ptr %v) {
; CHECK-LABEL: tbuild:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    lxv v3, 0(r7)
; CHECK-NEXT:    vmr v2, v3
; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp34, vsp34, 0
; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp34, vsp34, 1
; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-NEXT:    stxvp vsp34, 96(r6)
; CHECK-NEXT:    stxvp vsp36, 64(r6)
; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-NEXT:    stxvp vsp34, 32(r6)
; CHECK-NEXT:    stxvp vsp36, 0(r6)
; CHECK-NEXT:    lxvp vsp34, 0(r3)
; CHECK-NEXT:    lxvp vsp36, 32(r3)
; CHECK-NEXT:    lxvp vsp32, 64(r3)
; CHECK-NEXT:    lxvp vsp38, 96(r3)
; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-NEXT:    stxvp vsp34, 96(r5)
; CHECK-NEXT:    stxvp vsp36, 64(r5)
; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-NEXT:    stxvp vsp34, 32(r5)
; CHECK-NEXT:    stxvp vsp36, 0(r5)
; CHECK-NEXT:    blr
;
; CHECK-BE-LABEL: tbuild:
; CHECK-BE:       # %bb.0: # %entry
; CHECK-BE-NEXT:    lxv v3, 0(r7)
; CHECK-BE-NEXT:    vmr v2, v3
; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi0, vsp34, vsp34, 1
; CHECK-BE-NEXT:    dmxxinstdmr512 wacc0, vsp34, vsp34, 0
; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-BE-NEXT:    stxvp vsp36, 96(r6)
; CHECK-BE-NEXT:    stxvp vsp34, 64(r6)
; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-BE-NEXT:    stxvp vsp36, 32(r6)
; CHECK-BE-NEXT:    stxvp vsp34, 0(r6)
; CHECK-BE-NEXT:    lxvp vsp34, 96(r3)
; CHECK-BE-NEXT:    lxvp vsp36, 64(r3)
; CHECK-BE-NEXT:    lxvp vsp32, 32(r3)
; CHECK-BE-NEXT:    lxvp vsp38, 0(r3)
; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-BE-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
; CHECK-BE-NEXT:    stxvp vsp36, 96(r5)
; CHECK-BE-NEXT:    stxvp vsp34, 64(r5)
; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-BE-NEXT:    stxvp vsp36, 32(r5)
; CHECK-BE-NEXT:    stxvp vsp34, 0(r5)
; CHECK-BE-NEXT:    blr
entry:
  %0 = load <16 x i8>, ptr %v, align 16
  %1 = tail call <1024 x i1> @llvm.ppc.mma.build.dmr(<16 x i8> %0, <16 x i8> %0, <16 x i8> %0, <16 x i8> %0, <16 x i8> %0, <16 x i8> %0, <16 x i8> %0, <16 x i8> %0)
  store <1024 x i1> %1, ptr %res2, align 128
  %2 = load <1024 x i1>, ptr %p1, align 128
  tail call void @llvm.ppc.mma.disassemble.dmr(ptr %res1, <1024 x i1> %2)
  ret void
}

declare <1024 x i1> @llvm.ppc.mma.build.dmr(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
declare void @llvm.ppc.mma.disassemble.dmr(ptr, <1024 x i1>)
declare <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
declare <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1>)
declare <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1>, <1024 x i1>)
declare <1024 x i1> @llvm.ppc.mma.dmxxinstdmr512(<1024 x i1>, <256 x i1>, <256 x i1>, i32)
declare <1024 x i1> @llvm.ppc.mma.dmxxinstdmr256(<1024 x i1>, <256 x i1>, i32)
declare { <256 x i1>, <256 x i1> } @llvm.ppc.mma.dmxxextfdmr512(<1024 x i1>, i32)
declare <256 x i1> @llvm.ppc.mma.dmxxextfdmr256(<1024 x i1>, i32)
