Merge pull request #403 from vchuravy/vc/lvvm9-missing

vchuravy · web-flow · commit 45f8e6f68975 · 2020-01-20T12:23:13.000-05:00
Add missing LLVM patches
diff --git a/L/LLVM/v8/bundled/llvm_patches/0030-llvm-8.0-D66657-codegen-degenerate.patch b/L/LLVM/v8/bundled/llvm_patches/0030-llvm-8.0-D66657-codegen-degenerate.patch
@@ -0,0 +1,65 @@
+From 4c7e1defbddafcfcfe1211b041d43a36114a8f48 Mon Sep 17 00:00:00 2001
+From: Valentin Churavy <v.churavy@gmail.com>
+Date: Sat, 14 Dec 2019 10:33:30 -0500
+Subject: [PATCH 2/2] [CodegenPrepare] Guard against degenerate branches
+
+Summary:
+Guard against a potential crash observed in https://github.com/JuliaLang/julia/issues/32994#issuecomment-524249628
+If two branches are collapsed we can encounter a degenerate conditional branch `TBB==FBB`.
+The subsequent code assumes that they differ, so we exit out early.
+
+Reviewers: ributzka, spatel
+
+Subscribers: loladiro, dexonsmith, hiraditya, llvm-commits
+
+Tags: #llvm
+
+Differential Revision: https://reviews.llvm.org/D66657
+---
+ llvm/lib/CodeGen/CodeGenPrepare.cpp            |  4 ++++
+ .../CodeGen/X86/codegen-prepare-collapse.ll    | 18 ++++++++++++++++++
+ 2 files changed, 22 insertions(+)
+ create mode 100644 llvm/test/CodeGen/X86/codegen-prepare-collapse.ll
+
+diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
+index c35f8666fa3..3647641c594 100644
+--- a/lib/CodeGen/CodeGenPrepare.cpp
++++ b/lib/CodeGen/CodeGenPrepare.cpp
+@@ -6929,6 +6929,10 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
+     if (Br1->getMetadata(LLVMContext::MD_unpredictable))
+       continue;
+ 
++    // The merging of mostly empty BB can cause a degenerate branch.
++    if (TBB == FBB)
++      continue;
++
+     unsigned Opc;
+     Value *Cond1, *Cond2;
+     if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)),
+diff --git a/test/CodeGen/X86/codegen-prepare-collapse.ll b/test/CodeGen/X86/codegen-prepare-collapse.ll
+new file mode 100644
+index 00000000000..18e3ef7afbd
+--- /dev/null
++++ b/test/CodeGen/X86/codegen-prepare-collapse.ll
+@@ -0,0 +1,18 @@
++; RUN: llc -fast-isel=true -O1 -mtriple=x86_64-unkown-linux-gnu -start-before=codegenprepare -stop-after=codegenprepare -o - < %s | FileCheck %s
++
++; CHECK-LABEL: @foo
++define void @foo() {
++top:
++; CHECK: br label %L34
++  br label %L34
++
++L34:                                              ; preds = %L34, %L34, %top
++  %.sroa.075.0 = phi i64 [ undef, %top ], [ undef, %L34 ], [ undef, %L34 ]
++  %0 = icmp sgt i8 undef, -1
++  %cond5896 = icmp eq i8 0, 2
++  %cond58 = and i1 %cond5896, %0
++; During codegenprepare such degenerate branches can occur and should not
++; lead to crashes.
++; CHECK: br label %L34
++  br i1 %cond58, label %L34, label %L34
++}
+-- 
+2.24.1
+
diff --git a/L/LLVM/v8/bundled/llvm_patches/0031-llvm-8.0-D71495-vectorize-freduce.patch b/L/LLVM/v8/bundled/llvm_patches/0031-llvm-8.0-D71495-vectorize-freduce.patch
@@ -0,0 +1,94 @@
+From 7c30e23f115ae285b497ef11af0153703111dff2 Mon Sep 17 00:00:00 2001
+From: Valentin Churavy <v.churavy@gmail.com>
+Date: Sun, 22 Dec 2019 14:25:50 -0500
+Subject: [PATCH 1/2] [SelectionDAG] Copy FP flags when visiting a binary
+ instruction.
+
+Summary:
+We noticed in Julia that the sequence below no longer turned into
+a sequence of FMA instructions in LLVM 7+, but it did in LLVM 6.
+
+```
+    %29 = fmul contract <4 x double> %wide.load, %wide.load16
+    %30 = fmul contract <4 x double> %wide.load13, %wide.load17
+    %31 = fmul contract <4 x double> %wide.load14, %wide.load18
+    %32 = fmul contract <4 x double> %wide.load15, %wide.load19
+    %33 = fadd fast <4 x double> %vec.phi, %29
+    %34 = fadd fast <4 x double> %vec.phi10, %30
+    %35 = fadd fast <4 x double> %vec.phi11, %31
+    %36 = fadd fast <4 x double> %vec.phi12, %32
+```
+
+Unlike Clang, Julia doesn't set the `unsafe-fp-math=true` function
+attribute, but rather emits more local instruction flags.
+
+This partially undoes https://reviews.llvm.org/D46854 and if required I can try to minimize the test further.
+
+Reviewers: spatel, mcberg2017
+
+Reviewed By: spatel
+
+Subscribers: chriselrod, merge_guards_bot, hiraditya, llvm-commits
+
+Tags: #llvm
+
+Differential Revision: https://reviews.llvm.org/D71495
+---
+ .../SelectionDAG/SelectionDAGBuilder.cpp      |  7 +++++
+ llvm/test/CodeGen/X86/fmf-reduction.ll        | 26 +++++++++++++++++++
+ 2 files changed, 33 insertions(+)
+ create mode 100644 llvm/test/CodeGen/X86/fmf-reduction.ll
+
+diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+index bfeb3d1bc2b..e6362c19691 100644
+--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
++++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+@@ -2833,6 +2833,13 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) {
+   if (isVectorReductionOp(&I)) {
+     Flags.setVectorReduction(true);
+     LLVM_DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n");
++
++    // If no flags are set we will propagate the incoming flags, if any flags
++    // are set, we will intersect them with the incoming flag and so we need to
++    // copy the FMF flags here.
++    if (auto *FPOp = dyn_cast<FPMathOperator>(&I)) {
++      Flags.copyFMF(*FPOp);
++    }
+   }
+ 
+   SDValue Op1 = getValue(I.getOperand(0));
+diff --git a/test/CodeGen/X86/fmf-reduction.ll b/test/CodeGen/X86/fmf-reduction.ll
+new file mode 100644
+index 00000000000..1d669d2a924
+--- /dev/null
++++ b/test/CodeGen/X86/fmf-reduction.ll
+@@ -0,0 +1,26 @@
++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
++; RUN: llc < %s -mtriple=x86_64-- -mattr=fma | FileCheck %s
++
++; Propagation of IR FMF should not drop flags when adding the DAG reduction flag.
++; This should include an FMA instruction, not separate FMUL/FADD.
++
++define double @julia_dotf(<4 x double> %x, <4 x double> %y, <4 x double> %z, i1 %t3) {
++; CHECK-LABEL: julia_dotf:
++; CHECK:       # %bb.0:
++; CHECK-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
++; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
++; CHECK-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
++; CHECK-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
++; CHECK-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
++; CHECK-NEXT:    vzeroupper
++; CHECK-NEXT:    retq
++  %t1 = fmul contract <4 x double> %x, %y
++  %t2 = fadd fast <4 x double> %z, %t1
++  %rdx.shuf = shufflevector <4 x double> %t2, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
++  %bin.rdx22 = fadd fast <4 x double> %t2, %rdx.shuf
++  %rdx.shuf23 = shufflevector <4 x double> %bin.rdx22, <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
++  %bin.rdx24 = fadd fast <4 x double> %bin.rdx22, %rdx.shuf23
++  %t4 = extractelement <4 x double> %bin.rdx24, i32 0
++  ret double %t4
++}
++
+-- 
+2.24.1
+
diff --git a/L/LLVM/v9/build_tarballs.jl b/L/LLVM/v9/build_tarballs.jl
@@ -2,6 +2,7 @@ version = v"9.0.1"
 
 include("../common.jl")
 
+
 platforms = expand_cxxstring_abis(supported_platforms())
 sources, script, products = configure(version, assert=false)
 build_tarballs(ARGS, "LLVM", version, sources, script,
diff --git a/L/LLVM/v9/bundled/llvm_patches/0030-llvm-8.0-D66657-codegen-degenerate.patch b/L/LLVM/v9/bundled/llvm_patches/0030-llvm-8.0-D66657-codegen-degenerate.patch
@@ -0,0 +1,65 @@
+From 4c7e1defbddafcfcfe1211b041d43a36114a8f48 Mon Sep 17 00:00:00 2001
+From: Valentin Churavy <v.churavy@gmail.com>
+Date: Sat, 14 Dec 2019 10:33:30 -0500
+Subject: [PATCH 2/2] [CodegenPrepare] Guard against degenerate branches
+
+Summary:
+Guard against a potential crash observed in https://github.com/JuliaLang/julia/issues/32994#issuecomment-524249628
+If two branches are collapsed we can encounter a degenerate conditional branch `TBB==FBB`.
+The subsequent code assumes that they differ, so we exit out early.
+
+Reviewers: ributzka, spatel
+
+Subscribers: loladiro, dexonsmith, hiraditya, llvm-commits
+
+Tags: #llvm
+
+Differential Revision: https://reviews.llvm.org/D66657
+---
+ llvm/lib/CodeGen/CodeGenPrepare.cpp            |  4 ++++
+ .../CodeGen/X86/codegen-prepare-collapse.ll    | 18 ++++++++++++++++++
+ 2 files changed, 22 insertions(+)
+ create mode 100644 llvm/test/CodeGen/X86/codegen-prepare-collapse.ll
+
+diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
+index c35f8666fa3..3647641c594 100644
+--- a/lib/CodeGen/CodeGenPrepare.cpp
++++ b/lib/CodeGen/CodeGenPrepare.cpp
+@@ -6929,6 +6929,10 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
+     if (Br1->getMetadata(LLVMContext::MD_unpredictable))
+       continue;
+ 
++    // The merging of mostly empty BB can cause a degenerate branch.
++    if (TBB == FBB)
++      continue;
++
+     unsigned Opc;
+     Value *Cond1, *Cond2;
+     if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)),
+diff --git a/test/CodeGen/X86/codegen-prepare-collapse.ll b/test/CodeGen/X86/codegen-prepare-collapse.ll
+new file mode 100644
+index 00000000000..18e3ef7afbd
+--- /dev/null
++++ b/test/CodeGen/X86/codegen-prepare-collapse.ll
+@@ -0,0 +1,18 @@
++; RUN: llc -fast-isel=true -O1 -mtriple=x86_64-unkown-linux-gnu -start-before=codegenprepare -stop-after=codegenprepare -o - < %s | FileCheck %s
++
++; CHECK-LABEL: @foo
++define void @foo() {
++top:
++; CHECK: br label %L34
++  br label %L34
++
++L34:                                              ; preds = %L34, %L34, %top
++  %.sroa.075.0 = phi i64 [ undef, %top ], [ undef, %L34 ], [ undef, %L34 ]
++  %0 = icmp sgt i8 undef, -1
++  %cond5896 = icmp eq i8 0, 2
++  %cond58 = and i1 %cond5896, %0
++; During codegenprepare such degenerate branches can occur and should not
++; lead to crashes.
++; CHECK: br label %L34
++  br i1 %cond58, label %L34, label %L34
++}
+-- 
+2.24.1
+
diff --git a/L/LLVM/v9/bundled/llvm_patches/0031-llvm-8.0-D71495-vectorize-freduce.patch b/L/LLVM/v9/bundled/llvm_patches/0031-llvm-8.0-D71495-vectorize-freduce.patch
@@ -0,0 +1,94 @@
+From 7c30e23f115ae285b497ef11af0153703111dff2 Mon Sep 17 00:00:00 2001
+From: Valentin Churavy <v.churavy@gmail.com>
+Date: Sun, 22 Dec 2019 14:25:50 -0500
+Subject: [PATCH 1/2] [SelectionDAG] Copy FP flags when visiting a binary
+ instruction.
+
+Summary:
+We noticed in Julia that the sequence below no longer turned into
+a sequence of FMA instructions in LLVM 7+, but it did in LLVM 6.
+
+```
+    %29 = fmul contract <4 x double> %wide.load, %wide.load16
+    %30 = fmul contract <4 x double> %wide.load13, %wide.load17
+    %31 = fmul contract <4 x double> %wide.load14, %wide.load18
+    %32 = fmul contract <4 x double> %wide.load15, %wide.load19
+    %33 = fadd fast <4 x double> %vec.phi, %29
+    %34 = fadd fast <4 x double> %vec.phi10, %30
+    %35 = fadd fast <4 x double> %vec.phi11, %31
+    %36 = fadd fast <4 x double> %vec.phi12, %32
+```
+
+Unlike Clang, Julia doesn't set the `unsafe-fp-math=true` function
+attribute, but rather emits more local instruction flags.
+
+This partially undoes https://reviews.llvm.org/D46854 and if required I can try to minimize the test further.
+
+Reviewers: spatel, mcberg2017
+
+Reviewed By: spatel
+
+Subscribers: chriselrod, merge_guards_bot, hiraditya, llvm-commits
+
+Tags: #llvm
+
+Differential Revision: https://reviews.llvm.org/D71495
+---
+ .../SelectionDAG/SelectionDAGBuilder.cpp      |  7 +++++
+ llvm/test/CodeGen/X86/fmf-reduction.ll        | 26 +++++++++++++++++++
+ 2 files changed, 33 insertions(+)
+ create mode 100644 llvm/test/CodeGen/X86/fmf-reduction.ll
+
+diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+index bfeb3d1bc2b..e6362c19691 100644
+--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
++++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+@@ -2833,6 +2833,13 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) {
+   if (isVectorReductionOp(&I)) {
+     Flags.setVectorReduction(true);
+     LLVM_DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n");
++
++    // If no flags are set we will propagate the incoming flags, if any flags
++    // are set, we will intersect them with the incoming flag and so we need to
++    // copy the FMF flags here.
++    if (auto *FPOp = dyn_cast<FPMathOperator>(&I)) {
++      Flags.copyFMF(*FPOp);
++    }
+   }
+ 
+   SDValue Op1 = getValue(I.getOperand(0));
+diff --git a/test/CodeGen/X86/fmf-reduction.ll b/test/CodeGen/X86/fmf-reduction.ll
+new file mode 100644
+index 00000000000..1d669d2a924
+--- /dev/null
++++ b/test/CodeGen/X86/fmf-reduction.ll
+@@ -0,0 +1,26 @@
++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
++; RUN: llc < %s -mtriple=x86_64-- -mattr=fma | FileCheck %s
++
++; Propagation of IR FMF should not drop flags when adding the DAG reduction flag.
++; This should include an FMA instruction, not separate FMUL/FADD.
++
++define double @julia_dotf(<4 x double> %x, <4 x double> %y, <4 x double> %z, i1 %t3) {
++; CHECK-LABEL: julia_dotf:
++; CHECK:       # %bb.0:
++; CHECK-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
++; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
++; CHECK-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
++; CHECK-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
++; CHECK-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
++; CHECK-NEXT:    vzeroupper
++; CHECK-NEXT:    retq
++  %t1 = fmul contract <4 x double> %x, %y
++  %t2 = fadd fast <4 x double> %z, %t1
++  %rdx.shuf = shufflevector <4 x double> %t2, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
++  %bin.rdx22 = fadd fast <4 x double> %t2, %rdx.shuf
++  %rdx.shuf23 = shufflevector <4 x double> %bin.rdx22, <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
++  %bin.rdx24 = fadd fast <4 x double> %bin.rdx22, %rdx.shuf23
++  %t4 = extractelement <4 x double> %bin.rdx24, i32 0
++  ret double %t4
++}
++
+-- 
+2.24.1
+
diff --git a/L/LLVM/v9/bundled/patches/0006-D50010-VNCoercion-ni.patch b/L/LLVM/v9/bundled/patches/0006-D50010-VNCoercion-ni.patch
@@ -0,0 +1,64 @@
+diff --git a/llvm/lib/Transforms/Utils/VNCoercion.cpp b/llvm/lib/Transforms/Utils/VNCoercion.cpp
+--- a/llvm/lib/Transforms/Utils/VNCoercion.cpp
++++ b/llvm/lib/Transforms/Utils/VNCoercion.cpp
+@@ -34,17 +34,22 @@
+   if (StoreSize < DL.getTypeSizeInBits(LoadTy))
+     return false;
+ 
++  bool StoredNI = DL.isNonIntegralPointerType(StoredTy->getScalarType());
++  bool LoadNI = DL.isNonIntegralPointerType(LoadTy->getScalarType());
+   // Don't coerce non-integral pointers to integers or vice versa.
+-  if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()) !=
+-      DL.isNonIntegralPointerType(LoadTy->getScalarType())) {
++  if (StoredNI != LoadNI) {
+     // As a special case, allow coercion of memset used to initialize
+     // an array w/null.  Despite non-integral pointers not generally having a
+     // specific bit pattern, we do assume null is zero.
+     if (auto *CI = dyn_cast<Constant>(StoredVal))
+       return CI->isNullValue();
+     return false;
++  } else if (StoredNI && LoadNI &&
++             cast<PointerType>(StoredTy)->getAddressSpace() !=
++                 cast<PointerType>(LoadTy)->getAddressSpace()) {
++    return false;
+   }
+-  
++
+   return true;
+ }
+ 
+diff --git a/llvm/test/Transforms/GVN/non-integral-pointers.ll b/llvm/test/Transforms/GVN/non-integral-pointers.ll
+--- a/llvm/test/Transforms/GVN/non-integral-pointers.ll
++++ b/llvm/test/Transforms/GVN/non-integral-pointers.ll
+@@ -1,7 +1,7 @@
+ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+ ; RUN: opt -gvn -S < %s | FileCheck %s
+ 
+-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4"
++target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4:5"
+ target triple = "x86_64-unknown-linux-gnu"
+ 
+ define void @f0(i1 %alwaysFalse, i64 %val, i64* %loc) {
+@@ -285,3 +285,21 @@
+   %ref = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %loc.off
+   ret i8 addrspace(4)* %ref
+ }
++
++ define i8 addrspace(5)* @multini(i1 %alwaysFalse, i8 addrspace(4)* %val, i8 addrspace(4)** %loc) {
++ ; CHECK-LABEL: @multini(
++ ; CHECK-NOT: inttoptr
++ ; CHECK-NOT: ptrtoint
++ ; CHECK-NOT: addrspacecast
++  entry:
++   store i8 addrspace(4)* %val, i8 addrspace(4)** %loc
++   br i1 %alwaysFalse, label %neverTaken, label %alwaysTaken
++
++  neverTaken:
++   %loc.bc = bitcast i8 addrspace(4)** %loc to i8 addrspace(5)**
++   %differentas = load i8 addrspace(5)*, i8 addrspace(5)** %loc.bc
++   ret i8 addrspace(5)* %differentas
++
++  alwaysTaken:
++   ret i8 addrspace(5)* null
++ }
+