Source/JavaScriptCore/ChangeLog

 12015-10-31 Filip Pizlo <fpizlo@apple.com>
 2
 3 B3::reduceStrength's DCE should be more agro and less wrong
 4 https://bugs.webkit.org/show_bug.cgi?id=150748
 5
 6 Reviewed by NOBODY (OOPS!).
 7
 8 First of all, our DCE had a bug where it would keep Upsilons after it deleted the Phis that
 9 they referenced. But our B3 DCE was also not aggressive enough. It would not eliminate
 10 cycles. It was also probably slower than it needed to be, since it would eliminate all
 11 never-referenced things on each fixpoint.
 12
 13 This adds a presume-everyone-is-dead-and-find-live-things style DCE. This is very natural to
 14 write, except for Upsilons. For everything but Upsilons, it's just a worklist algorithm. For
 15 Upsilons, it's a fixpoint. It works fine in the end.
 16
 17 I kept finding bugs in this algorithm when I tested it against my "Complex" test that I was
 18 writing as a compile time benchmark. So, I include that test in this change. I also include
 19 the small lowering extensions that it needed - shifting and zero extending.
 20
 21 This change also adds an LLVM version of the Complex test. Though the LLVM version feels
 22 more natural to write because LLVM has traditional Phi's rather than our quirky Phi's, in
 23 the end LLVM ends up performing very badly - 10x to 20x worse than B3. Some of that gap will
 24 close once we give B3 a register allocator, but still, that's pretty good news for our B3
 25 strategy.
 26
 27 * assembler/MacroAssemblerX86_64.h:
 28 (JSC::MacroAssemblerX86_64::lshift64):
 29 (JSC::MacroAssemblerX86_64::rshift64):
 30 * assembler/X86Assembler.h:
 31 (JSC::X86Assembler::shlq_i8r):
 32 (JSC::X86Assembler::shlq_CLr):
 33 (JSC::X86Assembler::imull_rr):
 34 * b3/B3BasicBlock.cpp:
 35 (JSC::B3::BasicBlock::replacePredecessor):
 36 (JSC::B3::BasicBlock::dump):
 37 (JSC::B3::BasicBlock::removeNops): Deleted.
 38 * b3/B3BasicBlock.h:
 39 (JSC::B3::BasicBlock::frequency):
 40 * b3/B3LowerToAir.cpp:
 41 (JSC::B3::Air::LowerToAir::tryAnd):
 42 (JSC::B3::Air::LowerToAir::tryShl):
 43 (JSC::B3::Air::LowerToAir::tryStoreAddLoad):
 44 (JSC::B3::Air::LowerToAir::tryTrunc):
 45 (JSC::B3::Air::LowerToAir::tryZExt32):
 46 (JSC::B3::Air::LowerToAir::tryArgumentReg):
 47 * b3/B3LoweringMatcher.patterns:
 48 * b3/B3ReduceStrength.cpp:
 49 * b3/B3Validate.cpp:
 50 * b3/air/AirAllocateStack.cpp:
 51 (JSC::B3::Air::allocateStack):
 52 * b3/air/AirInstInlines.h:
 53 (JSC::B3::Air::ForEach<Arg>::forEach):
 54 (JSC::B3::Air::Inst::forEach):
 55 (JSC::B3::Air::isLshift32Valid):
 56 (JSC::B3::Air::isLshift64Valid):
 57 * b3/air/AirLiveness.h:
 58 (JSC::B3::Air::Liveness::isAlive):
 59 (JSC::B3::Air::Liveness::Liveness):
 60 (JSC::B3::Air::Liveness::LocalCalc::execute):
 61 * b3/air/AirOpcode.opcodes:
 62 * b3/testb3.cpp:
 63 (JSC::B3::testBranchEqualFoldPtr):
 64 (JSC::B3::testComplex):
 65 (JSC::B3::run):
 66
1672015-10-31 Brian Burg <bburg@apple.com>
268
369 Builtins generator should put WebCore-only wrappers in the per-builtin header
191844

Source/JavaScriptCore/assembler/MacroAssemblerX86_64.h

@@public:
329329 m_assembler.shlq_i8r(imm.m_value, dest);
330330 }
331331
 332 void lshift64(RegisterID src, RegisterID dest)
 333 {
 334 ASSERT(src != dest);
 335
 336 if (src == X86Registers::ecx)
 337 m_assembler.shlq_CLr(dest);
 338 else {
 339 // Can only shift by ecx, so we do some swapping of we see anything else.
 340 swap(src, X86Registers::ecx);
 341 m_assembler.shlq_CLr(dest);
 342 swap(src, X86Registers::ecx);
 343 }
 344 }
 345
332346 void rshift64(TrustedImm32 imm, RegisterID dest)
333347 {
334348 m_assembler.sarq_i8r(imm.m_value, dest);
191844

Source/JavaScriptCore/assembler/X86Assembler.h

@@public:
910910 m_formatter.immediate8(imm);
911911 }
912912 }
 913
 914 void shlq_CLr(RegisterID dst)
 915 {
 916 m_formatter.oneByteOp64(OP_GROUP2_EvCL, GROUP2_OP_SHL, dst);
 917 }
913918#endif // CPU(X86_64)
914919
915920 void imull_rr(RegisterID src, RegisterID dst)
191844

Source/JavaScriptCore/b3/B3BasicBlock.cpp

@@bool BasicBlock::replacePredecessor(Basi
6767 return B3::replacePredecessor(this, from, to);
6868}
6969
70 void BasicBlock::removeNops(Procedure& procedure)
71 {
72  unsigned sourceIndex = 0;
73  unsigned targetIndex = 0;
74  while (sourceIndex < size()) {
75  Value* value = m_values[sourceIndex++];
76  if (value->opcode() == Nop)
77  procedure.deleteValue(value);
78  else
79  m_values[targetIndex++] = value;
80  }
81  m_values.resize(targetIndex);
82 }
83 
8470void BasicBlock::dump(PrintStream& out) const
8571{
8672 out.print(dumpPrefix, m_index);
191844

Source/JavaScriptCore/b3/B3BasicBlock.h

@@public:
9595
9696 double frequency() const { return m_frequency; }
9797
98  void removeNops(Procedure&);
99 
10098 void dump(PrintStream&) const;
10199 void deepDump(PrintStream&) const;
102100
191844

Source/JavaScriptCore/b3/B3LowerToAir.cpp

@@public:
577577 return false;
578578 }
579579 }
 580
 581 bool tryShl(Value* value, Value* amount)
 582 {
 583 Air::Opcode opcode = value->type() == Int32 ? Lshift32 : Lshift64;
 584
 585 if (imm(amount)) {
 586 append(Move, tmp(value), tmp(currentValue));
 587 append(opcode, imm(amount), tmp(currentValue));
 588 return true;
 589 }
 590
 591 append(Move, tmp(value), tmp(currentValue));
 592 append(Move, tmp(amount), Tmp(X86Registers::ecx));
 593 append(opcode, Tmp(X86Registers::ecx), tmp(currentValue));
 594 return true;
 595 }
580596
581597 bool tryStoreAddLoad(Value* left, Value* right, Value*)
582598 {

@@public:
640656 return true;
641657 }
642658
 659 bool tryZExt32(Value* value)
 660 {
 661 append(Move32, tmp(value), tmp(currentValue));
 662 return true;
 663 }
 664
643665 bool tryArgumentReg()
644666 {
645667 prologue.append(Inst(
191844

Source/JavaScriptCore/b3/B3LoweringMatcher.patterns

@@Add = Add(left, right)
3434Sub = Sub(left, right)
3535And = BitAnd(left, right)
3636
 37Shl = Shl(value, amount)
 38
3739StoreAddLoad = Store(Add(left, right), address)
3840StoreSubLoad = Store(Add(left, right), address)
3941StoreAndLoad = Store(BitAnd(left, right), address)

@@Store = Store(value, address)
4244TruncArgumentReg = Trunc(argumentReg = ArgumentReg())
4345Trunc = Trunc(value)
4446
 47ZExt32 = ZExt32(value)
 48
4549ArgumentReg = ArgumentReg()
4650
4751Const32 = Const32()
191844

Source/JavaScriptCore/b3/B3ReduceStrength.cpp

2929#if ENABLE(B3_JIT)
3030
3131#include "B3ControlValue.h"
 32#include "B3IndexSet.h"
3233#include "B3InsertionSetInlines.h"
3334#include "B3MemoryValue.h"
3435#include "B3PhaseScope.h"
3536#include "B3ProcedureInlines.h"
 37#include "B3UpsilonValue.h"
3638#include "B3UseCounts.h"
3739#include "B3ValueInlines.h"
 40#include <wtf/GraphNodeWorklist.h>
3841
3942namespace JSC { namespace B3 {
4043

@@public:
6164 for (m_index = 0; m_index < block->size(); ++m_index)
6265 process();
6366 m_insertionSet.execute(m_block);
64 
65  block->removeNops(m_proc);
6667 }
6768
6869 if (m_changedCFG) {

@@public:
7071 m_changed = true;
7172 }
7273
73  UseCounts useCounts(m_proc);
74  for (Value* value : m_proc.values()) {
75  if (!useCounts[value] && !value->effects().mustExecute()) {
76  value->replaceWithNop();
77  m_changed = true;
78  }
79  }
 74 killDeadCode();
8075
8176 result |= m_changed;
8277 } while (m_changed);

@@private:
337332 m_changed = true;
338333 }
339334
 335 void killDeadCode()
 336 {
 337 GraphNodeWorklist<Value*, IndexSet<Value>> worklist;
 338 Vector<UpsilonValue*, 64> upsilons;
 339 for (BasicBlock* block : m_proc) {
 340 for (Value* value : *block) {
 341 Effects effects = value->effects();
 342 // We don't care about SSA Effects, since we model them more accurately than the
 343 // effects() method does.
 344 effects.writesSSAState = false;
 345 effects.readsSSAState = false;
 346 if (effects.mustExecute())
 347 worklist.push(value);
 348 if (UpsilonValue* upsilon = value->as<UpsilonValue>())
 349 upsilons.append(upsilon);
 350 }
 351 }
 352 for (;;) {
 353 while (Value* value = worklist.pop()) {
 354 for (Value* child : value->children())
 355 worklist.push(child);
 356 }
 357
 358 bool didPush = false;
 359 for (size_t upsilonIndex = 0; upsilonIndex < upsilons.size(); ++upsilonIndex) {
 360 UpsilonValue* upsilon = upsilons[upsilonIndex];
 361 if (worklist.saw(upsilon->phi())) {
 362 worklist.push(upsilon);
 363 upsilons[upsilonIndex--] = upsilons.last();
 364 upsilons.takeLast();
 365 didPush = true;
 366 }
 367 }
 368 if (!didPush)
 369 break;
 370 }
 371
 372 for (BasicBlock* block : m_proc) {
 373 size_t sourceIndex = 0;
 374 size_t targetIndex = 0;
 375 while (sourceIndex < block->size()) {
 376 Value* value = block->at(sourceIndex++);
 377 if (worklist.saw(value))
 378 block->at(targetIndex++) = value;
 379 else {
 380 m_proc.deleteValue(value);
 381
 382 // It's not entirely clear if this is needed. I think it makes sense to have
 383 // this force a rerun of the fixpoint for now, since that will make it easier
 384 // to do peephole optimizations: removing dead code will make the peephole
 385 // easier to spot.
 386 m_changed = true;
 387 }
 388 }
 389 block->values().resize(targetIndex);
 390 }
 391 }
 392
340393 Procedure& m_proc;
341394 InsertionSet m_insertionSet;
342395 BasicBlock* m_block;
191844

Source/JavaScriptCore/b3/B3Validate.cpp

@@public:
6161 {
6262 HashSet<BasicBlock*> blocks;
6363 HashSet<Value*> valueInProc;
64  HashSet<Value*> valueInBlock;
 64 HashMap<Value*, unsigned> valueInBlock;
6565
6666 for (BasicBlock* block : m_procedure) {
6767 blocks.add(block);
6868 for (Value* value : *block)
69  valueInBlock.add(value);
 69 valueInBlock.add(value, 0).iterator->value++;
7070 }
7171
7272 for (Value* value : m_procedure.values())

@@public:
7474
7575 for (Value* value : valueInProc)
7676 VALIDATE(valueInBlock.contains(value), ("At ", *value));
77  for (Value* value : valueInBlock)
78  VALIDATE(valueInProc.contains(value), ("At ", *value));
 77 for (auto& entry : valueInBlock) {
 78 VALIDATE(valueInProc.contains(entry.key), ("At ", *entry.key));
 79 VALIDATE(entry.value == 1, ("At ", *entry.key));
 80 }
7981
8082 for (Value* value : valueInProc) {
8183 for (Value* child : value->children())
191844

Source/JavaScriptCore/b3/testb3.cpp

@@void testBranchEqualFoldPtr(intptr_t val
10661066 CHECK(compileAndRun<int>(proc) == !value);
10671067}
10681068
 1069void testComplex(unsigned numVars, unsigned numConstructs)
 1070{
 1071 double before = monotonicallyIncreasingTimeMS();
 1072
 1073 Procedure proc;
 1074 BasicBlock* current = proc.addBlock();
 1075
 1076 Const32Value* one = current->appendNew<Const32Value>(proc, Origin(), 1);
 1077
 1078 Vector<int32_t> varSlots;
 1079 for (unsigned i = numVars; i--;)
 1080 varSlots.append(i);
 1081
 1082 Vector<Value*> vars;
 1083 for (int32_t& varSlot : varSlots) {
 1084 Value* varSlotPtr = current->appendNew<ConstPtrValue>(proc, Origin(), &varSlot);
 1085 vars.append(current->appendNew<MemoryValue>(proc, Load, Int32, Origin(), varSlotPtr));
 1086 }
 1087
 1088 for (unsigned i = 0; i < numConstructs; ++i) {
 1089 if (i & 1) {
 1090 // Control flow diamond.
 1091 unsigned predicateVarIndex = (i >> 1) % numVars;
 1092 unsigned thenIncVarIndex = ((i >> 1) + 1) % numVars;
 1093 unsigned elseIncVarIndex = ((i >> 1) + 2) % numVars;
 1094
 1095 BasicBlock* thenBlock = proc.addBlock();
 1096 BasicBlock* elseBlock = proc.addBlock();
 1097 BasicBlock* continuation = proc.addBlock();
 1098
 1099 current->appendNew<ControlValue>(
 1100 proc, Branch, Origin(), vars[predicateVarIndex],
 1101 FrequentedBlock(thenBlock), FrequentedBlock(elseBlock));
 1102
 1103 UpsilonValue* thenThenResult = thenBlock->appendNew<UpsilonValue>(
 1104 proc, Origin(),
 1105 thenBlock->appendNew<Value>(proc, Add, Origin(), vars[thenIncVarIndex], one));
 1106 UpsilonValue* thenElseResult = thenBlock->appendNew<UpsilonValue>(
 1107 proc, Origin(), vars[elseIncVarIndex]);
 1108 thenBlock->appendNew<ControlValue>(proc, Jump, Origin(), FrequentedBlock(continuation));
 1109
 1110 UpsilonValue* elseElseResult = elseBlock->appendNew<UpsilonValue>(
 1111 proc, Origin(),
 1112 elseBlock->appendNew<Value>(proc, Add, Origin(), vars[elseIncVarIndex], one));
 1113 UpsilonValue* elseThenResult = elseBlock->appendNew<UpsilonValue>(
 1114 proc, Origin(), vars[thenIncVarIndex]);
 1115 elseBlock->appendNew<ControlValue>(proc, Jump, Origin(), FrequentedBlock(continuation));
 1116
 1117 Value* thenPhi = continuation->appendNew<Value>(proc, Phi, Int32, Origin());
 1118 thenThenResult->setPhi(thenPhi);
 1119 elseThenResult->setPhi(thenPhi);
 1120 vars[thenIncVarIndex] = thenPhi;
 1121
 1122 Value* elsePhi = continuation->appendNew<Value>(proc, Phi, Int32, Origin());
 1123 thenElseResult->setPhi(elsePhi);
 1124 elseElseResult->setPhi(elsePhi);
 1125 vars[elseIncVarIndex] = thenPhi;
 1126
 1127 current = continuation;
 1128 } else {
 1129 // Loop.
 1130
 1131 BasicBlock* loopEntry = proc.addBlock();
 1132 BasicBlock* loopReentry = proc.addBlock();
 1133 BasicBlock* loopBody = proc.addBlock();
 1134 BasicBlock* loopExit = proc.addBlock();
 1135 BasicBlock* loopSkip = proc.addBlock();
 1136 BasicBlock* continuation = proc.addBlock();
 1137
 1138 Value* startIndex = vars[(i >> 1) % numVars];
 1139 Value* startSum = current->appendNew<Const32Value>(proc, Origin(), 0);
 1140 current->appendNew<ControlValue>(
 1141 proc, Branch, Origin(), startIndex,
 1142 FrequentedBlock(loopEntry), FrequentedBlock(loopSkip));
 1143
 1144 UpsilonValue* startIndexForBody = loopEntry->appendNew<UpsilonValue>(
 1145 proc, Origin(), startIndex);
 1146 UpsilonValue* startSumForBody = loopEntry->appendNew<UpsilonValue>(
 1147 proc, Origin(), startSum);
 1148 loopEntry->appendNew<ControlValue>(proc, Jump, Origin(), FrequentedBlock(loopBody));
 1149
 1150 Value* bodyIndex = loopBody->appendNew<Value>(proc, Phi, Int32, Origin());
 1151 startIndexForBody->setPhi(bodyIndex);
 1152 Value* bodySum = loopBody->appendNew<Value>(proc, Phi, Int32, Origin());
 1153 startSumForBody->setPhi(bodySum);
 1154 Value* newBodyIndex = loopBody->appendNew<Value>(proc, Sub, Origin(), bodyIndex, one);
 1155 Value* newBodySum = loopBody->appendNew<Value>(
 1156 proc, Add, Origin(),
 1157 bodySum,
 1158 loopBody->appendNew<MemoryValue>(
 1159 proc, Load, Int32, Origin(),
 1160 loopBody->appendNew<Value>(
 1161 proc, Add, Origin(),
 1162 loopBody->appendNew<ConstPtrValue>(proc, Origin(), varSlots.data()),
 1163 loopBody->appendNew<Value>(
 1164 proc, Shl, Origin(),
 1165 loopBody->appendNew<Value>(
 1166 proc, ZExt32, Origin(),
 1167 loopBody->appendNew<Value>(
 1168 proc, BitAnd, Origin(),
 1169 newBodyIndex,
 1170 loopBody->appendNew<Const32Value>(
 1171 proc, Origin(), numVars - 1))),
 1172 loopBody->appendNew<Const32Value>(proc, Origin(), 2)))));
 1173 loopBody->appendNew<ControlValue>(
 1174 proc, Branch, Origin(), newBodyIndex,
 1175 FrequentedBlock(loopReentry), FrequentedBlock(loopExit));
 1176
 1177 loopReentry->appendNew<UpsilonValue>(proc, Origin(), newBodyIndex, bodyIndex);
 1178 loopReentry->appendNew<UpsilonValue>(proc, Origin(), newBodySum, bodySum);
 1179 loopReentry->appendNew<ControlValue>(proc, Jump, Origin(), FrequentedBlock(loopBody));
 1180
 1181 UpsilonValue* exitSum = loopExit->appendNew<UpsilonValue>(proc, Origin(), newBodySum);
 1182 loopExit->appendNew<ControlValue>(proc, Jump, Origin(), FrequentedBlock(continuation));
 1183
 1184 UpsilonValue* skipSum = loopSkip->appendNew<UpsilonValue>(proc, Origin(), startSum);
 1185 loopSkip->appendNew<ControlValue>(proc, Jump, Origin(), FrequentedBlock(continuation));
 1186
 1187 Value* finalSum = continuation->appendNew<Value>(proc, Phi, Int32, Origin());
 1188 exitSum->setPhi(finalSum);
 1189 skipSum->setPhi(finalSum);
 1190
 1191 current = continuation;
 1192 vars[((i >> 1) + 1) % numVars] = finalSum;
 1193 }
 1194 }
 1195
 1196 current->appendNew<ControlValue>(proc, Return, Origin(), vars[0]);
 1197
 1198 compile(proc);
 1199
 1200 double after = monotonicallyIncreasingTimeMS();
 1201 dataLog(" That took ", after - before, " ms.\n");
 1202}
 1203
10691204#define RUN(test) do { \
10701205 if (!shouldRun(#test)) \
10711206 break; \

@@void run(const char* filter)
11681303 RUN(testBranchEqualFoldPtr(42));
11691304 RUN(testBranchEqualFoldPtr(0));
11701305
 1306 RUN(testComplex(64, 128));
 1307 RUN(testComplex(64, 256));
 1308 RUN(testComplex(64, 384));
 1309
11711310 if (!didRun)
11721311 usage();
11731312}
191844

Source/JavaScriptCore/b3/air/AirAllocateStack.cpp

@@namespace {
4141
4242const bool verbose = false;
4343
44 bool argumentIsOnStack(const Arg& arg)
45 {
46  return arg.isStack() && arg.stackSlot()->kind() == StackSlotKind::Anonymous;
47 }
48 
4944bool attemptAssignment(
5045 StackSlot* slot, intptr_t offsetFromFP, const Vector<StackSlot*>& otherSlots)
5146{

@@void allocateStack(Code& code)
141136 }
142137
143138 // Now we handle the anonymous slots.
144  Liveness<Arg> liveness(code);
 139 Liveness<StackSlot*> liveness(code);
145140 IndexMap<StackSlot, HashSet<StackSlot*>> interference(code.stackSlots().size());
146141 Vector<StackSlot*> slots;
147142
148143 for (BasicBlock* block : code) {
149  Liveness<Arg>::LocalCalc localCalc(liveness, block);
 144 Liveness<StackSlot*>::LocalCalc localCalc(liveness, block);
150145
151146 auto interfere = [&] (Inst& inst) {
152147 if (verbose)
153  dataLog("Interfering: ", listDump(localCalc.live()), "\n");
 148 dataLog("Interfering: ", pointerListDump(localCalc.live()), "\n");
154149
155150 // Form a clique of stack slots that interfere. First find the list of stack slots
156151 // that are live right now.
157152 slots.resize(0);
158  for (Arg arg : localCalc.live()) {
159  if (argumentIsOnStack(arg))
160  slots.append(arg.stackSlot());
 153 for (StackSlot* slot : localCalc.live()) {
 154 if (slot->kind() == StackSlotKind::Anonymous)
 155 slots.append(slot);
161156 }
162157
163158 // We mustn't mandate that the input code is optimal. Therefore, it may have dead stores
164159 // to the stack. We need to treat these as interfering.
165160 inst.forEachArg(
166161 [&] (Arg& arg, Arg::Role role, Arg::Type) {
167  if (Arg::isDef(role) && argumentIsOnStack(arg)
168  && !localCalc.live().contains(arg))
169  slots.append(arg.stackSlot());
 162 if (Arg::isDef(role) && arg.isStack()) {
 163 StackSlot* slot = arg.stackSlot();
 164 if (slot->kind() == StackSlotKind::Anonymous
 165 && !localCalc.live().contains(slot))
 166 slots.append(slot);
 167 }
170168 });
171169
172170 if (verbose)
191844

Source/JavaScriptCore/b3/air/AirInstInlines.h

@@template<> struct ForEach<Arg> {
5151 inst.forEachArg(functor);
5252 }
5353};
 54template<> struct ForEach<StackSlot*> {
 55 template<typename Functor>
 56 static void forEach(Inst& inst, const Functor& functor)
 57 {
 58 inst.forEachArg(
 59 [&] (Arg& arg, Arg::Role role, Arg::Type type) {
 60 if (!arg.isStack())
 61 return;
 62 StackSlot* stackSlot = arg.stackSlot();
 63 functor(stackSlot, role, type);
 64 arg = Arg::stack(stackSlot);
 65 });
 66 }
 67};
5468
5569template<typename Thing, typename Functor>
5670void Inst::forEach(const Functor& functor)

@@inline bool isLshift32Valid(const Inst&
90104 return isShiftValid(inst);
91105}
92106
 107inline bool isLshift64Valid(const Inst& inst)
 108{
 109 return isShiftValid(inst);
 110}
 111
93112} } } // namespace JSC::B3::Air
94113
95114#endif // ENABLE(B3_JIT)
191844

Source/JavaScriptCore/b3/air/AirLiveness.h

@@namespace JSC { namespace B3 { namespace
3939template<typename Thing>
4040class Liveness {
4141public:
 42 template<typename T>
 43 static bool isAlive(const T& thing) { return thing.isAlive(); }
 44
 45 static bool isAlive(StackSlot* slot) { return slot->kind() == StackSlotKind::Anonymous; }
 46
4247 Liveness(Code& code)
4348 {
4449 m_liveAtHead.resize(code.size());

@@public:
9499 // First handle def's.
95100 inst.forEach<Thing>(
96101 [this] (Thing& arg, Arg::Role role, Arg::Type) {
97  if (!arg.isAlive())
 102 if (!isAlive(arg))
98103 return;
99104 if (Arg::isDef(role))
100105 m_live.remove(arg);
101106 });
102107
103  // Next handle clobbered registers.
104  if (inst.hasSpecial()) {
105  inst.extraClobberedRegs().forEach(
106  [this] (Reg reg) {
107  m_live.remove(Thing(Tmp(reg)));
108  });
109  }
110 
111108 // Finally handle use's.
112109 inst.forEach<Thing>(
113110 [this] (Thing& arg, Arg::Role role, Arg::Type) {
114  if (!arg.isAlive())
 111 if (!isAlive(arg))
115112 return;
116113 if (Arg::isUse(role))
117114 m_live.add(arg);
191844

Source/JavaScriptCore/b3/air/AirOpcode.opcodes

@@Lshift32 U:G, UD:G
107107 Tmp*, Tmp
108108 Imm, Tmp
109109
 110Lshift64 U:G, UD:G
 111 Tmp*, Tmp
 112 Imm, Tmp
 113
110114Mul32 U:G, UD:G
111115 Tmp, Tmp
112116 Addr, Tmp
191844

Source/WTF/ChangeLog

 12015-10-31 Filip Pizlo <fpizlo@apple.com>
 2
 3 B3::reduceStrength's DCE should be more agro and less wrong
 4 https://bugs.webkit.org/show_bug.cgi?id=150748
 5
 6 Reviewed by NOBODY (OOPS!).
 7
 8 * wtf/GraphNodeWorklist.h:
 9 (WTF::GraphNodeWorklist::saw): This method is super useful.
 10
 112015-10-31 Filip Pizlo <fpizlo@apple.com>
 12
 13 B3::reduceStrength's DCE should be more agro
 14 https://bugs.webkit.org/show_bug.cgi?id=150748
 15
 16 Reviewed by NOBODY (OOPS!).
 17
 18 * wtf/GraphNodeWorklist.h:
 19 (WTF::GraphNodeWorklist::saw): This is going to be useful.
 20
1212015-10-30 Chris Dumez <cdumez@apple.com>
222
323 Regression(r191673): Crash in RunLoopTimer::schedule()
191844

Source/WTF/wtf/GraphNodeWorklist.h

@@public:
5454 return m_stack.takeLast();
5555 }
5656
 57 bool saw(Node node) { return m_seen.contains(node); }
 58
5759private:
5860 Set m_seen;
5961 Vector<Node, 16> m_stack;
191844

Tools/ChangeLog

 12015-10-31 Filip Pizlo <fpizlo@apple.com>
 2
 3 B3::reduceStrength's DCE should be more agro and less wrong
 4 https://bugs.webkit.org/show_bug.cgi?id=150748
 5
 6 Reviewed by NOBODY (OOPS!).
 7
 8 Add an LLVM version of testb3's "testComplex".
 9
 10 * ReducedFTL/ComplexTest.cpp: Added.
 11
1122015-10-31 Lucas Forschler <lforschler@apple.com>
213
314 Teach the CompileWebKit step to look for additional arguments.
191844

Tools/ReducedFTL/ComplexTest.cpp

 1/*
 2 * Copyright (C) 2015 Apple Inc. All rights reserved.
 3 *
 4 * Redistribution and use in source and binary forms, with or without
 5 * modification, are permitted provided that the following conditions
 6 * are met:
 7 * 1. Redistributions of source code must retain the above copyright
 8 * notice, this list of conditions and the following disclaimer.
 9 * 2. Redistributions in binary form must reproduce the above copyright
 10 * notice, this list of conditions and the following disclaimer in the
 11 * documentation and/or other materials provided with the distribution.
 12 *
 13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 24 */
 25
 26// Compile like so:
 27// clang++ -O3 -o ComplexTest ComplexTest.cpp -std=c++14 `~/Downloads/clang+llvm-3.7.0-x86_64-apple-darwin/bin/llvm-config --cppflags --cxxflags --ldflags --libs` -lcurses -g -lz
 28
 29#include <vector>
 30#include <memory>
 31#include <chrono>
 32
 33#include <sys/time.h>
 34
 35#include <llvm/Support/raw_ostream.h>
 36#include <llvm/IR/LLVMContext.h>
 37#include <llvm/IR/DataLayout.h>
 38#include <llvm/IR/IRBuilder.h>
 39#include <llvm/IR/Constants.h>
 40#include <llvm/IR/Function.h>
 41#include <llvm/IR/Instructions.h>
 42#include <llvm/IR/Module.h>
 43#include <llvm/IR/Verifier.h>
 44#include <llvm/ExecutionEngine/ExecutionEngine.h>
 45#include <llvm/Transforms/Utils/BasicBlockUtils.h>
 46#include <llvm/Pass.h>
 47#include <llvm/IR/LegacyPassManager.h>
 48#include <llvm/Support/Host.h>
 49#include <llvm/Analysis/Passes.h>
 50#include <llvm/Transforms/Scalar.h>
 51#include <llvm/Transforms/IPO.h>
 52#include <llvm/Transforms/IPO/PassManagerBuilder.h>
 53#include <llvm/ExecutionEngine/MCJIT.h>
 54#include <llvm/Support/ErrorHandling.h>
 55#include <llvm/Support/TargetSelect.h>
 56
 57using namespace llvm;
 58using namespace std;
 59
 60namespace {
 61
 62template<typename ToType, typename FromType>
 63inline ToType bitwise_cast(FromType from)
 64{
 65 static_assert(sizeof(FromType) == sizeof(ToType), "bitwise_cast size of FromType and ToType must be equal!");
 66 union {
 67 FromType from;
 68 ToType to;
 69 } u;
 70 u.from = from;
 71 return u.to;
 72}
 73
 74double monotonicallyIncreasingTimeMS()
 75{
 76 return chrono::duration_cast<chrono::microseconds>(
 77 chrono::steady_clock::now().time_since_epoch()).count() / 1000.;
 78}
 79
 80void run(unsigned numVars, unsigned numConstructs)
 81{
 82 LLVMContext context;
 83 Type* int32Ty = Type::getInt32Ty(context);
 84 Module* module = new Module("complexModule", context);
 85 Function* function = Function::Create(
 86 FunctionType::get(int32Ty, false), GlobalValue::ExternalLinkage, "complexFunction", module);
 87
 88 vector<int32_t> varSlots;
 89 for (unsigned i = numVars; i--;)
 90 varSlots.push_back(i);
 91
 92 BasicBlock* current = BasicBlock::Create(context, "", function);
 93 unique_ptr<IRBuilder<>> builder = make_unique<IRBuilder<>>(current);
 94
 95 vector<Value*> vars;
 96 for (int32_t& varSlot : varSlots) {
 97 Value* ptr = builder->CreateIntToPtr(builder->getInt64(bitwise_cast<intptr_t>(&varSlot)),
 98 int32Ty->getPointerTo());
 99 vars.push_back(builder->CreateLoad(ptr));
 100 }
 101
 102 for (unsigned i = 0; i < numConstructs; ++i) {
 103 if (i & 1) {
 104 // Control flow diamond.
 105 unsigned predicateVarIndex = (i >> 1) % numVars;
 106 unsigned thenIncVarIndex = ((i >> 1) + 1) % numVars;
 107 unsigned elseIncVarIndex = ((i >> 1) + 2) % numVars;
 108
 109 BasicBlock* thenBlock = BasicBlock::Create(context, "", function);
 110 BasicBlock* elseBlock = BasicBlock::Create(context, "", function);
 111 BasicBlock* continuation = BasicBlock::Create(context, "", function);
 112
 113 builder->CreateCondBr(
 114 builder->CreateICmpNE(vars[predicateVarIndex], builder->getInt32(0)),
 115 thenBlock, elseBlock);
 116
 117 builder = make_unique<IRBuilder<>>(thenBlock);
 118 Value* thenValue = builder->CreateAdd(vars[thenIncVarIndex], builder->getInt32(1));
 119 builder->CreateBr(continuation);
 120
 121 builder = make_unique<IRBuilder<>>(elseBlock);
 122 Value* elseValue = builder->CreateAdd(vars[elseIncVarIndex], builder->getInt32(1));
 123 builder->CreateBr(continuation);
 124
 125 builder = make_unique<IRBuilder<>>(current = continuation);
 126 PHINode* thenPhi = builder->CreatePHI(int32Ty, 2);
 127 thenPhi->addIncoming(thenValue, thenBlock);
 128 thenPhi->addIncoming(vars[thenIncVarIndex], elseBlock);
 129 PHINode* elsePhi = builder->CreatePHI(int32Ty, 2);
 130 elsePhi->addIncoming(elseValue, elseBlock);
 131 elsePhi->addIncoming(vars[elseIncVarIndex], thenBlock);
 132
 133 vars[thenIncVarIndex] = thenPhi;
 134 vars[elseIncVarIndex] = elsePhi;
 135 } else {
 136 // Loop.
 137
 138 BasicBlock* loopBody = BasicBlock::Create(context, "", function);
 139 BasicBlock* continuation = BasicBlock::Create(context, "", function);
 140
 141 Value* startIndex = vars[(i >> 1) % numVars];
 142 Value* startSum = builder->getInt32(0);
 143 builder->CreateCondBr(
 144 builder->CreateICmpNE(startIndex, builder->getInt32(0)),
 145 loopBody, continuation);
 146
 147 builder = make_unique<IRBuilder<>>(loopBody);
 148 PHINode* bodyIndex = builder->CreatePHI(int32Ty, 2);
 149 PHINode* bodySum = builder->CreatePHI(int32Ty, 2);
 150 bodyIndex->addIncoming(startIndex, current);
 151 bodySum->addIncoming(startSum, current);
 152
 153 Value* newBodyIndex = builder->CreateSub(bodyIndex, builder->getInt32(1));
 154 Value* newBodySum = builder->CreateAdd(
 155 bodySum,
 156 builder->CreateLoad(
 157 builder->CreateIntToPtr(
 158 builder->CreateAdd(
 159 builder->getInt64(bitwise_cast<intptr_t>(&varSlots[0])),
 160 builder->CreateShl(
 161 builder->CreateZExt(
 162 builder->CreateAnd(
 163 newBodyIndex,
 164 builder->getInt32(numVars - 1)),
 165 Type::getInt64Ty(context)),
 166 builder->getInt64(2))),
 167 int32Ty->getPointerTo())));
 168 bodyIndex->addIncoming(newBodyIndex, loopBody);
 169 bodySum->addIncoming(newBodySum, loopBody);
 170 builder->CreateCondBr(
 171 builder->CreateICmpNE(newBodyIndex, builder->getInt32(0)),
 172 loopBody, continuation);
 173
 174 builder = make_unique<IRBuilder<>>(continuation);
 175 PHINode* finalSum = builder->CreatePHI(int32Ty, 2);
 176 finalSum->addIncoming(startSum, current);
 177 finalSum->addIncoming(newBodySum, loopBody);
 178 current = continuation;
 179 }
 180 }
 181
 182 builder->CreateRet(vars[0]);
 183 builder = nullptr;
 184
 185 unique_ptr<ExecutionEngine> EE;
 186 {
 187 std::string errorMessage;
 188 EngineBuilder builder((std::unique_ptr<Module>(module)));
 189 builder.setMArch("");
 190 builder.setMCPU(sys::getHostCPUName());
 191 builder.setMAttrs(std::vector<std::string>());
 192 builder.setRelocationModel(Reloc::Default);
 193 builder.setCodeModel(CodeModel::JITDefault);
 194 builder.setErrorStr(&errorMessage);
 195 builder.setEngineKind(EngineKind::JIT);
 196
 197 builder.setOptLevel(CodeGenOpt::Default);
 198
 199 {
 200 TargetOptions Options;
 201 Options.FloatABIType = FloatABI::Default;
 202
 203 builder.setTargetOptions(Options);
 204 }
 205
 206 EE = unique_ptr<ExecutionEngine>(builder.create());
 207 }
 208
 209 legacy::PassManager passManager;
 210 //passManager.add(new DataLayout(*EE->getDataLayout()));
 211 passManager.add(createPromoteMemoryToRegisterPass());
 212 passManager.add(createConstantPropagationPass());
 213 passManager.add(createInstructionCombiningPass());
 214 passManager.add(createBasicAliasAnalysisPass());
 215 passManager.add(createTypeBasedAliasAnalysisPass());
 216 passManager.add(createGVNPass());
 217 passManager.add(createCFGSimplificationPass());
 218 passManager.run(*module);
 219
 220 EE->getFunctionAddress(function->getName());
 221}
 222
 223} // anonymous namespace
 224
 225int main(int c, char** v)
 226{
 227 InitializeAllTargets();
 228 InitializeAllTargetMCs();
 229 InitializeAllAsmPrinters();
 230 InitializeAllAsmParsers();
 231 for (unsigned i = 1; i <= 3; ++i) {
 232 printf("Doing: run(64, %u)\n", i * 128);
 233 double before = monotonicallyIncreasingTimeMS();
 234 run(64, 128 * i);
 235 double after = monotonicallyIncreasingTimeMS();
 236 printf("That took %lf ms.\n", after - before);
 237 }
 238 return 0;
 239}
 240
0