[dev.ssa] cmd/compile: handle floating point on ARM

Machine supports (or the runtime simulates in soft float mode) (u)int32<->float conversions. The frontend rewrites int64<->float conversions to call to runtime function. For int64->float32 conversion, the frontend generates . . AS u(100) l(10) tc(1) . . . NAME-main.~r1 u(1) a(true) g(1) l(9) x(8+0) class(PPARAMOUT) f(1) float32 . . . CALLFUNC u(100) l(10) tc(1) float32 . . . . NAME-runtime.int64tofloat64 u(1) a(true) x(0+0) class(PFUNC) tc(1) used(true) FUNC-func(int64) float64 The CALLFUNC node has type float32, whereas runtime.int64tofloat64 returns float64. The legacy backend implicitly makes a float64->float32 conversion. The SSA backend does not do implicit conversion, so we insert an explicit CONV here. All cmd/compile/internal/gc/testdata/*_ssa.go tests passed. Progress on SSA for ARM. Still not complete. Update #15365. Change-Id: I30937c8ff977271246b068f48224693776804339 Reviewed-on: https://go-review.googlesource.com/23652 Reviewed-by: Keith Randall <[email protected]>
golang · Jun 6, 2016 · 59e11d7 · 59e11d7
1 parent e78d90b
commit 59e11d7
Show file tree

Hide file tree

Showing 9 changed files with 1,507 additions and 76 deletions.
diff --git a/src/cmd/compile/internal/arm/ssa.go b/src/cmd/compile/internal/arm/ssa.go
@@ -5,6 +5,8 @@
 package arm
 
 import (
+	"math"
+
 	"cmd/compile/internal/gc"
 	"cmd/compile/internal/ssa"
 	"cmd/internal/obj"
@@ -29,14 +31,36 @@ var ssaRegToReg = []int16{
 	arm.REG_R14,
 	arm.REG_R15,
 
+	arm.REG_F0,
+	arm.REG_F1,
+	arm.REG_F2,
+	arm.REG_F3,
+	arm.REG_F4,
+	arm.REG_F5,
+	arm.REG_F6,
+	arm.REG_F7,
+	arm.REG_F8,
+	arm.REG_F9,
+	arm.REG_F10,
+	arm.REG_F11,
+	arm.REG_F12,
+	arm.REG_F13,
+	arm.REG_F14,
+	arm.REG_F15,
+
 	arm.REG_CPSR, // flag
 	0,            // SB isn't a real register.  We fill an Addr.Reg field with 0 in this case.
 }
 
 // loadByType returns the load instruction of the given type.
 func loadByType(t ssa.Type) obj.As {
 	if t.IsFloat() {
-		panic("load floating point register is not implemented")
+		switch t.Size() {
+		case 4:
+			return arm.AMOVF
+		case 8:
+			return arm.AMOVD
+		}
 	} else {
 		switch t.Size() {
 		case 1:
@@ -61,7 +85,12 @@ func loadByType(t ssa.Type) obj.As {
 // storeByType returns the store instruction of the given type.
 func storeByType(t ssa.Type) obj.As {
 	if t.IsFloat() {
-		panic("store floating point register is not implemented")
+		switch t.Size() {
+		case 4:
+			return arm.AMOVF
+		case 8:
+			return arm.AMOVD
+		}
 	} else {
 		switch t.Size() {
 		case 1:
@@ -93,7 +122,18 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 		if x == y {
 			return
 		}
-		p := gc.Prog(arm.AMOVW)
+		as := arm.AMOVW
+		if v.Type.IsFloat() {
+			switch v.Type.Size() {
+			case 4:
+				as = arm.AMOVF
+			case 8:
+				as = arm.AMOVD
+			default:
+				panic("bad float size")
+			}
+		}
+		p := gc.Prog(as)
 		p.From.Type = obj.TYPE_REG
 		p.From.Reg = x
 		p.To.Type = obj.TYPE_REG
@@ -172,7 +212,15 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 		ssa.OpARMOR,
 		ssa.OpARMXOR,
 		ssa.OpARMBIC,
-		ssa.OpARMMUL:
+		ssa.OpARMMUL,
+		ssa.OpARMADDF,
+		ssa.OpARMADDD,
+		ssa.OpARMSUBF,
+		ssa.OpARMSUBD,
+		ssa.OpARMMULF,
+		ssa.OpARMMULD,
+		ssa.OpARMDIVF,
+		ssa.OpARMDIVD:
 		r := gc.SSARegNum(v)
 		r1 := gc.SSARegNum(v.Args[0])
 		r2 := gc.SSARegNum(v.Args[1])
@@ -331,10 +379,19 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 		p.From.Offset = v.AuxInt
 		p.To.Type = obj.TYPE_REG
 		p.To.Reg = gc.SSARegNum(v)
+	case ssa.OpARMMOVFconst,
+		ssa.OpARMMOVDconst:
+		p := gc.Prog(v.Op.Asm())
+		p.From.Type = obj.TYPE_FCONST
+		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = gc.SSARegNum(v)
 	case ssa.OpARMCMP,
 		ssa.OpARMCMN,
 		ssa.OpARMTST,
-		ssa.OpARMTEQ:
+		ssa.OpARMTEQ,
+		ssa.OpARMCMPF,
+		ssa.OpARMCMPD:
 		p := gc.Prog(v.Op.Asm())
 		p.From.Type = obj.TYPE_REG
 		// Special layout in ARM assembly
@@ -354,7 +411,9 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 		ssa.OpARMMOVBUload,
 		ssa.OpARMMOVHload,
 		ssa.OpARMMOVHUload,
-		ssa.OpARMMOVWload:
+		ssa.OpARMMOVWload,
+		ssa.OpARMMOVFload,
+		ssa.OpARMMOVDload:
 		p := gc.Prog(v.Op.Asm())
 		p.From.Type = obj.TYPE_MEM
 		p.From.Reg = gc.SSARegNum(v.Args[0])
@@ -363,7 +422,9 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 		p.To.Reg = gc.SSARegNum(v)
 	case ssa.OpARMMOVBstore,
 		ssa.OpARMMOVHstore,
-		ssa.OpARMMOVWstore:
+		ssa.OpARMMOVWstore,
+		ssa.OpARMMOVFstore,
+		ssa.OpARMMOVDstore:
 		p := gc.Prog(v.Op.Asm())
 		p.From.Type = obj.TYPE_REG
 		p.From.Reg = gc.SSARegNum(v.Args[1])
@@ -374,11 +435,25 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 		ssa.OpARMMOVBUreg,
 		ssa.OpARMMOVHreg,
 		ssa.OpARMMOVHUreg,
-		ssa.OpARMMVN:
-		if v.Type.IsMemory() {
-			v.Fatalf("memory operand for %s", v.LongString())
-		}
+		ssa.OpARMMVN,
+		ssa.OpARMSQRTD,
+		ssa.OpARMMOVWF,
+		ssa.OpARMMOVWD,
+		ssa.OpARMMOVFW,
+		ssa.OpARMMOVDW,
+		ssa.OpARMMOVFD,
+		ssa.OpARMMOVDF:
+		p := gc.Prog(v.Op.Asm())
+		p.From.Type = obj.TYPE_REG
+		p.From.Reg = gc.SSARegNum(v.Args[0])
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = gc.SSARegNum(v)
+	case ssa.OpARMMOVWUF,
+		ssa.OpARMMOVWUD,
+		ssa.OpARMMOVFWU,
+		ssa.OpARMMOVDWU:
 		p := gc.Prog(v.Op.Asm())
+		p.Scond = arm.C_UBIT
 		p.From.Type = obj.TYPE_REG
 		p.From.Reg = gc.SSARegNum(v.Args[0])
 		p.To.Type = obj.TYPE_REG

diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go
@@ -1323,6 +1323,15 @@ var fpConvOpToSSA = map[twoTypes]twoOpsAndType{
 	twoTypes{TFLOAT32, TFLOAT64}: twoOpsAndType{ssa.OpCvt32Fto64F, ssa.OpCopy, TFLOAT64},
 }
 
+// this map is used only for 32-bit arch, and only includes the difference
+// on 32-bit arch, don't use int64<->float conversion for uint32
+var fpConvOpToSSA32 = map[twoTypes]twoOpsAndType{
+	twoTypes{TUINT32, TFLOAT32}: twoOpsAndType{ssa.OpCopy, ssa.OpCvt32Uto32F, TUINT32},
+	twoTypes{TUINT32, TFLOAT64}: twoOpsAndType{ssa.OpCopy, ssa.OpCvt32Uto64F, TUINT32},
+	twoTypes{TFLOAT32, TUINT32}: twoOpsAndType{ssa.OpCvt32Fto32U, ssa.OpCopy, TUINT32},
+	twoTypes{TFLOAT64, TUINT32}: twoOpsAndType{ssa.OpCvt64Fto32U, ssa.OpCopy, TUINT32},
+}
+
 var shiftOpToSSA = map[opAndTwoTypes]ssa.Op{
 	opAndTwoTypes{OLSH, TINT8, TUINT8}:   ssa.OpLsh8x8,
 	opAndTwoTypes{OLSH, TUINT8, TUINT8}:  ssa.OpLsh8x8,
@@ -1639,6 +1648,11 @@ func (s *state) expr(n *Node) *ssa.Value {
 
 		if ft.IsFloat() || tt.IsFloat() {
 			conv, ok := fpConvOpToSSA[twoTypes{s.concreteEtype(ft), s.concreteEtype(tt)}]
+			if s.config.IntSize == 4 {
+				if conv1, ok1 := fpConvOpToSSA32[twoTypes{s.concreteEtype(ft), s.concreteEtype(tt)}]; ok1 {
+					conv = conv1
+				}
+			}
 			if !ok {
 				s.Fatalf("weird float conversion %s -> %s", ft, tt)
 			}

diff --git a/src/cmd/compile/internal/gc/walk.go b/src/cmd/compile/internal/gc/walk.go
@@ -1094,12 +1094,12 @@ opswitch:
 
 			if n.Type.IsFloat() {
 				if n.Left.Type.Etype == TINT64 {
-					n = mkcall("int64tofloat64", n.Type, init, conv(n.Left, Types[TINT64]))
+					n = conv(mkcall("int64tofloat64", Types[TFLOAT64], init, conv(n.Left, Types[TINT64])), n.Type)
 					break
 				}
 
 				if n.Left.Type.Etype == TUINT64 {
-					n = mkcall("uint64tofloat64", n.Type, init, conv(n.Left, Types[TUINT64]))
+					n = conv(mkcall("uint64tofloat64", Types[TFLOAT64], init, conv(n.Left, Types[TUINT64])), n.Type)
 					break
 				}
 			}

diff --git a/src/cmd/compile/internal/ssa/decompose.go b/src/cmd/compile/internal/ssa/decompose.go
@@ -94,6 +94,8 @@ func decomposeBuiltIn(f *Func) {
 				f.NamedValues[dataName] = append(f.NamedValues[dataName], data)
 			}
 			delete(f.NamedValues, name)
+		case t.IsFloat():
+			// floats are never decomposed, even ones bigger than IntSize
 		case t.Size() > f.Config.IntSize:
 			f.Unimplementedf("undecomposed named type %s %s", name, t)
 		default:
@@ -115,6 +117,8 @@ func decomposeBuiltInPhi(v *Value) {
 		decomposeSlicePhi(v)
 	case v.Type.IsInterface():
 		decomposeInterfacePhi(v)
+	case v.Type.IsFloat():
+		// floats are never decomposed, even ones bigger than IntSize
 	case v.Type.Size() > v.Block.Func.Config.IntSize:
 		v.Unimplementedf("undecomposed type %s", v.Type)
 	}

diff --git a/src/cmd/compile/internal/ssa/gen/ARM.rules b/src/cmd/compile/internal/ssa/gen/ARM.rules
@@ -6,6 +6,8 @@
 (Add32 x y) -> (ADD x y)
 (Add16 x y) -> (ADD x y)
 (Add8 x y) -> (ADD x y)
+(Add32F x y) -> (ADDF x y)
+(Add64F x y) -> (ADDD x y)
 
 (Add32carry x y) -> (ADDS x y)
 (Add32withcarry x y c) -> (ADC x y c)
@@ -14,13 +16,17 @@
 (Sub32 x y) -> (SUB x y)
 (Sub16 x y) -> (SUB x y)
 (Sub8 x y) -> (SUB x y)
+(Sub32F x y) -> (SUBF x y)
+(Sub64F x y) -> (SUBD x y)
 
 (Sub32carry x y) -> (SUBS x y)
 (Sub32withcarry x y c) -> (SBC x y c)
 
 (Mul32 x y) -> (MUL x y)
 (Mul16 x y) -> (MUL x y)
 (Mul8 x y) -> (MUL x y)
+(Mul32F x y) -> (MULF x y)
+(Mul64F x y) -> (MULD x y)
 
 (Hmul32 x y) -> (HMUL x y)
 (Hmul32u x y) -> (HMULU x y)
@@ -37,6 +43,8 @@
 (Div16u x y) -> (DIVU (ZeroExt16to32 x) (ZeroExt16to32 y))
 (Div8 x y) -> (DIV (SignExt8to32 x) (SignExt8to32 y))
 (Div8u x y) -> (DIVU (ZeroExt8to32 x) (ZeroExt8to32 y))
+(Div32F x y) -> (DIVF x y)
+(Div64F x y) -> (DIVD x y)
 
 (Mod32 x y) -> (MOD x y)
 (Mod32u x y) -> (MODU x y)
@@ -61,11 +69,16 @@
 (Neg32 x) -> (RSBconst [0] x)
 (Neg16 x) -> (RSBconst [0] x)
 (Neg8 x) -> (RSBconst [0] x)
+//TODO: implement NEGF, NEGD in assembler and soft float simulator, and use them.
+(Neg32F x) -> (MULF (MOVFconst [int64(math.Float64bits(-1))]) x)
+(Neg64F x) -> (MULD (MOVDconst [int64(math.Float64bits(-1))]) x)
 
 (Com32 x) -> (MVN x)
 (Com16 x) -> (MVN x)
 (Com8 x) -> (MVN x)
 
+(Sqrt x) -> (SQRTD x)
+
 // boolean ops -- booleans are represented with 0=false, 1=true
 (AndB x y) -> (AND x y)
 (OrB x y) -> (OR x y)
@@ -143,6 +156,8 @@
 (Const8 [val]) -> (MOVWconst [val])
 (Const16 [val]) -> (MOVWconst [val])
 (Const32 [val]) -> (MOVWconst [val])
+(Const32F [val]) -> (MOVFconst [val])
+(Const64F [val]) -> (MOVDconst [val])
 (ConstNil) -> (MOVWconst [0])
 (ConstBool [b]) -> (MOVWconst [b])
 
@@ -164,20 +179,38 @@
 (Signmask x) -> (SRAconst x [31])
 (Zeromask x) -> (LoweredZeromask x)
 
+// float <-> int conversion
+(Cvt32to32F x) -> (MOVWF x)
+(Cvt32to64F x) -> (MOVWD x)
+(Cvt32Uto32F x) -> (MOVWUF x)
+(Cvt32Uto64F x) -> (MOVWUD x)
+(Cvt32Fto32 x) -> (MOVFW x)
+(Cvt64Fto32 x) -> (MOVDW x)
+(Cvt32Fto32U x) -> (MOVFWU x)
+(Cvt64Fto32U x) -> (MOVDWU x)
+(Cvt32Fto64F x) -> (MOVFD x)
+(Cvt64Fto32F x) -> (MOVDF x)
+
 // comparisons
 (Eq8 x y)  -> (Equal (CMP (ZeroExt8to32 x) (ZeroExt8to32 y)))
 (Eq16 x y) -> (Equal (CMP (ZeroExt16to32 x) (ZeroExt16to32 y)))
 (Eq32 x y) -> (Equal (CMP x y))
 (EqPtr x y) -> (Equal (CMP x y))
+(Eq32F x y) -> (Equal (CMPF x y))
+(Eq64F x y) -> (Equal (CMPD x y))
 
 (Neq8 x y)  -> (NotEqual (CMP (ZeroExt8to32 x) (ZeroExt8to32 y)))
 (Neq16 x y) -> (NotEqual (CMP (ZeroExt16to32 x) (ZeroExt16to32 y)))
 (Neq32 x y) -> (NotEqual (CMP x y))
 (NeqPtr x y) -> (NotEqual (CMP x y))
+(Neq32F x y) -> (NotEqual (CMPF x y))
+(Neq64F x y) -> (NotEqual (CMPD x y))
 
 (Less8 x y)  -> (LessThan (CMP (SignExt8to32 x) (SignExt8to32 y)))
 (Less16 x y) -> (LessThan (CMP (SignExt16to32 x) (SignExt16to32 y)))
 (Less32 x y) -> (LessThan (CMP x y))
+(Less32F x y) -> (GreaterThan (CMPF y x)) // reverse operands to work around NaN
+(Less64F x y) -> (GreaterThan (CMPD y x)) // reverse operands to work around NaN
 
 (Less8U x y)  -> (LessThanU (CMP (ZeroExt8to32 x) (ZeroExt8to32 y)))
 (Less16U x y) -> (LessThanU (CMP (ZeroExt16to32 x) (ZeroExt16to32 y)))
@@ -186,6 +219,8 @@
 (Leq8 x y)  -> (LessEqual (CMP (SignExt8to32 x) (SignExt8to32 y)))
 (Leq16 x y) -> (LessEqual (CMP (SignExt16to32 x) (SignExt16to32 y)))
 (Leq32 x y) -> (LessEqual (CMP x y))
+(Leq32F x y) -> (GreaterEqual (CMPF y x)) // reverse operands to work around NaN
+(Leq64F x y) -> (GreaterEqual (CMPD y x)) // reverse operands to work around NaN
 
 (Leq8U x y)  -> (LessEqualU (CMP (ZeroExt8to32 x) (ZeroExt8to32 y)))
 (Leq16U x y) -> (LessEqualU (CMP (ZeroExt16to32 x) (ZeroExt16to32 y)))
@@ -194,6 +229,8 @@
 (Greater8 x y)  -> (GreaterThan (CMP (SignExt8to32 x) (SignExt8to32 y)))
 (Greater16 x y) -> (GreaterThan (CMP (SignExt16to32 x) (SignExt16to32 y)))
 (Greater32 x y) -> (GreaterThan (CMP x y))
+(Greater32F x y) -> (GreaterThan (CMPF x y))
+(Greater64F x y) -> (GreaterThan (CMPD x y))
 
 (Greater8U x y)  -> (GreaterThanU (CMP (ZeroExt8to32 x) (ZeroExt8to32 y)))
 (Greater16U x y) -> (GreaterThanU (CMP (ZeroExt16to32 x) (ZeroExt16to32 y)))
@@ -202,6 +239,8 @@
 (Geq8 x y)  -> (GreaterEqual (CMP (SignExt8to32 x) (SignExt8to32 y)))
 (Geq16 x y) -> (GreaterEqual (CMP (SignExt16to32 x) (SignExt16to32 y)))
 (Geq32 x y) -> (GreaterEqual (CMP x y))
+(Geq32F x y) -> (GreaterEqual (CMPF x y))
+(Geq64F x y) -> (GreaterEqual (CMPD x y))
 
 (Geq8U x y)  -> (GreaterEqualU (CMP (ZeroExt8to32 x) (ZeroExt8to32 y)))
 (Geq16U x y) -> (GreaterEqualU (CMP (ZeroExt16to32 x) (ZeroExt16to32 y)))
@@ -218,11 +257,15 @@
 (Load <t> ptr mem) && (is16BitInt(t) && isSigned(t)) -> (MOVHload ptr mem)
 (Load <t> ptr mem) && (is16BitInt(t) && !isSigned(t)) -> (MOVHUload ptr mem)
 (Load <t> ptr mem) && (is32BitInt(t) || isPtr(t)) -> (MOVWload ptr mem)
+(Load <t> ptr mem) && is32BitFloat(t) -> (MOVFload ptr mem)
+(Load <t> ptr mem) && is64BitFloat(t) -> (MOVDload ptr mem)
 
 // stores
 (Store [1] ptr val mem) -> (MOVBstore ptr val mem)
 (Store [2] ptr val mem) -> (MOVHstore ptr val mem)
-(Store [4] ptr val mem) -> (MOVWstore ptr val mem)
+(Store [4] ptr val mem) && !is32BitFloat(val.Type) -> (MOVWstore ptr val mem)
+(Store [4] ptr val mem) && is32BitFloat(val.Type) -> (MOVFstore ptr val mem)
+(Store [8] ptr val mem) && is64BitFloat(val.Type) -> (MOVDstore ptr val mem)
 
 // zero instructions
 //TODO: check alignment?
@@ -336,13 +379,21 @@
   (MOVHUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
 (MOVWload [off1] {sym1} (ADDconst [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) ->
   (MOVWload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+(MOVFload [off1] {sym1} (ADDconst [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) ->
+  (MOVFload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+(MOVDload [off1] {sym1} (ADDconst [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) ->
+  (MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
 
 (MOVBstore [off1] {sym1} (ADDconst [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) ->
   (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
 (MOVHstore [off1] {sym1} (ADDconst [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) ->
   (MOVHstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
 (MOVWstore [off1] {sym1} (ADDconst [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) ->
   (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
+(MOVFstore [off1] {sym1} (ADDconst [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) ->
+  (MOVFstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
+(MOVDstore [off1] {sym1} (ADDconst [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) ->
+  (MOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
 
 (ADD (MUL x y) a) -> (MULA x y a)
 (ADD a (MUL x y)) -> (MULA x y a)