Skip to content

Commit 38181b4

Browse files
clazissstephanosio
authored andcommitted
arc: Add ARCHS release 310a tune variant.
Add mtune and mcpu options for ARCHS release 310a type CPU. The mtune=release31a is designed to be used as an alternative to the mcpu=hs4x_rel31 option. ARCHS4x release 31a uses DSP instructions which are implemented a bit different than mpy9. Hence, use safer mpy2 option. gcc/ * config/arc/arc-arch.h (arc_tune_attr): Add ARC_TUNE_ARCHS4X_REL31A variant. * config/arc/arc.cc (arc_override_options): Tune options for release 310a. (arc_sched_issue_rate): Use correct enum. (arc600_corereg_hazard): Textual change. (arc_hazard): Add release 310a tunning. * config/arc/arc.md (tune): Update and take into consideration new tune option. (tune_dspmpy): Likewise. (tune_store): New attribute. * config/arc/arc.opt (mtune): New tune option. * config/arc/arcHS4x.md (hs4x_brcc0, hs4x_brcc1): New cpu units. (hs4x_brcc_op): New instruction rezervation. (hs4x_data_store_1_op): Likewise. * config/arc/arc-cpus.def (hs4x_rel31): New cpu variant. * config/arc/arc-tables.opt: Regenerate. * config/arc/t-multilib: Likewise. * doc/invoke.texi (ARC): Update mcpu and tune sections. Signed-off-by: Claudiu Zissulescu <claziss@gmail.com>
1 parent c62ec7a commit 38181b4

File tree

9 files changed

+181
-90
lines changed

9 files changed

+181
-90
lines changed

gcc/config/arc/arc-arch.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,8 @@ enum arc_tune_attr
7777
ARC_TUNE_CORE_3,
7878
ARC_TUNE_ARCHS4X,
7979
ARC_TUNE_ARCHS4XD,
80-
ARC_TUNE_ARCHS4XD_SLOW
80+
ARC_TUNE_ARCHS4XD_SLOW,
81+
ARC_TUNE_ARCHS4X_REL31A
8182
};
8283

8384
/* Extra options for a processor template to hold any CPU specific

gcc/config/arc/arc-cpus.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ ARC_CPU (hs38, hs, FL_MPYOPT_9|FL_DIVREM|FL_LL64, NONE, NONE)
6464
ARC_CPU (hs38_linux, hs, FL_MPYOPT_9|FL_DIVREM|FL_LL64|FL_FPU_FPUD_ALL, NONE, NONE)
6565
ARC_CPU (hs4x, hs, FL_MPYOPT_9|FL_DIVREM|FL_LL64, NONE, ARCHS4X)
6666
ARC_CPU (hs4xd, hs, FL_MPYOPT_9|FL_DIVREM|FL_LL64, NONE, ARCHS4XD)
67+
ARC_CPU (hs4x_rel31, hs, FL_MPYOPT_2|FL_DIVREM|FL_LL64, NONE, ARCHS4X_REL31A)
6768

6869
ARC_CPU (arc600, 6xx, FL_BS, NONE, ARC600)
6970
ARC_CPU (arc600_norm, 6xx, FL_BS|FL_NORM, NONE, ARC600)

gcc/config/arc/arc-tables.opt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,9 @@ Enum(processor_type) String(hs4x) Value(PROCESSOR_hs4x)
6969
EnumValue
7070
Enum(processor_type) String(hs4xd) Value(PROCESSOR_hs4xd)
7171

72+
EnumValue
73+
Enum(processor_type) String(hs4x_rel31) Value(PROCESSOR_hs4x_rel31)
74+
7275
EnumValue
7376
Enum(processor_type) String(arc600) Value(PROCESSOR_arc600)
7477

gcc/config/arc/arc.cc

Lines changed: 119 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -646,8 +646,8 @@ arc_sched_issue_rate (void)
646646
{
647647
switch (arc_tune)
648648
{
649-
case TUNE_ARCHS4X:
650-
case TUNE_ARCHS4XD:
649+
case ARC_TUNE_ARCHS4X:
650+
case ARC_TUNE_ARCHS4XD:
651651
return 3;
652652
default:
653653
break;
@@ -1458,6 +1458,12 @@ arc_override_options (void)
14581458
if (!OPTION_SET_P (unaligned_access) && TARGET_HS)
14591459
unaligned_access = 1;
14601460

1461+
if (TARGET_HS && (arc_tune == ARC_TUNE_ARCHS4X_REL31A))
1462+
{
1463+
TARGET_CODE_DENSITY_FRAME = 0;
1464+
flag_delayed_branch = 0;
1465+
}
1466+
14611467
/* These need to be done at start up. It's convenient to do them here. */
14621468
arc_init ();
14631469
}
@@ -7817,6 +7823,115 @@ arc_store_addr_hazard_p (rtx_insn* producer, rtx_insn* consumer)
78177823
return arc_store_addr_hazard_internal_p (producer, consumer);
78187824
}
78197825

7826+
/* Return length adjustment for INSN.
7827+
For ARC600:
7828+
A write to a core reg greater or equal to 32 must not be immediately
7829+
followed by a use. Anticipate the length requirement to insert a nop
7830+
between PRED and SUCC to prevent a hazard. */
7831+
7832+
static int
7833+
arc600_corereg_hazard (rtx_insn *pred, rtx_insn *succ)
7834+
{
7835+
if (!TARGET_ARC600)
7836+
return 0;
7837+
if (GET_CODE (PATTERN (pred)) == SEQUENCE)
7838+
pred = as_a <rtx_sequence *> (PATTERN (pred))->insn (1);
7839+
if (GET_CODE (PATTERN (succ)) == SEQUENCE)
7840+
succ = as_a <rtx_sequence *> (PATTERN (succ))->insn (0);
7841+
if (recog_memoized (pred) == CODE_FOR_mulsi_600
7842+
|| recog_memoized (pred) == CODE_FOR_umul_600
7843+
|| recog_memoized (pred) == CODE_FOR_mac_600
7844+
|| recog_memoized (pred) == CODE_FOR_mul64_600
7845+
|| recog_memoized (pred) == CODE_FOR_mac64_600
7846+
|| recog_memoized (pred) == CODE_FOR_umul64_600
7847+
|| recog_memoized (pred) == CODE_FOR_umac64_600)
7848+
return 0;
7849+
subrtx_iterator::array_type array;
7850+
FOR_EACH_SUBRTX (iter, array, PATTERN (pred), NONCONST)
7851+
{
7852+
const_rtx x = *iter;
7853+
switch (GET_CODE (x))
7854+
{
7855+
case SET: case POST_INC: case POST_DEC: case PRE_INC: case PRE_DEC:
7856+
break;
7857+
default:
7858+
/* This is also fine for PRE/POST_MODIFY, because they
7859+
contain a SET. */
7860+
continue;
7861+
}
7862+
rtx dest = XEXP (x, 0);
7863+
/* Check if this sets a an extension register. N.B. we use 61 for the
7864+
condition codes, which is definitely not an extension register. */
7865+
if (REG_P (dest) && REGNO (dest) >= 32 && REGNO (dest) < 61
7866+
/* Check if the same register is used by the PAT. */
7867+
&& (refers_to_regno_p
7868+
(REGNO (dest),
7869+
REGNO (dest) + (GET_MODE_SIZE (GET_MODE (dest)) + 3) / 4U,
7870+
PATTERN (succ), 0)))
7871+
return 4;
7872+
}
7873+
return 0;
7874+
}
7875+
7876+
/* For ARC600:
7877+
A write to a core reg greater or equal to 32 must not be immediately
7878+
followed by a use. Anticipate the length requirement to insert a nop
7879+
between PRED and SUCC to prevent a hazard. */
7880+
7881+
int
7882+
arc_hazard (rtx_insn *pred, rtx_insn *succ)
7883+
{
7884+
if (!pred || !INSN_P (pred) || !succ || !INSN_P (succ))
7885+
return 0;
7886+
7887+
if (TARGET_ARC600)
7888+
return arc600_corereg_hazard (pred, succ);
7889+
7890+
return 0;
7891+
}
7892+
7893+
/* When compiling for release 310a, insert a nop before any
7894+
conditional jump. */
7895+
7896+
static int
7897+
arc_check_release31a (rtx_insn *pred, rtx_insn *succ)
7898+
{
7899+
if (!pred || !INSN_P (pred) || !succ || !INSN_P (succ))
7900+
return 0;
7901+
7902+
if (!JUMP_P (pred) && !single_set (pred))
7903+
return 0;
7904+
7905+
if (!JUMP_P (succ) && !single_set (succ))
7906+
return 0;
7907+
7908+
if (TARGET_HS && (arc_tune == ARC_TUNE_ARCHS4X_REL31A))
7909+
switch (get_attr_type (pred))
7910+
{
7911+
case TYPE_STORE:
7912+
switch (get_attr_type (succ))
7913+
{
7914+
case TYPE_BRCC:
7915+
case TYPE_BRCC_NO_DELAY_SLOT:
7916+
case TYPE_LOOP_END:
7917+
return 1;
7918+
default:
7919+
break;
7920+
}
7921+
break;
7922+
case TYPE_BRCC:
7923+
case TYPE_BRCC_NO_DELAY_SLOT:
7924+
case TYPE_LOOP_END:
7925+
if (get_attr_type (succ) == TYPE_STORE)
7926+
return 1;
7927+
break;
7928+
default:
7929+
break;
7930+
}
7931+
7932+
return 0;
7933+
}
7934+
78207935
/* The same functionality as arc_hazard. It is called in machine
78217936
reorg before any other optimization. Hence, the NOP size is taken
78227937
into account when doing branch shortening. */
@@ -7830,10 +7945,8 @@ workaround_arc_anomaly (void)
78307945
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
78317946
{
78327947
succ0 = next_real_insn (insn);
7833-
if (arc_hazard (insn, succ0))
7834-
{
7835-
emit_insn_before (gen_nopv (), succ0);
7836-
}
7948+
if (arc_hazard (insn, succ0) || arc_check_release31a (insn, succ0))
7949+
emit_insn_before (gen_nopv (), succ0);
78377950
}
78387951

78397952
if (!TARGET_ARC700)
@@ -9324,56 +9437,6 @@ disi_highpart (rtx in)
93249437
return simplify_gen_subreg (SImode, in, DImode, TARGET_BIG_ENDIAN ? 0 : 4);
93259438
}
93269439

9327-
/* Return length adjustment for INSN.
9328-
For ARC600:
9329-
A write to a core reg greater or equal to 32 must not be immediately
9330-
followed by a use. Anticipate the length requirement to insert a nop
9331-
between PRED and SUCC to prevent a hazard. */
9332-
9333-
static int
9334-
arc600_corereg_hazard (rtx_insn *pred, rtx_insn *succ)
9335-
{
9336-
if (!TARGET_ARC600)
9337-
return 0;
9338-
if (GET_CODE (PATTERN (pred)) == SEQUENCE)
9339-
pred = as_a <rtx_sequence *> (PATTERN (pred))->insn (1);
9340-
if (GET_CODE (PATTERN (succ)) == SEQUENCE)
9341-
succ = as_a <rtx_sequence *> (PATTERN (succ))->insn (0);
9342-
if (recog_memoized (pred) == CODE_FOR_mulsi_600
9343-
|| recog_memoized (pred) == CODE_FOR_umul_600
9344-
|| recog_memoized (pred) == CODE_FOR_mac_600
9345-
|| recog_memoized (pred) == CODE_FOR_mul64_600
9346-
|| recog_memoized (pred) == CODE_FOR_mac64_600
9347-
|| recog_memoized (pred) == CODE_FOR_umul64_600
9348-
|| recog_memoized (pred) == CODE_FOR_umac64_600)
9349-
return 0;
9350-
subrtx_iterator::array_type array;
9351-
FOR_EACH_SUBRTX (iter, array, PATTERN (pred), NONCONST)
9352-
{
9353-
const_rtx x = *iter;
9354-
switch (GET_CODE (x))
9355-
{
9356-
case SET: case POST_INC: case POST_DEC: case PRE_INC: case PRE_DEC:
9357-
break;
9358-
default:
9359-
/* This is also fine for PRE/POST_MODIFY, because they
9360-
contain a SET. */
9361-
continue;
9362-
}
9363-
rtx dest = XEXP (x, 0);
9364-
/* Check if this sets an extension register. N.B. we use 61 for the
9365-
condition codes, which is definitely not an extension register. */
9366-
if (REG_P (dest) && REGNO (dest) >= 32 && REGNO (dest) < 61
9367-
/* Check if the same register is used by the PAT. */
9368-
&& (refers_to_regno_p
9369-
(REGNO (dest),
9370-
REGNO (dest) + (GET_MODE_SIZE (GET_MODE (dest)) + 3) / 4U,
9371-
PATTERN (succ), 0)))
9372-
return 4;
9373-
}
9374-
return 0;
9375-
}
9376-
93779440
/* Given a rtx, check if it is an assembly instruction or not. */
93789441

93799442
static int
@@ -9408,23 +9471,6 @@ arc_asm_insn_p (rtx x)
94089471
return 0;
94099472
}
94109473

9411-
/* For ARC600:
9412-
A write to a core reg greater or equal to 32 must not be immediately
9413-
followed by a use. Anticipate the length requirement to insert a nop
9414-
between PRED and SUCC to prevent a hazard. */
9415-
9416-
int
9417-
arc_hazard (rtx_insn *pred, rtx_insn *succ)
9418-
{
9419-
if (!pred || !INSN_P (pred) || !succ || !INSN_P (succ))
9420-
return 0;
9421-
9422-
if (TARGET_ARC600)
9423-
return arc600_corereg_hazard (pred, succ);
9424-
9425-
return 0;
9426-
}
9427-
94289474
/* Return length adjustment for INSN. */
94299475

94309476
int

gcc/config/arc/arc.md

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -645,22 +645,21 @@
645645
;; is made that makes conditional execution required.
646646

647647
(define_attr "tune" "none,arc600,arc7xx,arc700_4_2_std,arc700_4_2_xmac, \
648-
core_3, archs4x, archs4xd, archs4xd_slow"
648+
archs4x, archs4xd"
649649
(const
650-
(cond [(symbol_ref "arc_tune == TUNE_ARC600")
650+
(cond [(symbol_ref "arc_tune == ARC_TUNE_ARC600")
651651
(const_string "arc600")
652652
(symbol_ref "arc_tune == ARC_TUNE_ARC7XX")
653653
(const_string "arc7xx")
654-
(symbol_ref "arc_tune == TUNE_ARC700_4_2_STD")
654+
(symbol_ref "arc_tune == ARC_TUNE_ARC700_4_2_STD")
655655
(const_string "arc700_4_2_std")
656-
(symbol_ref "arc_tune == TUNE_ARC700_4_2_XMAC")
656+
(symbol_ref "arc_tune == ARC_TUNE_ARC700_4_2_XMAC")
657657
(const_string "arc700_4_2_xmac")
658-
(symbol_ref "arc_tune == ARC_TUNE_CORE_3")
659-
(const_string "core_3")
660-
(symbol_ref "arc_tune == TUNE_ARCHS4X")
658+
(ior (symbol_ref "arc_tune == ARC_TUNE_ARCHS4X")
659+
(symbol_ref "arc_tune == ARC_TUNE_ARCHS4X_REL31A"))
661660
(const_string "archs4x")
662-
(ior (symbol_ref "arc_tune == TUNE_ARCHS4XD")
663-
(symbol_ref "arc_tune == TUNE_ARCHS4XD_SLOW"))
661+
(ior (symbol_ref "arc_tune == ARC_TUNE_ARCHS4XD")
662+
(symbol_ref "arc_tune == ARC_TUNE_ARCHS4XD_SLOW"))
664663
(const_string "archs4xd")]
665664
(const_string "none"))))
666665

@@ -671,13 +670,22 @@ core_3, archs4x, archs4xd, archs4xd_slow"
671670

672671
(define_attr "tune_dspmpy" "none, slow, fast"
673672
(const
674-
(cond [(ior (symbol_ref "arc_tune == TUNE_ARCHS4X")
675-
(symbol_ref "arc_tune == TUNE_ARCHS4XD"))
673+
(cond [(ior (symbol_ref "arc_tune == ARC_TUNE_ARCHS4X")
674+
(symbol_ref "arc_tune == ARC_TUNE_ARCHS4XD"))
676675
(const_string "fast")
677-
(symbol_ref "arc_tune == TUNE_ARCHS4XD_SLOW")
676+
(symbol_ref "arc_tune == ARC_TUNE_ARCHS4XD_SLOW")
678677
(const_string "slow")]
679678
(const_string "none"))))
680679

680+
(define_attr "tune_store" "none, normal, rel31a"
681+
(const
682+
(cond [(ior (symbol_ref "arc_tune == ARC_TUNE_ARCHS4X")
683+
(symbol_ref "arc_tune == ARC_TUNE_ARCHS4XD"))
684+
(const_string "normal")
685+
(symbol_ref "arc_tune == ARC_TUNE_ARCHS4X_REL31A")
686+
(const_string "rel31a")]
687+
(const_string "none"))))
688+
681689
;; Move instructions.
682690
(define_expand "movqi"
683691
[(set (match_operand:QI 0 "move_dest_operand" "")

gcc/config/arc/arc.opt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,9 @@ Enum(arc_tune_attr) String(arc750d) Value(ARC_TUNE_ARC700_4_2_XMAC)
276276
EnumValue
277277
Enum(arc_tune_attr) String(core3) Value(ARC_TUNE_CORE_3)
278278

279+
EnumValue
280+
Enum(arc_tune_attr) String(release31a) Value(ARC_TUNE_ARCHS4X_REL31A)
281+
279282
mindexed-loads
280283
Target Var(TARGET_INDEXED_LOADS) Init(TARGET_INDEXED_LOADS_DEFAULT)
281284
Enable the use of indexed loads.

gcc/config/arc/arcHS4x.md

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,21 @@
2727
(define_cpu_unit "hs4x_mult" "ARCHS4x")
2828
(define_cpu_unit "hs4x_x1, hs4x_x2" "ARCHS4x")
2929
(define_cpu_unit "hs4x_y1, hs4x_y2" "ARCHS4x")
30+
(define_cpu_unit "hs4x_brcc0, hs4x_brcc1" "ARCHS4x")
3031

3132
(define_insn_reservation "hs4x_brj_op" 1
3233
(and (match_test "TARGET_HS")
3334
(eq_attr "tune" "archs4x, archs4xd")
3435
(eq_attr "type" "call, call_no_delay_slot, uncond_branch, jump, \
35-
branch, brcc,brcc_no_delay_slot, sfunc"))
36+
branch, sfunc"))
3637
"hs4x_issue0")
3738

39+
(define_insn_reservation "hs4x_brcc_op" 1
40+
(and (match_test "TARGET_HS")
41+
(eq_attr "tune" "archs4x, archs4xd")
42+
(eq_attr "type" "brcc,brcc_no_delay_slot,loop_end"))
43+
"hs4x_issue0 + hs4x_brcc0 + hs4x_brcc1")
44+
3845
(define_insn_reservation "hs4x_data_load_op" 4
3946
(and (match_test "TARGET_HS")
4047
(eq_attr "tune" "archs4x, archs4xd")
@@ -43,10 +50,16 @@ branch, brcc,brcc_no_delay_slot, sfunc"))
4350

4451
(define_insn_reservation "hs4x_data_store_op" 1
4552
(and (match_test "TARGET_HS")
46-
(eq_attr "tune" "archs4x, archs4xd")
53+
(eq_attr "tune_store" "normal")
4754
(eq_attr "type" "store"))
4855
"hs4x_issue1 + hs4x_ld_st")
4956

57+
(define_insn_reservation "hs4x_data_store_1_op" 2
58+
(and (match_test "TARGET_HS")
59+
(eq_attr "tune_store" "rel31a")
60+
(eq_attr "type" "store"))
61+
"hs4x_issue1 + hs4x_ld_st + hs4x_brcc0, hs4x_brcc1")
62+
5063
;; Advanced ALU
5164
(define_insn_reservation "hs4x_adv_alue_op" 4
5265
(and (match_test "TARGET_HS")

gcc/config/arc/t-multilib

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@
2121
# along with GCC; see the file COPYING3. If not see
2222
# <http://www.gnu.org/licenses/>.
2323

24-
MULTILIB_OPTIONS = mcpu=em/mcpu=em_mini/mcpu=arcem/mcpu=em4/mcpu=em4_dmips/mcpu=em4_fpus/mcpu=em4_fpuda/mcpu=quarkse_em/mcpu=hs/mcpu=archs/mcpu=hs34/mcpu=hs38/mcpu=hs38_linux/mcpu=hs4x/mcpu=hs4xd/mcpu=arc600/mcpu=arc600_norm/mcpu=arc600_mul64/mcpu=arc600_mul32x16/mcpu=arc601/mcpu=arc601_norm/mcpu=arc601_mul64/mcpu=arc601_mul32x16/mcpu=arc700/mcpu=nps400
24+
MULTILIB_OPTIONS = mcpu=em/mcpu=em_mini/mcpu=arcem/mcpu=em4/mcpu=em4_dmips/mcpu=em4_fpus/mcpu=em4_fpuda/mcpu=quarkse_em/mcpu=hs/mcpu=archs/mcpu=hs34/mcpu=hs38/mcpu=hs38_linux/mcpu=hs4x/mcpu=hs4xd/mcpu=hs4x_rel31/mcpu=arc600/mcpu=arc600_norm/mcpu=arc600_mul64/mcpu=arc600_mul32x16/mcpu=arc601/mcpu=arc601_norm/mcpu=arc601_mul64/mcpu=arc601_mul32x16/mcpu=arc700/mcpu=nps400
2525

26-
MULTILIB_DIRNAMES = em em_mini arcem em4 em4_dmips em4_fpus em4_fpuda quarkse_em hs archs hs34 hs38 hs38_linux hs4x hs4xd arc600 arc600_norm arc600_mul64 arc600_mul32x16 arc601 arc601_norm arc601_mul64 arc601_mul32x16 arc700 nps400
26+
MULTILIB_DIRNAMES = em em_mini arcem em4 em4_dmips em4_fpus em4_fpuda quarkse_em hs archs hs34 hs38 hs38_linux hs4x hs4xd hs4x_rel31 arc600 arc600_norm arc600_mul64 arc600_mul32x16 arc601 arc601_norm arc601_mul64 arc601_mul32x16 arc700 nps400
2727

2828
# Aliases:
2929
MULTILIB_MATCHES = mcpu?arc600=mcpu?ARC600

0 commit comments

Comments
 (0)