summaryrefslogtreecommitdiff
path: root/packages/gcc/8.1.0/0018-ARC-Reimplement-return-padding-operation-for-ARC700.patch
diff options
context:
space:
mode:
Diffstat (limited to 'packages/gcc/8.1.0/0018-ARC-Reimplement-return-padding-operation-for-ARC700.patch')
-rw-r--r--packages/gcc/8.1.0/0018-ARC-Reimplement-return-padding-operation-for-ARC700.patch402
1 files changed, 0 insertions, 402 deletions
diff --git a/packages/gcc/8.1.0/0018-ARC-Reimplement-return-padding-operation-for-ARC700.patch b/packages/gcc/8.1.0/0018-ARC-Reimplement-return-padding-operation-for-ARC700.patch
deleted file mode 100644
index e876ed4..0000000
--- a/packages/gcc/8.1.0/0018-ARC-Reimplement-return-padding-operation-for-ARC700.patch
+++ /dev/null
@@ -1,402 +0,0 @@
-From cc11539ff23bd2c3136f826a18d8010ac34dc518 Mon Sep 17 00:00:00 2001
-From: Claudiu Zissulescu <claziss@synopsys.com>
-Date: Fri, 24 Mar 2017 11:55:54 +0100
-Subject: [PATCH] [ARC] Reimplement return padding operation for ARC700.
-
-For ARC700, adding padding if necessary to avoid a mispredict. A
-return could happen immediately after the function start. A
-call/return and return/return must be 6 bytes apart to avoid
-mispredict.
-
-The old implementation was doing this operation very late in the
-compilation process, and the additional nop instructions and/or
-forcing some other instruction to take their long form was not taken
-into account when generating brcc instructions. Thus, wrong code could
-be generated.
-
-gcc/
-2017-03-24 Claudiu Zissulescu <claziss@synopsys.com>
-
- * config/arc/arc-protos.h (arc_pad_return): Remove.
- * config/arc/arc.c (machine_function): Remove force_short_suffix
- and size_reason.
- (arc_print_operand): Adjust printing of '&'.
- (arc_verify_short): Remove conditional printing of short suffix.
- (arc_final_prescan_insn): Remove reference to size_reason.
- (pad_return): New function.
- (arc_reorg): Call pad_return.
- (arc_pad_return): Remove.
- (arc_init_machine_status): Remove reference to force_short_suffix.
- * config/arc/arc.md (vunspec): Add VUNSPEC_ARC_BLOCKAGE.
- (attr length): When attribute iscompact is true force to 2
- regardless; in the case of maybe check if we want to force the
- instruction to have 4 bytes length.
- (nopv): Change it to generate 4 byte long nop as well.
- (blockage): New pattern.
- (simple_return): Remove call to arc_pad_return.
- (p_return_i): Likewise.
-
-gcc/testsuite/
-2017-03-24 Claudiu Zissulescu <claziss@synopsys.com>
-
- * gcc.target/arc/pr9001107555.c: New file.
----
- gcc/config/arc/arc-protos.h | 1 -
- gcc/config/arc/arc.c | 156 +++++++++-----------
- gcc/config/arc/arc.md | 26 +++-
- gcc/testsuite/gcc.target/arc/pr9001107555.c | 38 +++++
- 4 files changed, 128 insertions(+), 93 deletions(-)
- create mode 100644 gcc/testsuite/gcc.target/arc/pr9001107555.c
-
-diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h
-index 0ba6871628ad..cb5909564eac 100644
---- a/gcc/config/arc/arc-protos.h
-+++ b/gcc/config/arc/arc-protos.h
-@@ -93,7 +93,6 @@ extern void arc_clear_unalign (void);
- extern void arc_toggle_unalign (void);
- extern void split_addsi (rtx *);
- extern void split_subsi (rtx *);
--extern void arc_pad_return (void);
- extern void arc_split_move (rtx *);
- extern const char *arc_short_long (rtx_insn *insn, const char *, const char *);
- extern rtx arc_regno_use_in (unsigned int, rtx);
-diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
-index 2e6fbcb70c6c..79c2d72b4cb3 100644
---- a/gcc/config/arc/arc.c
-+++ b/gcc/config/arc/arc.c
-@@ -2564,8 +2564,6 @@ typedef struct GTY (()) machine_function
- struct arc_frame_info frame_info;
- /* To keep track of unalignment caused by short insns. */
- int unalign;
-- int force_short_suffix; /* Used when disgorging return delay slot insns. */
-- const char *size_reason;
- struct arc_ccfsm ccfsm_current;
- /* Map from uid to ccfsm state during branch shortening. */
- rtx ccfsm_current_insn;
-@@ -4220,7 +4218,7 @@ arc_print_operand (FILE *file, rtx x, int code)
- }
- break;
- case '&':
-- if (TARGET_ANNOTATE_ALIGN && cfun->machine->size_reason)
-+ if (TARGET_ANNOTATE_ALIGN)
- fprintf (file, "; unalign: %d", cfun->machine->unalign);
- return;
- case '+':
-@@ -4906,7 +4904,6 @@ static int
- arc_verify_short (rtx_insn *insn, int, int check_attr)
- {
- enum attr_iscompact iscompact;
-- struct machine_function *machine;
-
- if (check_attr > 0)
- {
-@@ -4914,10 +4911,6 @@ arc_verify_short (rtx_insn *insn, int, int check_attr)
- if (iscompact == ISCOMPACT_FALSE)
- return 0;
- }
-- machine = cfun->machine;
--
-- if (machine->force_short_suffix >= 0)
-- return machine->force_short_suffix;
-
- return (get_attr_length (insn) & 2) != 0;
- }
-@@ -4956,8 +4949,6 @@ arc_final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
- cfun->machine->prescan_initialized = 1;
- }
- arc_ccfsm_advance (insn, &arc_ccfsm_current);
--
-- cfun->machine->size_reason = 0;
- }
-
- /* Given FROM and TO register numbers, say whether this elimination is allowed.
-@@ -7599,6 +7590,76 @@ jli_call_scan (void)
- }
- }
-
-+/* Add padding if necessary to avoid a mispredict. A return could
-+ happen immediately after the function start. A call/return and
-+ return/return must be 6 bytes apart to avoid mispredict. */
-+
-+static void
-+pad_return (void)
-+{
-+ rtx_insn *insn;
-+ long offset;
-+
-+ if (!TARGET_PAD_RETURN)
-+ return;
-+
-+ for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
-+ {
-+ rtx_insn *prev0 = prev_active_insn (insn);
-+ bool wantlong = false;
-+
-+ if (!INSN_P (insn) || GET_CODE (PATTERN (insn)) != SIMPLE_RETURN)
-+ continue;
-+
-+ if (!prev0)
-+ {
-+ prev0 = emit_insn_before (gen_nopv (), insn);
-+ /* REG_SAVE_NOTE is used by Haifa scheduler, we are in reorg
-+ so it is safe to reuse it for forcing a particular length
-+ for an instruction. */
-+ add_reg_note (prev0, REG_SAVE_NOTE, GEN_INT (1));
-+ emit_insn_before (gen_nopv (), insn);
-+ continue;
-+ }
-+ offset = get_attr_length (prev0);
-+
-+ if (get_attr_length (prev0) == 2
-+ && get_attr_iscompact (prev0) != ISCOMPACT_TRUE)
-+ {
-+ /* Force long version of the insn. */
-+ wantlong = true;
-+ offset += 2;
-+ }
-+
-+ rtx_insn *prev = prev_active_insn (prev0);
-+ if (prev)
-+ offset += get_attr_length (prev);
-+
-+ prev = prev_active_insn (prev);
-+ if (prev)
-+ offset += get_attr_length (prev);
-+
-+ switch (offset)
-+ {
-+ case 2:
-+ prev = emit_insn_before (gen_nopv (), insn);
-+ add_reg_note (prev, REG_SAVE_NOTE, GEN_INT (1));
-+ break;
-+ case 4:
-+ emit_insn_before (gen_nopv (), insn);
-+ break;
-+ default:
-+ continue;
-+ }
-+
-+ if (wantlong)
-+ add_reg_note (prev0, REG_SAVE_NOTE, GEN_INT (1));
-+
-+ /* Emit a blockage to avoid delay slot scheduling. */
-+ emit_insn_before (gen_blockage(), insn);
-+ }
-+}
-+
- static int arc_reorg_in_progress = 0;
-
- /* ARC's machince specific reorg function. */
-@@ -7624,6 +7685,7 @@ arc_reorg (void)
-
- workaround_arc_anomaly ();
- jli_call_scan ();
-+ pad_return ();
-
- /* FIXME: should anticipate ccfsm action, generate special patterns for
- to-be-deleted branches that have no delay slot and have at least the
-@@ -9332,79 +9394,6 @@ arc_branch_size_unknown_p (void)
- return !optimize_size && arc_reorg_in_progress;
- }
-
--/* We are about to output a return insn. Add padding if necessary to avoid
-- a mispredict. A return could happen immediately after the function
-- start, but after a call we know that there will be at least a blink
-- restore. */
--
--void
--arc_pad_return (void)
--{
-- rtx_insn *insn = current_output_insn;
-- rtx_insn *prev = prev_active_insn (insn);
-- int want_long;
--
-- if (!prev)
-- {
-- fputs ("\tnop_s\n", asm_out_file);
-- cfun->machine->unalign ^= 2;
-- want_long = 1;
-- }
-- /* If PREV is a sequence, we know it must be a branch / jump or a tailcall,
-- because after a call, we'd have to restore blink first. */
-- else if (GET_CODE (PATTERN (prev)) == SEQUENCE)
-- return;
-- else
-- {
-- want_long = (get_attr_length (prev) == 2);
-- prev = prev_active_insn (prev);
-- }
-- if (!prev
-- || ((NONJUMP_INSN_P (prev) && GET_CODE (PATTERN (prev)) == SEQUENCE)
-- ? CALL_ATTR (as_a <rtx_sequence *> (PATTERN (prev))->insn (0),
-- NON_SIBCALL)
-- : CALL_ATTR (prev, NON_SIBCALL)))
-- {
-- if (want_long)
-- cfun->machine->size_reason
-- = "call/return and return/return must be 6 bytes apart to avoid mispredict";
-- else if (TARGET_UNALIGN_BRANCH && cfun->machine->unalign)
-- {
-- cfun->machine->size_reason
-- = "Long unaligned jump avoids non-delay slot penalty";
-- want_long = 1;
-- }
-- /* Disgorge delay insn, if there is any, and it may be moved. */
-- if (final_sequence
-- /* ??? Annulled would be OK if we can and do conditionalize
-- the delay slot insn accordingly. */
-- && !INSN_ANNULLED_BRANCH_P (insn)
-- && (get_attr_cond (insn) != COND_USE
-- || !reg_set_p (gen_rtx_REG (CCmode, CC_REG),
-- XVECEXP (final_sequence, 0, 1))))
-- {
-- prev = as_a <rtx_insn *> (XVECEXP (final_sequence, 0, 1));
-- gcc_assert (!prev_real_insn (insn)
-- || !arc_hazard (prev_real_insn (insn), prev));
-- cfun->machine->force_short_suffix = !want_long;
-- rtx save_pred = current_insn_predicate;
-- final_scan_insn (prev, asm_out_file, optimize, 1, NULL);
-- cfun->machine->force_short_suffix = -1;
-- prev->set_deleted ();
-- current_output_insn = insn;
-- current_insn_predicate = save_pred;
-- }
-- else if (want_long)
-- fputs ("\tnop\n", asm_out_file);
-- else
-- {
-- fputs ("\tnop_s\n", asm_out_file);
-- cfun->machine->unalign ^= 2;
-- }
-- }
-- return;
--}
--
- /* The usual; we set up our machine_function data. */
-
- static struct machine_function *
-@@ -9413,7 +9402,6 @@ arc_init_machine_status (void)
- struct machine_function *machine;
- machine = ggc_cleared_alloc<machine_function> ();
- machine->fn_type = ARC_FUNCTION_UNKNOWN;
-- machine->force_short_suffix = -1;
-
- return machine;
- }
-diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
-index d19e99daca72..fcc6e0692dd1 100644
---- a/gcc/config/arc/arc.md
-+++ b/gcc/config/arc/arc.md
-@@ -161,6 +161,7 @@
- VUNSPEC_ARC_CAS
- VUNSPEC_ARC_SC
- VUNSPEC_ARC_LL
-+ VUNSPEC_ARC_BLOCKAGE
- ])
-
- (define_constants
-@@ -384,13 +385,18 @@
- ;; and insn lengths: insns with shimm values cannot be conditionally executed.
- (define_attr "length" ""
- (cond
-- [(eq_attr "iscompact" "true,maybe")
-+ [(eq_attr "iscompact" "true")
-+ (const_int 2)
-+
-+ (eq_attr "iscompact" "maybe")
- (cond
- [(eq_attr "type" "sfunc")
- (cond [(match_test "GET_CODE (PATTERN (insn)) == COND_EXEC")
- (const_int 12)]
- (const_int 10))
-- (match_test "GET_CODE (PATTERN (insn)) == COND_EXEC") (const_int 4)]
-+ (match_test "GET_CODE (PATTERN (insn)) == COND_EXEC") (const_int 4)
-+ (match_test "find_reg_note (insn, REG_SAVE_NOTE, GEN_INT (1))")
-+ (const_int 4)]
- (const_int 2))
-
- (eq_attr "iscompact" "true_limm")
-@@ -4438,8 +4444,16 @@
- ""
- "nop%?"
- [(set_attr "type" "misc")
-- (set_attr "iscompact" "true")
-- (set_attr "length" "2")])
-+ (set_attr "iscompact" "maybe")
-+ (set_attr "length" "*")])
-+
-+(define_insn "blockage"
-+ [(unspec_volatile [(const_int 0)] VUNSPEC_ARC_BLOCKAGE)]
-+ ""
-+ ""
-+ [(set_attr "length" "0")
-+ (set_attr "type" "block")]
-+)
-
- ;; Split up troublesome insns for better scheduling.
-
-@@ -4984,8 +4998,6 @@
- {
- return \"rtie\";
- }
-- if (TARGET_PAD_RETURN)
-- arc_pad_return ();
- output_asm_insn (\"j%!%* [%0]%&\", &reg);
- return \"\";
- }
-@@ -5029,8 +5041,6 @@
- arc_return_address_register (arc_compute_function_type
- (cfun)));
-
-- if (TARGET_PAD_RETURN)
-- arc_pad_return ();
- output_asm_insn (\"j%d0%!%# [%1]%&\", xop);
- /* record the condition in case there is a delay insn. */
- arc_ccfsm_record_condition (xop[0], false, insn, 0);
-diff --git a/gcc/testsuite/gcc.target/arc/pr9001107555.c b/gcc/testsuite/gcc.target/arc/pr9001107555.c
-new file mode 100644
-index 000000000000..134426d33d93
---- /dev/null
-+++ b/gcc/testsuite/gcc.target/arc/pr9001107555.c
-@@ -0,0 +1,38 @@
-+/* { dg-do assemble } *
-+/* { dg-skip-if "" { ! { clmcpu } } } */
-+/* { dg-options "-O3 -w -funroll-loops -mno-sdata -mcpu=arc700" } */
-+
-+typedef a __attribute__((__mode__(__DI__)));
-+typedef struct c c;
-+struct b {
-+ int d;
-+ c *e
-+};
-+enum { f };
-+typedef struct {
-+ a g;
-+ a h;
-+ int i
-+} j;
-+struct c {
-+ int count;
-+ int current
-+};
-+k;
-+l(struct b *demux, __builtin_va_list args) {
-+ c m = *demux->e;
-+ j *n;
-+ switch (k)
-+ case f: {
-+ a o = __builtin_va_arg(args, a);
-+ m.current = 0;
-+ while (m.current < m.count) {
-+ if (n[m.current].h > o) {
-+ p(demux->d, 4 + 128LL * n[m.current].i);
-+ break;
-+ }
-+ m.current++;
-+ }
-+ return 0;
-+ }
-+}
---
-2.17.0
-