summaryrefslogtreecommitdiff
path: root/packages
diff options
context:
space:
mode:
authorAlexey Brodkin <abrodkin@synopsys.com>2018-05-16 10:49:23 (GMT)
committerAlexey Brodkin <abrodkin@synopsys.com>2018-05-16 12:47:45 (GMT)
commite714f99d8934bf9b774d50aa9dcd524e2404dc58 (patch)
tree711f0c6906b4be1f3188866a586fb54b0086e1f3 /packages
parent1873a8e14ae7f04bd44f74afcb56a3af8eecc4ba (diff)
ARC: Fix Linux kernel comilation for ARC700
Without this fix we're getting the following error on attempt to build Linux kernel: --------------------------------->8--------------------------------- xfrm4_mode_tunnel.s: Assembler messages: xfrm4_mode_tunnel.s:188: Error: operand out of range (128 is not between - 128 and 127) --------------------------------->8--------------------------------- The fix is taken from https://github.com/foss-for-synopsys-dwc-arc-processors/gcc/commit/d8d716f49c0057e239f2b64e7d902046b92d244f and it is supposed to be merged in upstream GCC sources soonish. Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
Diffstat (limited to 'packages')
-rw-r--r--packages/gcc/8.1.0/0018-ARC-Reimplement-return-padding-operation-for-ARC700.patch402
1 files changed, 402 insertions, 0 deletions
diff --git a/packages/gcc/8.1.0/0018-ARC-Reimplement-return-padding-operation-for-ARC700.patch b/packages/gcc/8.1.0/0018-ARC-Reimplement-return-padding-operation-for-ARC700.patch
new file mode 100644
index 0000000..e876ed4
--- /dev/null
+++ b/packages/gcc/8.1.0/0018-ARC-Reimplement-return-padding-operation-for-ARC700.patch
@@ -0,0 +1,402 @@
+From cc11539ff23bd2c3136f826a18d8010ac34dc518 Mon Sep 17 00:00:00 2001
+From: Claudiu Zissulescu <claziss@synopsys.com>
+Date: Fri, 24 Mar 2017 11:55:54 +0100
+Subject: [PATCH] [ARC] Reimplement return padding operation for ARC700.
+
+For ARC700, adding padding if necessary to avoid a mispredict. A
+return could happen immediately after the function start. A
+call/return and return/return must be 6 bytes apart to avoid
+mispredict.
+
+The old implementation was doing this operation very late in the
+compilation process, and the additional nop instructions and/or
+forcing some other instruction to take their long form was not taken
+into account when generating brcc instructions. Thus, wrong code could
+be generated.
+
+gcc/
+2017-03-24 Claudiu Zissulescu <claziss@synopsys.com>
+
+ * config/arc/arc-protos.h (arc_pad_return): Remove.
+ * config/arc/arc.c (machine_function): Remove force_short_suffix
+ and size_reason.
+ (arc_print_operand): Adjust printing of '&'.
+ (arc_verify_short): Remove conditional printing of short suffix.
+ (arc_final_prescan_insn): Remove reference to size_reason.
+ (pad_return): New function.
+ (arc_reorg): Call pad_return.
+ (arc_pad_return): Remove.
+ (arc_init_machine_status): Remove reference to force_short_suffix.
+ * config/arc/arc.md (vunspec): Add VUNSPEC_ARC_BLOCKAGE.
+ (attr length): When attribute iscompact is true force to 2
+ regardless; in the case of maybe check if we want to force the
+ instruction to have 4 bytes length.
+ (nopv): Change it to generate 4 byte long nop as well.
+ (blockage): New pattern.
+ (simple_return): Remove call to arc_pad_return.
+ (p_return_i): Likewise.
+
+gcc/testsuite/
+2017-03-24 Claudiu Zissulescu <claziss@synopsys.com>
+
+ * gcc.target/arc/pr9001107555.c: New file.
+---
+ gcc/config/arc/arc-protos.h | 1 -
+ gcc/config/arc/arc.c | 156 +++++++++-----------
+ gcc/config/arc/arc.md | 26 +++-
+ gcc/testsuite/gcc.target/arc/pr9001107555.c | 38 +++++
+ 4 files changed, 128 insertions(+), 93 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/arc/pr9001107555.c
+
+diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h
+index 0ba6871628ad..cb5909564eac 100644
+--- a/gcc/config/arc/arc-protos.h
++++ b/gcc/config/arc/arc-protos.h
+@@ -93,7 +93,6 @@ extern void arc_clear_unalign (void);
+ extern void arc_toggle_unalign (void);
+ extern void split_addsi (rtx *);
+ extern void split_subsi (rtx *);
+-extern void arc_pad_return (void);
+ extern void arc_split_move (rtx *);
+ extern const char *arc_short_long (rtx_insn *insn, const char *, const char *);
+ extern rtx arc_regno_use_in (unsigned int, rtx);
+diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
+index 2e6fbcb70c6c..79c2d72b4cb3 100644
+--- a/gcc/config/arc/arc.c
++++ b/gcc/config/arc/arc.c
+@@ -2564,8 +2564,6 @@ typedef struct GTY (()) machine_function
+ struct arc_frame_info frame_info;
+ /* To keep track of unalignment caused by short insns. */
+ int unalign;
+- int force_short_suffix; /* Used when disgorging return delay slot insns. */
+- const char *size_reason;
+ struct arc_ccfsm ccfsm_current;
+ /* Map from uid to ccfsm state during branch shortening. */
+ rtx ccfsm_current_insn;
+@@ -4220,7 +4218,7 @@ arc_print_operand (FILE *file, rtx x, int code)
+ }
+ break;
+ case '&':
+- if (TARGET_ANNOTATE_ALIGN && cfun->machine->size_reason)
++ if (TARGET_ANNOTATE_ALIGN)
+ fprintf (file, "; unalign: %d", cfun->machine->unalign);
+ return;
+ case '+':
+@@ -4906,7 +4904,6 @@ static int
+ arc_verify_short (rtx_insn *insn, int, int check_attr)
+ {
+ enum attr_iscompact iscompact;
+- struct machine_function *machine;
+
+ if (check_attr > 0)
+ {
+@@ -4914,10 +4911,6 @@ arc_verify_short (rtx_insn *insn, int, int check_attr)
+ if (iscompact == ISCOMPACT_FALSE)
+ return 0;
+ }
+- machine = cfun->machine;
+-
+- if (machine->force_short_suffix >= 0)
+- return machine->force_short_suffix;
+
+ return (get_attr_length (insn) & 2) != 0;
+ }
+@@ -4956,8 +4949,6 @@ arc_final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
+ cfun->machine->prescan_initialized = 1;
+ }
+ arc_ccfsm_advance (insn, &arc_ccfsm_current);
+-
+- cfun->machine->size_reason = 0;
+ }
+
+ /* Given FROM and TO register numbers, say whether this elimination is allowed.
+@@ -7599,6 +7590,76 @@ jli_call_scan (void)
+ }
+ }
+
++/* Add padding if necessary to avoid a mispredict. A return could
++ happen immediately after the function start. A call/return and
++ return/return must be 6 bytes apart to avoid mispredict. */
++
++static void
++pad_return (void)
++{
++ rtx_insn *insn;
++ long offset;
++
++ if (!TARGET_PAD_RETURN)
++ return;
++
++ for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
++ {
++ rtx_insn *prev0 = prev_active_insn (insn);
++ bool wantlong = false;
++
++ if (!INSN_P (insn) || GET_CODE (PATTERN (insn)) != SIMPLE_RETURN)
++ continue;
++
++ if (!prev0)
++ {
++ prev0 = emit_insn_before (gen_nopv (), insn);
++ /* REG_SAVE_NOTE is used by Haifa scheduler, we are in reorg
++ so it is safe to reuse it for forcing a particular length
++ for an instruction. */
++ add_reg_note (prev0, REG_SAVE_NOTE, GEN_INT (1));
++ emit_insn_before (gen_nopv (), insn);
++ continue;
++ }
++ offset = get_attr_length (prev0);
++
++ if (get_attr_length (prev0) == 2
++ && get_attr_iscompact (prev0) != ISCOMPACT_TRUE)
++ {
++ /* Force long version of the insn. */
++ wantlong = true;
++ offset += 2;
++ }
++
++ rtx_insn *prev = prev_active_insn (prev0);
++ if (prev)
++ offset += get_attr_length (prev);
++
++ prev = prev_active_insn (prev);
++ if (prev)
++ offset += get_attr_length (prev);
++
++ switch (offset)
++ {
++ case 2:
++ prev = emit_insn_before (gen_nopv (), insn);
++ add_reg_note (prev, REG_SAVE_NOTE, GEN_INT (1));
++ break;
++ case 4:
++ emit_insn_before (gen_nopv (), insn);
++ break;
++ default:
++ continue;
++ }
++
++ if (wantlong)
++ add_reg_note (prev0, REG_SAVE_NOTE, GEN_INT (1));
++
++ /* Emit a blockage to avoid delay slot scheduling. */
++ emit_insn_before (gen_blockage(), insn);
++ }
++}
++
+ static int arc_reorg_in_progress = 0;
+
+ /* ARC's machince specific reorg function. */
+@@ -7624,6 +7685,7 @@ arc_reorg (void)
+
+ workaround_arc_anomaly ();
+ jli_call_scan ();
++ pad_return ();
+
+ /* FIXME: should anticipate ccfsm action, generate special patterns for
+ to-be-deleted branches that have no delay slot and have at least the
+@@ -9332,79 +9394,6 @@ arc_branch_size_unknown_p (void)
+ return !optimize_size && arc_reorg_in_progress;
+ }
+
+-/* We are about to output a return insn. Add padding if necessary to avoid
+- a mispredict. A return could happen immediately after the function
+- start, but after a call we know that there will be at least a blink
+- restore. */
+-
+-void
+-arc_pad_return (void)
+-{
+- rtx_insn *insn = current_output_insn;
+- rtx_insn *prev = prev_active_insn (insn);
+- int want_long;
+-
+- if (!prev)
+- {
+- fputs ("\tnop_s\n", asm_out_file);
+- cfun->machine->unalign ^= 2;
+- want_long = 1;
+- }
+- /* If PREV is a sequence, we know it must be a branch / jump or a tailcall,
+- because after a call, we'd have to restore blink first. */
+- else if (GET_CODE (PATTERN (prev)) == SEQUENCE)
+- return;
+- else
+- {
+- want_long = (get_attr_length (prev) == 2);
+- prev = prev_active_insn (prev);
+- }
+- if (!prev
+- || ((NONJUMP_INSN_P (prev) && GET_CODE (PATTERN (prev)) == SEQUENCE)
+- ? CALL_ATTR (as_a <rtx_sequence *> (PATTERN (prev))->insn (0),
+- NON_SIBCALL)
+- : CALL_ATTR (prev, NON_SIBCALL)))
+- {
+- if (want_long)
+- cfun->machine->size_reason
+- = "call/return and return/return must be 6 bytes apart to avoid mispredict";
+- else if (TARGET_UNALIGN_BRANCH && cfun->machine->unalign)
+- {
+- cfun->machine->size_reason
+- = "Long unaligned jump avoids non-delay slot penalty";
+- want_long = 1;
+- }
+- /* Disgorge delay insn, if there is any, and it may be moved. */
+- if (final_sequence
+- /* ??? Annulled would be OK if we can and do conditionalize
+- the delay slot insn accordingly. */
+- && !INSN_ANNULLED_BRANCH_P (insn)
+- && (get_attr_cond (insn) != COND_USE
+- || !reg_set_p (gen_rtx_REG (CCmode, CC_REG),
+- XVECEXP (final_sequence, 0, 1))))
+- {
+- prev = as_a <rtx_insn *> (XVECEXP (final_sequence, 0, 1));
+- gcc_assert (!prev_real_insn (insn)
+- || !arc_hazard (prev_real_insn (insn), prev));
+- cfun->machine->force_short_suffix = !want_long;
+- rtx save_pred = current_insn_predicate;
+- final_scan_insn (prev, asm_out_file, optimize, 1, NULL);
+- cfun->machine->force_short_suffix = -1;
+- prev->set_deleted ();
+- current_output_insn = insn;
+- current_insn_predicate = save_pred;
+- }
+- else if (want_long)
+- fputs ("\tnop\n", asm_out_file);
+- else
+- {
+- fputs ("\tnop_s\n", asm_out_file);
+- cfun->machine->unalign ^= 2;
+- }
+- }
+- return;
+-}
+-
+ /* The usual; we set up our machine_function data. */
+
+ static struct machine_function *
+@@ -9413,7 +9402,6 @@ arc_init_machine_status (void)
+ struct machine_function *machine;
+ machine = ggc_cleared_alloc<machine_function> ();
+ machine->fn_type = ARC_FUNCTION_UNKNOWN;
+- machine->force_short_suffix = -1;
+
+ return machine;
+ }
+diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
+index d19e99daca72..fcc6e0692dd1 100644
+--- a/gcc/config/arc/arc.md
++++ b/gcc/config/arc/arc.md
+@@ -161,6 +161,7 @@
+ VUNSPEC_ARC_CAS
+ VUNSPEC_ARC_SC
+ VUNSPEC_ARC_LL
++ VUNSPEC_ARC_BLOCKAGE
+ ])
+
+ (define_constants
+@@ -384,13 +385,18 @@
+ ;; and insn lengths: insns with shimm values cannot be conditionally executed.
+ (define_attr "length" ""
+ (cond
+- [(eq_attr "iscompact" "true,maybe")
++ [(eq_attr "iscompact" "true")
++ (const_int 2)
++
++ (eq_attr "iscompact" "maybe")
+ (cond
+ [(eq_attr "type" "sfunc")
+ (cond [(match_test "GET_CODE (PATTERN (insn)) == COND_EXEC")
+ (const_int 12)]
+ (const_int 10))
+- (match_test "GET_CODE (PATTERN (insn)) == COND_EXEC") (const_int 4)]
++ (match_test "GET_CODE (PATTERN (insn)) == COND_EXEC") (const_int 4)
++ (match_test "find_reg_note (insn, REG_SAVE_NOTE, GEN_INT (1))")
++ (const_int 4)]
+ (const_int 2))
+
+ (eq_attr "iscompact" "true_limm")
+@@ -4438,8 +4444,16 @@
+ ""
+ "nop%?"
+ [(set_attr "type" "misc")
+- (set_attr "iscompact" "true")
+- (set_attr "length" "2")])
++ (set_attr "iscompact" "maybe")
++ (set_attr "length" "*")])
++
++(define_insn "blockage"
++ [(unspec_volatile [(const_int 0)] VUNSPEC_ARC_BLOCKAGE)]
++ ""
++ ""
++ [(set_attr "length" "0")
++ (set_attr "type" "block")]
++)
+
+ ;; Split up troublesome insns for better scheduling.
+
+@@ -4984,8 +4998,6 @@
+ {
+ return \"rtie\";
+ }
+- if (TARGET_PAD_RETURN)
+- arc_pad_return ();
+ output_asm_insn (\"j%!%* [%0]%&\", &reg);
+ return \"\";
+ }
+@@ -5029,8 +5041,6 @@
+ arc_return_address_register (arc_compute_function_type
+ (cfun)));
+
+- if (TARGET_PAD_RETURN)
+- arc_pad_return ();
+ output_asm_insn (\"j%d0%!%# [%1]%&\", xop);
+ /* record the condition in case there is a delay insn. */
+ arc_ccfsm_record_condition (xop[0], false, insn, 0);
+diff --git a/gcc/testsuite/gcc.target/arc/pr9001107555.c b/gcc/testsuite/gcc.target/arc/pr9001107555.c
+new file mode 100644
+index 000000000000..134426d33d93
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/arc/pr9001107555.c
+@@ -0,0 +1,38 @@
++/* { dg-do assemble } *
++/* { dg-skip-if "" { ! { clmcpu } } } */
++/* { dg-options "-O3 -w -funroll-loops -mno-sdata -mcpu=arc700" } */
++
++typedef a __attribute__((__mode__(__DI__)));
++typedef struct c c;
++struct b {
++ int d;
++ c *e
++};
++enum { f };
++typedef struct {
++ a g;
++ a h;
++ int i
++} j;
++struct c {
++ int count;
++ int current
++};
++k;
++l(struct b *demux, __builtin_va_list args) {
++ c m = *demux->e;
++ j *n;
++ switch (k)
++ case f: {
++ a o = __builtin_va_arg(args, a);
++ m.current = 0;
++ while (m.current < m.count) {
++ if (n[m.current].h > o) {
++ p(demux->d, 4 + 128LL * n[m.current].i);
++ break;
++ }
++ m.current++;
++ }
++ return 0;
++ }
++}
+--
+2.17.0
+