diff --git a/Makefile.def b/Makefile.def
index 4b9f757f58652..df8ccfb24c3d0 100644
--- a/Makefile.def
+++ b/Makefile.def
@@ -4,7 +4,7 @@ AutoGen definitions Makefile.tpl;
 // Makefile.in is generated from Makefile.tpl by 'autogen Makefile.def'.
 // This file was originally written by Nathanael Nerode.
 //
-//   Copyright 2002-2013 Free Software Foundation
+//   Copyright 2002-2019 Free Software Foundation
 //
 // This file is free software; you can redistribute it and/or modify
 // it under the terms of the GNU General Public License as published by
@@ -81,6 +81,15 @@ host_modules= { module= itcl; };
 host_modules= { module= ld; bootstrap=true; };
 host_modules= { module= libbacktrace; bootstrap=true; };
 host_modules= { module= libcpp; bootstrap=true; };
+// As with libiconv, don't install any of libcody
+host_modules= { module= libcody; bootstrap=true;
+		no_install= true;
+		missing= pdf;
+		missing= html;
+		missing= info;
+		missing= install-pdf;
+		missing= install-html;
+		missing= install-info; };
 host_modules= { module= libdecnumber; bootstrap=true; };
 host_modules= { module= libgui; };
 host_modules= { module= libiberty; bootstrap=true;
@@ -112,6 +121,9 @@ host_modules= { module= texinfo; no_install= true; };
 host_modules= { module= zlib; no_install=true; no_check=true;
 		bootstrap=true;
 	        extra_configure_flags='@extra_host_zlib_configure_flags@';};
+host_modules= { module= gnulib; };
+host_modules= { module= gdbsupport; };
+host_modules= { module= gdbserver; };
 host_modules= { module= gdb; };
 host_modules= { module= expect; };
 host_modules= { module= guile; };
@@ -122,12 +134,14 @@ host_modules= { module= libtermcap; no_check=true;
                 missing=distclean;
                 missing=maintainer-clean; };
 host_modules= { module= utils; no_check=true; };
+host_modules= { module= c++tools; };
 host_modules= { module= gnattools; };
 host_modules= { module= lto-plugin; bootstrap=true;
 		extra_configure_flags='--enable-shared @extra_linker_plugin_flags@ @extra_linker_plugin_configure_flags@';
 		extra_make_flags='@extra_linker_plugin_flags@'; };
 host_modules= { module= libcc1; extra_configure_flags=--enable-shared; };
 host_modules= { module= gotools; };
+host_modules= { module= libctf; bootstrap=true; };
 
 target_modules = { module= libstdc++-v3;
 		   bootstrap=true;
@@ -137,9 +151,6 @@ target_modules = { module= libsanitizer;
 		   bootstrap=true;
 		   lib_path=.libs;
 		   raw_cxx=true; };
-target_modules = { module= libmpx;
-		   bootstrap=true;
-		   lib_path=.libs; };
 target_modules = { module= libvtv;
 		   bootstrap=true;
 		   lib_path=.libs;
@@ -156,6 +167,8 @@ target_modules = { module= libgfortran; };
 target_modules = { module= libobjc; };
 target_modules = { module= libgo; };
 target_modules = { module= libhsail-rt; };
+target_modules = { module= libphobos;
+		   lib_path=src/.libs; };
 target_modules = { module= libtermcap; no_check=true;
                    missing=mostlyclean;
                    missing=clean;
@@ -268,6 +281,8 @@ flags_to_pass = { flag= STAGE1_CHECKING ; };
 flags_to_pass = { flag= STAGE1_LANGUAGES ; };
 flags_to_pass = { flag= GNATBIND ; };
 flags_to_pass = { flag= GNATMAKE ; };
+flags_to_pass = { flag= GDC ; };
+flags_to_pass = { flag= GDCFLAGS ; };
 
 // Target tools
 flags_to_pass = { flag= AR_FOR_TARGET ; };
@@ -281,6 +296,8 @@ flags_to_pass = { flag= FLAGS_FOR_TARGET ; };
 flags_to_pass = { flag= GFORTRAN_FOR_TARGET ; };
 flags_to_pass = { flag= GOC_FOR_TARGET ; };
 flags_to_pass = { flag= GOCFLAGS_FOR_TARGET ; };
+flags_to_pass = { flag= GDC_FOR_TARGET ; };
+flags_to_pass = { flag= GDCFLAGS_FOR_TARGET ; };
 flags_to_pass = { flag= LD_FOR_TARGET ; };
 flags_to_pass = { flag= LIPO_FOR_TARGET ; };
 flags_to_pass = { flag= LDFLAGS_FOR_TARGET ; };
@@ -339,6 +356,7 @@ dependencies = { module=all-gcc; on=all-build-libcpp; };
 dependencies = { module=all-gcc; on=all-zlib; };
 dependencies = { module=all-gcc; on=all-libbacktrace; hard=true; };
 dependencies = { module=all-gcc; on=all-libcpp; hard=true; };
+dependencies = { module=all-gcc; on=all-libcody; hard=true; };
 dependencies = { module=all-gcc; on=all-libdecnumber; hard=true; };
 dependencies = { module=all-gcc; on=all-libiberty; };
 dependencies = { module=all-gcc; on=all-fixincludes; };
@@ -373,6 +391,8 @@ dependencies = { module=all-lto-plugin; on=all-libiberty-linker-plugin; };
 dependencies = { module=configure-libcc1; on=configure-gcc; };
 dependencies = { module=all-libcc1; on=all-gcc; };
 
+// we want version.o from gcc, and implicitly depend on libcody
+dependencies = { module=all-c++tools; on=all-gcc; };
 dependencies = { module=all-gotools; on=all-target-libgo; };
 
 dependencies = { module=all-utils; on=all-libiberty; };
@@ -387,17 +407,28 @@ dependencies = { module=all-intl; on=all-libiconv; };
 dependencies = { module=configure-gdb; on=all-intl; };
 dependencies = { module=configure-gdb; on=configure-sim; };
 dependencies = { module=configure-gdb; on=all-bfd; };
+dependencies = { module=configure-gdb; on=all-gnulib; };
+dependencies = { module=configure-gdb; on=all-gdbsupport; };
 // Depend on all-libiconv so that configure checks for iconv
 // functions will work.
 dependencies = { module=configure-gdb; on=all-libiconv; };
 dependencies = { module=all-gdb; on=all-libiberty; };
 dependencies = { module=all-gdb; on=all-libiconv; };
+dependencies = { module=all-gdb; on=all-gnulib; };
+dependencies = { module=all-gdb; on=all-gdbsupport; };
 dependencies = { module=all-gdb; on=all-opcodes; };
 dependencies = { module=all-gdb; on=all-readline; };
 dependencies = { module=all-gdb; on=all-build-bison; };
 dependencies = { module=all-gdb; on=all-sim; };
 dependencies = { module=all-gdb; on=all-libdecnumber; };
 dependencies = { module=all-gdb; on=all-libtermcap; };
+dependencies = { module=all-gdb; on=all-libctf; };
+
+// Host modules specific to gdbserver.
+dependencies = { module=configure-gdbserver; on=all-gnulib; };
+dependencies = { module=all-gdbserver; on=all-gdbsupport; };
+dependencies = { module=all-gdbserver; on=all-gnulib; };
+dependencies = { module=all-gdbserver; on=all-libiberty; };
 
 dependencies = { module=configure-libgui; on=configure-tcl; };
 dependencies = { module=configure-libgui; on=configure-tk; };
@@ -405,6 +436,11 @@ dependencies = { module=all-libgui; on=all-tcl; };
 dependencies = { module=all-libgui; on=all-tk; };
 dependencies = { module=all-libgui; on=all-itcl; };
 
+dependencies = { module=configure-gdbsupport; on=configure-gnulib; };
+dependencies = { module=configure-gdbsupport; on=configure-intl; };
+dependencies = { module=all-gdbsupport; on=all-gnulib; };
+dependencies = { module=all-gdbsupport; on=all-intl; };
+
 // Host modules specific to binutils.
 dependencies = { module=configure-bfd; on=configure-libiberty; hard=true; };
 dependencies = { module=configure-bfd; on=configure-intl; };
@@ -425,6 +461,8 @@ dependencies = { module=all-binutils; on=all-build-flex; };
 dependencies = { module=all-binutils; on=all-build-bison; };
 dependencies = { module=all-binutils; on=all-intl; };
 dependencies = { module=all-binutils; on=all-gas; };
+dependencies = { module=all-binutils; on=all-libctf; };
+dependencies = { module=all-ld; on=all-libctf; };
 
 // We put install-opcodes before install-binutils because the installed
 // binutils might be on PATH, and they might need the shared opcodes
@@ -509,12 +547,20 @@ dependencies = { module=all-sim; on=all-libiberty; };
 dependencies = { module=all-sim; on=all-bfd; };
 dependencies = { module=all-sim; on=all-opcodes; };
 dependencies = { module=all-sim; on=all-readline; };
-dependencies = { module=all-sim; on=configure-gdb; };
 
 // Other host modules.
 dependencies = { module=all-fastjar; on=all-zlib; };
 dependencies = { module=all-fastjar; on=all-build-texinfo; };
 dependencies = { module=all-fastjar; on=all-libiberty; };
+dependencies = { module=all-libctf; on=all-libiberty; hard=true; };
+dependencies = { module=all-libctf; on=all-bfd; };
+dependencies = { module=all-libctf; on=all-zlib; };
+// So that checking for ELF support in BFD from libctf configure is possible.
+dependencies = { module=configure-libctf; on=all-bfd; };
+dependencies = { module=configure-libctf; on=all-intl; };
+dependencies = { module=configure-libctf; on=all-zlib; };
+dependencies = { module=configure-libctf; on=all-libiconv; };
+dependencies = { module=check-libctf; on=all-ld; };
 
 // Warning, these are not well tested.
 dependencies = { module=all-bison; on=all-intl; };
@@ -547,6 +593,11 @@ dependencies = { module=configure-target-libgo; on=all-target-libstdc++-v3; };
 dependencies = { module=all-target-libgo; on=all-target-libbacktrace; };
 dependencies = { module=all-target-libgo; on=all-target-libffi; };
 dependencies = { module=all-target-libgo; on=all-target-libatomic; };
+dependencies = { module=configure-target-libphobos; on=configure-target-libbacktrace; };
+dependencies = { module=configure-target-libphobos; on=configure-target-zlib; };
+dependencies = { module=all-target-libphobos; on=all-target-libbacktrace; };
+dependencies = { module=all-target-libphobos; on=all-target-zlib; };
+dependencies = { module=all-target-libphobos; on=all-target-libatomic; };
 dependencies = { module=configure-target-libstdc++-v3; on=configure-target-libgomp; };
 dependencies = { module=configure-target-liboffloadmic; on=configure-target-libgomp; };
 dependencies = { module=configure-target-libsanitizer; on=all-target-libstdc++-v3; };
@@ -560,6 +611,7 @@ dependencies = { module=all-target-liboffloadmic; on=all-target-libgomp; };
 dependencies = { module=install-target-libgo; on=install-target-libatomic; };
 dependencies = { module=install-target-libgfortran; on=install-target-libquadmath; };
 dependencies = { module=install-target-libgfortran; on=install-target-libgcc; };
+dependencies = { module=install-target-libphobos; on=install-target-libatomic; };
 dependencies = { module=install-target-libsanitizer; on=install-target-libstdc++-v3; };
 dependencies = { module=install-target-libsanitizer; on=install-target-libgcc; };
 dependencies = { module=install-target-libvtv; on=install-target-libstdc++-v3; };
@@ -589,7 +641,8 @@ languages = { language=c++;	gcc-check-target=check-c++;
 				lib-check-target=check-target-libgomp-c++; };
 languages = { language=fortran;	gcc-check-target=check-fortran;
 				lib-check-target=check-target-libquadmath;
-				lib-check-target=check-target-libgfortran; };
+				lib-check-target=check-target-libgfortran;
+				lib-check-target=check-target-libgomp-fortran; };
 languages = { language=ada;	gcc-check-target=check-ada;
 				lib-check-target=check-target-libada; };
 languages = { language=objc;	gcc-check-target=check-objc;
@@ -600,6 +653,8 @@ languages = { language=go;	gcc-check-target=check-go;
 				lib-check-target=check-gotools; };
 languages = { language=brig;	gcc-check-target=check-brig;
 				lib-check-target=check-target-libhsail-rt; };
+languages = { language=d;	gcc-check-target=check-d;
+				lib-check-target=check-target-libphobos; };
 
 // Toplevel bootstrap
 bootstrap_stage = { id=1 ; };
diff --git a/Makefile.in b/Makefile.in
index 38774f542a641..047be0255e26a 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -156,6 +156,8 @@ BUILD_EXPORTS = \
 	GFORTRAN="$(GFORTRAN_FOR_BUILD)"; export GFORTRAN; \
 	GOC="$(GOC_FOR_BUILD)"; export GOC; \
 	GOCFLAGS="$(GOCFLAGS_FOR_BUILD)"; export GOCFLAGS; \
+	GDC="$(GDC_FOR_BUILD)"; export GDC; \
+	GDCFLAGS="$(GDCFLAGS_FOR_BUILD)"; export GDCFLAGS; \
 	DLLTOOL="$(DLLTOOL_FOR_BUILD)"; export DLLTOOL; \
 	LD="$(LD_FOR_BUILD)"; export LD; \
 	LDFLAGS="$(LDFLAGS_FOR_BUILD)"; export LDFLAGS; \
@@ -192,9 +194,11 @@ HOST_EXPORTS = \
 	CXXFLAGS="$(CXXFLAGS)"; export CXXFLAGS; \
 	GFORTRAN="$(GFORTRAN)"; export GFORTRAN; \
 	GOC="$(GOC)"; export GOC; \
+	GDC="$(GDC)"; export GDC; \
 	AR="$(AR)"; export AR; \
 	AS="$(AS)"; export AS; \
 	CC_FOR_BUILD="$(CC_FOR_BUILD)"; export CC_FOR_BUILD; \
+	CXX_FOR_BUILD="$(CXX_FOR_BUILD)"; export CXX_FOR_BUILD; \
 	DLLTOOL="$(DLLTOOL)"; export DLLTOOL; \
 	LD="$(LD)"; export LD; \
 	LDFLAGS="$(STAGE1_LDFLAGS) $(LDFLAGS)"; export LDFLAGS; \
@@ -204,6 +208,7 @@ HOST_EXPORTS = \
 	WINDMC="$(WINDMC)"; export WINDMC; \
 	OBJCOPY="$(OBJCOPY)"; export OBJCOPY; \
 	OBJDUMP="$(OBJDUMP)"; export OBJDUMP; \
+	OTOOL="$(OTOOL)"; export OTOOL; \
 	READELF="$(READELF)"; export READELF; \
 	AR_FOR_TARGET="$(AR_FOR_TARGET)"; export AR_FOR_TARGET; \
 	AS_FOR_TARGET="$(AS_FOR_TARGET)"; export AS_FOR_TARGET; \
@@ -212,6 +217,7 @@ HOST_EXPORTS = \
 	NM_FOR_TARGET="$(NM_FOR_TARGET)"; export NM_FOR_TARGET; \
 	OBJDUMP_FOR_TARGET="$(OBJDUMP_FOR_TARGET)"; export OBJDUMP_FOR_TARGET; \
 	OBJCOPY_FOR_TARGET="$(OBJCOPY_FOR_TARGET)"; export OBJCOPY_FOR_TARGET; \
+	OTOOL_FOR_TARGET="$(OTOOL_FOR_TARGET)"; export OTOOL_FOR_TARGET; \
 	RANLIB_FOR_TARGET="$(RANLIB_FOR_TARGET)"; export RANLIB_FOR_TARGET; \
 	READELF_FOR_TARGET="$(READELF_FOR_TARGET)"; export READELF_FOR_TARGET; \
 	TOPLEVEL_CONFIGURE_ARGUMENTS="$(TOPLEVEL_CONFIGURE_ARGUMENTS)"; export TOPLEVEL_CONFIGURE_ARGUMENTS; \
@@ -256,6 +262,14 @@ POSTSTAGE1_HOST_EXPORTS = \
 	CC_FOR_BUILD="$$CC"; export CC_FOR_BUILD; \
 	$(POSTSTAGE1_CXX_EXPORT) \
 	$(LTO_EXPORTS) \
+	GDC="$$r/$(HOST_SUBDIR)/prev-gcc/gdc$(exeext) -B$$r/$(HOST_SUBDIR)/prev-gcc/ \
+	  -B$(build_tooldir)/bin/ $(GDC_FLAGS_FOR_TARGET) \
+	  -B$$r/prev-$(TARGET_SUBDIR)/libphobos/src \
+	  -I$$r/prev-$(TARGET_SUBDIR)/libphobos/libdruntime -I$$s/libphobos/libdruntime \
+	  -L$$r/prev-$(TARGET_SUBDIR)/libphobos/src/.libs \
+	  -L$$r/prev-$(TARGET_SUBDIR)/libphobos/libdruntime/.libs"; \
+	export GDC; \
+	GDC_FOR_BUILD="$$GDC"; export GDC_FOR_BUILD; \
 	GNATBIND="$$r/$(HOST_SUBDIR)/prev-gcc/gnatbind"; export GNATBIND; \
 	LDFLAGS="$(POSTSTAGE1_LDFLAGS) $(BOOT_LDFLAGS)"; export LDFLAGS; \
 	HOST_LIBS="$(POSTSTAGE1_LIBS)"; export HOST_LIBS;
@@ -278,6 +292,7 @@ BASE_TARGET_EXPORTS = \
 	CXXFLAGS="$(CXXFLAGS_FOR_TARGET)"; export CXXFLAGS; \
 	GFORTRAN="$(GFORTRAN_FOR_TARGET) $(XGCC_FLAGS_FOR_TARGET) $$TFLAGS"; export GFORTRAN; \
 	GOC="$(GOC_FOR_TARGET) $(XGCC_FLAGS_FOR_TARGET) $$TFLAGS"; export GOC; \
+	GDC="$(GDC_FOR_TARGET) $(XGCC_FLAGS_FOR_TARGET) $$TFLAGS"; export GDC; \
 	DLLTOOL="$(DLLTOOL_FOR_TARGET)"; export DLLTOOL; \
 	LD="$(COMPILER_LD_FOR_TARGET)"; export LD; \
 	LDFLAGS="$(LDFLAGS_FOR_TARGET)"; export LDFLAGS; \
@@ -285,6 +300,7 @@ BASE_TARGET_EXPORTS = \
 	NM="$(COMPILER_NM_FOR_TARGET)"; export NM; \
 	OBJDUMP="$(OBJDUMP_FOR_TARGET)"; export OBJDUMP; \
 	OBJCOPY="$(OBJCOPY_FOR_TARGET)"; export OBJCOPY; \
+	OTOOL="$(OTOOL_FOR_TARGET)"; export OTOOL; \
 	RANLIB="$(RANLIB_FOR_TARGET)"; export RANLIB; \
 	READELF="$(READELF_FOR_TARGET)"; export READELF; \
 	STRIP="$(STRIP_FOR_TARGET)"; export STRIP; \
@@ -342,6 +358,7 @@ CXX_FOR_BUILD = @CXX_FOR_BUILD@
 DLLTOOL_FOR_BUILD = @DLLTOOL_FOR_BUILD@
 GFORTRAN_FOR_BUILD = @GFORTRAN_FOR_BUILD@
 GOC_FOR_BUILD = @GOC_FOR_BUILD@
+GDC_FOR_BUILD = @GDC_FOR_BUILD@
 LDFLAGS_FOR_BUILD = @LDFLAGS_FOR_BUILD@
 LD_FOR_BUILD = @LD_FOR_BUILD@
 NM_FOR_BUILD = @NM_FOR_BUILD@
@@ -371,7 +388,7 @@ MAKEINFO = @MAKEINFO@
 EXPECT = @EXPECT@
 RUNTEST = @RUNTEST@
 
-AUTO_PROFILE = gcc-auto-profile -c 1000000
+AUTO_PROFILE = gcc-auto-profile -c 10000000
 
 # This just becomes part of the MAKEINFO definition passed down to
 # sub-makes.  It lets flags be given on the command line while still
@@ -393,12 +410,14 @@ LD = @LD@
 LIPO = @LIPO@
 NM = @NM@
 OBJDUMP = @OBJDUMP@
+OTOOL = @OTOOL@
 RANLIB = @RANLIB@
 READELF = @READELF@
 STRIP = @STRIP@
 WINDRES = @WINDRES@
 WINDMC = @WINDMC@
 
+GDC = @GDC@
 GNATBIND = @GNATBIND@
 GNATMAKE = @GNATMAKE@
 
@@ -408,6 +427,7 @@ LIBCFLAGS = $(CFLAGS)
 CXXFLAGS = @CXXFLAGS@
 LIBCXXFLAGS = $(CXXFLAGS) -fno-implicit-templates
 GOCFLAGS = $(CFLAGS)
+GDCFLAGS = $(CFLAGS)
 
 CREATE_GCOV = create_gcov
 
@@ -529,13 +549,23 @@ STAGE1_CONFIGURE_FLAGS = --disable-intermodule $(STAGE1_CHECKING) \
 	  --disable-coverage --enable-languages="$(STAGE1_LANGUAGES)" \
 	  --disable-build-format-warnings
 
+# When using the slow stage1 compiler disable IL verification and forcefully
+# enable it when using the stage2 compiler instead.  As we later compare
+# stage2 and stage3 we are merely avoid doing redundant work, plus we apply
+# checking when building all target libraries for release builds.
+STAGE1_TFLAGS += -fno-checking
+STAGE2_CFLAGS += -fno-checking
+STAGE2_TFLAGS += -fno-checking
+STAGE3_CFLAGS += -fchecking=1
+STAGE3_TFLAGS += -fchecking=1
+
 STAGEprofile_CFLAGS = $(STAGE2_CFLAGS) -fprofile-generate
 STAGEprofile_TFLAGS = $(STAGE2_TFLAGS)
 
-STAGEtrain_CFLAGS = $(STAGE3_CFLAGS)
-STAGEtrain_TFLAGS = $(STAGE3_TFLAGS)
+STAGEtrain_CFLAGS = $(filter-out -fchecking=1,$(STAGE3_CFLAGS))
+STAGEtrain_TFLAGS = $(filter-out -fchecking=1,$(STAGE3_TFLAGS))
 
-STAGEfeedback_CFLAGS = $(STAGE4_CFLAGS) -fprofile-use
+STAGEfeedback_CFLAGS = $(STAGE4_CFLAGS) -fprofile-use -fprofile-reproducible=parallel-runs
 STAGEfeedback_TFLAGS = $(STAGE4_TFLAGS)
 
 STAGEautoprofile_CFLAGS = $(STAGE2_CFLAGS) -g
@@ -564,6 +594,7 @@ CXX_FOR_TARGET=$(STAGE_CC_WRAPPER) @CXX_FOR_TARGET@
 RAW_CXX_FOR_TARGET=$(STAGE_CC_WRAPPER) @RAW_CXX_FOR_TARGET@
 GFORTRAN_FOR_TARGET=$(STAGE_CC_WRAPPER) @GFORTRAN_FOR_TARGET@
 GOC_FOR_TARGET=$(STAGE_CC_WRAPPER) @GOC_FOR_TARGET@
+GDC_FOR_TARGET=$(STAGE_CC_WRAPPER) @GDC_FOR_TARGET@
 DLLTOOL_FOR_TARGET=@DLLTOOL_FOR_TARGET@
 LD_FOR_TARGET=@LD_FOR_TARGET@
 
@@ -571,6 +602,7 @@ LIPO_FOR_TARGET=@LIPO_FOR_TARGET@
 NM_FOR_TARGET=@NM_FOR_TARGET@
 OBJDUMP_FOR_TARGET=@OBJDUMP_FOR_TARGET@
 OBJCOPY_FOR_TARGET=@OBJCOPY_FOR_TARGET@
+OTOOL_FOR_TARGET=@OTOOL_FOR_TARGET@
 RANLIB_FOR_TARGET=@RANLIB_FOR_TARGET@
 READELF_FOR_TARGET=@READELF_FOR_TARGET@
 STRIP_FOR_TARGET=@STRIP_FOR_TARGET@
@@ -588,6 +620,7 @@ LIBCFLAGS_FOR_TARGET = $(CFLAGS_FOR_TARGET)
 LIBCXXFLAGS_FOR_TARGET = $(CXXFLAGS_FOR_TARGET) -fno-implicit-templates
 LDFLAGS_FOR_TARGET = @LDFLAGS_FOR_TARGET@
 GOCFLAGS_FOR_TARGET = -O2 -g
+GDCFLAGS_FOR_TARGET = -O2 -g
 
 FLAGS_FOR_TARGET = @FLAGS_FOR_TARGET@
 SYSROOT_CFLAGS_FOR_TARGET = @SYSROOT_CFLAGS_FOR_TARGET@
@@ -612,7 +645,7 @@ all:
 
 # This is the list of directories that may be needed in RPATH_ENVVAR
 # so that programs built for the target machine work.
-TARGET_LIB_PATH = $(TARGET_LIB_PATH_libstdc++-v3)$(TARGET_LIB_PATH_libsanitizer)$(TARGET_LIB_PATH_libmpx)$(TARGET_LIB_PATH_libvtv)$(TARGET_LIB_PATH_liboffloadmic)$(TARGET_LIB_PATH_libssp)$(TARGET_LIB_PATH_libgomp)$(TARGET_LIB_PATH_libitm)$(TARGET_LIB_PATH_libatomic)$(HOST_LIB_PATH_gcc)
+TARGET_LIB_PATH = $(TARGET_LIB_PATH_libstdc++-v3)$(TARGET_LIB_PATH_libsanitizer)$(TARGET_LIB_PATH_libvtv)$(TARGET_LIB_PATH_liboffloadmic)$(TARGET_LIB_PATH_libssp)$(TARGET_LIB_PATH_libphobos)$(TARGET_LIB_PATH_libgomp)$(TARGET_LIB_PATH_libitm)$(TARGET_LIB_PATH_libatomic)$(HOST_LIB_PATH_gcc)
 
 @if target-libstdc++-v3
 TARGET_LIB_PATH_libstdc++-v3 = $$r/$(TARGET_SUBDIR)/libstdc++-v3/src/.libs:
@@ -622,10 +655,6 @@ TARGET_LIB_PATH_libstdc++-v3 = $$r/$(TARGET_SUBDIR)/libstdc++-v3/src/.libs:
 TARGET_LIB_PATH_libsanitizer = $$r/$(TARGET_SUBDIR)/libsanitizer/.libs:
 @endif target-libsanitizer
 
-@if target-libmpx
-TARGET_LIB_PATH_libmpx = $$r/$(TARGET_SUBDIR)/libmpx/.libs:
-@endif target-libmpx
-
 @if target-libvtv
 TARGET_LIB_PATH_libvtv = $$r/$(TARGET_SUBDIR)/libvtv/.libs:
 @endif target-libvtv
@@ -638,6 +667,10 @@ TARGET_LIB_PATH_liboffloadmic = $$r/$(TARGET_SUBDIR)/liboffloadmic/.libs:
 TARGET_LIB_PATH_libssp = $$r/$(TARGET_SUBDIR)/libssp/.libs:
 @endif target-libssp
 
+@if target-libphobos
+TARGET_LIB_PATH_libphobos = $$r/$(TARGET_SUBDIR)/libphobos/src/.libs:
+@endif target-libphobos
+
 @if target-libgomp
 TARGET_LIB_PATH_libgomp = $$r/$(TARGET_SUBDIR)/libgomp/.libs:
 @endif target-libgomp
@@ -772,6 +805,8 @@ BASE_FLAGS_TO_PASS = \
 	"STAGE1_LANGUAGES=$(STAGE1_LANGUAGES)" \
 	"GNATBIND=$(GNATBIND)" \
 	"GNATMAKE=$(GNATMAKE)" \
+	"GDC=$(GDC)" \
+	"GDCFLAGS=$(GDCFLAGS)" \
 	"AR_FOR_TARGET=$(AR_FOR_TARGET)" \
 	"AS_FOR_TARGET=$(AS_FOR_TARGET)" \
 	"CC_FOR_TARGET=$(CC_FOR_TARGET)" \
@@ -783,6 +818,8 @@ BASE_FLAGS_TO_PASS = \
 	"GFORTRAN_FOR_TARGET=$(GFORTRAN_FOR_TARGET)" \
 	"GOC_FOR_TARGET=$(GOC_FOR_TARGET)" \
 	"GOCFLAGS_FOR_TARGET=$(GOCFLAGS_FOR_TARGET)" \
+	"GDC_FOR_TARGET=$(GDC_FOR_TARGET)" \
+	"GDCFLAGS_FOR_TARGET=$(GDCFLAGS_FOR_TARGET)" \
 	"LD_FOR_TARGET=$(LD_FOR_TARGET)" \
 	"LIPO_FOR_TARGET=$(LIPO_FOR_TARGET)" \
 	"LDFLAGS_FOR_TARGET=$(LDFLAGS_FOR_TARGET)" \
@@ -801,30 +838,39 @@ BASE_FLAGS_TO_PASS = \
 	"LEAN=$(LEAN)" \
 	"STAGE1_CFLAGS=$(STAGE1_CFLAGS)" \
 	"STAGE1_CXXFLAGS=$(STAGE1_CXXFLAGS)" \
+	"STAGE1_GENERATOR_CFLAGS=$(STAGE1_GENERATOR_CFLAGS)" \
 	"STAGE1_TFLAGS=$(STAGE1_TFLAGS)" \
 	"STAGE2_CFLAGS=$(STAGE2_CFLAGS)" \
 	"STAGE2_CXXFLAGS=$(STAGE2_CXXFLAGS)" \
+	"STAGE2_GENERATOR_CFLAGS=$(STAGE2_GENERATOR_CFLAGS)" \
 	"STAGE2_TFLAGS=$(STAGE2_TFLAGS)" \
 	"STAGE3_CFLAGS=$(STAGE3_CFLAGS)" \
 	"STAGE3_CXXFLAGS=$(STAGE3_CXXFLAGS)" \
+	"STAGE3_GENERATOR_CFLAGS=$(STAGE3_GENERATOR_CFLAGS)" \
 	"STAGE3_TFLAGS=$(STAGE3_TFLAGS)" \
 	"STAGE4_CFLAGS=$(STAGE4_CFLAGS)" \
 	"STAGE4_CXXFLAGS=$(STAGE4_CXXFLAGS)" \
+	"STAGE4_GENERATOR_CFLAGS=$(STAGE4_GENERATOR_CFLAGS)" \
 	"STAGE4_TFLAGS=$(STAGE4_TFLAGS)" \
 	"STAGEprofile_CFLAGS=$(STAGEprofile_CFLAGS)" \
 	"STAGEprofile_CXXFLAGS=$(STAGEprofile_CXXFLAGS)" \
+	"STAGEprofile_GENERATOR_CFLAGS=$(STAGEprofile_GENERATOR_CFLAGS)" \
 	"STAGEprofile_TFLAGS=$(STAGEprofile_TFLAGS)" \
 	"STAGEtrain_CFLAGS=$(STAGEtrain_CFLAGS)" \
 	"STAGEtrain_CXXFLAGS=$(STAGEtrain_CXXFLAGS)" \
+	"STAGEtrain_GENERATOR_CFLAGS=$(STAGEtrain_GENERATOR_CFLAGS)" \
 	"STAGEtrain_TFLAGS=$(STAGEtrain_TFLAGS)" \
 	"STAGEfeedback_CFLAGS=$(STAGEfeedback_CFLAGS)" \
 	"STAGEfeedback_CXXFLAGS=$(STAGEfeedback_CXXFLAGS)" \
+	"STAGEfeedback_GENERATOR_CFLAGS=$(STAGEfeedback_GENERATOR_CFLAGS)" \
 	"STAGEfeedback_TFLAGS=$(STAGEfeedback_TFLAGS)" \
 	"STAGEautoprofile_CFLAGS=$(STAGEautoprofile_CFLAGS)" \
 	"STAGEautoprofile_CXXFLAGS=$(STAGEautoprofile_CXXFLAGS)" \
+	"STAGEautoprofile_GENERATOR_CFLAGS=$(STAGEautoprofile_GENERATOR_CFLAGS)" \
 	"STAGEautoprofile_TFLAGS=$(STAGEautoprofile_TFLAGS)" \
 	"STAGEautofeedback_CFLAGS=$(STAGEautofeedback_CFLAGS)" \
 	"STAGEautofeedback_CXXFLAGS=$(STAGEautofeedback_CXXFLAGS)" \
+	"STAGEautofeedback_GENERATOR_CFLAGS=$(STAGEautofeedback_GENERATOR_CFLAGS)" \
 	"STAGEautofeedback_TFLAGS=$(STAGEautofeedback_TFLAGS)" \
 	$(CXX_FOR_TARGET_FLAG_TO_PASS) \
 	"TFLAGS=$(TFLAGS)" \
@@ -845,10 +891,12 @@ EXTRA_HOST_FLAGS = \
 	'DLLTOOL=$(DLLTOOL)' \
 	'GFORTRAN=$(GFORTRAN)' \
 	'GOC=$(GOC)' \
+	'GDC=$(GDC)' \
 	'LD=$(LD)' \
 	'LIPO=$(LIPO)' \
 	'NM=$(NM)' \
 	'OBJDUMP=$(OBJDUMP)' \
+	'OTOOL=$(OTOOL)' \
 	'RANLIB=$(RANLIB)' \
 	'READELF=$(READELF)' \
 	'STRIP=$(STRIP)' \
@@ -869,6 +917,7 @@ STAGE1_FLAGS_TO_PASS = \
 POSTSTAGE1_FLAGS_TO_PASS = \
 	CC="$${CC}" CC_FOR_BUILD="$${CC_FOR_BUILD}" \
 	CXX="$${CXX}" CXX_FOR_BUILD="$${CXX_FOR_BUILD}" \
+	GDC="$${GDC}" GDC_FOR_BUILD="$${GDC_FOR_BUILD}" \
 	GNATBIND="$${GNATBIND}" \
 	LDFLAGS="$${LDFLAGS}" \
 	HOST_LIBS="$${HOST_LIBS}" \
@@ -901,6 +950,8 @@ EXTRA_TARGET_FLAGS = \
 	'GFORTRAN=$$(GFORTRAN_FOR_TARGET) $$(XGCC_FLAGS_FOR_TARGET) $$(TFLAGS)' \
 	'GOC=$$(GOC_FOR_TARGET) $$(XGCC_FLAGS_FOR_TARGET) $$(TFLAGS)' \
 	'GOCFLAGS=$$(GOCFLAGS_FOR_TARGET)' \
+	'GDC=$$(GDC_FOR_TARGET) $$(XGCC_FLAGS_FOR_TARGET) $$(TFLAGS)' \
+	'GDCFLAGS=$$(GDCFLAGS_FOR_TARGET)' \
 	'LD=$(COMPILER_LD_FOR_TARGET)' \
 	'LDFLAGS=$$(LDFLAGS_FOR_TARGET)' \
 	'LIBCFLAGS=$$(LIBCFLAGS_FOR_TARGET)' \
@@ -966,6 +1017,7 @@ configure-host:  \
     maybe-configure-ld \
     maybe-configure-libbacktrace \
     maybe-configure-libcpp \
+    maybe-configure-libcody \
     maybe-configure-libdecnumber \
     maybe-configure-libgui \
     maybe-configure-libiberty \
@@ -977,21 +1029,25 @@ configure-host:  \
     maybe-configure-sim \
     maybe-configure-texinfo \
     maybe-configure-zlib \
+    maybe-configure-gnulib \
+    maybe-configure-gdbsupport \
+    maybe-configure-gdbserver \
     maybe-configure-gdb \
     maybe-configure-expect \
     maybe-configure-guile \
     maybe-configure-tk \
     maybe-configure-libtermcap \
     maybe-configure-utils \
+    maybe-configure-c++tools \
     maybe-configure-gnattools \
     maybe-configure-lto-plugin \
     maybe-configure-libcc1 \
-    maybe-configure-gotools
+    maybe-configure-gotools \
+    maybe-configure-libctf
 .PHONY: configure-target
 configure-target:  \
     maybe-configure-target-libstdc++-v3 \
     maybe-configure-target-libsanitizer \
-    maybe-configure-target-libmpx \
     maybe-configure-target-libvtv \
     maybe-configure-target-liboffloadmic \
     maybe-configure-target-libssp \
@@ -1003,6 +1059,7 @@ configure-target:  \
     maybe-configure-target-libobjc \
     maybe-configure-target-libgo \
     maybe-configure-target-libhsail-rt \
+    maybe-configure-target-libphobos \
     maybe-configure-target-libtermcap \
     maybe-configure-target-winsup \
     maybe-configure-target-libgloss \
@@ -1108,6 +1165,9 @@ all-host: maybe-all-libbacktrace
 @if libcpp-no-bootstrap
 all-host: maybe-all-libcpp
 @endif libcpp-no-bootstrap
+@if libcody-no-bootstrap
+all-host: maybe-all-libcody
+@endif libcody-no-bootstrap
 @if libdecnumber-no-bootstrap
 all-host: maybe-all-libdecnumber
 @endif libdecnumber-no-bootstrap
@@ -1129,18 +1189,25 @@ all-host: maybe-all-texinfo
 @if zlib-no-bootstrap
 all-host: maybe-all-zlib
 @endif zlib-no-bootstrap
+all-host: maybe-all-gnulib
+all-host: maybe-all-gdbsupport
+all-host: maybe-all-gdbserver
 all-host: maybe-all-gdb
 all-host: maybe-all-expect
 all-host: maybe-all-guile
 all-host: maybe-all-tk
 all-host: maybe-all-libtermcap
 all-host: maybe-all-utils
+all-host: maybe-all-c++tools
 all-host: maybe-all-gnattools
 @if lto-plugin-no-bootstrap
 all-host: maybe-all-lto-plugin
 @endif lto-plugin-no-bootstrap
 all-host: maybe-all-libcc1
 all-host: maybe-all-gotools
+@if libctf-no-bootstrap
+all-host: maybe-all-libctf
+@endif libctf-no-bootstrap
 
 .PHONY: all-target
 
@@ -1150,9 +1217,6 @@ all-target: maybe-all-target-libstdc++-v3
 @if target-libsanitizer-no-bootstrap
 all-target: maybe-all-target-libsanitizer
 @endif target-libsanitizer-no-bootstrap
-@if target-libmpx-no-bootstrap
-all-target: maybe-all-target-libmpx
-@endif target-libmpx-no-bootstrap
 @if target-libvtv-no-bootstrap
 all-target: maybe-all-target-libvtv
 @endif target-libvtv-no-bootstrap
@@ -1168,6 +1232,7 @@ all-target: maybe-all-target-libgfortran
 all-target: maybe-all-target-libobjc
 all-target: maybe-all-target-libgo
 all-target: maybe-all-target-libhsail-rt
+all-target: maybe-all-target-libphobos
 all-target: maybe-all-target-libtermcap
 all-target: maybe-all-target-winsup
 all-target: maybe-all-target-libgloss
@@ -1222,6 +1287,7 @@ info-host: maybe-info-itcl
 info-host: maybe-info-ld
 info-host: maybe-info-libbacktrace
 info-host: maybe-info-libcpp
+info-host: maybe-info-libcody
 info-host: maybe-info-libdecnumber
 info-host: maybe-info-libgui
 info-host: maybe-info-libiberty
@@ -1233,22 +1299,26 @@ info-host: maybe-info-sid
 info-host: maybe-info-sim
 info-host: maybe-info-texinfo
 info-host: maybe-info-zlib
+info-host: maybe-info-gnulib
+info-host: maybe-info-gdbsupport
+info-host: maybe-info-gdbserver
 info-host: maybe-info-gdb
 info-host: maybe-info-expect
 info-host: maybe-info-guile
 info-host: maybe-info-tk
 info-host: maybe-info-libtermcap
 info-host: maybe-info-utils
+info-host: maybe-info-c++tools
 info-host: maybe-info-gnattools
 info-host: maybe-info-lto-plugin
 info-host: maybe-info-libcc1
 info-host: maybe-info-gotools
+info-host: maybe-info-libctf
 
 .PHONY: info-target
 
 info-target: maybe-info-target-libstdc++-v3
 info-target: maybe-info-target-libsanitizer
-info-target: maybe-info-target-libmpx
 info-target: maybe-info-target-libvtv
 info-target: maybe-info-target-liboffloadmic
 info-target: maybe-info-target-libssp
@@ -1260,6 +1330,7 @@ info-target: maybe-info-target-libgfortran
 info-target: maybe-info-target-libobjc
 info-target: maybe-info-target-libgo
 info-target: maybe-info-target-libhsail-rt
+info-target: maybe-info-target-libphobos
 info-target: maybe-info-target-libtermcap
 info-target: maybe-info-target-winsup
 info-target: maybe-info-target-libgloss
@@ -1307,6 +1378,7 @@ dvi-host: maybe-dvi-itcl
 dvi-host: maybe-dvi-ld
 dvi-host: maybe-dvi-libbacktrace
 dvi-host: maybe-dvi-libcpp
+dvi-host: maybe-dvi-libcody
 dvi-host: maybe-dvi-libdecnumber
 dvi-host: maybe-dvi-libgui
 dvi-host: maybe-dvi-libiberty
@@ -1318,22 +1390,26 @@ dvi-host: maybe-dvi-sid
 dvi-host: maybe-dvi-sim
 dvi-host: maybe-dvi-texinfo
 dvi-host: maybe-dvi-zlib
+dvi-host: maybe-dvi-gnulib
+dvi-host: maybe-dvi-gdbsupport
+dvi-host: maybe-dvi-gdbserver
 dvi-host: maybe-dvi-gdb
 dvi-host: maybe-dvi-expect
 dvi-host: maybe-dvi-guile
 dvi-host: maybe-dvi-tk
 dvi-host: maybe-dvi-libtermcap
 dvi-host: maybe-dvi-utils
+dvi-host: maybe-dvi-c++tools
 dvi-host: maybe-dvi-gnattools
 dvi-host: maybe-dvi-lto-plugin
 dvi-host: maybe-dvi-libcc1
 dvi-host: maybe-dvi-gotools
+dvi-host: maybe-dvi-libctf
 
 .PHONY: dvi-target
 
 dvi-target: maybe-dvi-target-libstdc++-v3
 dvi-target: maybe-dvi-target-libsanitizer
-dvi-target: maybe-dvi-target-libmpx
 dvi-target: maybe-dvi-target-libvtv
 dvi-target: maybe-dvi-target-liboffloadmic
 dvi-target: maybe-dvi-target-libssp
@@ -1345,6 +1421,7 @@ dvi-target: maybe-dvi-target-libgfortran
 dvi-target: maybe-dvi-target-libobjc
 dvi-target: maybe-dvi-target-libgo
 dvi-target: maybe-dvi-target-libhsail-rt
+dvi-target: maybe-dvi-target-libphobos
 dvi-target: maybe-dvi-target-libtermcap
 dvi-target: maybe-dvi-target-winsup
 dvi-target: maybe-dvi-target-libgloss
@@ -1392,6 +1469,7 @@ pdf-host: maybe-pdf-itcl
 pdf-host: maybe-pdf-ld
 pdf-host: maybe-pdf-libbacktrace
 pdf-host: maybe-pdf-libcpp
+pdf-host: maybe-pdf-libcody
 pdf-host: maybe-pdf-libdecnumber
 pdf-host: maybe-pdf-libgui
 pdf-host: maybe-pdf-libiberty
@@ -1403,22 +1481,26 @@ pdf-host: maybe-pdf-sid
 pdf-host: maybe-pdf-sim
 pdf-host: maybe-pdf-texinfo
 pdf-host: maybe-pdf-zlib
+pdf-host: maybe-pdf-gnulib
+pdf-host: maybe-pdf-gdbsupport
+pdf-host: maybe-pdf-gdbserver
 pdf-host: maybe-pdf-gdb
 pdf-host: maybe-pdf-expect
 pdf-host: maybe-pdf-guile
 pdf-host: maybe-pdf-tk
 pdf-host: maybe-pdf-libtermcap
 pdf-host: maybe-pdf-utils
+pdf-host: maybe-pdf-c++tools
 pdf-host: maybe-pdf-gnattools
 pdf-host: maybe-pdf-lto-plugin
 pdf-host: maybe-pdf-libcc1
 pdf-host: maybe-pdf-gotools
+pdf-host: maybe-pdf-libctf
 
 .PHONY: pdf-target
 
 pdf-target: maybe-pdf-target-libstdc++-v3
 pdf-target: maybe-pdf-target-libsanitizer
-pdf-target: maybe-pdf-target-libmpx
 pdf-target: maybe-pdf-target-libvtv
 pdf-target: maybe-pdf-target-liboffloadmic
 pdf-target: maybe-pdf-target-libssp
@@ -1430,6 +1512,7 @@ pdf-target: maybe-pdf-target-libgfortran
 pdf-target: maybe-pdf-target-libobjc
 pdf-target: maybe-pdf-target-libgo
 pdf-target: maybe-pdf-target-libhsail-rt
+pdf-target: maybe-pdf-target-libphobos
 pdf-target: maybe-pdf-target-libtermcap
 pdf-target: maybe-pdf-target-winsup
 pdf-target: maybe-pdf-target-libgloss
@@ -1477,6 +1560,7 @@ html-host: maybe-html-itcl
 html-host: maybe-html-ld
 html-host: maybe-html-libbacktrace
 html-host: maybe-html-libcpp
+html-host: maybe-html-libcody
 html-host: maybe-html-libdecnumber
 html-host: maybe-html-libgui
 html-host: maybe-html-libiberty
@@ -1488,22 +1572,26 @@ html-host: maybe-html-sid
 html-host: maybe-html-sim
 html-host: maybe-html-texinfo
 html-host: maybe-html-zlib
+html-host: maybe-html-gnulib
+html-host: maybe-html-gdbsupport
+html-host: maybe-html-gdbserver
 html-host: maybe-html-gdb
 html-host: maybe-html-expect
 html-host: maybe-html-guile
 html-host: maybe-html-tk
 html-host: maybe-html-libtermcap
 html-host: maybe-html-utils
+html-host: maybe-html-c++tools
 html-host: maybe-html-gnattools
 html-host: maybe-html-lto-plugin
 html-host: maybe-html-libcc1
 html-host: maybe-html-gotools
+html-host: maybe-html-libctf
 
 .PHONY: html-target
 
 html-target: maybe-html-target-libstdc++-v3
 html-target: maybe-html-target-libsanitizer
-html-target: maybe-html-target-libmpx
 html-target: maybe-html-target-libvtv
 html-target: maybe-html-target-liboffloadmic
 html-target: maybe-html-target-libssp
@@ -1515,6 +1603,7 @@ html-target: maybe-html-target-libgfortran
 html-target: maybe-html-target-libobjc
 html-target: maybe-html-target-libgo
 html-target: maybe-html-target-libhsail-rt
+html-target: maybe-html-target-libphobos
 html-target: maybe-html-target-libtermcap
 html-target: maybe-html-target-winsup
 html-target: maybe-html-target-libgloss
@@ -1562,6 +1651,7 @@ TAGS-host: maybe-TAGS-itcl
 TAGS-host: maybe-TAGS-ld
 TAGS-host: maybe-TAGS-libbacktrace
 TAGS-host: maybe-TAGS-libcpp
+TAGS-host: maybe-TAGS-libcody
 TAGS-host: maybe-TAGS-libdecnumber
 TAGS-host: maybe-TAGS-libgui
 TAGS-host: maybe-TAGS-libiberty
@@ -1573,22 +1663,26 @@ TAGS-host: maybe-TAGS-sid
 TAGS-host: maybe-TAGS-sim
 TAGS-host: maybe-TAGS-texinfo
 TAGS-host: maybe-TAGS-zlib
+TAGS-host: maybe-TAGS-gnulib
+TAGS-host: maybe-TAGS-gdbsupport
+TAGS-host: maybe-TAGS-gdbserver
 TAGS-host: maybe-TAGS-gdb
 TAGS-host: maybe-TAGS-expect
 TAGS-host: maybe-TAGS-guile
 TAGS-host: maybe-TAGS-tk
 TAGS-host: maybe-TAGS-libtermcap
 TAGS-host: maybe-TAGS-utils
+TAGS-host: maybe-TAGS-c++tools
 TAGS-host: maybe-TAGS-gnattools
 TAGS-host: maybe-TAGS-lto-plugin
 TAGS-host: maybe-TAGS-libcc1
 TAGS-host: maybe-TAGS-gotools
+TAGS-host: maybe-TAGS-libctf
 
 .PHONY: TAGS-target
 
 TAGS-target: maybe-TAGS-target-libstdc++-v3
 TAGS-target: maybe-TAGS-target-libsanitizer
-TAGS-target: maybe-TAGS-target-libmpx
 TAGS-target: maybe-TAGS-target-libvtv
 TAGS-target: maybe-TAGS-target-liboffloadmic
 TAGS-target: maybe-TAGS-target-libssp
@@ -1600,6 +1694,7 @@ TAGS-target: maybe-TAGS-target-libgfortran
 TAGS-target: maybe-TAGS-target-libobjc
 TAGS-target: maybe-TAGS-target-libgo
 TAGS-target: maybe-TAGS-target-libhsail-rt
+TAGS-target: maybe-TAGS-target-libphobos
 TAGS-target: maybe-TAGS-target-libtermcap
 TAGS-target: maybe-TAGS-target-winsup
 TAGS-target: maybe-TAGS-target-libgloss
@@ -1647,6 +1742,7 @@ install-info-host: maybe-install-info-itcl
 install-info-host: maybe-install-info-ld
 install-info-host: maybe-install-info-libbacktrace
 install-info-host: maybe-install-info-libcpp
+install-info-host: maybe-install-info-libcody
 install-info-host: maybe-install-info-libdecnumber
 install-info-host: maybe-install-info-libgui
 install-info-host: maybe-install-info-libiberty
@@ -1658,22 +1754,26 @@ install-info-host: maybe-install-info-sid
 install-info-host: maybe-install-info-sim
 install-info-host: maybe-install-info-texinfo
 install-info-host: maybe-install-info-zlib
+install-info-host: maybe-install-info-gnulib
+install-info-host: maybe-install-info-gdbsupport
+install-info-host: maybe-install-info-gdbserver
 install-info-host: maybe-install-info-gdb
 install-info-host: maybe-install-info-expect
 install-info-host: maybe-install-info-guile
 install-info-host: maybe-install-info-tk
 install-info-host: maybe-install-info-libtermcap
 install-info-host: maybe-install-info-utils
+install-info-host: maybe-install-info-c++tools
 install-info-host: maybe-install-info-gnattools
 install-info-host: maybe-install-info-lto-plugin
 install-info-host: maybe-install-info-libcc1
 install-info-host: maybe-install-info-gotools
+install-info-host: maybe-install-info-libctf
 
 .PHONY: install-info-target
 
 install-info-target: maybe-install-info-target-libstdc++-v3
 install-info-target: maybe-install-info-target-libsanitizer
-install-info-target: maybe-install-info-target-libmpx
 install-info-target: maybe-install-info-target-libvtv
 install-info-target: maybe-install-info-target-liboffloadmic
 install-info-target: maybe-install-info-target-libssp
@@ -1685,6 +1785,7 @@ install-info-target: maybe-install-info-target-libgfortran
 install-info-target: maybe-install-info-target-libobjc
 install-info-target: maybe-install-info-target-libgo
 install-info-target: maybe-install-info-target-libhsail-rt
+install-info-target: maybe-install-info-target-libphobos
 install-info-target: maybe-install-info-target-libtermcap
 install-info-target: maybe-install-info-target-winsup
 install-info-target: maybe-install-info-target-libgloss
@@ -1732,6 +1833,7 @@ install-pdf-host: maybe-install-pdf-itcl
 install-pdf-host: maybe-install-pdf-ld
 install-pdf-host: maybe-install-pdf-libbacktrace
 install-pdf-host: maybe-install-pdf-libcpp
+install-pdf-host: maybe-install-pdf-libcody
 install-pdf-host: maybe-install-pdf-libdecnumber
 install-pdf-host: maybe-install-pdf-libgui
 install-pdf-host: maybe-install-pdf-libiberty
@@ -1743,22 +1845,26 @@ install-pdf-host: maybe-install-pdf-sid
 install-pdf-host: maybe-install-pdf-sim
 install-pdf-host: maybe-install-pdf-texinfo
 install-pdf-host: maybe-install-pdf-zlib
+install-pdf-host: maybe-install-pdf-gnulib
+install-pdf-host: maybe-install-pdf-gdbsupport
+install-pdf-host: maybe-install-pdf-gdbserver
 install-pdf-host: maybe-install-pdf-gdb
 install-pdf-host: maybe-install-pdf-expect
 install-pdf-host: maybe-install-pdf-guile
 install-pdf-host: maybe-install-pdf-tk
 install-pdf-host: maybe-install-pdf-libtermcap
 install-pdf-host: maybe-install-pdf-utils
+install-pdf-host: maybe-install-pdf-c++tools
 install-pdf-host: maybe-install-pdf-gnattools
 install-pdf-host: maybe-install-pdf-lto-plugin
 install-pdf-host: maybe-install-pdf-libcc1
 install-pdf-host: maybe-install-pdf-gotools
+install-pdf-host: maybe-install-pdf-libctf
 
 .PHONY: install-pdf-target
 
 install-pdf-target: maybe-install-pdf-target-libstdc++-v3
 install-pdf-target: maybe-install-pdf-target-libsanitizer
-install-pdf-target: maybe-install-pdf-target-libmpx
 install-pdf-target: maybe-install-pdf-target-libvtv
 install-pdf-target: maybe-install-pdf-target-liboffloadmic
 install-pdf-target: maybe-install-pdf-target-libssp
@@ -1770,6 +1876,7 @@ install-pdf-target: maybe-install-pdf-target-libgfortran
 install-pdf-target: maybe-install-pdf-target-libobjc
 install-pdf-target: maybe-install-pdf-target-libgo
 install-pdf-target: maybe-install-pdf-target-libhsail-rt
+install-pdf-target: maybe-install-pdf-target-libphobos
 install-pdf-target: maybe-install-pdf-target-libtermcap
 install-pdf-target: maybe-install-pdf-target-winsup
 install-pdf-target: maybe-install-pdf-target-libgloss
@@ -1817,6 +1924,7 @@ install-html-host: maybe-install-html-itcl
 install-html-host: maybe-install-html-ld
 install-html-host: maybe-install-html-libbacktrace
 install-html-host: maybe-install-html-libcpp
+install-html-host: maybe-install-html-libcody
 install-html-host: maybe-install-html-libdecnumber
 install-html-host: maybe-install-html-libgui
 install-html-host: maybe-install-html-libiberty
@@ -1828,22 +1936,26 @@ install-html-host: maybe-install-html-sid
 install-html-host: maybe-install-html-sim
 install-html-host: maybe-install-html-texinfo
 install-html-host: maybe-install-html-zlib
+install-html-host: maybe-install-html-gnulib
+install-html-host: maybe-install-html-gdbsupport
+install-html-host: maybe-install-html-gdbserver
 install-html-host: maybe-install-html-gdb
 install-html-host: maybe-install-html-expect
 install-html-host: maybe-install-html-guile
 install-html-host: maybe-install-html-tk
 install-html-host: maybe-install-html-libtermcap
 install-html-host: maybe-install-html-utils
+install-html-host: maybe-install-html-c++tools
 install-html-host: maybe-install-html-gnattools
 install-html-host: maybe-install-html-lto-plugin
 install-html-host: maybe-install-html-libcc1
 install-html-host: maybe-install-html-gotools
+install-html-host: maybe-install-html-libctf
 
 .PHONY: install-html-target
 
 install-html-target: maybe-install-html-target-libstdc++-v3
 install-html-target: maybe-install-html-target-libsanitizer
-install-html-target: maybe-install-html-target-libmpx
 install-html-target: maybe-install-html-target-libvtv
 install-html-target: maybe-install-html-target-liboffloadmic
 install-html-target: maybe-install-html-target-libssp
@@ -1855,6 +1967,7 @@ install-html-target: maybe-install-html-target-libgfortran
 install-html-target: maybe-install-html-target-libobjc
 install-html-target: maybe-install-html-target-libgo
 install-html-target: maybe-install-html-target-libhsail-rt
+install-html-target: maybe-install-html-target-libphobos
 install-html-target: maybe-install-html-target-libtermcap
 install-html-target: maybe-install-html-target-winsup
 install-html-target: maybe-install-html-target-libgloss
@@ -1902,6 +2015,7 @@ installcheck-host: maybe-installcheck-itcl
 installcheck-host: maybe-installcheck-ld
 installcheck-host: maybe-installcheck-libbacktrace
 installcheck-host: maybe-installcheck-libcpp
+installcheck-host: maybe-installcheck-libcody
 installcheck-host: maybe-installcheck-libdecnumber
 installcheck-host: maybe-installcheck-libgui
 installcheck-host: maybe-installcheck-libiberty
@@ -1913,22 +2027,26 @@ installcheck-host: maybe-installcheck-sid
 installcheck-host: maybe-installcheck-sim
 installcheck-host: maybe-installcheck-texinfo
 installcheck-host: maybe-installcheck-zlib
+installcheck-host: maybe-installcheck-gnulib
+installcheck-host: maybe-installcheck-gdbsupport
+installcheck-host: maybe-installcheck-gdbserver
 installcheck-host: maybe-installcheck-gdb
 installcheck-host: maybe-installcheck-expect
 installcheck-host: maybe-installcheck-guile
 installcheck-host: maybe-installcheck-tk
 installcheck-host: maybe-installcheck-libtermcap
 installcheck-host: maybe-installcheck-utils
+installcheck-host: maybe-installcheck-c++tools
 installcheck-host: maybe-installcheck-gnattools
 installcheck-host: maybe-installcheck-lto-plugin
 installcheck-host: maybe-installcheck-libcc1
 installcheck-host: maybe-installcheck-gotools
+installcheck-host: maybe-installcheck-libctf
 
 .PHONY: installcheck-target
 
 installcheck-target: maybe-installcheck-target-libstdc++-v3
 installcheck-target: maybe-installcheck-target-libsanitizer
-installcheck-target: maybe-installcheck-target-libmpx
 installcheck-target: maybe-installcheck-target-libvtv
 installcheck-target: maybe-installcheck-target-liboffloadmic
 installcheck-target: maybe-installcheck-target-libssp
@@ -1940,6 +2058,7 @@ installcheck-target: maybe-installcheck-target-libgfortran
 installcheck-target: maybe-installcheck-target-libobjc
 installcheck-target: maybe-installcheck-target-libgo
 installcheck-target: maybe-installcheck-target-libhsail-rt
+installcheck-target: maybe-installcheck-target-libphobos
 installcheck-target: maybe-installcheck-target-libtermcap
 installcheck-target: maybe-installcheck-target-winsup
 installcheck-target: maybe-installcheck-target-libgloss
@@ -1987,6 +2106,7 @@ mostlyclean-host: maybe-mostlyclean-itcl
 mostlyclean-host: maybe-mostlyclean-ld
 mostlyclean-host: maybe-mostlyclean-libbacktrace
 mostlyclean-host: maybe-mostlyclean-libcpp
+mostlyclean-host: maybe-mostlyclean-libcody
 mostlyclean-host: maybe-mostlyclean-libdecnumber
 mostlyclean-host: maybe-mostlyclean-libgui
 mostlyclean-host: maybe-mostlyclean-libiberty
@@ -1998,22 +2118,26 @@ mostlyclean-host: maybe-mostlyclean-sid
 mostlyclean-host: maybe-mostlyclean-sim
 mostlyclean-host: maybe-mostlyclean-texinfo
 mostlyclean-host: maybe-mostlyclean-zlib
+mostlyclean-host: maybe-mostlyclean-gnulib
+mostlyclean-host: maybe-mostlyclean-gdbsupport
+mostlyclean-host: maybe-mostlyclean-gdbserver
 mostlyclean-host: maybe-mostlyclean-gdb
 mostlyclean-host: maybe-mostlyclean-expect
 mostlyclean-host: maybe-mostlyclean-guile
 mostlyclean-host: maybe-mostlyclean-tk
 mostlyclean-host: maybe-mostlyclean-libtermcap
 mostlyclean-host: maybe-mostlyclean-utils
+mostlyclean-host: maybe-mostlyclean-c++tools
 mostlyclean-host: maybe-mostlyclean-gnattools
 mostlyclean-host: maybe-mostlyclean-lto-plugin
 mostlyclean-host: maybe-mostlyclean-libcc1
 mostlyclean-host: maybe-mostlyclean-gotools
+mostlyclean-host: maybe-mostlyclean-libctf
 
 .PHONY: mostlyclean-target
 
 mostlyclean-target: maybe-mostlyclean-target-libstdc++-v3
 mostlyclean-target: maybe-mostlyclean-target-libsanitizer
-mostlyclean-target: maybe-mostlyclean-target-libmpx
 mostlyclean-target: maybe-mostlyclean-target-libvtv
 mostlyclean-target: maybe-mostlyclean-target-liboffloadmic
 mostlyclean-target: maybe-mostlyclean-target-libssp
@@ -2025,6 +2149,7 @@ mostlyclean-target: maybe-mostlyclean-target-libgfortran
 mostlyclean-target: maybe-mostlyclean-target-libobjc
 mostlyclean-target: maybe-mostlyclean-target-libgo
 mostlyclean-target: maybe-mostlyclean-target-libhsail-rt
+mostlyclean-target: maybe-mostlyclean-target-libphobos
 mostlyclean-target: maybe-mostlyclean-target-libtermcap
 mostlyclean-target: maybe-mostlyclean-target-winsup
 mostlyclean-target: maybe-mostlyclean-target-libgloss
@@ -2072,6 +2197,7 @@ clean-host: maybe-clean-itcl
 clean-host: maybe-clean-ld
 clean-host: maybe-clean-libbacktrace
 clean-host: maybe-clean-libcpp
+clean-host: maybe-clean-libcody
 clean-host: maybe-clean-libdecnumber
 clean-host: maybe-clean-libgui
 clean-host: maybe-clean-libiberty
@@ -2083,22 +2209,26 @@ clean-host: maybe-clean-sid
 clean-host: maybe-clean-sim
 clean-host: maybe-clean-texinfo
 clean-host: maybe-clean-zlib
+clean-host: maybe-clean-gnulib
+clean-host: maybe-clean-gdbsupport
+clean-host: maybe-clean-gdbserver
 clean-host: maybe-clean-gdb
 clean-host: maybe-clean-expect
 clean-host: maybe-clean-guile
 clean-host: maybe-clean-tk
 clean-host: maybe-clean-libtermcap
 clean-host: maybe-clean-utils
+clean-host: maybe-clean-c++tools
 clean-host: maybe-clean-gnattools
 clean-host: maybe-clean-lto-plugin
 clean-host: maybe-clean-libcc1
 clean-host: maybe-clean-gotools
+clean-host: maybe-clean-libctf
 
 .PHONY: clean-target
 
 clean-target: maybe-clean-target-libstdc++-v3
 clean-target: maybe-clean-target-libsanitizer
-clean-target: maybe-clean-target-libmpx
 clean-target: maybe-clean-target-libvtv
 clean-target: maybe-clean-target-liboffloadmic
 clean-target: maybe-clean-target-libssp
@@ -2110,6 +2240,7 @@ clean-target: maybe-clean-target-libgfortran
 clean-target: maybe-clean-target-libobjc
 clean-target: maybe-clean-target-libgo
 clean-target: maybe-clean-target-libhsail-rt
+clean-target: maybe-clean-target-libphobos
 clean-target: maybe-clean-target-libtermcap
 clean-target: maybe-clean-target-winsup
 clean-target: maybe-clean-target-libgloss
@@ -2157,6 +2288,7 @@ distclean-host: maybe-distclean-itcl
 distclean-host: maybe-distclean-ld
 distclean-host: maybe-distclean-libbacktrace
 distclean-host: maybe-distclean-libcpp
+distclean-host: maybe-distclean-libcody
 distclean-host: maybe-distclean-libdecnumber
 distclean-host: maybe-distclean-libgui
 distclean-host: maybe-distclean-libiberty
@@ -2168,22 +2300,26 @@ distclean-host: maybe-distclean-sid
 distclean-host: maybe-distclean-sim
 distclean-host: maybe-distclean-texinfo
 distclean-host: maybe-distclean-zlib
+distclean-host: maybe-distclean-gnulib
+distclean-host: maybe-distclean-gdbsupport
+distclean-host: maybe-distclean-gdbserver
 distclean-host: maybe-distclean-gdb
 distclean-host: maybe-distclean-expect
 distclean-host: maybe-distclean-guile
 distclean-host: maybe-distclean-tk
 distclean-host: maybe-distclean-libtermcap
 distclean-host: maybe-distclean-utils
+distclean-host: maybe-distclean-c++tools
 distclean-host: maybe-distclean-gnattools
 distclean-host: maybe-distclean-lto-plugin
 distclean-host: maybe-distclean-libcc1
 distclean-host: maybe-distclean-gotools
+distclean-host: maybe-distclean-libctf
 
 .PHONY: distclean-target
 
 distclean-target: maybe-distclean-target-libstdc++-v3
 distclean-target: maybe-distclean-target-libsanitizer
-distclean-target: maybe-distclean-target-libmpx
 distclean-target: maybe-distclean-target-libvtv
 distclean-target: maybe-distclean-target-liboffloadmic
 distclean-target: maybe-distclean-target-libssp
@@ -2195,6 +2331,7 @@ distclean-target: maybe-distclean-target-libgfortran
 distclean-target: maybe-distclean-target-libobjc
 distclean-target: maybe-distclean-target-libgo
 distclean-target: maybe-distclean-target-libhsail-rt
+distclean-target: maybe-distclean-target-libphobos
 distclean-target: maybe-distclean-target-libtermcap
 distclean-target: maybe-distclean-target-winsup
 distclean-target: maybe-distclean-target-libgloss
@@ -2242,6 +2379,7 @@ maintainer-clean-host: maybe-maintainer-clean-itcl
 maintainer-clean-host: maybe-maintainer-clean-ld
 maintainer-clean-host: maybe-maintainer-clean-libbacktrace
 maintainer-clean-host: maybe-maintainer-clean-libcpp
+maintainer-clean-host: maybe-maintainer-clean-libcody
 maintainer-clean-host: maybe-maintainer-clean-libdecnumber
 maintainer-clean-host: maybe-maintainer-clean-libgui
 maintainer-clean-host: maybe-maintainer-clean-libiberty
@@ -2253,22 +2391,26 @@ maintainer-clean-host: maybe-maintainer-clean-sid
 maintainer-clean-host: maybe-maintainer-clean-sim
 maintainer-clean-host: maybe-maintainer-clean-texinfo
 maintainer-clean-host: maybe-maintainer-clean-zlib
+maintainer-clean-host: maybe-maintainer-clean-gnulib
+maintainer-clean-host: maybe-maintainer-clean-gdbsupport
+maintainer-clean-host: maybe-maintainer-clean-gdbserver
 maintainer-clean-host: maybe-maintainer-clean-gdb
 maintainer-clean-host: maybe-maintainer-clean-expect
 maintainer-clean-host: maybe-maintainer-clean-guile
 maintainer-clean-host: maybe-maintainer-clean-tk
 maintainer-clean-host: maybe-maintainer-clean-libtermcap
 maintainer-clean-host: maybe-maintainer-clean-utils
+maintainer-clean-host: maybe-maintainer-clean-c++tools
 maintainer-clean-host: maybe-maintainer-clean-gnattools
 maintainer-clean-host: maybe-maintainer-clean-lto-plugin
 maintainer-clean-host: maybe-maintainer-clean-libcc1
 maintainer-clean-host: maybe-maintainer-clean-gotools
+maintainer-clean-host: maybe-maintainer-clean-libctf
 
 .PHONY: maintainer-clean-target
 
 maintainer-clean-target: maybe-maintainer-clean-target-libstdc++-v3
 maintainer-clean-target: maybe-maintainer-clean-target-libsanitizer
-maintainer-clean-target: maybe-maintainer-clean-target-libmpx
 maintainer-clean-target: maybe-maintainer-clean-target-libvtv
 maintainer-clean-target: maybe-maintainer-clean-target-liboffloadmic
 maintainer-clean-target: maybe-maintainer-clean-target-libssp
@@ -2280,6 +2422,7 @@ maintainer-clean-target: maybe-maintainer-clean-target-libgfortran
 maintainer-clean-target: maybe-maintainer-clean-target-libobjc
 maintainer-clean-target: maybe-maintainer-clean-target-libgo
 maintainer-clean-target: maybe-maintainer-clean-target-libhsail-rt
+maintainer-clean-target: maybe-maintainer-clean-target-libphobos
 maintainer-clean-target: maybe-maintainer-clean-target-libtermcap
 maintainer-clean-target: maybe-maintainer-clean-target-winsup
 maintainer-clean-target: maybe-maintainer-clean-target-libgloss
@@ -2335,8 +2478,8 @@ local-distclean:
 	-rm -f texinfo/doc/Makefile texinfo/po/POTFILES
 	-rmdir texinfo/doc texinfo/info texinfo/intl texinfo/lib 2>/dev/null
 	-rmdir texinfo/makeinfo texinfo/po texinfo/util 2>/dev/null
-	-rmdir fastjar gcc gnattools gotools libcc1 libiberty 2>/dev/null
-	-rmdir texinfo zlib 2>/dev/null
+	-rmdir c++tools fastjar gcc gnattools gotools 2>/dev/null
+	-rmdir libcc1 libiberty texinfo zlib 2>/dev/null
 	-find . -name config.cache -exec rm -f {} \; \; 2>/dev/null
 
 local-maintainer-clean:
@@ -2383,6 +2526,7 @@ check-host:  \
     maybe-check-ld \
     maybe-check-libbacktrace \
     maybe-check-libcpp \
+    maybe-check-libcody \
     maybe-check-libdecnumber \
     maybe-check-libgui \
     maybe-check-libiberty \
@@ -2394,22 +2538,26 @@ check-host:  \
     maybe-check-sim \
     maybe-check-texinfo \
     maybe-check-zlib \
+    maybe-check-gnulib \
+    maybe-check-gdbsupport \
+    maybe-check-gdbserver \
     maybe-check-gdb \
     maybe-check-expect \
     maybe-check-guile \
     maybe-check-tk \
     maybe-check-libtermcap \
     maybe-check-utils \
+    maybe-check-c++tools \
     maybe-check-gnattools \
     maybe-check-lto-plugin \
     maybe-check-libcc1 \
-    maybe-check-gotools
+    maybe-check-gotools \
+    maybe-check-libctf
 
 .PHONY: check-target
 check-target:  \
     maybe-check-target-libstdc++-v3 \
     maybe-check-target-libsanitizer \
-    maybe-check-target-libmpx \
     maybe-check-target-libvtv \
     maybe-check-target-liboffloadmic \
     maybe-check-target-libssp \
@@ -2421,6 +2569,7 @@ check-target:  \
     maybe-check-target-libobjc \
     maybe-check-target-libgo \
     maybe-check-target-libhsail-rt \
+    maybe-check-target-libphobos \
     maybe-check-target-libtermcap \
     maybe-check-target-winsup \
     maybe-check-target-libgloss \
@@ -2515,6 +2664,7 @@ install-host-nogcc:  \
     maybe-install-ld \
     maybe-install-libbacktrace \
     maybe-install-libcpp \
+    maybe-install-libcody \
     maybe-install-libdecnumber \
     maybe-install-libgui \
     maybe-install-libiberty \
@@ -2526,16 +2676,21 @@ install-host-nogcc:  \
     maybe-install-sim \
     maybe-install-texinfo \
     maybe-install-zlib \
+    maybe-install-gnulib \
+    maybe-install-gdbsupport \
+    maybe-install-gdbserver \
     maybe-install-gdb \
     maybe-install-expect \
     maybe-install-guile \
     maybe-install-tk \
     maybe-install-libtermcap \
     maybe-install-utils \
+    maybe-install-c++tools \
     maybe-install-gnattools \
     maybe-install-lto-plugin \
     maybe-install-libcc1 \
-    maybe-install-gotools
+    maybe-install-gotools \
+    maybe-install-libctf
 
 .PHONY: install-host
 install-host:  \
@@ -2564,6 +2719,7 @@ install-host:  \
     maybe-install-ld \
     maybe-install-libbacktrace \
     maybe-install-libcpp \
+    maybe-install-libcody \
     maybe-install-libdecnumber \
     maybe-install-libgui \
     maybe-install-libiberty \
@@ -2575,22 +2731,26 @@ install-host:  \
     maybe-install-sim \
     maybe-install-texinfo \
     maybe-install-zlib \
+    maybe-install-gnulib \
+    maybe-install-gdbsupport \
+    maybe-install-gdbserver \
     maybe-install-gdb \
     maybe-install-expect \
     maybe-install-guile \
     maybe-install-tk \
     maybe-install-libtermcap \
     maybe-install-utils \
+    maybe-install-c++tools \
     maybe-install-gnattools \
     maybe-install-lto-plugin \
     maybe-install-libcc1 \
-    maybe-install-gotools
+    maybe-install-gotools \
+    maybe-install-libctf
 
 .PHONY: install-target
 install-target:  \
     maybe-install-target-libstdc++-v3 \
     maybe-install-target-libsanitizer \
-    maybe-install-target-libmpx \
     maybe-install-target-libvtv \
     maybe-install-target-liboffloadmic \
     maybe-install-target-libssp \
@@ -2602,6 +2762,7 @@ install-target:  \
     maybe-install-target-libobjc \
     maybe-install-target-libgo \
     maybe-install-target-libhsail-rt \
+    maybe-install-target-libphobos \
     maybe-install-target-libtermcap \
     maybe-install-target-winsup \
     maybe-install-target-libgloss \
@@ -2669,6 +2830,7 @@ install-strip-host:  \
     maybe-install-strip-ld \
     maybe-install-strip-libbacktrace \
     maybe-install-strip-libcpp \
+    maybe-install-strip-libcody \
     maybe-install-strip-libdecnumber \
     maybe-install-strip-libgui \
     maybe-install-strip-libiberty \
@@ -2680,22 +2842,26 @@ install-strip-host:  \
     maybe-install-strip-sim \
     maybe-install-strip-texinfo \
     maybe-install-strip-zlib \
+    maybe-install-strip-gnulib \
+    maybe-install-strip-gdbsupport \
+    maybe-install-strip-gdbserver \
     maybe-install-strip-gdb \
     maybe-install-strip-expect \
     maybe-install-strip-guile \
     maybe-install-strip-tk \
     maybe-install-strip-libtermcap \
     maybe-install-strip-utils \
+    maybe-install-strip-c++tools \
     maybe-install-strip-gnattools \
     maybe-install-strip-lto-plugin \
     maybe-install-strip-libcc1 \
-    maybe-install-strip-gotools
+    maybe-install-strip-gotools \
+    maybe-install-strip-libctf
 
 .PHONY: install-strip-target
 install-strip-target:  \
     maybe-install-strip-target-libstdc++-v3 \
     maybe-install-strip-target-libsanitizer \
-    maybe-install-strip-target-libmpx \
     maybe-install-strip-target-libvtv \
     maybe-install-strip-target-liboffloadmic \
     maybe-install-strip-target-libssp \
@@ -2707,6 +2873,7 @@ install-strip-target:  \
     maybe-install-strip-target-libobjc \
     maybe-install-strip-target-libgo \
     maybe-install-strip-target-libhsail-rt \
+    maybe-install-strip-target-libphobos \
     maybe-install-strip-target-libtermcap \
     maybe-install-strip-target-winsup \
     maybe-install-strip-target-libgloss \
@@ -3534,6 +3701,7 @@ all-stage1-bfd: configure-stage1-bfd
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE1_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE1_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE1_CXXFLAGS)" \
 		LIBCFLAGS="$(LIBCFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -3578,6 +3746,7 @@ all-stage2-bfd: configure-stage2-bfd
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE2_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE2_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE2_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE2_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -3620,6 +3789,7 @@ all-stage3-bfd: configure-stage3-bfd
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE3_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE3_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE3_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE3_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -3662,6 +3832,7 @@ all-stage4-bfd: configure-stage4-bfd
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE4_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE4_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE4_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE4_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -3704,6 +3875,7 @@ all-stageprofile-bfd: configure-stageprofile-bfd
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -3746,6 +3918,7 @@ all-stagetrain-bfd: configure-stagetrain-bfd
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEtrain_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEtrain_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEtrain_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEtrain_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -3788,6 +3961,7 @@ all-stagefeedback-bfd: configure-stagefeedback-bfd
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEfeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEfeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEfeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEfeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -3830,6 +4004,7 @@ all-stageautoprofile-bfd: configure-stageautoprofile-bfd
 	$$s/gcc/config/i386/$(AUTO_PROFILE) \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautoprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautoprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautoprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautoprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -3872,6 +4047,7 @@ all-stageautofeedback-bfd: configure-stageautofeedback-bfd
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautofeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautofeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautofeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautofeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -4639,6 +4815,7 @@ all-stage1-opcodes: configure-stage1-opcodes
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE1_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE1_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE1_CXXFLAGS)" \
 		LIBCFLAGS="$(LIBCFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -4683,6 +4860,7 @@ all-stage2-opcodes: configure-stage2-opcodes
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE2_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE2_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE2_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE2_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -4725,6 +4903,7 @@ all-stage3-opcodes: configure-stage3-opcodes
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE3_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE3_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE3_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE3_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -4767,6 +4946,7 @@ all-stage4-opcodes: configure-stage4-opcodes
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE4_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE4_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE4_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE4_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -4809,6 +4989,7 @@ all-stageprofile-opcodes: configure-stageprofile-opcodes
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -4851,6 +5032,7 @@ all-stagetrain-opcodes: configure-stagetrain-opcodes
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEtrain_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEtrain_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEtrain_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEtrain_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -4893,6 +5075,7 @@ all-stagefeedback-opcodes: configure-stagefeedback-opcodes
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEfeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEfeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEfeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEfeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -4935,6 +5118,7 @@ all-stageautoprofile-opcodes: configure-stageautoprofile-opcodes
 	$$s/gcc/config/i386/$(AUTO_PROFILE) \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautoprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautoprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautoprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautoprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -4977,6 +5161,7 @@ all-stageautofeedback-opcodes: configure-stageautofeedback-opcodes
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautofeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautofeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautofeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautofeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -5744,6 +5929,7 @@ all-stage1-binutils: configure-stage1-binutils
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE1_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE1_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE1_CXXFLAGS)" \
 		LIBCFLAGS="$(LIBCFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -5788,6 +5974,7 @@ all-stage2-binutils: configure-stage2-binutils
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE2_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE2_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE2_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE2_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -5830,6 +6017,7 @@ all-stage3-binutils: configure-stage3-binutils
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE3_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE3_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE3_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE3_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -5872,6 +6060,7 @@ all-stage4-binutils: configure-stage4-binutils
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE4_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE4_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE4_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE4_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -5914,6 +6103,7 @@ all-stageprofile-binutils: configure-stageprofile-binutils
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -5956,6 +6146,7 @@ all-stagetrain-binutils: configure-stagetrain-binutils
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEtrain_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEtrain_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEtrain_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEtrain_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -5998,6 +6189,7 @@ all-stagefeedback-binutils: configure-stagefeedback-binutils
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEfeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEfeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEfeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEfeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -6040,6 +6232,7 @@ all-stageautoprofile-binutils: configure-stageautoprofile-binutils
 	$$s/gcc/config/i386/$(AUTO_PROFILE) \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautoprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautoprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautoprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautoprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -6082,6 +6275,7 @@ all-stageautofeedback-binutils: configure-stageautofeedback-binutils
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautofeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautofeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautofeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautofeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -9060,6 +9254,7 @@ all-stage1-fixincludes: configure-stage1-fixincludes
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE1_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE1_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE1_CXXFLAGS)" \
 		LIBCFLAGS="$(LIBCFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -9104,6 +9299,7 @@ all-stage2-fixincludes: configure-stage2-fixincludes
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE2_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE2_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE2_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE2_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -9146,6 +9342,7 @@ all-stage3-fixincludes: configure-stage3-fixincludes
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE3_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE3_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE3_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE3_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -9188,6 +9385,7 @@ all-stage4-fixincludes: configure-stage4-fixincludes
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE4_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE4_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE4_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE4_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -9230,6 +9428,7 @@ all-stageprofile-fixincludes: configure-stageprofile-fixincludes
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -9272,6 +9471,7 @@ all-stagetrain-fixincludes: configure-stagetrain-fixincludes
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEtrain_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEtrain_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEtrain_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEtrain_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -9314,6 +9514,7 @@ all-stagefeedback-fixincludes: configure-stagefeedback-fixincludes
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEfeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEfeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEfeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEfeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -9356,6 +9557,7 @@ all-stageautoprofile-fixincludes: configure-stageautoprofile-fixincludes
 	$$s/gcc/config/i386/$(AUTO_PROFILE) \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautoprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautoprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautoprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautoprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -9398,6 +9600,7 @@ all-stageautofeedback-fixincludes: configure-stageautofeedback-fixincludes
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautofeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautofeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautofeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautofeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -10594,6 +10797,7 @@ all-stage1-gas: configure-stage1-gas
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE1_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE1_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE1_CXXFLAGS)" \
 		LIBCFLAGS="$(LIBCFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -10638,6 +10842,7 @@ all-stage2-gas: configure-stage2-gas
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE2_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE2_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE2_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE2_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -10680,6 +10885,7 @@ all-stage3-gas: configure-stage3-gas
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE3_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE3_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE3_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE3_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -10722,6 +10928,7 @@ all-stage4-gas: configure-stage4-gas
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE4_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE4_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE4_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE4_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -10764,6 +10971,7 @@ all-stageprofile-gas: configure-stageprofile-gas
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -10806,6 +11014,7 @@ all-stagetrain-gas: configure-stagetrain-gas
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEtrain_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEtrain_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEtrain_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEtrain_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -10848,6 +11057,7 @@ all-stagefeedback-gas: configure-stagefeedback-gas
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEfeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEfeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEfeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEfeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -10890,6 +11100,7 @@ all-stageautoprofile-gas: configure-stageautoprofile-gas
 	$$s/gcc/config/i386/$(AUTO_PROFILE) \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautoprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautoprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautoprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautoprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -10932,6 +11143,7 @@ all-stageautofeedback-gas: configure-stageautofeedback-gas
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautofeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautofeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautofeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautofeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -11699,6 +11911,7 @@ all-stage1-gcc: configure-stage1-gcc
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE1_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE1_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE1_CXXFLAGS)" \
 		LIBCFLAGS="$(LIBCFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -11743,6 +11956,7 @@ all-stage2-gcc: configure-stage2-gcc
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE2_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE2_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE2_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE2_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -11785,6 +11999,7 @@ all-stage3-gcc: configure-stage3-gcc
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE3_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE3_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE3_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE3_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -11827,6 +12042,7 @@ all-stage4-gcc: configure-stage4-gcc
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE4_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE4_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE4_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE4_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -11869,6 +12085,7 @@ all-stageprofile-gcc: configure-stageprofile-gcc
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -11911,6 +12128,7 @@ all-stagetrain-gcc: configure-stagetrain-gcc
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEtrain_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEtrain_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEtrain_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEtrain_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -11953,6 +12171,7 @@ all-stagefeedback-gcc: configure-stagefeedback-gcc
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEfeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEfeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEfeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEfeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -11995,6 +12214,7 @@ all-stageautoprofile-gcc: configure-stageautoprofile-gcc
 	$$s/gcc/config/i386/$(AUTO_PROFILE) \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautoprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautoprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautoprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautoprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -12037,6 +12257,7 @@ all-stageautofeedback-gcc: configure-stageautofeedback-gcc
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautofeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautofeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautofeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautofeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -12813,6 +13034,7 @@ all-stage1-gmp: configure-stage1-gmp
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE1_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE1_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE1_CXXFLAGS)" \
 		LIBCFLAGS="$(LIBCFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -12857,6 +13079,7 @@ all-stage2-gmp: configure-stage2-gmp
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE2_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE2_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE2_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE2_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -12899,6 +13122,7 @@ all-stage3-gmp: configure-stage3-gmp
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE3_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE3_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE3_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE3_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -12941,6 +13165,7 @@ all-stage4-gmp: configure-stage4-gmp
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE4_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE4_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE4_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE4_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -12983,6 +13208,7 @@ all-stageprofile-gmp: configure-stageprofile-gmp
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -13025,6 +13251,7 @@ all-stagetrain-gmp: configure-stagetrain-gmp
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEtrain_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEtrain_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEtrain_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEtrain_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -13067,6 +13294,7 @@ all-stagefeedback-gmp: configure-stagefeedback-gmp
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEfeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEfeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEfeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEfeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -13109,6 +13337,7 @@ all-stageautoprofile-gmp: configure-stageautoprofile-gmp
 	$$s/gcc/config/i386/$(AUTO_PROFILE) \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautoprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautoprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautoprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautoprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -13151,6 +13380,7 @@ all-stageautofeedback-gmp: configure-stageautofeedback-gmp
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautofeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautofeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautofeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautofeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -13915,6 +14145,7 @@ all-stage1-mpfr: configure-stage1-mpfr
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE1_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE1_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE1_CXXFLAGS)" \
 		LIBCFLAGS="$(LIBCFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -13959,6 +14190,7 @@ all-stage2-mpfr: configure-stage2-mpfr
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE2_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE2_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE2_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE2_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -14001,6 +14233,7 @@ all-stage3-mpfr: configure-stage3-mpfr
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE3_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE3_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE3_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE3_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -14043,6 +14276,7 @@ all-stage4-mpfr: configure-stage4-mpfr
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE4_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE4_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE4_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE4_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -14085,6 +14319,7 @@ all-stageprofile-mpfr: configure-stageprofile-mpfr
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -14127,6 +14362,7 @@ all-stagetrain-mpfr: configure-stagetrain-mpfr
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEtrain_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEtrain_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEtrain_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEtrain_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -14169,6 +14405,7 @@ all-stagefeedback-mpfr: configure-stagefeedback-mpfr
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEfeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEfeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEfeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEfeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -14211,6 +14448,7 @@ all-stageautoprofile-mpfr: configure-stageautoprofile-mpfr
 	$$s/gcc/config/i386/$(AUTO_PROFILE) \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautoprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautoprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautoprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautoprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -14253,6 +14491,7 @@ all-stageautofeedback-mpfr: configure-stageautofeedback-mpfr
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautofeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautofeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautofeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautofeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -15017,6 +15256,7 @@ all-stage1-mpc: configure-stage1-mpc
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE1_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE1_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE1_CXXFLAGS)" \
 		LIBCFLAGS="$(LIBCFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -15061,6 +15301,7 @@ all-stage2-mpc: configure-stage2-mpc
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE2_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE2_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE2_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE2_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -15103,6 +15344,7 @@ all-stage3-mpc: configure-stage3-mpc
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE3_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE3_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE3_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE3_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -15145,6 +15387,7 @@ all-stage4-mpc: configure-stage4-mpc
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE4_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE4_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE4_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE4_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -15187,6 +15430,7 @@ all-stageprofile-mpc: configure-stageprofile-mpc
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -15229,6 +15473,7 @@ all-stagetrain-mpc: configure-stagetrain-mpc
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEtrain_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEtrain_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEtrain_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEtrain_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -15271,6 +15516,7 @@ all-stagefeedback-mpc: configure-stagefeedback-mpc
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEfeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEfeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEfeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEfeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -15313,6 +15559,7 @@ all-stageautoprofile-mpc: configure-stageautoprofile-mpc
 	$$s/gcc/config/i386/$(AUTO_PROFILE) \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautoprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautoprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautoprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautoprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -15355,6 +15602,7 @@ all-stageautofeedback-mpc: configure-stageautofeedback-mpc
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautofeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautofeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautofeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautofeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -16119,6 +16367,7 @@ all-stage1-isl: configure-stage1-isl
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE1_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE1_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE1_CXXFLAGS)" \
 		LIBCFLAGS="$(LIBCFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -16163,6 +16412,7 @@ all-stage2-isl: configure-stage2-isl
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE2_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE2_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE2_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE2_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -16205,6 +16455,7 @@ all-stage3-isl: configure-stage3-isl
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE3_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE3_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE3_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE3_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -16247,6 +16498,7 @@ all-stage4-isl: configure-stage4-isl
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE4_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE4_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE4_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE4_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -16289,6 +16541,7 @@ all-stageprofile-isl: configure-stageprofile-isl
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -16331,6 +16584,7 @@ all-stagetrain-isl: configure-stagetrain-isl
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEtrain_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEtrain_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEtrain_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEtrain_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -16373,6 +16627,7 @@ all-stagefeedback-isl: configure-stagefeedback-isl
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEfeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEfeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEfeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEfeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -16415,6 +16670,7 @@ all-stageautoprofile-isl: configure-stageautoprofile-isl
 	$$s/gcc/config/i386/$(AUTO_PROFILE) \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautoprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautoprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautoprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautoprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -16457,6 +16713,7 @@ all-stageautofeedback-isl: configure-stageautofeedback-isl
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautofeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautofeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautofeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautofeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -17221,6 +17478,7 @@ all-stage1-libelf: configure-stage1-libelf
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE1_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE1_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE1_CXXFLAGS)" \
 		LIBCFLAGS="$(LIBCFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -17265,6 +17523,7 @@ all-stage2-libelf: configure-stage2-libelf
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE2_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE2_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE2_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE2_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -17307,6 +17566,7 @@ all-stage3-libelf: configure-stage3-libelf
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE3_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE3_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE3_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE3_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -17349,6 +17609,7 @@ all-stage4-libelf: configure-stage4-libelf
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE4_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE4_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE4_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE4_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -17391,6 +17652,7 @@ all-stageprofile-libelf: configure-stageprofile-libelf
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -17433,6 +17695,7 @@ all-stagetrain-libelf: configure-stagetrain-libelf
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEtrain_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEtrain_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEtrain_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEtrain_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -17475,6 +17738,7 @@ all-stagefeedback-libelf: configure-stagefeedback-libelf
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEfeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEfeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEfeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEfeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -17517,6 +17781,7 @@ all-stageautoprofile-libelf: configure-stageautoprofile-libelf
 	$$s/gcc/config/i386/$(AUTO_PROFILE) \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautoprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautoprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautoprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautoprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -17559,6 +17824,7 @@ all-stageautofeedback-libelf: configure-stageautofeedback-libelf
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautofeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautofeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautofeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautofeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -18314,6 +18580,7 @@ all-stage1-gold: configure-stage1-gold
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE1_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE1_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE1_CXXFLAGS)" \
 		LIBCFLAGS="$(LIBCFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -18358,6 +18625,7 @@ all-stage2-gold: configure-stage2-gold
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE2_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE2_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE2_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE2_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -18400,6 +18668,7 @@ all-stage3-gold: configure-stage3-gold
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE3_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE3_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE3_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE3_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -18442,6 +18711,7 @@ all-stage4-gold: configure-stage4-gold
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE4_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE4_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE4_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE4_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -18484,6 +18754,7 @@ all-stageprofile-gold: configure-stageprofile-gold
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -18526,6 +18797,7 @@ all-stagetrain-gold: configure-stagetrain-gold
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEtrain_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEtrain_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEtrain_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEtrain_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -18568,6 +18840,7 @@ all-stagefeedback-gold: configure-stagefeedback-gold
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEfeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEfeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEfeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEfeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -18610,6 +18883,7 @@ all-stageautoprofile-gold: configure-stageautoprofile-gold
 	$$s/gcc/config/i386/$(AUTO_PROFILE) \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautoprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautoprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautoprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautoprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -18652,6 +18926,7 @@ all-stageautofeedback-gold: configure-stageautofeedback-gold
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautofeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautofeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautofeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautofeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -19860,6 +20135,7 @@ all-stage1-intl: configure-stage1-intl
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE1_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE1_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE1_CXXFLAGS)" \
 		LIBCFLAGS="$(LIBCFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -19904,6 +20180,7 @@ all-stage2-intl: configure-stage2-intl
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE2_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE2_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE2_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE2_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -19946,6 +20223,7 @@ all-stage3-intl: configure-stage3-intl
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE3_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE3_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE3_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE3_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -19988,6 +20266,7 @@ all-stage4-intl: configure-stage4-intl
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE4_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE4_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE4_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE4_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -20030,6 +20309,7 @@ all-stageprofile-intl: configure-stageprofile-intl
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -20072,6 +20352,7 @@ all-stagetrain-intl: configure-stagetrain-intl
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEtrain_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEtrain_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEtrain_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEtrain_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -20114,6 +20395,7 @@ all-stagefeedback-intl: configure-stagefeedback-intl
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEfeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEfeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEfeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEfeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -20156,6 +20438,7 @@ all-stageautoprofile-intl: configure-stageautoprofile-intl
 	$$s/gcc/config/i386/$(AUTO_PROFILE) \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautoprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautoprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautoprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautoprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -20198,6 +20481,7 @@ all-stageautofeedback-intl: configure-stageautofeedback-intl
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautofeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautofeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautofeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautofeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -21832,6 +22116,7 @@ all-stage1-ld: configure-stage1-ld
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE1_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE1_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE1_CXXFLAGS)" \
 		LIBCFLAGS="$(LIBCFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -21876,6 +22161,7 @@ all-stage2-ld: configure-stage2-ld
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE2_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE2_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE2_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE2_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -21918,6 +22204,7 @@ all-stage3-ld: configure-stage3-ld
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE3_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE3_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE3_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE3_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -21960,6 +22247,7 @@ all-stage4-ld: configure-stage4-ld
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE4_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE4_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE4_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE4_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -22002,6 +22290,7 @@ all-stageprofile-ld: configure-stageprofile-ld
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -22044,6 +22333,7 @@ all-stagetrain-ld: configure-stagetrain-ld
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEtrain_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEtrain_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEtrain_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEtrain_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -22086,6 +22376,7 @@ all-stagefeedback-ld: configure-stagefeedback-ld
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEfeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEfeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEfeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEfeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -22128,6 +22419,7 @@ all-stageautoprofile-ld: configure-stageautoprofile-ld
 	$$s/gcc/config/i386/$(AUTO_PROFILE) \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautoprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautoprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautoprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautoprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -22170,6 +22462,7 @@ all-stageautofeedback-ld: configure-stageautofeedback-ld
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautofeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautofeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautofeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautofeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -22937,6 +23230,7 @@ all-stage1-libbacktrace: configure-stage1-libbacktrace
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE1_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE1_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE1_CXXFLAGS)" \
 		LIBCFLAGS="$(LIBCFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -22981,6 +23275,7 @@ all-stage2-libbacktrace: configure-stage2-libbacktrace
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE2_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE2_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE2_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE2_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -23023,6 +23318,7 @@ all-stage3-libbacktrace: configure-stage3-libbacktrace
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE3_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE3_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE3_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE3_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -23065,6 +23361,7 @@ all-stage4-libbacktrace: configure-stage4-libbacktrace
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE4_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE4_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE4_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE4_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -23107,6 +23404,7 @@ all-stageprofile-libbacktrace: configure-stageprofile-libbacktrace
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -23149,6 +23447,7 @@ all-stagetrain-libbacktrace: configure-stagetrain-libbacktrace
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEtrain_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEtrain_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEtrain_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEtrain_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -23191,6 +23490,7 @@ all-stagefeedback-libbacktrace: configure-stagefeedback-libbacktrace
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEfeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEfeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEfeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEfeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -23233,6 +23533,7 @@ all-stageautoprofile-libbacktrace: configure-stageautoprofile-libbacktrace
 	$$s/gcc/config/i386/$(AUTO_PROFILE) \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautoprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautoprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautoprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautoprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -23275,6 +23576,7 @@ all-stageautofeedback-libbacktrace: configure-stageautofeedback-libbacktrace
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautofeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautofeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautofeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautofeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -24042,6 +24344,7 @@ all-stage1-libcpp: configure-stage1-libcpp
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE1_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE1_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE1_CXXFLAGS)" \
 		LIBCFLAGS="$(LIBCFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -24086,6 +24389,7 @@ all-stage2-libcpp: configure-stage2-libcpp
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE2_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE2_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE2_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE2_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -24128,6 +24432,7 @@ all-stage3-libcpp: configure-stage3-libcpp
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE3_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE3_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE3_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE3_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -24170,6 +24475,7 @@ all-stage4-libcpp: configure-stage4-libcpp
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE4_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE4_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE4_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE4_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -24212,6 +24518,7 @@ all-stageprofile-libcpp: configure-stageprofile-libcpp
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -24254,6 +24561,7 @@ all-stagetrain-libcpp: configure-stagetrain-libcpp
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEtrain_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEtrain_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEtrain_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEtrain_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -24296,6 +24604,7 @@ all-stagefeedback-libcpp: configure-stagefeedback-libcpp
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEfeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEfeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEfeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEfeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -24338,6 +24647,7 @@ all-stageautoprofile-libcpp: configure-stageautoprofile-libcpp
 	$$s/gcc/config/i386/$(AUTO_PROFILE) \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautoprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautoprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautoprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautoprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -24380,6 +24690,7 @@ all-stageautofeedback-libcpp: configure-stageautofeedback-libcpp
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautofeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautofeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautofeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautofeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -24779,374 +25090,375 @@ maintainer-clean-libcpp:
 
 
 
-.PHONY: configure-libdecnumber maybe-configure-libdecnumber
-maybe-configure-libdecnumber:
+.PHONY: configure-libcody maybe-configure-libcody
+maybe-configure-libcody:
 @if gcc-bootstrap
-configure-libdecnumber: stage_current
+configure-libcody: stage_current
 @endif gcc-bootstrap
-@if libdecnumber
-maybe-configure-libdecnumber: configure-libdecnumber
-configure-libdecnumber: 
+@if libcody
+maybe-configure-libcody: configure-libcody
+configure-libcody: 
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	test ! -f $(HOST_SUBDIR)/libdecnumber/Makefile || exit 0; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber; \
+	test ! -f $(HOST_SUBDIR)/libcody/Makefile || exit 0; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libcody; \
 	$(HOST_EXPORTS)  \
-	echo Configuring in $(HOST_SUBDIR)/libdecnumber; \
-	cd "$(HOST_SUBDIR)/libdecnumber" || exit 1; \
+	echo Configuring in $(HOST_SUBDIR)/libcody; \
+	cd "$(HOST_SUBDIR)/libcody" || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(HOST_SUBDIR)/libdecnumber/ | \
+	  *) topdir=`echo $(HOST_SUBDIR)/libcody/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libdecnumber; \
+	module_srcdir=libcody; \
 	$(SHELL) \
 	  $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
 	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
 	  --target=${target_alias}  \
 	  || exit 1
-@endif libdecnumber
+@endif libcody
 
 
 
-.PHONY: configure-stage1-libdecnumber maybe-configure-stage1-libdecnumber
-maybe-configure-stage1-libdecnumber:
-@if libdecnumber-bootstrap
-maybe-configure-stage1-libdecnumber: configure-stage1-libdecnumber
-configure-stage1-libdecnumber:
+.PHONY: configure-stage1-libcody maybe-configure-stage1-libcody
+maybe-configure-stage1-libcody:
+@if libcody-bootstrap
+maybe-configure-stage1-libcody: configure-stage1-libcody
+configure-stage1-libcody:
 	@[ $(current_stage) = stage1 ] || $(MAKE) stage1-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber
+	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libcody
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGE1_TFLAGS)"; \
-	test ! -f $(HOST_SUBDIR)/libdecnumber/Makefile || exit 0; \
+	test ! -f $(HOST_SUBDIR)/libcody/Makefile || exit 0; \
 	$(HOST_EXPORTS) \
 	CFLAGS="$(STAGE1_CFLAGS)"; export CFLAGS; \
 	CXXFLAGS="$(STAGE1_CXXFLAGS)"; export CXXFLAGS; \
 	LIBCFLAGS="$(LIBCFLAGS)"; export LIBCFLAGS;  \
-	echo Configuring stage 1 in $(HOST_SUBDIR)/libdecnumber; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber; \
-	cd $(HOST_SUBDIR)/libdecnumber || exit 1; \
+	echo Configuring stage 1 in $(HOST_SUBDIR)/libcody; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libcody; \
+	cd $(HOST_SUBDIR)/libcody || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(HOST_SUBDIR)/libdecnumber/ | \
+	  *) topdir=`echo $(HOST_SUBDIR)/libcody/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libdecnumber; \
+	module_srcdir=libcody; \
 	$(SHELL) $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
 	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
 	  --target=${target_alias} \
 	   \
 	  $(STAGE1_CONFIGURE_FLAGS)
-@endif libdecnumber-bootstrap
+@endif libcody-bootstrap
 
-.PHONY: configure-stage2-libdecnumber maybe-configure-stage2-libdecnumber
-maybe-configure-stage2-libdecnumber:
-@if libdecnumber-bootstrap
-maybe-configure-stage2-libdecnumber: configure-stage2-libdecnumber
-configure-stage2-libdecnumber:
+.PHONY: configure-stage2-libcody maybe-configure-stage2-libcody
+maybe-configure-stage2-libcody:
+@if libcody-bootstrap
+maybe-configure-stage2-libcody: configure-stage2-libcody
+configure-stage2-libcody:
 	@[ $(current_stage) = stage2 ] || $(MAKE) stage2-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber
+	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libcody
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGE2_TFLAGS)"; \
-	test ! -f $(HOST_SUBDIR)/libdecnumber/Makefile || exit 0; \
+	test ! -f $(HOST_SUBDIR)/libcody/Makefile || exit 0; \
 	$(HOST_EXPORTS) \
 	$(POSTSTAGE1_HOST_EXPORTS) \
 	CFLAGS="$(STAGE2_CFLAGS)"; export CFLAGS; \
 	CXXFLAGS="$(STAGE2_CXXFLAGS)"; export CXXFLAGS; \
 	LIBCFLAGS="$(STAGE2_CFLAGS)"; export LIBCFLAGS;  \
-	echo Configuring stage 2 in $(HOST_SUBDIR)/libdecnumber; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber; \
-	cd $(HOST_SUBDIR)/libdecnumber || exit 1; \
+	echo Configuring stage 2 in $(HOST_SUBDIR)/libcody; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libcody; \
+	cd $(HOST_SUBDIR)/libcody || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(HOST_SUBDIR)/libdecnumber/ | \
+	  *) topdir=`echo $(HOST_SUBDIR)/libcody/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libdecnumber; \
+	module_srcdir=libcody; \
 	$(SHELL) $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
 	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
 	  --target=${target_alias} \
 	  --with-build-libsubdir=$(HOST_SUBDIR) \
 	  $(STAGE2_CONFIGURE_FLAGS)
-@endif libdecnumber-bootstrap
+@endif libcody-bootstrap
 
-.PHONY: configure-stage3-libdecnumber maybe-configure-stage3-libdecnumber
-maybe-configure-stage3-libdecnumber:
-@if libdecnumber-bootstrap
-maybe-configure-stage3-libdecnumber: configure-stage3-libdecnumber
-configure-stage3-libdecnumber:
+.PHONY: configure-stage3-libcody maybe-configure-stage3-libcody
+maybe-configure-stage3-libcody:
+@if libcody-bootstrap
+maybe-configure-stage3-libcody: configure-stage3-libcody
+configure-stage3-libcody:
 	@[ $(current_stage) = stage3 ] || $(MAKE) stage3-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber
+	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libcody
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGE3_TFLAGS)"; \
-	test ! -f $(HOST_SUBDIR)/libdecnumber/Makefile || exit 0; \
+	test ! -f $(HOST_SUBDIR)/libcody/Makefile || exit 0; \
 	$(HOST_EXPORTS) \
 	$(POSTSTAGE1_HOST_EXPORTS) \
 	CFLAGS="$(STAGE3_CFLAGS)"; export CFLAGS; \
 	CXXFLAGS="$(STAGE3_CXXFLAGS)"; export CXXFLAGS; \
 	LIBCFLAGS="$(STAGE3_CFLAGS)"; export LIBCFLAGS;  \
-	echo Configuring stage 3 in $(HOST_SUBDIR)/libdecnumber; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber; \
-	cd $(HOST_SUBDIR)/libdecnumber || exit 1; \
+	echo Configuring stage 3 in $(HOST_SUBDIR)/libcody; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libcody; \
+	cd $(HOST_SUBDIR)/libcody || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(HOST_SUBDIR)/libdecnumber/ | \
+	  *) topdir=`echo $(HOST_SUBDIR)/libcody/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libdecnumber; \
+	module_srcdir=libcody; \
 	$(SHELL) $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
 	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
 	  --target=${target_alias} \
 	  --with-build-libsubdir=$(HOST_SUBDIR) \
 	  $(STAGE3_CONFIGURE_FLAGS)
-@endif libdecnumber-bootstrap
+@endif libcody-bootstrap
 
-.PHONY: configure-stage4-libdecnumber maybe-configure-stage4-libdecnumber
-maybe-configure-stage4-libdecnumber:
-@if libdecnumber-bootstrap
-maybe-configure-stage4-libdecnumber: configure-stage4-libdecnumber
-configure-stage4-libdecnumber:
+.PHONY: configure-stage4-libcody maybe-configure-stage4-libcody
+maybe-configure-stage4-libcody:
+@if libcody-bootstrap
+maybe-configure-stage4-libcody: configure-stage4-libcody
+configure-stage4-libcody:
 	@[ $(current_stage) = stage4 ] || $(MAKE) stage4-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber
+	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libcody
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGE4_TFLAGS)"; \
-	test ! -f $(HOST_SUBDIR)/libdecnumber/Makefile || exit 0; \
+	test ! -f $(HOST_SUBDIR)/libcody/Makefile || exit 0; \
 	$(HOST_EXPORTS) \
 	$(POSTSTAGE1_HOST_EXPORTS) \
 	CFLAGS="$(STAGE4_CFLAGS)"; export CFLAGS; \
 	CXXFLAGS="$(STAGE4_CXXFLAGS)"; export CXXFLAGS; \
 	LIBCFLAGS="$(STAGE4_CFLAGS)"; export LIBCFLAGS;  \
-	echo Configuring stage 4 in $(HOST_SUBDIR)/libdecnumber; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber; \
-	cd $(HOST_SUBDIR)/libdecnumber || exit 1; \
+	echo Configuring stage 4 in $(HOST_SUBDIR)/libcody; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libcody; \
+	cd $(HOST_SUBDIR)/libcody || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(HOST_SUBDIR)/libdecnumber/ | \
+	  *) topdir=`echo $(HOST_SUBDIR)/libcody/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libdecnumber; \
+	module_srcdir=libcody; \
 	$(SHELL) $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
 	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
 	  --target=${target_alias} \
 	  --with-build-libsubdir=$(HOST_SUBDIR) \
 	  $(STAGE4_CONFIGURE_FLAGS)
-@endif libdecnumber-bootstrap
+@endif libcody-bootstrap
 
-.PHONY: configure-stageprofile-libdecnumber maybe-configure-stageprofile-libdecnumber
-maybe-configure-stageprofile-libdecnumber:
-@if libdecnumber-bootstrap
-maybe-configure-stageprofile-libdecnumber: configure-stageprofile-libdecnumber
-configure-stageprofile-libdecnumber:
+.PHONY: configure-stageprofile-libcody maybe-configure-stageprofile-libcody
+maybe-configure-stageprofile-libcody:
+@if libcody-bootstrap
+maybe-configure-stageprofile-libcody: configure-stageprofile-libcody
+configure-stageprofile-libcody:
 	@[ $(current_stage) = stageprofile ] || $(MAKE) stageprofile-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber
+	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libcody
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEprofile_TFLAGS)"; \
-	test ! -f $(HOST_SUBDIR)/libdecnumber/Makefile || exit 0; \
+	test ! -f $(HOST_SUBDIR)/libcody/Makefile || exit 0; \
 	$(HOST_EXPORTS) \
 	$(POSTSTAGE1_HOST_EXPORTS) \
 	CFLAGS="$(STAGEprofile_CFLAGS)"; export CFLAGS; \
 	CXXFLAGS="$(STAGEprofile_CXXFLAGS)"; export CXXFLAGS; \
 	LIBCFLAGS="$(STAGEprofile_CFLAGS)"; export LIBCFLAGS;  \
-	echo Configuring stage profile in $(HOST_SUBDIR)/libdecnumber; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber; \
-	cd $(HOST_SUBDIR)/libdecnumber || exit 1; \
+	echo Configuring stage profile in $(HOST_SUBDIR)/libcody; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libcody; \
+	cd $(HOST_SUBDIR)/libcody || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(HOST_SUBDIR)/libdecnumber/ | \
+	  *) topdir=`echo $(HOST_SUBDIR)/libcody/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libdecnumber; \
+	module_srcdir=libcody; \
 	$(SHELL) $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
 	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
 	  --target=${target_alias} \
 	  --with-build-libsubdir=$(HOST_SUBDIR) \
 	  $(STAGEprofile_CONFIGURE_FLAGS)
-@endif libdecnumber-bootstrap
+@endif libcody-bootstrap
 
-.PHONY: configure-stagetrain-libdecnumber maybe-configure-stagetrain-libdecnumber
-maybe-configure-stagetrain-libdecnumber:
-@if libdecnumber-bootstrap
-maybe-configure-stagetrain-libdecnumber: configure-stagetrain-libdecnumber
-configure-stagetrain-libdecnumber:
+.PHONY: configure-stagetrain-libcody maybe-configure-stagetrain-libcody
+maybe-configure-stagetrain-libcody:
+@if libcody-bootstrap
+maybe-configure-stagetrain-libcody: configure-stagetrain-libcody
+configure-stagetrain-libcody:
 	@[ $(current_stage) = stagetrain ] || $(MAKE) stagetrain-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber
+	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libcody
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEtrain_TFLAGS)"; \
-	test ! -f $(HOST_SUBDIR)/libdecnumber/Makefile || exit 0; \
+	test ! -f $(HOST_SUBDIR)/libcody/Makefile || exit 0; \
 	$(HOST_EXPORTS) \
 	$(POSTSTAGE1_HOST_EXPORTS) \
 	CFLAGS="$(STAGEtrain_CFLAGS)"; export CFLAGS; \
 	CXXFLAGS="$(STAGEtrain_CXXFLAGS)"; export CXXFLAGS; \
 	LIBCFLAGS="$(STAGEtrain_CFLAGS)"; export LIBCFLAGS;  \
-	echo Configuring stage train in $(HOST_SUBDIR)/libdecnumber; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber; \
-	cd $(HOST_SUBDIR)/libdecnumber || exit 1; \
+	echo Configuring stage train in $(HOST_SUBDIR)/libcody; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libcody; \
+	cd $(HOST_SUBDIR)/libcody || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(HOST_SUBDIR)/libdecnumber/ | \
+	  *) topdir=`echo $(HOST_SUBDIR)/libcody/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libdecnumber; \
+	module_srcdir=libcody; \
 	$(SHELL) $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
 	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
 	  --target=${target_alias} \
 	  --with-build-libsubdir=$(HOST_SUBDIR) \
 	  $(STAGEtrain_CONFIGURE_FLAGS)
-@endif libdecnumber-bootstrap
+@endif libcody-bootstrap
 
-.PHONY: configure-stagefeedback-libdecnumber maybe-configure-stagefeedback-libdecnumber
-maybe-configure-stagefeedback-libdecnumber:
-@if libdecnumber-bootstrap
-maybe-configure-stagefeedback-libdecnumber: configure-stagefeedback-libdecnumber
-configure-stagefeedback-libdecnumber:
+.PHONY: configure-stagefeedback-libcody maybe-configure-stagefeedback-libcody
+maybe-configure-stagefeedback-libcody:
+@if libcody-bootstrap
+maybe-configure-stagefeedback-libcody: configure-stagefeedback-libcody
+configure-stagefeedback-libcody:
 	@[ $(current_stage) = stagefeedback ] || $(MAKE) stagefeedback-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber
+	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libcody
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEfeedback_TFLAGS)"; \
-	test ! -f $(HOST_SUBDIR)/libdecnumber/Makefile || exit 0; \
+	test ! -f $(HOST_SUBDIR)/libcody/Makefile || exit 0; \
 	$(HOST_EXPORTS) \
 	$(POSTSTAGE1_HOST_EXPORTS) \
 	CFLAGS="$(STAGEfeedback_CFLAGS)"; export CFLAGS; \
 	CXXFLAGS="$(STAGEfeedback_CXXFLAGS)"; export CXXFLAGS; \
 	LIBCFLAGS="$(STAGEfeedback_CFLAGS)"; export LIBCFLAGS;  \
-	echo Configuring stage feedback in $(HOST_SUBDIR)/libdecnumber; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber; \
-	cd $(HOST_SUBDIR)/libdecnumber || exit 1; \
+	echo Configuring stage feedback in $(HOST_SUBDIR)/libcody; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libcody; \
+	cd $(HOST_SUBDIR)/libcody || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(HOST_SUBDIR)/libdecnumber/ | \
+	  *) topdir=`echo $(HOST_SUBDIR)/libcody/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libdecnumber; \
+	module_srcdir=libcody; \
 	$(SHELL) $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
 	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
 	  --target=${target_alias} \
 	  --with-build-libsubdir=$(HOST_SUBDIR) \
 	  $(STAGEfeedback_CONFIGURE_FLAGS)
-@endif libdecnumber-bootstrap
+@endif libcody-bootstrap
 
-.PHONY: configure-stageautoprofile-libdecnumber maybe-configure-stageautoprofile-libdecnumber
-maybe-configure-stageautoprofile-libdecnumber:
-@if libdecnumber-bootstrap
-maybe-configure-stageautoprofile-libdecnumber: configure-stageautoprofile-libdecnumber
-configure-stageautoprofile-libdecnumber:
+.PHONY: configure-stageautoprofile-libcody maybe-configure-stageautoprofile-libcody
+maybe-configure-stageautoprofile-libcody:
+@if libcody-bootstrap
+maybe-configure-stageautoprofile-libcody: configure-stageautoprofile-libcody
+configure-stageautoprofile-libcody:
 	@[ $(current_stage) = stageautoprofile ] || $(MAKE) stageautoprofile-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber
+	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libcody
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEautoprofile_TFLAGS)"; \
-	test ! -f $(HOST_SUBDIR)/libdecnumber/Makefile || exit 0; \
+	test ! -f $(HOST_SUBDIR)/libcody/Makefile || exit 0; \
 	$(HOST_EXPORTS) \
 	$(POSTSTAGE1_HOST_EXPORTS) \
 	CFLAGS="$(STAGEautoprofile_CFLAGS)"; export CFLAGS; \
 	CXXFLAGS="$(STAGEautoprofile_CXXFLAGS)"; export CXXFLAGS; \
 	LIBCFLAGS="$(STAGEautoprofile_CFLAGS)"; export LIBCFLAGS;  \
-	echo Configuring stage autoprofile in $(HOST_SUBDIR)/libdecnumber; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber; \
-	cd $(HOST_SUBDIR)/libdecnumber || exit 1; \
+	echo Configuring stage autoprofile in $(HOST_SUBDIR)/libcody; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libcody; \
+	cd $(HOST_SUBDIR)/libcody || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(HOST_SUBDIR)/libdecnumber/ | \
+	  *) topdir=`echo $(HOST_SUBDIR)/libcody/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libdecnumber; \
+	module_srcdir=libcody; \
 	$(SHELL) $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
 	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
 	  --target=${target_alias} \
 	  --with-build-libsubdir=$(HOST_SUBDIR) \
 	  $(STAGEautoprofile_CONFIGURE_FLAGS)
-@endif libdecnumber-bootstrap
+@endif libcody-bootstrap
 
-.PHONY: configure-stageautofeedback-libdecnumber maybe-configure-stageautofeedback-libdecnumber
-maybe-configure-stageautofeedback-libdecnumber:
-@if libdecnumber-bootstrap
-maybe-configure-stageautofeedback-libdecnumber: configure-stageautofeedback-libdecnumber
-configure-stageautofeedback-libdecnumber:
+.PHONY: configure-stageautofeedback-libcody maybe-configure-stageautofeedback-libcody
+maybe-configure-stageautofeedback-libcody:
+@if libcody-bootstrap
+maybe-configure-stageautofeedback-libcody: configure-stageautofeedback-libcody
+configure-stageautofeedback-libcody:
 	@[ $(current_stage) = stageautofeedback ] || $(MAKE) stageautofeedback-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber
+	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libcody
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEautofeedback_TFLAGS)"; \
-	test ! -f $(HOST_SUBDIR)/libdecnumber/Makefile || exit 0; \
+	test ! -f $(HOST_SUBDIR)/libcody/Makefile || exit 0; \
 	$(HOST_EXPORTS) \
 	$(POSTSTAGE1_HOST_EXPORTS) \
 	CFLAGS="$(STAGEautofeedback_CFLAGS)"; export CFLAGS; \
 	CXXFLAGS="$(STAGEautofeedback_CXXFLAGS)"; export CXXFLAGS; \
 	LIBCFLAGS="$(STAGEautofeedback_CFLAGS)"; export LIBCFLAGS;  \
-	echo Configuring stage autofeedback in $(HOST_SUBDIR)/libdecnumber; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber; \
-	cd $(HOST_SUBDIR)/libdecnumber || exit 1; \
+	echo Configuring stage autofeedback in $(HOST_SUBDIR)/libcody; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libcody; \
+	cd $(HOST_SUBDIR)/libcody || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(HOST_SUBDIR)/libdecnumber/ | \
+	  *) topdir=`echo $(HOST_SUBDIR)/libcody/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libdecnumber; \
+	module_srcdir=libcody; \
 	$(SHELL) $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
 	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
 	  --target=${target_alias} \
 	  --with-build-libsubdir=$(HOST_SUBDIR) \
 	  $(STAGEautofeedback_CONFIGURE_FLAGS)
-@endif libdecnumber-bootstrap
+@endif libcody-bootstrap
 
 
 
 
 
-.PHONY: all-libdecnumber maybe-all-libdecnumber
-maybe-all-libdecnumber:
+.PHONY: all-libcody maybe-all-libcody
+maybe-all-libcody:
 @if gcc-bootstrap
-all-libdecnumber: stage_current
+all-libcody: stage_current
 @endif gcc-bootstrap
-@if libdecnumber
-TARGET-libdecnumber=all
-maybe-all-libdecnumber: all-libdecnumber
-all-libdecnumber: configure-libdecnumber
+@if libcody
+TARGET-libcody=all
+maybe-all-libcody: all-libcody
+all-libcody: configure-libcody
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS)  \
-	(cd $(HOST_SUBDIR)/libdecnumber && \
+	(cd $(HOST_SUBDIR)/libcody && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) $(EXTRA_HOST_FLAGS) $(STAGE1_FLAGS_TO_PASS)  \
-		$(TARGET-libdecnumber))
-@endif libdecnumber
+		$(TARGET-libcody))
+@endif libcody
 
 
 
-.PHONY: all-stage1-libdecnumber maybe-all-stage1-libdecnumber
-.PHONY: clean-stage1-libdecnumber maybe-clean-stage1-libdecnumber
-maybe-all-stage1-libdecnumber:
-maybe-clean-stage1-libdecnumber:
-@if libdecnumber-bootstrap
-maybe-all-stage1-libdecnumber: all-stage1-libdecnumber
-all-stage1: all-stage1-libdecnumber
-TARGET-stage1-libdecnumber = $(TARGET-libdecnumber)
-all-stage1-libdecnumber: configure-stage1-libdecnumber
+.PHONY: all-stage1-libcody maybe-all-stage1-libcody
+.PHONY: clean-stage1-libcody maybe-clean-stage1-libcody
+maybe-all-stage1-libcody:
+maybe-clean-stage1-libcody:
+@if libcody-bootstrap
+maybe-all-stage1-libcody: all-stage1-libcody
+all-stage1: all-stage1-libcody
+TARGET-stage1-libcody = $(TARGET-libcody)
+all-stage1-libcody: configure-stage1-libcody
 	@[ $(current_stage) = stage1 ] || $(MAKE) stage1-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGE1_TFLAGS)"; \
 	$(HOST_EXPORTS)  \
-	cd $(HOST_SUBDIR)/libdecnumber && \
+	cd $(HOST_SUBDIR)/libcody && \
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE1_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE1_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE1_CXXFLAGS)" \
 		LIBCFLAGS="$(LIBCFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -25155,42 +25467,43 @@ all-stage1-libdecnumber: configure-stage1-libdecnumber
 		$(EXTRA_HOST_FLAGS)  \
 		$(STAGE1_FLAGS_TO_PASS)  \
 		TFLAGS="$(STAGE1_TFLAGS)"  \
-		$(TARGET-stage1-libdecnumber)
+		$(TARGET-stage1-libcody)
 
-maybe-clean-stage1-libdecnumber: clean-stage1-libdecnumber
-clean-stage1: clean-stage1-libdecnumber
-clean-stage1-libdecnumber:
+maybe-clean-stage1-libcody: clean-stage1-libcody
+clean-stage1: clean-stage1-libcody
+clean-stage1-libcody:
 	@if [ $(current_stage) = stage1 ]; then \
-	  [ -f $(HOST_SUBDIR)/libdecnumber/Makefile ] || exit 0; \
+	  [ -f $(HOST_SUBDIR)/libcody/Makefile ] || exit 0; \
 	else \
-	  [ -f $(HOST_SUBDIR)/stage1-libdecnumber/Makefile ] || exit 0; \
+	  [ -f $(HOST_SUBDIR)/stage1-libcody/Makefile ] || exit 0; \
 	  $(MAKE) stage1-start; \
 	fi; \
-	cd $(HOST_SUBDIR)/libdecnumber && \
+	cd $(HOST_SUBDIR)/libcody && \
 	$(MAKE) $(EXTRA_HOST_FLAGS)  \
 	$(STAGE1_FLAGS_TO_PASS)  clean
-@endif libdecnumber-bootstrap
-
-
-.PHONY: all-stage2-libdecnumber maybe-all-stage2-libdecnumber
-.PHONY: clean-stage2-libdecnumber maybe-clean-stage2-libdecnumber
-maybe-all-stage2-libdecnumber:
-maybe-clean-stage2-libdecnumber:
-@if libdecnumber-bootstrap
-maybe-all-stage2-libdecnumber: all-stage2-libdecnumber
-all-stage2: all-stage2-libdecnumber
-TARGET-stage2-libdecnumber = $(TARGET-libdecnumber)
-all-stage2-libdecnumber: configure-stage2-libdecnumber
+@endif libcody-bootstrap
+
+
+.PHONY: all-stage2-libcody maybe-all-stage2-libcody
+.PHONY: clean-stage2-libcody maybe-clean-stage2-libcody
+maybe-all-stage2-libcody:
+maybe-clean-stage2-libcody:
+@if libcody-bootstrap
+maybe-all-stage2-libcody: all-stage2-libcody
+all-stage2: all-stage2-libcody
+TARGET-stage2-libcody = $(TARGET-libcody)
+all-stage2-libcody: configure-stage2-libcody
 	@[ $(current_stage) = stage2 ] || $(MAKE) stage2-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGE2_TFLAGS)"; \
 	$(HOST_EXPORTS) \
 	$(POSTSTAGE1_HOST_EXPORTS)  \
-	cd $(HOST_SUBDIR)/libdecnumber && \
+	cd $(HOST_SUBDIR)/libcody && \
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE2_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE2_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE2_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE2_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -25198,41 +25511,42 @@ all-stage2-libdecnumber: configure-stage2-libdecnumber
 		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
 		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
 		TFLAGS="$(STAGE2_TFLAGS)"  \
-		$(TARGET-stage2-libdecnumber)
+		$(TARGET-stage2-libcody)
 
-maybe-clean-stage2-libdecnumber: clean-stage2-libdecnumber
-clean-stage2: clean-stage2-libdecnumber
-clean-stage2-libdecnumber:
+maybe-clean-stage2-libcody: clean-stage2-libcody
+clean-stage2: clean-stage2-libcody
+clean-stage2-libcody:
 	@if [ $(current_stage) = stage2 ]; then \
-	  [ -f $(HOST_SUBDIR)/libdecnumber/Makefile ] || exit 0; \
+	  [ -f $(HOST_SUBDIR)/libcody/Makefile ] || exit 0; \
 	else \
-	  [ -f $(HOST_SUBDIR)/stage2-libdecnumber/Makefile ] || exit 0; \
+	  [ -f $(HOST_SUBDIR)/stage2-libcody/Makefile ] || exit 0; \
 	  $(MAKE) stage2-start; \
 	fi; \
-	cd $(HOST_SUBDIR)/libdecnumber && \
+	cd $(HOST_SUBDIR)/libcody && \
 	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
-@endif libdecnumber-bootstrap
-
-
-.PHONY: all-stage3-libdecnumber maybe-all-stage3-libdecnumber
-.PHONY: clean-stage3-libdecnumber maybe-clean-stage3-libdecnumber
-maybe-all-stage3-libdecnumber:
-maybe-clean-stage3-libdecnumber:
-@if libdecnumber-bootstrap
-maybe-all-stage3-libdecnumber: all-stage3-libdecnumber
-all-stage3: all-stage3-libdecnumber
-TARGET-stage3-libdecnumber = $(TARGET-libdecnumber)
-all-stage3-libdecnumber: configure-stage3-libdecnumber
+@endif libcody-bootstrap
+
+
+.PHONY: all-stage3-libcody maybe-all-stage3-libcody
+.PHONY: clean-stage3-libcody maybe-clean-stage3-libcody
+maybe-all-stage3-libcody:
+maybe-clean-stage3-libcody:
+@if libcody-bootstrap
+maybe-all-stage3-libcody: all-stage3-libcody
+all-stage3: all-stage3-libcody
+TARGET-stage3-libcody = $(TARGET-libcody)
+all-stage3-libcody: configure-stage3-libcody
 	@[ $(current_stage) = stage3 ] || $(MAKE) stage3-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGE3_TFLAGS)"; \
 	$(HOST_EXPORTS) \
 	$(POSTSTAGE1_HOST_EXPORTS)  \
-	cd $(HOST_SUBDIR)/libdecnumber && \
+	cd $(HOST_SUBDIR)/libcody && \
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE3_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE3_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE3_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE3_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -25240,41 +25554,42 @@ all-stage3-libdecnumber: configure-stage3-libdecnumber
 		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
 		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
 		TFLAGS="$(STAGE3_TFLAGS)"  \
-		$(TARGET-stage3-libdecnumber)
+		$(TARGET-stage3-libcody)
 
-maybe-clean-stage3-libdecnumber: clean-stage3-libdecnumber
-clean-stage3: clean-stage3-libdecnumber
-clean-stage3-libdecnumber:
+maybe-clean-stage3-libcody: clean-stage3-libcody
+clean-stage3: clean-stage3-libcody
+clean-stage3-libcody:
 	@if [ $(current_stage) = stage3 ]; then \
-	  [ -f $(HOST_SUBDIR)/libdecnumber/Makefile ] || exit 0; \
+	  [ -f $(HOST_SUBDIR)/libcody/Makefile ] || exit 0; \
 	else \
-	  [ -f $(HOST_SUBDIR)/stage3-libdecnumber/Makefile ] || exit 0; \
+	  [ -f $(HOST_SUBDIR)/stage3-libcody/Makefile ] || exit 0; \
 	  $(MAKE) stage3-start; \
 	fi; \
-	cd $(HOST_SUBDIR)/libdecnumber && \
+	cd $(HOST_SUBDIR)/libcody && \
 	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
-@endif libdecnumber-bootstrap
-
-
-.PHONY: all-stage4-libdecnumber maybe-all-stage4-libdecnumber
-.PHONY: clean-stage4-libdecnumber maybe-clean-stage4-libdecnumber
-maybe-all-stage4-libdecnumber:
-maybe-clean-stage4-libdecnumber:
-@if libdecnumber-bootstrap
-maybe-all-stage4-libdecnumber: all-stage4-libdecnumber
-all-stage4: all-stage4-libdecnumber
-TARGET-stage4-libdecnumber = $(TARGET-libdecnumber)
-all-stage4-libdecnumber: configure-stage4-libdecnumber
+@endif libcody-bootstrap
+
+
+.PHONY: all-stage4-libcody maybe-all-stage4-libcody
+.PHONY: clean-stage4-libcody maybe-clean-stage4-libcody
+maybe-all-stage4-libcody:
+maybe-clean-stage4-libcody:
+@if libcody-bootstrap
+maybe-all-stage4-libcody: all-stage4-libcody
+all-stage4: all-stage4-libcody
+TARGET-stage4-libcody = $(TARGET-libcody)
+all-stage4-libcody: configure-stage4-libcody
 	@[ $(current_stage) = stage4 ] || $(MAKE) stage4-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGE4_TFLAGS)"; \
 	$(HOST_EXPORTS) \
 	$(POSTSTAGE1_HOST_EXPORTS)  \
-	cd $(HOST_SUBDIR)/libdecnumber && \
+	cd $(HOST_SUBDIR)/libcody && \
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE4_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE4_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE4_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE4_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -25282,41 +25597,42 @@ all-stage4-libdecnumber: configure-stage4-libdecnumber
 		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
 		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
 		TFLAGS="$(STAGE4_TFLAGS)"  \
-		$(TARGET-stage4-libdecnumber)
+		$(TARGET-stage4-libcody)
 
-maybe-clean-stage4-libdecnumber: clean-stage4-libdecnumber
-clean-stage4: clean-stage4-libdecnumber
-clean-stage4-libdecnumber:
+maybe-clean-stage4-libcody: clean-stage4-libcody
+clean-stage4: clean-stage4-libcody
+clean-stage4-libcody:
 	@if [ $(current_stage) = stage4 ]; then \
-	  [ -f $(HOST_SUBDIR)/libdecnumber/Makefile ] || exit 0; \
+	  [ -f $(HOST_SUBDIR)/libcody/Makefile ] || exit 0; \
 	else \
-	  [ -f $(HOST_SUBDIR)/stage4-libdecnumber/Makefile ] || exit 0; \
+	  [ -f $(HOST_SUBDIR)/stage4-libcody/Makefile ] || exit 0; \
 	  $(MAKE) stage4-start; \
 	fi; \
-	cd $(HOST_SUBDIR)/libdecnumber && \
+	cd $(HOST_SUBDIR)/libcody && \
 	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
-@endif libdecnumber-bootstrap
-
-
-.PHONY: all-stageprofile-libdecnumber maybe-all-stageprofile-libdecnumber
-.PHONY: clean-stageprofile-libdecnumber maybe-clean-stageprofile-libdecnumber
-maybe-all-stageprofile-libdecnumber:
-maybe-clean-stageprofile-libdecnumber:
-@if libdecnumber-bootstrap
-maybe-all-stageprofile-libdecnumber: all-stageprofile-libdecnumber
-all-stageprofile: all-stageprofile-libdecnumber
-TARGET-stageprofile-libdecnumber = $(TARGET-libdecnumber)
-all-stageprofile-libdecnumber: configure-stageprofile-libdecnumber
+@endif libcody-bootstrap
+
+
+.PHONY: all-stageprofile-libcody maybe-all-stageprofile-libcody
+.PHONY: clean-stageprofile-libcody maybe-clean-stageprofile-libcody
+maybe-all-stageprofile-libcody:
+maybe-clean-stageprofile-libcody:
+@if libcody-bootstrap
+maybe-all-stageprofile-libcody: all-stageprofile-libcody
+all-stageprofile: all-stageprofile-libcody
+TARGET-stageprofile-libcody = $(TARGET-libcody)
+all-stageprofile-libcody: configure-stageprofile-libcody
 	@[ $(current_stage) = stageprofile ] || $(MAKE) stageprofile-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEprofile_TFLAGS)"; \
 	$(HOST_EXPORTS) \
 	$(POSTSTAGE1_HOST_EXPORTS)  \
-	cd $(HOST_SUBDIR)/libdecnumber && \
+	cd $(HOST_SUBDIR)/libcody && \
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -25324,41 +25640,42 @@ all-stageprofile-libdecnumber: configure-stageprofile-libdecnumber
 		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
 		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
 		TFLAGS="$(STAGEprofile_TFLAGS)"  \
-		$(TARGET-stageprofile-libdecnumber)
+		$(TARGET-stageprofile-libcody)
 
-maybe-clean-stageprofile-libdecnumber: clean-stageprofile-libdecnumber
-clean-stageprofile: clean-stageprofile-libdecnumber
-clean-stageprofile-libdecnumber:
+maybe-clean-stageprofile-libcody: clean-stageprofile-libcody
+clean-stageprofile: clean-stageprofile-libcody
+clean-stageprofile-libcody:
 	@if [ $(current_stage) = stageprofile ]; then \
-	  [ -f $(HOST_SUBDIR)/libdecnumber/Makefile ] || exit 0; \
+	  [ -f $(HOST_SUBDIR)/libcody/Makefile ] || exit 0; \
 	else \
-	  [ -f $(HOST_SUBDIR)/stageprofile-libdecnumber/Makefile ] || exit 0; \
+	  [ -f $(HOST_SUBDIR)/stageprofile-libcody/Makefile ] || exit 0; \
 	  $(MAKE) stageprofile-start; \
 	fi; \
-	cd $(HOST_SUBDIR)/libdecnumber && \
+	cd $(HOST_SUBDIR)/libcody && \
 	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
-@endif libdecnumber-bootstrap
-
-
-.PHONY: all-stagetrain-libdecnumber maybe-all-stagetrain-libdecnumber
-.PHONY: clean-stagetrain-libdecnumber maybe-clean-stagetrain-libdecnumber
-maybe-all-stagetrain-libdecnumber:
-maybe-clean-stagetrain-libdecnumber:
-@if libdecnumber-bootstrap
-maybe-all-stagetrain-libdecnumber: all-stagetrain-libdecnumber
-all-stagetrain: all-stagetrain-libdecnumber
-TARGET-stagetrain-libdecnumber = $(TARGET-libdecnumber)
-all-stagetrain-libdecnumber: configure-stagetrain-libdecnumber
+@endif libcody-bootstrap
+
+
+.PHONY: all-stagetrain-libcody maybe-all-stagetrain-libcody
+.PHONY: clean-stagetrain-libcody maybe-clean-stagetrain-libcody
+maybe-all-stagetrain-libcody:
+maybe-clean-stagetrain-libcody:
+@if libcody-bootstrap
+maybe-all-stagetrain-libcody: all-stagetrain-libcody
+all-stagetrain: all-stagetrain-libcody
+TARGET-stagetrain-libcody = $(TARGET-libcody)
+all-stagetrain-libcody: configure-stagetrain-libcody
 	@[ $(current_stage) = stagetrain ] || $(MAKE) stagetrain-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEtrain_TFLAGS)"; \
 	$(HOST_EXPORTS) \
 	$(POSTSTAGE1_HOST_EXPORTS)  \
-	cd $(HOST_SUBDIR)/libdecnumber && \
+	cd $(HOST_SUBDIR)/libcody && \
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEtrain_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEtrain_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEtrain_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEtrain_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -25366,41 +25683,42 @@ all-stagetrain-libdecnumber: configure-stagetrain-libdecnumber
 		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
 		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
 		TFLAGS="$(STAGEtrain_TFLAGS)"  \
-		$(TARGET-stagetrain-libdecnumber)
+		$(TARGET-stagetrain-libcody)
 
-maybe-clean-stagetrain-libdecnumber: clean-stagetrain-libdecnumber
-clean-stagetrain: clean-stagetrain-libdecnumber
-clean-stagetrain-libdecnumber:
+maybe-clean-stagetrain-libcody: clean-stagetrain-libcody
+clean-stagetrain: clean-stagetrain-libcody
+clean-stagetrain-libcody:
 	@if [ $(current_stage) = stagetrain ]; then \
-	  [ -f $(HOST_SUBDIR)/libdecnumber/Makefile ] || exit 0; \
+	  [ -f $(HOST_SUBDIR)/libcody/Makefile ] || exit 0; \
 	else \
-	  [ -f $(HOST_SUBDIR)/stagetrain-libdecnumber/Makefile ] || exit 0; \
+	  [ -f $(HOST_SUBDIR)/stagetrain-libcody/Makefile ] || exit 0; \
 	  $(MAKE) stagetrain-start; \
 	fi; \
-	cd $(HOST_SUBDIR)/libdecnumber && \
+	cd $(HOST_SUBDIR)/libcody && \
 	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
-@endif libdecnumber-bootstrap
-
-
-.PHONY: all-stagefeedback-libdecnumber maybe-all-stagefeedback-libdecnumber
-.PHONY: clean-stagefeedback-libdecnumber maybe-clean-stagefeedback-libdecnumber
-maybe-all-stagefeedback-libdecnumber:
-maybe-clean-stagefeedback-libdecnumber:
-@if libdecnumber-bootstrap
-maybe-all-stagefeedback-libdecnumber: all-stagefeedback-libdecnumber
-all-stagefeedback: all-stagefeedback-libdecnumber
-TARGET-stagefeedback-libdecnumber = $(TARGET-libdecnumber)
-all-stagefeedback-libdecnumber: configure-stagefeedback-libdecnumber
+@endif libcody-bootstrap
+
+
+.PHONY: all-stagefeedback-libcody maybe-all-stagefeedback-libcody
+.PHONY: clean-stagefeedback-libcody maybe-clean-stagefeedback-libcody
+maybe-all-stagefeedback-libcody:
+maybe-clean-stagefeedback-libcody:
+@if libcody-bootstrap
+maybe-all-stagefeedback-libcody: all-stagefeedback-libcody
+all-stagefeedback: all-stagefeedback-libcody
+TARGET-stagefeedback-libcody = $(TARGET-libcody)
+all-stagefeedback-libcody: configure-stagefeedback-libcody
 	@[ $(current_stage) = stagefeedback ] || $(MAKE) stagefeedback-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEfeedback_TFLAGS)"; \
 	$(HOST_EXPORTS) \
 	$(POSTSTAGE1_HOST_EXPORTS)  \
-	cd $(HOST_SUBDIR)/libdecnumber && \
+	cd $(HOST_SUBDIR)/libcody && \
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEfeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEfeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEfeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEfeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -25408,41 +25726,42 @@ all-stagefeedback-libdecnumber: configure-stagefeedback-libdecnumber
 		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
 		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
 		TFLAGS="$(STAGEfeedback_TFLAGS)"  \
-		$(TARGET-stagefeedback-libdecnumber)
+		$(TARGET-stagefeedback-libcody)
 
-maybe-clean-stagefeedback-libdecnumber: clean-stagefeedback-libdecnumber
-clean-stagefeedback: clean-stagefeedback-libdecnumber
-clean-stagefeedback-libdecnumber:
+maybe-clean-stagefeedback-libcody: clean-stagefeedback-libcody
+clean-stagefeedback: clean-stagefeedback-libcody
+clean-stagefeedback-libcody:
 	@if [ $(current_stage) = stagefeedback ]; then \
-	  [ -f $(HOST_SUBDIR)/libdecnumber/Makefile ] || exit 0; \
+	  [ -f $(HOST_SUBDIR)/libcody/Makefile ] || exit 0; \
 	else \
-	  [ -f $(HOST_SUBDIR)/stagefeedback-libdecnumber/Makefile ] || exit 0; \
+	  [ -f $(HOST_SUBDIR)/stagefeedback-libcody/Makefile ] || exit 0; \
 	  $(MAKE) stagefeedback-start; \
 	fi; \
-	cd $(HOST_SUBDIR)/libdecnumber && \
+	cd $(HOST_SUBDIR)/libcody && \
 	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
-@endif libdecnumber-bootstrap
-
-
-.PHONY: all-stageautoprofile-libdecnumber maybe-all-stageautoprofile-libdecnumber
-.PHONY: clean-stageautoprofile-libdecnumber maybe-clean-stageautoprofile-libdecnumber
-maybe-all-stageautoprofile-libdecnumber:
-maybe-clean-stageautoprofile-libdecnumber:
-@if libdecnumber-bootstrap
-maybe-all-stageautoprofile-libdecnumber: all-stageautoprofile-libdecnumber
-all-stageautoprofile: all-stageautoprofile-libdecnumber
-TARGET-stageautoprofile-libdecnumber = $(TARGET-libdecnumber)
-all-stageautoprofile-libdecnumber: configure-stageautoprofile-libdecnumber
+@endif libcody-bootstrap
+
+
+.PHONY: all-stageautoprofile-libcody maybe-all-stageautoprofile-libcody
+.PHONY: clean-stageautoprofile-libcody maybe-clean-stageautoprofile-libcody
+maybe-all-stageautoprofile-libcody:
+maybe-clean-stageautoprofile-libcody:
+@if libcody-bootstrap
+maybe-all-stageautoprofile-libcody: all-stageautoprofile-libcody
+all-stageautoprofile: all-stageautoprofile-libcody
+TARGET-stageautoprofile-libcody = $(TARGET-libcody)
+all-stageautoprofile-libcody: configure-stageautoprofile-libcody
 	@[ $(current_stage) = stageautoprofile ] || $(MAKE) stageautoprofile-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEautoprofile_TFLAGS)"; \
 	$(HOST_EXPORTS) \
 	$(POSTSTAGE1_HOST_EXPORTS)  \
-	cd $(HOST_SUBDIR)/libdecnumber && \
+	cd $(HOST_SUBDIR)/libcody && \
 	$$s/gcc/config/i386/$(AUTO_PROFILE) \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautoprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautoprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautoprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautoprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -25450,41 +25769,42 @@ all-stageautoprofile-libdecnumber: configure-stageautoprofile-libdecnumber
 		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
 		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
 		TFLAGS="$(STAGEautoprofile_TFLAGS)"  \
-		$(TARGET-stageautoprofile-libdecnumber)
+		$(TARGET-stageautoprofile-libcody)
 
-maybe-clean-stageautoprofile-libdecnumber: clean-stageautoprofile-libdecnumber
-clean-stageautoprofile: clean-stageautoprofile-libdecnumber
-clean-stageautoprofile-libdecnumber:
+maybe-clean-stageautoprofile-libcody: clean-stageautoprofile-libcody
+clean-stageautoprofile: clean-stageautoprofile-libcody
+clean-stageautoprofile-libcody:
 	@if [ $(current_stage) = stageautoprofile ]; then \
-	  [ -f $(HOST_SUBDIR)/libdecnumber/Makefile ] || exit 0; \
+	  [ -f $(HOST_SUBDIR)/libcody/Makefile ] || exit 0; \
 	else \
-	  [ -f $(HOST_SUBDIR)/stageautoprofile-libdecnumber/Makefile ] || exit 0; \
+	  [ -f $(HOST_SUBDIR)/stageautoprofile-libcody/Makefile ] || exit 0; \
 	  $(MAKE) stageautoprofile-start; \
 	fi; \
-	cd $(HOST_SUBDIR)/libdecnumber && \
+	cd $(HOST_SUBDIR)/libcody && \
 	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
-@endif libdecnumber-bootstrap
-
-
-.PHONY: all-stageautofeedback-libdecnumber maybe-all-stageautofeedback-libdecnumber
-.PHONY: clean-stageautofeedback-libdecnumber maybe-clean-stageautofeedback-libdecnumber
-maybe-all-stageautofeedback-libdecnumber:
-maybe-clean-stageautofeedback-libdecnumber:
-@if libdecnumber-bootstrap
-maybe-all-stageautofeedback-libdecnumber: all-stageautofeedback-libdecnumber
-all-stageautofeedback: all-stageautofeedback-libdecnumber
-TARGET-stageautofeedback-libdecnumber = $(TARGET-libdecnumber)
-all-stageautofeedback-libdecnumber: configure-stageautofeedback-libdecnumber
+@endif libcody-bootstrap
+
+
+.PHONY: all-stageautofeedback-libcody maybe-all-stageautofeedback-libcody
+.PHONY: clean-stageautofeedback-libcody maybe-clean-stageautofeedback-libcody
+maybe-all-stageautofeedback-libcody:
+maybe-clean-stageautofeedback-libcody:
+@if libcody-bootstrap
+maybe-all-stageautofeedback-libcody: all-stageautofeedback-libcody
+all-stageautofeedback: all-stageautofeedback-libcody
+TARGET-stageautofeedback-libcody = $(TARGET-libcody)
+all-stageautofeedback-libcody: configure-stageautofeedback-libcody
 	@[ $(current_stage) = stageautofeedback ] || $(MAKE) stageautofeedback-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEautofeedback_TFLAGS)"; \
 	$(HOST_EXPORTS) \
 	$(POSTSTAGE1_HOST_EXPORTS)  \
-	cd $(HOST_SUBDIR)/libdecnumber && \
+	cd $(HOST_SUBDIR)/libcody && \
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautofeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautofeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautofeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautofeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -25492,113 +25812,86 @@ all-stageautofeedback-libdecnumber: configure-stageautofeedback-libdecnumber
 		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
 		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
 		TFLAGS="$(STAGEautofeedback_TFLAGS)" PERF_DATA=perf.data \
-		$(TARGET-stageautofeedback-libdecnumber)
+		$(TARGET-stageautofeedback-libcody)
 
-maybe-clean-stageautofeedback-libdecnumber: clean-stageautofeedback-libdecnumber
-clean-stageautofeedback: clean-stageautofeedback-libdecnumber
-clean-stageautofeedback-libdecnumber:
+maybe-clean-stageautofeedback-libcody: clean-stageautofeedback-libcody
+clean-stageautofeedback: clean-stageautofeedback-libcody
+clean-stageautofeedback-libcody:
 	@if [ $(current_stage) = stageautofeedback ]; then \
-	  [ -f $(HOST_SUBDIR)/libdecnumber/Makefile ] || exit 0; \
+	  [ -f $(HOST_SUBDIR)/libcody/Makefile ] || exit 0; \
 	else \
-	  [ -f $(HOST_SUBDIR)/stageautofeedback-libdecnumber/Makefile ] || exit 0; \
+	  [ -f $(HOST_SUBDIR)/stageautofeedback-libcody/Makefile ] || exit 0; \
 	  $(MAKE) stageautofeedback-start; \
 	fi; \
-	cd $(HOST_SUBDIR)/libdecnumber && \
+	cd $(HOST_SUBDIR)/libcody && \
 	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
-@endif libdecnumber-bootstrap
+@endif libcody-bootstrap
 
 
 
 
 
-.PHONY: check-libdecnumber maybe-check-libdecnumber
-maybe-check-libdecnumber:
-@if libdecnumber
-maybe-check-libdecnumber: check-libdecnumber
+.PHONY: check-libcody maybe-check-libcody
+maybe-check-libcody:
+@if libcody
+maybe-check-libcody: check-libcody
 
-check-libdecnumber:
+check-libcody:
 	@: $(MAKE); $(unstage)
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) $(EXTRA_HOST_EXPORTS) \
-	(cd $(HOST_SUBDIR)/libdecnumber && \
+	(cd $(HOST_SUBDIR)/libcody && \
 	  $(MAKE) $(FLAGS_TO_PASS)  $(EXTRA_BOOTSTRAP_FLAGS) check)
 
-@endif libdecnumber
+@endif libcody
 
-.PHONY: install-libdecnumber maybe-install-libdecnumber
-maybe-install-libdecnumber:
-@if libdecnumber
-maybe-install-libdecnumber: install-libdecnumber
+.PHONY: install-libcody maybe-install-libcody
+maybe-install-libcody:
+@if libcody
+maybe-install-libcody: install-libcody
 
-install-libdecnumber: installdirs
-	@: $(MAKE); $(unstage)
-	@r=`${PWD_COMMAND}`; export r; \
-	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(HOST_EXPORTS) \
-	(cd $(HOST_SUBDIR)/libdecnumber && \
-	  $(MAKE) $(FLAGS_TO_PASS)  install)
+install-libcody:
 
-@endif libdecnumber
+@endif libcody
 
-.PHONY: install-strip-libdecnumber maybe-install-strip-libdecnumber
-maybe-install-strip-libdecnumber:
-@if libdecnumber
-maybe-install-strip-libdecnumber: install-strip-libdecnumber
+.PHONY: install-strip-libcody maybe-install-strip-libcody
+maybe-install-strip-libcody:
+@if libcody
+maybe-install-strip-libcody: install-strip-libcody
 
-install-strip-libdecnumber: installdirs
-	@: $(MAKE); $(unstage)
-	@r=`${PWD_COMMAND}`; export r; \
-	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(HOST_EXPORTS) \
-	(cd $(HOST_SUBDIR)/libdecnumber && \
-	  $(MAKE) $(FLAGS_TO_PASS)  install-strip)
+install-strip-libcody:
 
-@endif libdecnumber
+@endif libcody
 
 # Other targets (info, dvi, pdf, etc.)
 
-.PHONY: maybe-info-libdecnumber info-libdecnumber
-maybe-info-libdecnumber:
-@if libdecnumber
-maybe-info-libdecnumber: info-libdecnumber
+.PHONY: maybe-info-libcody info-libcody
+maybe-info-libcody:
+@if libcody
+maybe-info-libcody: info-libcody
 
-info-libdecnumber: \
-    configure-libdecnumber 
-	@[ -f ./libdecnumber/Makefile ] || exit 0; \
-	r=`${PWD_COMMAND}`; export r; \
-	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(HOST_EXPORTS) \
-	for flag in $(EXTRA_HOST_FLAGS) ; do \
-	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
-	done; \
-	echo "Doing info in libdecnumber"; \
-	(cd $(HOST_SUBDIR)/libdecnumber && \
-	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
-	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
-	          "RANLIB=$${RANLIB}" \
-	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	          info) \
-	  || exit 1
+# libcody doesn't support info.
+info-libcody:
 
-@endif libdecnumber
+@endif libcody
 
-.PHONY: maybe-dvi-libdecnumber dvi-libdecnumber
-maybe-dvi-libdecnumber:
-@if libdecnumber
-maybe-dvi-libdecnumber: dvi-libdecnumber
+.PHONY: maybe-dvi-libcody dvi-libcody
+maybe-dvi-libcody:
+@if libcody
+maybe-dvi-libcody: dvi-libcody
 
-dvi-libdecnumber: \
-    configure-libdecnumber 
-	@[ -f ./libdecnumber/Makefile ] || exit 0; \
+dvi-libcody: \
+    configure-libcody 
+	@[ -f ./libcody/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing dvi in libdecnumber"; \
-	(cd $(HOST_SUBDIR)/libdecnumber && \
+	echo "Doing dvi in libcody"; \
+	(cd $(HOST_SUBDIR)/libcody && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -25606,74 +25899,44 @@ dvi-libdecnumber: \
 	          dvi) \
 	  || exit 1
 
-@endif libdecnumber
+@endif libcody
 
-.PHONY: maybe-pdf-libdecnumber pdf-libdecnumber
-maybe-pdf-libdecnumber:
-@if libdecnumber
-maybe-pdf-libdecnumber: pdf-libdecnumber
+.PHONY: maybe-pdf-libcody pdf-libcody
+maybe-pdf-libcody:
+@if libcody
+maybe-pdf-libcody: pdf-libcody
 
-pdf-libdecnumber: \
-    configure-libdecnumber 
-	@[ -f ./libdecnumber/Makefile ] || exit 0; \
-	r=`${PWD_COMMAND}`; export r; \
-	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(HOST_EXPORTS) \
-	for flag in $(EXTRA_HOST_FLAGS) ; do \
-	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
-	done; \
-	echo "Doing pdf in libdecnumber"; \
-	(cd $(HOST_SUBDIR)/libdecnumber && \
-	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
-	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
-	          "RANLIB=$${RANLIB}" \
-	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	          pdf) \
-	  || exit 1
+# libcody doesn't support pdf.
+pdf-libcody:
 
-@endif libdecnumber
+@endif libcody
 
-.PHONY: maybe-html-libdecnumber html-libdecnumber
-maybe-html-libdecnumber:
-@if libdecnumber
-maybe-html-libdecnumber: html-libdecnumber
+.PHONY: maybe-html-libcody html-libcody
+maybe-html-libcody:
+@if libcody
+maybe-html-libcody: html-libcody
 
-html-libdecnumber: \
-    configure-libdecnumber 
-	@[ -f ./libdecnumber/Makefile ] || exit 0; \
-	r=`${PWD_COMMAND}`; export r; \
-	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(HOST_EXPORTS) \
-	for flag in $(EXTRA_HOST_FLAGS) ; do \
-	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
-	done; \
-	echo "Doing html in libdecnumber"; \
-	(cd $(HOST_SUBDIR)/libdecnumber && \
-	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
-	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
-	          "RANLIB=$${RANLIB}" \
-	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	          html) \
-	  || exit 1
+# libcody doesn't support html.
+html-libcody:
 
-@endif libdecnumber
+@endif libcody
 
-.PHONY: maybe-TAGS-libdecnumber TAGS-libdecnumber
-maybe-TAGS-libdecnumber:
-@if libdecnumber
-maybe-TAGS-libdecnumber: TAGS-libdecnumber
+.PHONY: maybe-TAGS-libcody TAGS-libcody
+maybe-TAGS-libcody:
+@if libcody
+maybe-TAGS-libcody: TAGS-libcody
 
-TAGS-libdecnumber: \
-    configure-libdecnumber 
-	@[ -f ./libdecnumber/Makefile ] || exit 0; \
+TAGS-libcody: \
+    configure-libcody 
+	@[ -f ./libcody/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing TAGS in libdecnumber"; \
-	(cd $(HOST_SUBDIR)/libdecnumber && \
+	echo "Doing TAGS in libcody"; \
+	(cd $(HOST_SUBDIR)/libcody && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -25681,328 +25944,965 @@ TAGS-libdecnumber: \
 	          TAGS) \
 	  || exit 1
 
-@endif libdecnumber
+@endif libcody
 
-.PHONY: maybe-install-info-libdecnumber install-info-libdecnumber
-maybe-install-info-libdecnumber:
-@if libdecnumber
-maybe-install-info-libdecnumber: install-info-libdecnumber
+.PHONY: maybe-install-info-libcody install-info-libcody
+maybe-install-info-libcody:
+@if libcody
+maybe-install-info-libcody: install-info-libcody
 
-install-info-libdecnumber: \
-    configure-libdecnumber \
-    info-libdecnumber 
-	@[ -f ./libdecnumber/Makefile ] || exit 0; \
+# libcody doesn't support install-info.
+install-info-libcody:
+
+@endif libcody
+
+.PHONY: maybe-install-pdf-libcody install-pdf-libcody
+maybe-install-pdf-libcody:
+@if libcody
+maybe-install-pdf-libcody: install-pdf-libcody
+
+# libcody doesn't support install-pdf.
+install-pdf-libcody:
+
+@endif libcody
+
+.PHONY: maybe-install-html-libcody install-html-libcody
+maybe-install-html-libcody:
+@if libcody
+maybe-install-html-libcody: install-html-libcody
+
+# libcody doesn't support install-html.
+install-html-libcody:
+
+@endif libcody
+
+.PHONY: maybe-installcheck-libcody installcheck-libcody
+maybe-installcheck-libcody:
+@if libcody
+maybe-installcheck-libcody: installcheck-libcody
+
+installcheck-libcody: \
+    configure-libcody 
+	@[ -f ./libcody/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing install-info in libdecnumber"; \
-	(cd $(HOST_SUBDIR)/libdecnumber && \
+	echo "Doing installcheck in libcody"; \
+	(cd $(HOST_SUBDIR)/libcody && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
 	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	          install-info) \
+	          installcheck) \
 	  || exit 1
 
-@endif libdecnumber
+@endif libcody
 
-.PHONY: maybe-install-pdf-libdecnumber install-pdf-libdecnumber
-maybe-install-pdf-libdecnumber:
-@if libdecnumber
-maybe-install-pdf-libdecnumber: install-pdf-libdecnumber
+.PHONY: maybe-mostlyclean-libcody mostlyclean-libcody
+maybe-mostlyclean-libcody:
+@if libcody
+maybe-mostlyclean-libcody: mostlyclean-libcody
 
-install-pdf-libdecnumber: \
-    configure-libdecnumber \
-    pdf-libdecnumber 
-	@[ -f ./libdecnumber/Makefile ] || exit 0; \
+mostlyclean-libcody: 
+	@[ -f ./libcody/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing install-pdf in libdecnumber"; \
-	(cd $(HOST_SUBDIR)/libdecnumber && \
+	echo "Doing mostlyclean in libcody"; \
+	(cd $(HOST_SUBDIR)/libcody && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
 	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	          install-pdf) \
+	          mostlyclean) \
 	  || exit 1
 
-@endif libdecnumber
+@endif libcody
 
-.PHONY: maybe-install-html-libdecnumber install-html-libdecnumber
-maybe-install-html-libdecnumber:
-@if libdecnumber
-maybe-install-html-libdecnumber: install-html-libdecnumber
+.PHONY: maybe-clean-libcody clean-libcody
+maybe-clean-libcody:
+@if libcody
+maybe-clean-libcody: clean-libcody
 
-install-html-libdecnumber: \
-    configure-libdecnumber \
-    html-libdecnumber 
-	@[ -f ./libdecnumber/Makefile ] || exit 0; \
+clean-libcody: 
+	@[ -f ./libcody/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing install-html in libdecnumber"; \
-	(cd $(HOST_SUBDIR)/libdecnumber && \
+	echo "Doing clean in libcody"; \
+	(cd $(HOST_SUBDIR)/libcody && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
 	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	          install-html) \
+	          clean) \
 	  || exit 1
 
-@endif libdecnumber
+@endif libcody
 
-.PHONY: maybe-installcheck-libdecnumber installcheck-libdecnumber
-maybe-installcheck-libdecnumber:
-@if libdecnumber
-maybe-installcheck-libdecnumber: installcheck-libdecnumber
+.PHONY: maybe-distclean-libcody distclean-libcody
+maybe-distclean-libcody:
+@if libcody
+maybe-distclean-libcody: distclean-libcody
 
-installcheck-libdecnumber: \
-    configure-libdecnumber 
-	@[ -f ./libdecnumber/Makefile ] || exit 0; \
+distclean-libcody: 
+	@[ -f ./libcody/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing installcheck in libdecnumber"; \
-	(cd $(HOST_SUBDIR)/libdecnumber && \
+	echo "Doing distclean in libcody"; \
+	(cd $(HOST_SUBDIR)/libcody && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
 	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	          installcheck) \
+	          distclean) \
 	  || exit 1
 
-@endif libdecnumber
+@endif libcody
 
-.PHONY: maybe-mostlyclean-libdecnumber mostlyclean-libdecnumber
-maybe-mostlyclean-libdecnumber:
-@if libdecnumber
-maybe-mostlyclean-libdecnumber: mostlyclean-libdecnumber
+.PHONY: maybe-maintainer-clean-libcody maintainer-clean-libcody
+maybe-maintainer-clean-libcody:
+@if libcody
+maybe-maintainer-clean-libcody: maintainer-clean-libcody
 
-mostlyclean-libdecnumber: 
-	@[ -f ./libdecnumber/Makefile ] || exit 0; \
+maintainer-clean-libcody: 
+	@[ -f ./libcody/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing mostlyclean in libdecnumber"; \
-	(cd $(HOST_SUBDIR)/libdecnumber && \
+	echo "Doing maintainer-clean in libcody"; \
+	(cd $(HOST_SUBDIR)/libcody && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
 	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	          mostlyclean) \
+	          maintainer-clean) \
 	  || exit 1
 
-@endif libdecnumber
+@endif libcody
 
-.PHONY: maybe-clean-libdecnumber clean-libdecnumber
-maybe-clean-libdecnumber:
-@if libdecnumber
-maybe-clean-libdecnumber: clean-libdecnumber
 
-clean-libdecnumber: 
-	@[ -f ./libdecnumber/Makefile ] || exit 0; \
-	r=`${PWD_COMMAND}`; export r; \
+
+.PHONY: configure-libdecnumber maybe-configure-libdecnumber
+maybe-configure-libdecnumber:
+@if gcc-bootstrap
+configure-libdecnumber: stage_current
+@endif gcc-bootstrap
+@if libdecnumber
+maybe-configure-libdecnumber: configure-libdecnumber
+configure-libdecnumber: 
+	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(HOST_EXPORTS) \
-	for flag in $(EXTRA_HOST_FLAGS) ; do \
-	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
-	done; \
-	echo "Doing clean in libdecnumber"; \
-	(cd $(HOST_SUBDIR)/libdecnumber && \
-	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
-	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
-	          "RANLIB=$${RANLIB}" \
-	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	          clean) \
+	test ! -f $(HOST_SUBDIR)/libdecnumber/Makefile || exit 0; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber; \
+	$(HOST_EXPORTS)  \
+	echo Configuring in $(HOST_SUBDIR)/libdecnumber; \
+	cd "$(HOST_SUBDIR)/libdecnumber" || exit 1; \
+	case $(srcdir) in \
+	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
+	  *) topdir=`echo $(HOST_SUBDIR)/libdecnumber/ | \
+		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
+	esac; \
+	module_srcdir=libdecnumber; \
+	$(SHELL) \
+	  $$s/$$module_srcdir/configure \
+	  --srcdir=$${topdir}/$$module_srcdir \
+	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
+	  --target=${target_alias}  \
 	  || exit 1
-
 @endif libdecnumber
 
-.PHONY: maybe-distclean-libdecnumber distclean-libdecnumber
-maybe-distclean-libdecnumber:
-@if libdecnumber
-maybe-distclean-libdecnumber: distclean-libdecnumber
 
-distclean-libdecnumber: 
-	@[ -f ./libdecnumber/Makefile ] || exit 0; \
-	r=`${PWD_COMMAND}`; export r; \
+
+.PHONY: configure-stage1-libdecnumber maybe-configure-stage1-libdecnumber
+maybe-configure-stage1-libdecnumber:
+@if libdecnumber-bootstrap
+maybe-configure-stage1-libdecnumber: configure-stage1-libdecnumber
+configure-stage1-libdecnumber:
+	@[ $(current_stage) = stage1 ] || $(MAKE) stage1-start
+	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber
+	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	TFLAGS="$(STAGE1_TFLAGS)"; \
+	test ! -f $(HOST_SUBDIR)/libdecnumber/Makefile || exit 0; \
 	$(HOST_EXPORTS) \
-	for flag in $(EXTRA_HOST_FLAGS) ; do \
-	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
-	done; \
-	echo "Doing distclean in libdecnumber"; \
-	(cd $(HOST_SUBDIR)/libdecnumber && \
-	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
-	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
-	          "RANLIB=$${RANLIB}" \
-	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	          distclean) \
-	  || exit 1
+	CFLAGS="$(STAGE1_CFLAGS)"; export CFLAGS; \
+	CXXFLAGS="$(STAGE1_CXXFLAGS)"; export CXXFLAGS; \
+	LIBCFLAGS="$(LIBCFLAGS)"; export LIBCFLAGS;  \
+	echo Configuring stage 1 in $(HOST_SUBDIR)/libdecnumber; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber; \
+	cd $(HOST_SUBDIR)/libdecnumber || exit 1; \
+	case $(srcdir) in \
+	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
+	  *) topdir=`echo $(HOST_SUBDIR)/libdecnumber/ | \
+		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
+	esac; \
+	module_srcdir=libdecnumber; \
+	$(SHELL) $$s/$$module_srcdir/configure \
+	  --srcdir=$${topdir}/$$module_srcdir \
+	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
+	  --target=${target_alias} \
+	   \
+	  $(STAGE1_CONFIGURE_FLAGS)
+@endif libdecnumber-bootstrap
 
-@endif libdecnumber
+.PHONY: configure-stage2-libdecnumber maybe-configure-stage2-libdecnumber
+maybe-configure-stage2-libdecnumber:
+@if libdecnumber-bootstrap
+maybe-configure-stage2-libdecnumber: configure-stage2-libdecnumber
+configure-stage2-libdecnumber:
+	@[ $(current_stage) = stage2 ] || $(MAKE) stage2-start
+	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	TFLAGS="$(STAGE2_TFLAGS)"; \
+	test ! -f $(HOST_SUBDIR)/libdecnumber/Makefile || exit 0; \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS) \
+	CFLAGS="$(STAGE2_CFLAGS)"; export CFLAGS; \
+	CXXFLAGS="$(STAGE2_CXXFLAGS)"; export CXXFLAGS; \
+	LIBCFLAGS="$(STAGE2_CFLAGS)"; export LIBCFLAGS;  \
+	echo Configuring stage 2 in $(HOST_SUBDIR)/libdecnumber; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber; \
+	cd $(HOST_SUBDIR)/libdecnumber || exit 1; \
+	case $(srcdir) in \
+	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
+	  *) topdir=`echo $(HOST_SUBDIR)/libdecnumber/ | \
+		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
+	esac; \
+	module_srcdir=libdecnumber; \
+	$(SHELL) $$s/$$module_srcdir/configure \
+	  --srcdir=$${topdir}/$$module_srcdir \
+	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
+	  --target=${target_alias} \
+	  --with-build-libsubdir=$(HOST_SUBDIR) \
+	  $(STAGE2_CONFIGURE_FLAGS)
+@endif libdecnumber-bootstrap
 
-.PHONY: maybe-maintainer-clean-libdecnumber maintainer-clean-libdecnumber
-maybe-maintainer-clean-libdecnumber:
-@if libdecnumber
-maybe-maintainer-clean-libdecnumber: maintainer-clean-libdecnumber
+.PHONY: configure-stage3-libdecnumber maybe-configure-stage3-libdecnumber
+maybe-configure-stage3-libdecnumber:
+@if libdecnumber-bootstrap
+maybe-configure-stage3-libdecnumber: configure-stage3-libdecnumber
+configure-stage3-libdecnumber:
+	@[ $(current_stage) = stage3 ] || $(MAKE) stage3-start
+	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	TFLAGS="$(STAGE3_TFLAGS)"; \
+	test ! -f $(HOST_SUBDIR)/libdecnumber/Makefile || exit 0; \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS) \
+	CFLAGS="$(STAGE3_CFLAGS)"; export CFLAGS; \
+	CXXFLAGS="$(STAGE3_CXXFLAGS)"; export CXXFLAGS; \
+	LIBCFLAGS="$(STAGE3_CFLAGS)"; export LIBCFLAGS;  \
+	echo Configuring stage 3 in $(HOST_SUBDIR)/libdecnumber; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber; \
+	cd $(HOST_SUBDIR)/libdecnumber || exit 1; \
+	case $(srcdir) in \
+	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
+	  *) topdir=`echo $(HOST_SUBDIR)/libdecnumber/ | \
+		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
+	esac; \
+	module_srcdir=libdecnumber; \
+	$(SHELL) $$s/$$module_srcdir/configure \
+	  --srcdir=$${topdir}/$$module_srcdir \
+	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
+	  --target=${target_alias} \
+	  --with-build-libsubdir=$(HOST_SUBDIR) \
+	  $(STAGE3_CONFIGURE_FLAGS)
+@endif libdecnumber-bootstrap
 
-maintainer-clean-libdecnumber: 
-	@[ -f ./libdecnumber/Makefile ] || exit 0; \
-	r=`${PWD_COMMAND}`; export r; \
+.PHONY: configure-stage4-libdecnumber maybe-configure-stage4-libdecnumber
+maybe-configure-stage4-libdecnumber:
+@if libdecnumber-bootstrap
+maybe-configure-stage4-libdecnumber: configure-stage4-libdecnumber
+configure-stage4-libdecnumber:
+	@[ $(current_stage) = stage4 ] || $(MAKE) stage4-start
+	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber
+	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	TFLAGS="$(STAGE4_TFLAGS)"; \
+	test ! -f $(HOST_SUBDIR)/libdecnumber/Makefile || exit 0; \
 	$(HOST_EXPORTS) \
-	for flag in $(EXTRA_HOST_FLAGS) ; do \
-	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
-	done; \
-	echo "Doing maintainer-clean in libdecnumber"; \
-	(cd $(HOST_SUBDIR)/libdecnumber && \
-	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
-	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
-	          "RANLIB=$${RANLIB}" \
-	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	          maintainer-clean) \
-	  || exit 1
+	$(POSTSTAGE1_HOST_EXPORTS) \
+	CFLAGS="$(STAGE4_CFLAGS)"; export CFLAGS; \
+	CXXFLAGS="$(STAGE4_CXXFLAGS)"; export CXXFLAGS; \
+	LIBCFLAGS="$(STAGE4_CFLAGS)"; export LIBCFLAGS;  \
+	echo Configuring stage 4 in $(HOST_SUBDIR)/libdecnumber; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber; \
+	cd $(HOST_SUBDIR)/libdecnumber || exit 1; \
+	case $(srcdir) in \
+	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
+	  *) topdir=`echo $(HOST_SUBDIR)/libdecnumber/ | \
+		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
+	esac; \
+	module_srcdir=libdecnumber; \
+	$(SHELL) $$s/$$module_srcdir/configure \
+	  --srcdir=$${topdir}/$$module_srcdir \
+	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
+	  --target=${target_alias} \
+	  --with-build-libsubdir=$(HOST_SUBDIR) \
+	  $(STAGE4_CONFIGURE_FLAGS)
+@endif libdecnumber-bootstrap
 
-@endif libdecnumber
+.PHONY: configure-stageprofile-libdecnumber maybe-configure-stageprofile-libdecnumber
+maybe-configure-stageprofile-libdecnumber:
+@if libdecnumber-bootstrap
+maybe-configure-stageprofile-libdecnumber: configure-stageprofile-libdecnumber
+configure-stageprofile-libdecnumber:
+	@[ $(current_stage) = stageprofile ] || $(MAKE) stageprofile-start
+	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	TFLAGS="$(STAGEprofile_TFLAGS)"; \
+	test ! -f $(HOST_SUBDIR)/libdecnumber/Makefile || exit 0; \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS) \
+	CFLAGS="$(STAGEprofile_CFLAGS)"; export CFLAGS; \
+	CXXFLAGS="$(STAGEprofile_CXXFLAGS)"; export CXXFLAGS; \
+	LIBCFLAGS="$(STAGEprofile_CFLAGS)"; export LIBCFLAGS;  \
+	echo Configuring stage profile in $(HOST_SUBDIR)/libdecnumber; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber; \
+	cd $(HOST_SUBDIR)/libdecnumber || exit 1; \
+	case $(srcdir) in \
+	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
+	  *) topdir=`echo $(HOST_SUBDIR)/libdecnumber/ | \
+		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
+	esac; \
+	module_srcdir=libdecnumber; \
+	$(SHELL) $$s/$$module_srcdir/configure \
+	  --srcdir=$${topdir}/$$module_srcdir \
+	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
+	  --target=${target_alias} \
+	  --with-build-libsubdir=$(HOST_SUBDIR) \
+	  $(STAGEprofile_CONFIGURE_FLAGS)
+@endif libdecnumber-bootstrap
+
+.PHONY: configure-stagetrain-libdecnumber maybe-configure-stagetrain-libdecnumber
+maybe-configure-stagetrain-libdecnumber:
+@if libdecnumber-bootstrap
+maybe-configure-stagetrain-libdecnumber: configure-stagetrain-libdecnumber
+configure-stagetrain-libdecnumber:
+	@[ $(current_stage) = stagetrain ] || $(MAKE) stagetrain-start
+	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	TFLAGS="$(STAGEtrain_TFLAGS)"; \
+	test ! -f $(HOST_SUBDIR)/libdecnumber/Makefile || exit 0; \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS) \
+	CFLAGS="$(STAGEtrain_CFLAGS)"; export CFLAGS; \
+	CXXFLAGS="$(STAGEtrain_CXXFLAGS)"; export CXXFLAGS; \
+	LIBCFLAGS="$(STAGEtrain_CFLAGS)"; export LIBCFLAGS;  \
+	echo Configuring stage train in $(HOST_SUBDIR)/libdecnumber; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber; \
+	cd $(HOST_SUBDIR)/libdecnumber || exit 1; \
+	case $(srcdir) in \
+	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
+	  *) topdir=`echo $(HOST_SUBDIR)/libdecnumber/ | \
+		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
+	esac; \
+	module_srcdir=libdecnumber; \
+	$(SHELL) $$s/$$module_srcdir/configure \
+	  --srcdir=$${topdir}/$$module_srcdir \
+	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
+	  --target=${target_alias} \
+	  --with-build-libsubdir=$(HOST_SUBDIR) \
+	  $(STAGEtrain_CONFIGURE_FLAGS)
+@endif libdecnumber-bootstrap
 
+.PHONY: configure-stagefeedback-libdecnumber maybe-configure-stagefeedback-libdecnumber
+maybe-configure-stagefeedback-libdecnumber:
+@if libdecnumber-bootstrap
+maybe-configure-stagefeedback-libdecnumber: configure-stagefeedback-libdecnumber
+configure-stagefeedback-libdecnumber:
+	@[ $(current_stage) = stagefeedback ] || $(MAKE) stagefeedback-start
+	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	TFLAGS="$(STAGEfeedback_TFLAGS)"; \
+	test ! -f $(HOST_SUBDIR)/libdecnumber/Makefile || exit 0; \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS) \
+	CFLAGS="$(STAGEfeedback_CFLAGS)"; export CFLAGS; \
+	CXXFLAGS="$(STAGEfeedback_CXXFLAGS)"; export CXXFLAGS; \
+	LIBCFLAGS="$(STAGEfeedback_CFLAGS)"; export LIBCFLAGS;  \
+	echo Configuring stage feedback in $(HOST_SUBDIR)/libdecnumber; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber; \
+	cd $(HOST_SUBDIR)/libdecnumber || exit 1; \
+	case $(srcdir) in \
+	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
+	  *) topdir=`echo $(HOST_SUBDIR)/libdecnumber/ | \
+		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
+	esac; \
+	module_srcdir=libdecnumber; \
+	$(SHELL) $$s/$$module_srcdir/configure \
+	  --srcdir=$${topdir}/$$module_srcdir \
+	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
+	  --target=${target_alias} \
+	  --with-build-libsubdir=$(HOST_SUBDIR) \
+	  $(STAGEfeedback_CONFIGURE_FLAGS)
+@endif libdecnumber-bootstrap
 
+.PHONY: configure-stageautoprofile-libdecnumber maybe-configure-stageautoprofile-libdecnumber
+maybe-configure-stageautoprofile-libdecnumber:
+@if libdecnumber-bootstrap
+maybe-configure-stageautoprofile-libdecnumber: configure-stageautoprofile-libdecnumber
+configure-stageautoprofile-libdecnumber:
+	@[ $(current_stage) = stageautoprofile ] || $(MAKE) stageautoprofile-start
+	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	TFLAGS="$(STAGEautoprofile_TFLAGS)"; \
+	test ! -f $(HOST_SUBDIR)/libdecnumber/Makefile || exit 0; \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS) \
+	CFLAGS="$(STAGEautoprofile_CFLAGS)"; export CFLAGS; \
+	CXXFLAGS="$(STAGEautoprofile_CXXFLAGS)"; export CXXFLAGS; \
+	LIBCFLAGS="$(STAGEautoprofile_CFLAGS)"; export LIBCFLAGS;  \
+	echo Configuring stage autoprofile in $(HOST_SUBDIR)/libdecnumber; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber; \
+	cd $(HOST_SUBDIR)/libdecnumber || exit 1; \
+	case $(srcdir) in \
+	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
+	  *) topdir=`echo $(HOST_SUBDIR)/libdecnumber/ | \
+		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
+	esac; \
+	module_srcdir=libdecnumber; \
+	$(SHELL) $$s/$$module_srcdir/configure \
+	  --srcdir=$${topdir}/$$module_srcdir \
+	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
+	  --target=${target_alias} \
+	  --with-build-libsubdir=$(HOST_SUBDIR) \
+	  $(STAGEautoprofile_CONFIGURE_FLAGS)
+@endif libdecnumber-bootstrap
 
-.PHONY: configure-libgui maybe-configure-libgui
-maybe-configure-libgui:
-@if gcc-bootstrap
-configure-libgui: stage_current
-@endif gcc-bootstrap
-@if libgui
-maybe-configure-libgui: configure-libgui
-configure-libgui: 
-	@: $(MAKE); $(unstage)
+.PHONY: configure-stageautofeedback-libdecnumber maybe-configure-stageautofeedback-libdecnumber
+maybe-configure-stageautofeedback-libdecnumber:
+@if libdecnumber-bootstrap
+maybe-configure-stageautofeedback-libdecnumber: configure-stageautofeedback-libdecnumber
+configure-stageautofeedback-libdecnumber:
+	@[ $(current_stage) = stageautofeedback ] || $(MAKE) stageautofeedback-start
+	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	test ! -f $(HOST_SUBDIR)/libgui/Makefile || exit 0; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libgui; \
-	$(HOST_EXPORTS)  \
-	echo Configuring in $(HOST_SUBDIR)/libgui; \
-	cd "$(HOST_SUBDIR)/libgui" || exit 1; \
+	TFLAGS="$(STAGEautofeedback_TFLAGS)"; \
+	test ! -f $(HOST_SUBDIR)/libdecnumber/Makefile || exit 0; \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS) \
+	CFLAGS="$(STAGEautofeedback_CFLAGS)"; export CFLAGS; \
+	CXXFLAGS="$(STAGEautofeedback_CXXFLAGS)"; export CXXFLAGS; \
+	LIBCFLAGS="$(STAGEautofeedback_CFLAGS)"; export LIBCFLAGS;  \
+	echo Configuring stage autofeedback in $(HOST_SUBDIR)/libdecnumber; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libdecnumber; \
+	cd $(HOST_SUBDIR)/libdecnumber || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(HOST_SUBDIR)/libgui/ | \
+	  *) topdir=`echo $(HOST_SUBDIR)/libdecnumber/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libgui; \
-	$(SHELL) \
-	  $$s/$$module_srcdir/configure \
+	module_srcdir=libdecnumber; \
+	$(SHELL) $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
 	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
-	  --target=${target_alias}  \
-	  || exit 1
-@endif libgui
+	  --target=${target_alias} \
+	  --with-build-libsubdir=$(HOST_SUBDIR) \
+	  $(STAGEautofeedback_CONFIGURE_FLAGS)
+@endif libdecnumber-bootstrap
 
 
 
 
 
-.PHONY: all-libgui maybe-all-libgui
-maybe-all-libgui:
+.PHONY: all-libdecnumber maybe-all-libdecnumber
+maybe-all-libdecnumber:
 @if gcc-bootstrap
-all-libgui: stage_current
+all-libdecnumber: stage_current
 @endif gcc-bootstrap
-@if libgui
-TARGET-libgui=all
-maybe-all-libgui: all-libgui
-all-libgui: configure-libgui
-	@: $(MAKE); $(unstage)
+@if libdecnumber
+TARGET-libdecnumber=all
+maybe-all-libdecnumber: all-libdecnumber
+all-libdecnumber: configure-libdecnumber
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS)  \
-	(cd $(HOST_SUBDIR)/libgui && \
+	(cd $(HOST_SUBDIR)/libdecnumber && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) $(EXTRA_HOST_FLAGS) $(STAGE1_FLAGS_TO_PASS)  \
-		$(TARGET-libgui))
-@endif libgui
-
-
+		$(TARGET-libdecnumber))
+@endif libdecnumber
 
 
-.PHONY: check-libgui maybe-check-libgui
-maybe-check-libgui:
-@if libgui
-maybe-check-libgui: check-libgui
 
-check-libgui:
-	@: $(MAKE); $(unstage)
+.PHONY: all-stage1-libdecnumber maybe-all-stage1-libdecnumber
+.PHONY: clean-stage1-libdecnumber maybe-clean-stage1-libdecnumber
+maybe-all-stage1-libdecnumber:
+maybe-clean-stage1-libdecnumber:
+@if libdecnumber-bootstrap
+maybe-all-stage1-libdecnumber: all-stage1-libdecnumber
+all-stage1: all-stage1-libdecnumber
+TARGET-stage1-libdecnumber = $(TARGET-libdecnumber)
+all-stage1-libdecnumber: configure-stage1-libdecnumber
+	@[ $(current_stage) = stage1 ] || $(MAKE) stage1-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	TFLAGS="$(STAGE1_TFLAGS)"; \
 	$(HOST_EXPORTS)  \
-	(cd $(HOST_SUBDIR)/libgui && \
-	  $(MAKE) $(FLAGS_TO_PASS)  check)
+	cd $(HOST_SUBDIR)/libdecnumber && \
+	 \
+	$(MAKE) $(BASE_FLAGS_TO_PASS) \
+		CFLAGS="$(STAGE1_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE1_GENERATOR_CFLAGS)" \
+		CXXFLAGS="$(STAGE1_CXXFLAGS)" \
+		LIBCFLAGS="$(LIBCFLAGS)" \
+		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
+		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
+		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
+		$(EXTRA_HOST_FLAGS)  \
+		$(STAGE1_FLAGS_TO_PASS)  \
+		TFLAGS="$(STAGE1_TFLAGS)"  \
+		$(TARGET-stage1-libdecnumber)
 
-@endif libgui
+maybe-clean-stage1-libdecnumber: clean-stage1-libdecnumber
+clean-stage1: clean-stage1-libdecnumber
+clean-stage1-libdecnumber:
+	@if [ $(current_stage) = stage1 ]; then \
+	  [ -f $(HOST_SUBDIR)/libdecnumber/Makefile ] || exit 0; \
+	else \
+	  [ -f $(HOST_SUBDIR)/stage1-libdecnumber/Makefile ] || exit 0; \
+	  $(MAKE) stage1-start; \
+	fi; \
+	cd $(HOST_SUBDIR)/libdecnumber && \
+	$(MAKE) $(EXTRA_HOST_FLAGS)  \
+	$(STAGE1_FLAGS_TO_PASS)  clean
+@endif libdecnumber-bootstrap
 
-.PHONY: install-libgui maybe-install-libgui
-maybe-install-libgui:
-@if libgui
-maybe-install-libgui: install-libgui
 
-install-libgui: installdirs
-	@: $(MAKE); $(unstage)
+.PHONY: all-stage2-libdecnumber maybe-all-stage2-libdecnumber
+.PHONY: clean-stage2-libdecnumber maybe-clean-stage2-libdecnumber
+maybe-all-stage2-libdecnumber:
+maybe-clean-stage2-libdecnumber:
+@if libdecnumber-bootstrap
+maybe-all-stage2-libdecnumber: all-stage2-libdecnumber
+all-stage2: all-stage2-libdecnumber
+TARGET-stage2-libdecnumber = $(TARGET-libdecnumber)
+all-stage2-libdecnumber: configure-stage2-libdecnumber
+	@[ $(current_stage) = stage2 ] || $(MAKE) stage2-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	TFLAGS="$(STAGE2_TFLAGS)"; \
 	$(HOST_EXPORTS) \
-	(cd $(HOST_SUBDIR)/libgui && \
-	  $(MAKE) $(FLAGS_TO_PASS)  install)
+	$(POSTSTAGE1_HOST_EXPORTS)  \
+	cd $(HOST_SUBDIR)/libdecnumber && \
+	 \
+	$(MAKE) $(BASE_FLAGS_TO_PASS) \
+		CFLAGS="$(STAGE2_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE2_GENERATOR_CFLAGS)" \
+		CXXFLAGS="$(STAGE2_CXXFLAGS)" \
+		LIBCFLAGS="$(STAGE2_CFLAGS)" \
+		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
+		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
+		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
+		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
+		TFLAGS="$(STAGE2_TFLAGS)"  \
+		$(TARGET-stage2-libdecnumber)
 
-@endif libgui
+maybe-clean-stage2-libdecnumber: clean-stage2-libdecnumber
+clean-stage2: clean-stage2-libdecnumber
+clean-stage2-libdecnumber:
+	@if [ $(current_stage) = stage2 ]; then \
+	  [ -f $(HOST_SUBDIR)/libdecnumber/Makefile ] || exit 0; \
+	else \
+	  [ -f $(HOST_SUBDIR)/stage2-libdecnumber/Makefile ] || exit 0; \
+	  $(MAKE) stage2-start; \
+	fi; \
+	cd $(HOST_SUBDIR)/libdecnumber && \
+	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
+@endif libdecnumber-bootstrap
 
-.PHONY: install-strip-libgui maybe-install-strip-libgui
-maybe-install-strip-libgui:
-@if libgui
-maybe-install-strip-libgui: install-strip-libgui
 
-install-strip-libgui: installdirs
-	@: $(MAKE); $(unstage)
+.PHONY: all-stage3-libdecnumber maybe-all-stage3-libdecnumber
+.PHONY: clean-stage3-libdecnumber maybe-clean-stage3-libdecnumber
+maybe-all-stage3-libdecnumber:
+maybe-clean-stage3-libdecnumber:
+@if libdecnumber-bootstrap
+maybe-all-stage3-libdecnumber: all-stage3-libdecnumber
+all-stage3: all-stage3-libdecnumber
+TARGET-stage3-libdecnumber = $(TARGET-libdecnumber)
+all-stage3-libdecnumber: configure-stage3-libdecnumber
+	@[ $(current_stage) = stage3 ] || $(MAKE) stage3-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	TFLAGS="$(STAGE3_TFLAGS)"; \
 	$(HOST_EXPORTS) \
-	(cd $(HOST_SUBDIR)/libgui && \
-	  $(MAKE) $(FLAGS_TO_PASS)  install-strip)
-
-@endif libgui
+	$(POSTSTAGE1_HOST_EXPORTS)  \
+	cd $(HOST_SUBDIR)/libdecnumber && \
+	 \
+	$(MAKE) $(BASE_FLAGS_TO_PASS) \
+		CFLAGS="$(STAGE3_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE3_GENERATOR_CFLAGS)" \
+		CXXFLAGS="$(STAGE3_CXXFLAGS)" \
+		LIBCFLAGS="$(STAGE3_CFLAGS)" \
+		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
+		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
+		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
+		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
+		TFLAGS="$(STAGE3_TFLAGS)"  \
+		$(TARGET-stage3-libdecnumber)
 
-# Other targets (info, dvi, pdf, etc.)
+maybe-clean-stage3-libdecnumber: clean-stage3-libdecnumber
+clean-stage3: clean-stage3-libdecnumber
+clean-stage3-libdecnumber:
+	@if [ $(current_stage) = stage3 ]; then \
+	  [ -f $(HOST_SUBDIR)/libdecnumber/Makefile ] || exit 0; \
+	else \
+	  [ -f $(HOST_SUBDIR)/stage3-libdecnumber/Makefile ] || exit 0; \
+	  $(MAKE) stage3-start; \
+	fi; \
+	cd $(HOST_SUBDIR)/libdecnumber && \
+	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
+@endif libdecnumber-bootstrap
 
-.PHONY: maybe-info-libgui info-libgui
-maybe-info-libgui:
-@if libgui
-maybe-info-libgui: info-libgui
 
-info-libgui: \
-    configure-libgui 
+.PHONY: all-stage4-libdecnumber maybe-all-stage4-libdecnumber
+.PHONY: clean-stage4-libdecnumber maybe-clean-stage4-libdecnumber
+maybe-all-stage4-libdecnumber:
+maybe-clean-stage4-libdecnumber:
+@if libdecnumber-bootstrap
+maybe-all-stage4-libdecnumber: all-stage4-libdecnumber
+all-stage4: all-stage4-libdecnumber
+TARGET-stage4-libdecnumber = $(TARGET-libdecnumber)
+all-stage4-libdecnumber: configure-stage4-libdecnumber
+	@[ $(current_stage) = stage4 ] || $(MAKE) stage4-start
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	TFLAGS="$(STAGE4_TFLAGS)"; \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS)  \
+	cd $(HOST_SUBDIR)/libdecnumber && \
+	 \
+	$(MAKE) $(BASE_FLAGS_TO_PASS) \
+		CFLAGS="$(STAGE4_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE4_GENERATOR_CFLAGS)" \
+		CXXFLAGS="$(STAGE4_CXXFLAGS)" \
+		LIBCFLAGS="$(STAGE4_CFLAGS)" \
+		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
+		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
+		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
+		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
+		TFLAGS="$(STAGE4_TFLAGS)"  \
+		$(TARGET-stage4-libdecnumber)
+
+maybe-clean-stage4-libdecnumber: clean-stage4-libdecnumber
+clean-stage4: clean-stage4-libdecnumber
+clean-stage4-libdecnumber:
+	@if [ $(current_stage) = stage4 ]; then \
+	  [ -f $(HOST_SUBDIR)/libdecnumber/Makefile ] || exit 0; \
+	else \
+	  [ -f $(HOST_SUBDIR)/stage4-libdecnumber/Makefile ] || exit 0; \
+	  $(MAKE) stage4-start; \
+	fi; \
+	cd $(HOST_SUBDIR)/libdecnumber && \
+	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
+@endif libdecnumber-bootstrap
+
+
+.PHONY: all-stageprofile-libdecnumber maybe-all-stageprofile-libdecnumber
+.PHONY: clean-stageprofile-libdecnumber maybe-clean-stageprofile-libdecnumber
+maybe-all-stageprofile-libdecnumber:
+maybe-clean-stageprofile-libdecnumber:
+@if libdecnumber-bootstrap
+maybe-all-stageprofile-libdecnumber: all-stageprofile-libdecnumber
+all-stageprofile: all-stageprofile-libdecnumber
+TARGET-stageprofile-libdecnumber = $(TARGET-libdecnumber)
+all-stageprofile-libdecnumber: configure-stageprofile-libdecnumber
+	@[ $(current_stage) = stageprofile ] || $(MAKE) stageprofile-start
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	TFLAGS="$(STAGEprofile_TFLAGS)"; \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS)  \
+	cd $(HOST_SUBDIR)/libdecnumber && \
+	 \
+	$(MAKE) $(BASE_FLAGS_TO_PASS) \
+		CFLAGS="$(STAGEprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEprofile_GENERATOR_CFLAGS)" \
+		CXXFLAGS="$(STAGEprofile_CXXFLAGS)" \
+		LIBCFLAGS="$(STAGEprofile_CFLAGS)" \
+		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
+		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
+		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
+		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
+		TFLAGS="$(STAGEprofile_TFLAGS)"  \
+		$(TARGET-stageprofile-libdecnumber)
+
+maybe-clean-stageprofile-libdecnumber: clean-stageprofile-libdecnumber
+clean-stageprofile: clean-stageprofile-libdecnumber
+clean-stageprofile-libdecnumber:
+	@if [ $(current_stage) = stageprofile ]; then \
+	  [ -f $(HOST_SUBDIR)/libdecnumber/Makefile ] || exit 0; \
+	else \
+	  [ -f $(HOST_SUBDIR)/stageprofile-libdecnumber/Makefile ] || exit 0; \
+	  $(MAKE) stageprofile-start; \
+	fi; \
+	cd $(HOST_SUBDIR)/libdecnumber && \
+	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
+@endif libdecnumber-bootstrap
+
+
+.PHONY: all-stagetrain-libdecnumber maybe-all-stagetrain-libdecnumber
+.PHONY: clean-stagetrain-libdecnumber maybe-clean-stagetrain-libdecnumber
+maybe-all-stagetrain-libdecnumber:
+maybe-clean-stagetrain-libdecnumber:
+@if libdecnumber-bootstrap
+maybe-all-stagetrain-libdecnumber: all-stagetrain-libdecnumber
+all-stagetrain: all-stagetrain-libdecnumber
+TARGET-stagetrain-libdecnumber = $(TARGET-libdecnumber)
+all-stagetrain-libdecnumber: configure-stagetrain-libdecnumber
+	@[ $(current_stage) = stagetrain ] || $(MAKE) stagetrain-start
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	TFLAGS="$(STAGEtrain_TFLAGS)"; \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS)  \
+	cd $(HOST_SUBDIR)/libdecnumber && \
+	 \
+	$(MAKE) $(BASE_FLAGS_TO_PASS) \
+		CFLAGS="$(STAGEtrain_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEtrain_GENERATOR_CFLAGS)" \
+		CXXFLAGS="$(STAGEtrain_CXXFLAGS)" \
+		LIBCFLAGS="$(STAGEtrain_CFLAGS)" \
+		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
+		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
+		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
+		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
+		TFLAGS="$(STAGEtrain_TFLAGS)"  \
+		$(TARGET-stagetrain-libdecnumber)
+
+maybe-clean-stagetrain-libdecnumber: clean-stagetrain-libdecnumber
+clean-stagetrain: clean-stagetrain-libdecnumber
+clean-stagetrain-libdecnumber:
+	@if [ $(current_stage) = stagetrain ]; then \
+	  [ -f $(HOST_SUBDIR)/libdecnumber/Makefile ] || exit 0; \
+	else \
+	  [ -f $(HOST_SUBDIR)/stagetrain-libdecnumber/Makefile ] || exit 0; \
+	  $(MAKE) stagetrain-start; \
+	fi; \
+	cd $(HOST_SUBDIR)/libdecnumber && \
+	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
+@endif libdecnumber-bootstrap
+
+
+.PHONY: all-stagefeedback-libdecnumber maybe-all-stagefeedback-libdecnumber
+.PHONY: clean-stagefeedback-libdecnumber maybe-clean-stagefeedback-libdecnumber
+maybe-all-stagefeedback-libdecnumber:
+maybe-clean-stagefeedback-libdecnumber:
+@if libdecnumber-bootstrap
+maybe-all-stagefeedback-libdecnumber: all-stagefeedback-libdecnumber
+all-stagefeedback: all-stagefeedback-libdecnumber
+TARGET-stagefeedback-libdecnumber = $(TARGET-libdecnumber)
+all-stagefeedback-libdecnumber: configure-stagefeedback-libdecnumber
+	@[ $(current_stage) = stagefeedback ] || $(MAKE) stagefeedback-start
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	TFLAGS="$(STAGEfeedback_TFLAGS)"; \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS)  \
+	cd $(HOST_SUBDIR)/libdecnumber && \
+	 \
+	$(MAKE) $(BASE_FLAGS_TO_PASS) \
+		CFLAGS="$(STAGEfeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEfeedback_GENERATOR_CFLAGS)" \
+		CXXFLAGS="$(STAGEfeedback_CXXFLAGS)" \
+		LIBCFLAGS="$(STAGEfeedback_CFLAGS)" \
+		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
+		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
+		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
+		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
+		TFLAGS="$(STAGEfeedback_TFLAGS)"  \
+		$(TARGET-stagefeedback-libdecnumber)
+
+maybe-clean-stagefeedback-libdecnumber: clean-stagefeedback-libdecnumber
+clean-stagefeedback: clean-stagefeedback-libdecnumber
+clean-stagefeedback-libdecnumber:
+	@if [ $(current_stage) = stagefeedback ]; then \
+	  [ -f $(HOST_SUBDIR)/libdecnumber/Makefile ] || exit 0; \
+	else \
+	  [ -f $(HOST_SUBDIR)/stagefeedback-libdecnumber/Makefile ] || exit 0; \
+	  $(MAKE) stagefeedback-start; \
+	fi; \
+	cd $(HOST_SUBDIR)/libdecnumber && \
+	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
+@endif libdecnumber-bootstrap
+
+
+.PHONY: all-stageautoprofile-libdecnumber maybe-all-stageautoprofile-libdecnumber
+.PHONY: clean-stageautoprofile-libdecnumber maybe-clean-stageautoprofile-libdecnumber
+maybe-all-stageautoprofile-libdecnumber:
+maybe-clean-stageautoprofile-libdecnumber:
+@if libdecnumber-bootstrap
+maybe-all-stageautoprofile-libdecnumber: all-stageautoprofile-libdecnumber
+all-stageautoprofile: all-stageautoprofile-libdecnumber
+TARGET-stageautoprofile-libdecnumber = $(TARGET-libdecnumber)
+all-stageautoprofile-libdecnumber: configure-stageautoprofile-libdecnumber
+	@[ $(current_stage) = stageautoprofile ] || $(MAKE) stageautoprofile-start
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	TFLAGS="$(STAGEautoprofile_TFLAGS)"; \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS)  \
+	cd $(HOST_SUBDIR)/libdecnumber && \
+	$$s/gcc/config/i386/$(AUTO_PROFILE) \
+	$(MAKE) $(BASE_FLAGS_TO_PASS) \
+		CFLAGS="$(STAGEautoprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautoprofile_GENERATOR_CFLAGS)" \
+		CXXFLAGS="$(STAGEautoprofile_CXXFLAGS)" \
+		LIBCFLAGS="$(STAGEautoprofile_CFLAGS)" \
+		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
+		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
+		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
+		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
+		TFLAGS="$(STAGEautoprofile_TFLAGS)"  \
+		$(TARGET-stageautoprofile-libdecnumber)
+
+maybe-clean-stageautoprofile-libdecnumber: clean-stageautoprofile-libdecnumber
+clean-stageautoprofile: clean-stageautoprofile-libdecnumber
+clean-stageautoprofile-libdecnumber:
+	@if [ $(current_stage) = stageautoprofile ]; then \
+	  [ -f $(HOST_SUBDIR)/libdecnumber/Makefile ] || exit 0; \
+	else \
+	  [ -f $(HOST_SUBDIR)/stageautoprofile-libdecnumber/Makefile ] || exit 0; \
+	  $(MAKE) stageautoprofile-start; \
+	fi; \
+	cd $(HOST_SUBDIR)/libdecnumber && \
+	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
+@endif libdecnumber-bootstrap
+
+
+.PHONY: all-stageautofeedback-libdecnumber maybe-all-stageautofeedback-libdecnumber
+.PHONY: clean-stageautofeedback-libdecnumber maybe-clean-stageautofeedback-libdecnumber
+maybe-all-stageautofeedback-libdecnumber:
+maybe-clean-stageautofeedback-libdecnumber:
+@if libdecnumber-bootstrap
+maybe-all-stageautofeedback-libdecnumber: all-stageautofeedback-libdecnumber
+all-stageautofeedback: all-stageautofeedback-libdecnumber
+TARGET-stageautofeedback-libdecnumber = $(TARGET-libdecnumber)
+all-stageautofeedback-libdecnumber: configure-stageautofeedback-libdecnumber
+	@[ $(current_stage) = stageautofeedback ] || $(MAKE) stageautofeedback-start
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	TFLAGS="$(STAGEautofeedback_TFLAGS)"; \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS)  \
+	cd $(HOST_SUBDIR)/libdecnumber && \
+	 \
+	$(MAKE) $(BASE_FLAGS_TO_PASS) \
+		CFLAGS="$(STAGEautofeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautofeedback_GENERATOR_CFLAGS)" \
+		CXXFLAGS="$(STAGEautofeedback_CXXFLAGS)" \
+		LIBCFLAGS="$(STAGEautofeedback_CFLAGS)" \
+		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
+		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
+		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
+		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
+		TFLAGS="$(STAGEautofeedback_TFLAGS)" PERF_DATA=perf.data \
+		$(TARGET-stageautofeedback-libdecnumber)
+
+maybe-clean-stageautofeedback-libdecnumber: clean-stageautofeedback-libdecnumber
+clean-stageautofeedback: clean-stageautofeedback-libdecnumber
+clean-stageautofeedback-libdecnumber:
+	@if [ $(current_stage) = stageautofeedback ]; then \
+	  [ -f $(HOST_SUBDIR)/libdecnumber/Makefile ] || exit 0; \
+	else \
+	  [ -f $(HOST_SUBDIR)/stageautofeedback-libdecnumber/Makefile ] || exit 0; \
+	  $(MAKE) stageautofeedback-start; \
+	fi; \
+	cd $(HOST_SUBDIR)/libdecnumber && \
+	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
+@endif libdecnumber-bootstrap
+
+
+
+
+
+.PHONY: check-libdecnumber maybe-check-libdecnumber
+maybe-check-libdecnumber:
+@if libdecnumber
+maybe-check-libdecnumber: check-libdecnumber
+
+check-libdecnumber:
 	@: $(MAKE); $(unstage)
-	@[ -f ./libgui/Makefile ] || exit 0; \
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) $(EXTRA_HOST_EXPORTS) \
+	(cd $(HOST_SUBDIR)/libdecnumber && \
+	  $(MAKE) $(FLAGS_TO_PASS)  $(EXTRA_BOOTSTRAP_FLAGS) check)
+
+@endif libdecnumber
+
+.PHONY: install-libdecnumber maybe-install-libdecnumber
+maybe-install-libdecnumber:
+@if libdecnumber
+maybe-install-libdecnumber: install-libdecnumber
+
+install-libdecnumber: installdirs
+	@: $(MAKE); $(unstage)
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	(cd $(HOST_SUBDIR)/libdecnumber && \
+	  $(MAKE) $(FLAGS_TO_PASS)  install)
+
+@endif libdecnumber
+
+.PHONY: install-strip-libdecnumber maybe-install-strip-libdecnumber
+maybe-install-strip-libdecnumber:
+@if libdecnumber
+maybe-install-strip-libdecnumber: install-strip-libdecnumber
+
+install-strip-libdecnumber: installdirs
+	@: $(MAKE); $(unstage)
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	(cd $(HOST_SUBDIR)/libdecnumber && \
+	  $(MAKE) $(FLAGS_TO_PASS)  install-strip)
+
+@endif libdecnumber
+
+# Other targets (info, dvi, pdf, etc.)
+
+.PHONY: maybe-info-libdecnumber info-libdecnumber
+maybe-info-libdecnumber:
+@if libdecnumber
+maybe-info-libdecnumber: info-libdecnumber
+
+info-libdecnumber: \
+    configure-libdecnumber 
+	@[ -f ./libdecnumber/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing info in libgui"; \
-	(cd $(HOST_SUBDIR)/libgui && \
+	echo "Doing info in libdecnumber"; \
+	(cd $(HOST_SUBDIR)/libdecnumber && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -26010,25 +26910,24 @@ info-libgui: \
 	          info) \
 	  || exit 1
 
-@endif libgui
+@endif libdecnumber
 
-.PHONY: maybe-dvi-libgui dvi-libgui
-maybe-dvi-libgui:
-@if libgui
-maybe-dvi-libgui: dvi-libgui
+.PHONY: maybe-dvi-libdecnumber dvi-libdecnumber
+maybe-dvi-libdecnumber:
+@if libdecnumber
+maybe-dvi-libdecnumber: dvi-libdecnumber
 
-dvi-libgui: \
-    configure-libgui 
-	@: $(MAKE); $(unstage)
-	@[ -f ./libgui/Makefile ] || exit 0; \
+dvi-libdecnumber: \
+    configure-libdecnumber 
+	@[ -f ./libdecnumber/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing dvi in libgui"; \
-	(cd $(HOST_SUBDIR)/libgui && \
+	echo "Doing dvi in libdecnumber"; \
+	(cd $(HOST_SUBDIR)/libdecnumber && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -26036,25 +26935,24 @@ dvi-libgui: \
 	          dvi) \
 	  || exit 1
 
-@endif libgui
+@endif libdecnumber
 
-.PHONY: maybe-pdf-libgui pdf-libgui
-maybe-pdf-libgui:
-@if libgui
-maybe-pdf-libgui: pdf-libgui
+.PHONY: maybe-pdf-libdecnumber pdf-libdecnumber
+maybe-pdf-libdecnumber:
+@if libdecnumber
+maybe-pdf-libdecnumber: pdf-libdecnumber
 
-pdf-libgui: \
-    configure-libgui 
-	@: $(MAKE); $(unstage)
-	@[ -f ./libgui/Makefile ] || exit 0; \
+pdf-libdecnumber: \
+    configure-libdecnumber 
+	@[ -f ./libdecnumber/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing pdf in libgui"; \
-	(cd $(HOST_SUBDIR)/libgui && \
+	echo "Doing pdf in libdecnumber"; \
+	(cd $(HOST_SUBDIR)/libdecnumber && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -26062,25 +26960,24 @@ pdf-libgui: \
 	          pdf) \
 	  || exit 1
 
-@endif libgui
+@endif libdecnumber
 
-.PHONY: maybe-html-libgui html-libgui
-maybe-html-libgui:
-@if libgui
-maybe-html-libgui: html-libgui
+.PHONY: maybe-html-libdecnumber html-libdecnumber
+maybe-html-libdecnumber:
+@if libdecnumber
+maybe-html-libdecnumber: html-libdecnumber
 
-html-libgui: \
-    configure-libgui 
-	@: $(MAKE); $(unstage)
-	@[ -f ./libgui/Makefile ] || exit 0; \
+html-libdecnumber: \
+    configure-libdecnumber 
+	@[ -f ./libdecnumber/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing html in libgui"; \
-	(cd $(HOST_SUBDIR)/libgui && \
+	echo "Doing html in libdecnumber"; \
+	(cd $(HOST_SUBDIR)/libdecnumber && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -26088,25 +26985,24 @@ html-libgui: \
 	          html) \
 	  || exit 1
 
-@endif libgui
+@endif libdecnumber
 
-.PHONY: maybe-TAGS-libgui TAGS-libgui
-maybe-TAGS-libgui:
-@if libgui
-maybe-TAGS-libgui: TAGS-libgui
+.PHONY: maybe-TAGS-libdecnumber TAGS-libdecnumber
+maybe-TAGS-libdecnumber:
+@if libdecnumber
+maybe-TAGS-libdecnumber: TAGS-libdecnumber
 
-TAGS-libgui: \
-    configure-libgui 
-	@: $(MAKE); $(unstage)
-	@[ -f ./libgui/Makefile ] || exit 0; \
+TAGS-libdecnumber: \
+    configure-libdecnumber 
+	@[ -f ./libdecnumber/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing TAGS in libgui"; \
-	(cd $(HOST_SUBDIR)/libgui && \
+	echo "Doing TAGS in libdecnumber"; \
+	(cd $(HOST_SUBDIR)/libdecnumber && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -26114,26 +27010,25 @@ TAGS-libgui: \
 	          TAGS) \
 	  || exit 1
 
-@endif libgui
+@endif libdecnumber
 
-.PHONY: maybe-install-info-libgui install-info-libgui
-maybe-install-info-libgui:
-@if libgui
-maybe-install-info-libgui: install-info-libgui
+.PHONY: maybe-install-info-libdecnumber install-info-libdecnumber
+maybe-install-info-libdecnumber:
+@if libdecnumber
+maybe-install-info-libdecnumber: install-info-libdecnumber
 
-install-info-libgui: \
-    configure-libgui \
-    info-libgui 
-	@: $(MAKE); $(unstage)
-	@[ -f ./libgui/Makefile ] || exit 0; \
+install-info-libdecnumber: \
+    configure-libdecnumber \
+    info-libdecnumber 
+	@[ -f ./libdecnumber/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing install-info in libgui"; \
-	(cd $(HOST_SUBDIR)/libgui && \
+	echo "Doing install-info in libdecnumber"; \
+	(cd $(HOST_SUBDIR)/libdecnumber && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -26141,26 +27036,25 @@ install-info-libgui: \
 	          install-info) \
 	  || exit 1
 
-@endif libgui
+@endif libdecnumber
 
-.PHONY: maybe-install-pdf-libgui install-pdf-libgui
-maybe-install-pdf-libgui:
-@if libgui
-maybe-install-pdf-libgui: install-pdf-libgui
+.PHONY: maybe-install-pdf-libdecnumber install-pdf-libdecnumber
+maybe-install-pdf-libdecnumber:
+@if libdecnumber
+maybe-install-pdf-libdecnumber: install-pdf-libdecnumber
 
-install-pdf-libgui: \
-    configure-libgui \
-    pdf-libgui 
-	@: $(MAKE); $(unstage)
-	@[ -f ./libgui/Makefile ] || exit 0; \
+install-pdf-libdecnumber: \
+    configure-libdecnumber \
+    pdf-libdecnumber 
+	@[ -f ./libdecnumber/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing install-pdf in libgui"; \
-	(cd $(HOST_SUBDIR)/libgui && \
+	echo "Doing install-pdf in libdecnumber"; \
+	(cd $(HOST_SUBDIR)/libdecnumber && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -26168,26 +27062,25 @@ install-pdf-libgui: \
 	          install-pdf) \
 	  || exit 1
 
-@endif libgui
+@endif libdecnumber
 
-.PHONY: maybe-install-html-libgui install-html-libgui
-maybe-install-html-libgui:
-@if libgui
-maybe-install-html-libgui: install-html-libgui
+.PHONY: maybe-install-html-libdecnumber install-html-libdecnumber
+maybe-install-html-libdecnumber:
+@if libdecnumber
+maybe-install-html-libdecnumber: install-html-libdecnumber
 
-install-html-libgui: \
-    configure-libgui \
-    html-libgui 
-	@: $(MAKE); $(unstage)
-	@[ -f ./libgui/Makefile ] || exit 0; \
+install-html-libdecnumber: \
+    configure-libdecnumber \
+    html-libdecnumber 
+	@[ -f ./libdecnumber/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing install-html in libgui"; \
-	(cd $(HOST_SUBDIR)/libgui && \
+	echo "Doing install-html in libdecnumber"; \
+	(cd $(HOST_SUBDIR)/libdecnumber && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -26195,15 +27088,451 @@ install-html-libgui: \
 	          install-html) \
 	  || exit 1
 
-@endif libgui
+@endif libdecnumber
 
-.PHONY: maybe-installcheck-libgui installcheck-libgui
-maybe-installcheck-libgui:
-@if libgui
-maybe-installcheck-libgui: installcheck-libgui
+.PHONY: maybe-installcheck-libdecnumber installcheck-libdecnumber
+maybe-installcheck-libdecnumber:
+@if libdecnumber
+maybe-installcheck-libdecnumber: installcheck-libdecnumber
 
-installcheck-libgui: \
-    configure-libgui 
+installcheck-libdecnumber: \
+    configure-libdecnumber 
+	@[ -f ./libdecnumber/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing installcheck in libdecnumber"; \
+	(cd $(HOST_SUBDIR)/libdecnumber && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          installcheck) \
+	  || exit 1
+
+@endif libdecnumber
+
+.PHONY: maybe-mostlyclean-libdecnumber mostlyclean-libdecnumber
+maybe-mostlyclean-libdecnumber:
+@if libdecnumber
+maybe-mostlyclean-libdecnumber: mostlyclean-libdecnumber
+
+mostlyclean-libdecnumber: 
+	@[ -f ./libdecnumber/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing mostlyclean in libdecnumber"; \
+	(cd $(HOST_SUBDIR)/libdecnumber && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          mostlyclean) \
+	  || exit 1
+
+@endif libdecnumber
+
+.PHONY: maybe-clean-libdecnumber clean-libdecnumber
+maybe-clean-libdecnumber:
+@if libdecnumber
+maybe-clean-libdecnumber: clean-libdecnumber
+
+clean-libdecnumber: 
+	@[ -f ./libdecnumber/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing clean in libdecnumber"; \
+	(cd $(HOST_SUBDIR)/libdecnumber && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          clean) \
+	  || exit 1
+
+@endif libdecnumber
+
+.PHONY: maybe-distclean-libdecnumber distclean-libdecnumber
+maybe-distclean-libdecnumber:
+@if libdecnumber
+maybe-distclean-libdecnumber: distclean-libdecnumber
+
+distclean-libdecnumber: 
+	@[ -f ./libdecnumber/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing distclean in libdecnumber"; \
+	(cd $(HOST_SUBDIR)/libdecnumber && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          distclean) \
+	  || exit 1
+
+@endif libdecnumber
+
+.PHONY: maybe-maintainer-clean-libdecnumber maintainer-clean-libdecnumber
+maybe-maintainer-clean-libdecnumber:
+@if libdecnumber
+maybe-maintainer-clean-libdecnumber: maintainer-clean-libdecnumber
+
+maintainer-clean-libdecnumber: 
+	@[ -f ./libdecnumber/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing maintainer-clean in libdecnumber"; \
+	(cd $(HOST_SUBDIR)/libdecnumber && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          maintainer-clean) \
+	  || exit 1
+
+@endif libdecnumber
+
+
+
+.PHONY: configure-libgui maybe-configure-libgui
+maybe-configure-libgui:
+@if gcc-bootstrap
+configure-libgui: stage_current
+@endif gcc-bootstrap
+@if libgui
+maybe-configure-libgui: configure-libgui
+configure-libgui: 
+	@: $(MAKE); $(unstage)
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	test ! -f $(HOST_SUBDIR)/libgui/Makefile || exit 0; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libgui; \
+	$(HOST_EXPORTS)  \
+	echo Configuring in $(HOST_SUBDIR)/libgui; \
+	cd "$(HOST_SUBDIR)/libgui" || exit 1; \
+	case $(srcdir) in \
+	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
+	  *) topdir=`echo $(HOST_SUBDIR)/libgui/ | \
+		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
+	esac; \
+	module_srcdir=libgui; \
+	$(SHELL) \
+	  $$s/$$module_srcdir/configure \
+	  --srcdir=$${topdir}/$$module_srcdir \
+	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
+	  --target=${target_alias}  \
+	  || exit 1
+@endif libgui
+
+
+
+
+
+.PHONY: all-libgui maybe-all-libgui
+maybe-all-libgui:
+@if gcc-bootstrap
+all-libgui: stage_current
+@endif gcc-bootstrap
+@if libgui
+TARGET-libgui=all
+maybe-all-libgui: all-libgui
+all-libgui: configure-libgui
+	@: $(MAKE); $(unstage)
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS)  \
+	(cd $(HOST_SUBDIR)/libgui && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) $(EXTRA_HOST_FLAGS) $(STAGE1_FLAGS_TO_PASS)  \
+		$(TARGET-libgui))
+@endif libgui
+
+
+
+
+.PHONY: check-libgui maybe-check-libgui
+maybe-check-libgui:
+@if libgui
+maybe-check-libgui: check-libgui
+
+check-libgui:
+	@: $(MAKE); $(unstage)
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS)  \
+	(cd $(HOST_SUBDIR)/libgui && \
+	  $(MAKE) $(FLAGS_TO_PASS)  check)
+
+@endif libgui
+
+.PHONY: install-libgui maybe-install-libgui
+maybe-install-libgui:
+@if libgui
+maybe-install-libgui: install-libgui
+
+install-libgui: installdirs
+	@: $(MAKE); $(unstage)
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	(cd $(HOST_SUBDIR)/libgui && \
+	  $(MAKE) $(FLAGS_TO_PASS)  install)
+
+@endif libgui
+
+.PHONY: install-strip-libgui maybe-install-strip-libgui
+maybe-install-strip-libgui:
+@if libgui
+maybe-install-strip-libgui: install-strip-libgui
+
+install-strip-libgui: installdirs
+	@: $(MAKE); $(unstage)
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	(cd $(HOST_SUBDIR)/libgui && \
+	  $(MAKE) $(FLAGS_TO_PASS)  install-strip)
+
+@endif libgui
+
+# Other targets (info, dvi, pdf, etc.)
+
+.PHONY: maybe-info-libgui info-libgui
+maybe-info-libgui:
+@if libgui
+maybe-info-libgui: info-libgui
+
+info-libgui: \
+    configure-libgui 
+	@: $(MAKE); $(unstage)
+	@[ -f ./libgui/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing info in libgui"; \
+	(cd $(HOST_SUBDIR)/libgui && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          info) \
+	  || exit 1
+
+@endif libgui
+
+.PHONY: maybe-dvi-libgui dvi-libgui
+maybe-dvi-libgui:
+@if libgui
+maybe-dvi-libgui: dvi-libgui
+
+dvi-libgui: \
+    configure-libgui 
+	@: $(MAKE); $(unstage)
+	@[ -f ./libgui/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing dvi in libgui"; \
+	(cd $(HOST_SUBDIR)/libgui && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          dvi) \
+	  || exit 1
+
+@endif libgui
+
+.PHONY: maybe-pdf-libgui pdf-libgui
+maybe-pdf-libgui:
+@if libgui
+maybe-pdf-libgui: pdf-libgui
+
+pdf-libgui: \
+    configure-libgui 
+	@: $(MAKE); $(unstage)
+	@[ -f ./libgui/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing pdf in libgui"; \
+	(cd $(HOST_SUBDIR)/libgui && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          pdf) \
+	  || exit 1
+
+@endif libgui
+
+.PHONY: maybe-html-libgui html-libgui
+maybe-html-libgui:
+@if libgui
+maybe-html-libgui: html-libgui
+
+html-libgui: \
+    configure-libgui 
+	@: $(MAKE); $(unstage)
+	@[ -f ./libgui/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing html in libgui"; \
+	(cd $(HOST_SUBDIR)/libgui && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          html) \
+	  || exit 1
+
+@endif libgui
+
+.PHONY: maybe-TAGS-libgui TAGS-libgui
+maybe-TAGS-libgui:
+@if libgui
+maybe-TAGS-libgui: TAGS-libgui
+
+TAGS-libgui: \
+    configure-libgui 
+	@: $(MAKE); $(unstage)
+	@[ -f ./libgui/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing TAGS in libgui"; \
+	(cd $(HOST_SUBDIR)/libgui && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          TAGS) \
+	  || exit 1
+
+@endif libgui
+
+.PHONY: maybe-install-info-libgui install-info-libgui
+maybe-install-info-libgui:
+@if libgui
+maybe-install-info-libgui: install-info-libgui
+
+install-info-libgui: \
+    configure-libgui \
+    info-libgui 
+	@: $(MAKE); $(unstage)
+	@[ -f ./libgui/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing install-info in libgui"; \
+	(cd $(HOST_SUBDIR)/libgui && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          install-info) \
+	  || exit 1
+
+@endif libgui
+
+.PHONY: maybe-install-pdf-libgui install-pdf-libgui
+maybe-install-pdf-libgui:
+@if libgui
+maybe-install-pdf-libgui: install-pdf-libgui
+
+install-pdf-libgui: \
+    configure-libgui \
+    pdf-libgui 
+	@: $(MAKE); $(unstage)
+	@[ -f ./libgui/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing install-pdf in libgui"; \
+	(cd $(HOST_SUBDIR)/libgui && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          install-pdf) \
+	  || exit 1
+
+@endif libgui
+
+.PHONY: maybe-install-html-libgui install-html-libgui
+maybe-install-html-libgui:
+@if libgui
+maybe-install-html-libgui: install-html-libgui
+
+install-html-libgui: \
+    configure-libgui \
+    html-libgui 
+	@: $(MAKE); $(unstage)
+	@[ -f ./libgui/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing install-html in libgui"; \
+	(cd $(HOST_SUBDIR)/libgui && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          install-html) \
+	  || exit 1
+
+@endif libgui
+
+.PHONY: maybe-installcheck-libgui installcheck-libgui
+maybe-installcheck-libgui:
+@if libgui
+maybe-installcheck-libgui: installcheck-libgui
+
+installcheck-libgui: \
+    configure-libgui 
 	@: $(MAKE); $(unstage)
 	@[ -f ./libgui/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
@@ -26702,6 +28031,7 @@ all-stage1-libiberty: configure-stage1-libiberty
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE1_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE1_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE1_CXXFLAGS)" \
 		LIBCFLAGS="$(LIBCFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -26746,6 +28076,7 @@ all-stage2-libiberty: configure-stage2-libiberty
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE2_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE2_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE2_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE2_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -26788,6 +28119,7 @@ all-stage3-libiberty: configure-stage3-libiberty
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE3_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE3_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE3_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE3_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -26830,6 +28162,7 @@ all-stage4-libiberty: configure-stage4-libiberty
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE4_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE4_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE4_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE4_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -26872,6 +28205,7 @@ all-stageprofile-libiberty: configure-stageprofile-libiberty
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -26914,6 +28248,7 @@ all-stagetrain-libiberty: configure-stagetrain-libiberty
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEtrain_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEtrain_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEtrain_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEtrain_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -26956,6 +28291,7 @@ all-stagefeedback-libiberty: configure-stagefeedback-libiberty
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEfeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEfeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEfeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEfeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -26998,6 +28334,7 @@ all-stageautoprofile-libiberty: configure-stageautoprofile-libiberty
 	$$s/gcc/config/i386/$(AUTO_PROFILE) \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautoprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautoprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautoprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautoprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -27040,6 +28377,7 @@ all-stageautofeedback-libiberty: configure-stageautofeedback-libiberty
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautofeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautofeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautofeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautofeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -27816,6 +29154,7 @@ all-stage1-libiberty-linker-plugin: configure-stage1-libiberty-linker-plugin
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE1_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE1_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE1_CXXFLAGS)" \
 		LIBCFLAGS="$(LIBCFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -27860,6 +29199,7 @@ all-stage2-libiberty-linker-plugin: configure-stage2-libiberty-linker-plugin
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE2_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE2_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE2_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE2_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -27902,6 +29242,7 @@ all-stage3-libiberty-linker-plugin: configure-stage3-libiberty-linker-plugin
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE3_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE3_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE3_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE3_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -27944,6 +29285,7 @@ all-stage4-libiberty-linker-plugin: configure-stage4-libiberty-linker-plugin
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE4_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE4_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE4_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE4_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -27986,6 +29328,7 @@ all-stageprofile-libiberty-linker-plugin: configure-stageprofile-libiberty-linke
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -28028,6 +29371,7 @@ all-stagetrain-libiberty-linker-plugin: configure-stagetrain-libiberty-linker-pl
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEtrain_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEtrain_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEtrain_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEtrain_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -28070,6 +29414,7 @@ all-stagefeedback-libiberty-linker-plugin: configure-stagefeedback-libiberty-lin
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEfeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEfeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEfeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEfeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -28112,6 +29457,7 @@ all-stageautoprofile-libiberty-linker-plugin: configure-stageautoprofile-libiber
 	$$s/gcc/config/i386/$(AUTO_PROFILE) \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautoprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautoprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautoprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautoprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -28154,6 +29500,7 @@ all-stageautofeedback-libiberty-linker-plugin: configure-stageautofeedback-libib
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautofeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautofeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautofeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautofeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -28930,6 +30277,7 @@ all-stage1-libiconv: configure-stage1-libiconv
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE1_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE1_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE1_CXXFLAGS)" \
 		LIBCFLAGS="$(LIBCFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -28974,6 +30322,7 @@ all-stage2-libiconv: configure-stage2-libiconv
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE2_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE2_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE2_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE2_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -29016,6 +30365,7 @@ all-stage3-libiconv: configure-stage3-libiconv
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE3_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE3_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE3_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE3_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -29058,6 +30408,7 @@ all-stage4-libiconv: configure-stage4-libiconv
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE4_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE4_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE4_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE4_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -29100,6 +30451,7 @@ all-stageprofile-libiconv: configure-stageprofile-libiconv
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -29142,6 +30494,7 @@ all-stagetrain-libiconv: configure-stagetrain-libiconv
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEtrain_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEtrain_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEtrain_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEtrain_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -29184,6 +30537,7 @@ all-stagefeedback-libiconv: configure-stagefeedback-libiconv
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEfeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEfeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEfeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEfeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -29226,6 +30580,7 @@ all-stageautoprofile-libiconv: configure-stageautoprofile-libiconv
 	$$s/gcc/config/i386/$(AUTO_PROFILE) \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautoprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautoprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautoprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautoprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -29268,6 +30623,7 @@ all-stageautofeedback-libiconv: configure-stageautofeedback-libiconv
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautofeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautofeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautofeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautofeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -30367,202 +31723,2189 @@ mostlyclean-readline:
 
 @endif readline
 
-.PHONY: maybe-clean-readline clean-readline
-maybe-clean-readline:
-@if readline
-maybe-clean-readline: clean-readline
+.PHONY: maybe-clean-readline clean-readline
+maybe-clean-readline:
+@if readline
+maybe-clean-readline: clean-readline
+
+clean-readline: 
+	@: $(MAKE); $(unstage)
+	@[ -f ./readline/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing clean in readline"; \
+	(cd $(HOST_SUBDIR)/readline && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          clean) \
+	  || exit 1
+
+@endif readline
+
+.PHONY: maybe-distclean-readline distclean-readline
+maybe-distclean-readline:
+@if readline
+maybe-distclean-readline: distclean-readline
+
+distclean-readline: 
+	@: $(MAKE); $(unstage)
+	@[ -f ./readline/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing distclean in readline"; \
+	(cd $(HOST_SUBDIR)/readline && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          distclean) \
+	  || exit 1
+
+@endif readline
+
+.PHONY: maybe-maintainer-clean-readline maintainer-clean-readline
+maybe-maintainer-clean-readline:
+@if readline
+maybe-maintainer-clean-readline: maintainer-clean-readline
+
+maintainer-clean-readline: 
+	@: $(MAKE); $(unstage)
+	@[ -f ./readline/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing maintainer-clean in readline"; \
+	(cd $(HOST_SUBDIR)/readline && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          maintainer-clean) \
+	  || exit 1
+
+@endif readline
+
+
+
+.PHONY: configure-sid maybe-configure-sid
+maybe-configure-sid:
+@if gcc-bootstrap
+configure-sid: stage_current
+@endif gcc-bootstrap
+@if sid
+maybe-configure-sid: configure-sid
+configure-sid: 
+	@: $(MAKE); $(unstage)
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	test ! -f $(HOST_SUBDIR)/sid/Makefile || exit 0; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/sid; \
+	$(HOST_EXPORTS)  \
+	echo Configuring in $(HOST_SUBDIR)/sid; \
+	cd "$(HOST_SUBDIR)/sid" || exit 1; \
+	case $(srcdir) in \
+	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
+	  *) topdir=`echo $(HOST_SUBDIR)/sid/ | \
+		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
+	esac; \
+	module_srcdir=sid; \
+	$(SHELL) \
+	  $$s/$$module_srcdir/configure \
+	  --srcdir=$${topdir}/$$module_srcdir \
+	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
+	  --target=${target_alias}  \
+	  || exit 1
+@endif sid
+
+
+
+
+
+.PHONY: all-sid maybe-all-sid
+maybe-all-sid:
+@if gcc-bootstrap
+all-sid: stage_current
+@endif gcc-bootstrap
+@if sid
+TARGET-sid=all
+maybe-all-sid: all-sid
+all-sid: configure-sid
+	@: $(MAKE); $(unstage)
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS)  \
+	(cd $(HOST_SUBDIR)/sid && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) $(EXTRA_HOST_FLAGS) $(STAGE1_FLAGS_TO_PASS)  \
+		$(TARGET-sid))
+@endif sid
+
+
+
+
+.PHONY: check-sid maybe-check-sid
+maybe-check-sid:
+@if sid
+maybe-check-sid: check-sid
+
+check-sid:
+	@: $(MAKE); $(unstage)
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS)  \
+	(cd $(HOST_SUBDIR)/sid && \
+	  $(MAKE) $(FLAGS_TO_PASS)  check)
+
+@endif sid
+
+.PHONY: install-sid maybe-install-sid
+maybe-install-sid:
+@if sid
+maybe-install-sid: install-sid
+
+install-sid: installdirs
+	@: $(MAKE); $(unstage)
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	(cd $(HOST_SUBDIR)/sid && \
+	  $(MAKE) $(FLAGS_TO_PASS)  install)
+
+@endif sid
+
+.PHONY: install-strip-sid maybe-install-strip-sid
+maybe-install-strip-sid:
+@if sid
+maybe-install-strip-sid: install-strip-sid
+
+install-strip-sid: installdirs
+	@: $(MAKE); $(unstage)
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	(cd $(HOST_SUBDIR)/sid && \
+	  $(MAKE) $(FLAGS_TO_PASS)  install-strip)
+
+@endif sid
+
+# Other targets (info, dvi, pdf, etc.)
+
+.PHONY: maybe-info-sid info-sid
+maybe-info-sid:
+@if sid
+maybe-info-sid: info-sid
+
+info-sid: \
+    configure-sid 
+	@: $(MAKE); $(unstage)
+	@[ -f ./sid/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing info in sid"; \
+	(cd $(HOST_SUBDIR)/sid && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          info) \
+	  || exit 1
+
+@endif sid
+
+.PHONY: maybe-dvi-sid dvi-sid
+maybe-dvi-sid:
+@if sid
+maybe-dvi-sid: dvi-sid
+
+dvi-sid: \
+    configure-sid 
+	@: $(MAKE); $(unstage)
+	@[ -f ./sid/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing dvi in sid"; \
+	(cd $(HOST_SUBDIR)/sid && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          dvi) \
+	  || exit 1
+
+@endif sid
+
+.PHONY: maybe-pdf-sid pdf-sid
+maybe-pdf-sid:
+@if sid
+maybe-pdf-sid: pdf-sid
+
+pdf-sid: \
+    configure-sid 
+	@: $(MAKE); $(unstage)
+	@[ -f ./sid/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing pdf in sid"; \
+	(cd $(HOST_SUBDIR)/sid && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          pdf) \
+	  || exit 1
+
+@endif sid
+
+.PHONY: maybe-html-sid html-sid
+maybe-html-sid:
+@if sid
+maybe-html-sid: html-sid
+
+html-sid: \
+    configure-sid 
+	@: $(MAKE); $(unstage)
+	@[ -f ./sid/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing html in sid"; \
+	(cd $(HOST_SUBDIR)/sid && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          html) \
+	  || exit 1
+
+@endif sid
+
+.PHONY: maybe-TAGS-sid TAGS-sid
+maybe-TAGS-sid:
+@if sid
+maybe-TAGS-sid: TAGS-sid
+
+TAGS-sid: \
+    configure-sid 
+	@: $(MAKE); $(unstage)
+	@[ -f ./sid/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing TAGS in sid"; \
+	(cd $(HOST_SUBDIR)/sid && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          TAGS) \
+	  || exit 1
+
+@endif sid
+
+.PHONY: maybe-install-info-sid install-info-sid
+maybe-install-info-sid:
+@if sid
+maybe-install-info-sid: install-info-sid
+
+install-info-sid: \
+    configure-sid \
+    info-sid 
+	@: $(MAKE); $(unstage)
+	@[ -f ./sid/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing install-info in sid"; \
+	(cd $(HOST_SUBDIR)/sid && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          install-info) \
+	  || exit 1
+
+@endif sid
+
+.PHONY: maybe-install-pdf-sid install-pdf-sid
+maybe-install-pdf-sid:
+@if sid
+maybe-install-pdf-sid: install-pdf-sid
+
+install-pdf-sid: \
+    configure-sid \
+    pdf-sid 
+	@: $(MAKE); $(unstage)
+	@[ -f ./sid/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing install-pdf in sid"; \
+	(cd $(HOST_SUBDIR)/sid && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          install-pdf) \
+	  || exit 1
+
+@endif sid
+
+.PHONY: maybe-install-html-sid install-html-sid
+maybe-install-html-sid:
+@if sid
+maybe-install-html-sid: install-html-sid
+
+install-html-sid: \
+    configure-sid \
+    html-sid 
+	@: $(MAKE); $(unstage)
+	@[ -f ./sid/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing install-html in sid"; \
+	(cd $(HOST_SUBDIR)/sid && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          install-html) \
+	  || exit 1
+
+@endif sid
+
+.PHONY: maybe-installcheck-sid installcheck-sid
+maybe-installcheck-sid:
+@if sid
+maybe-installcheck-sid: installcheck-sid
+
+installcheck-sid: \
+    configure-sid 
+	@: $(MAKE); $(unstage)
+	@[ -f ./sid/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing installcheck in sid"; \
+	(cd $(HOST_SUBDIR)/sid && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          installcheck) \
+	  || exit 1
+
+@endif sid
+
+.PHONY: maybe-mostlyclean-sid mostlyclean-sid
+maybe-mostlyclean-sid:
+@if sid
+maybe-mostlyclean-sid: mostlyclean-sid
+
+mostlyclean-sid: 
+	@: $(MAKE); $(unstage)
+	@[ -f ./sid/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing mostlyclean in sid"; \
+	(cd $(HOST_SUBDIR)/sid && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          mostlyclean) \
+	  || exit 1
+
+@endif sid
+
+.PHONY: maybe-clean-sid clean-sid
+maybe-clean-sid:
+@if sid
+maybe-clean-sid: clean-sid
+
+clean-sid: 
+	@: $(MAKE); $(unstage)
+	@[ -f ./sid/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing clean in sid"; \
+	(cd $(HOST_SUBDIR)/sid && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          clean) \
+	  || exit 1
+
+@endif sid
+
+.PHONY: maybe-distclean-sid distclean-sid
+maybe-distclean-sid:
+@if sid
+maybe-distclean-sid: distclean-sid
+
+distclean-sid: 
+	@: $(MAKE); $(unstage)
+	@[ -f ./sid/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing distclean in sid"; \
+	(cd $(HOST_SUBDIR)/sid && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          distclean) \
+	  || exit 1
+
+@endif sid
+
+.PHONY: maybe-maintainer-clean-sid maintainer-clean-sid
+maybe-maintainer-clean-sid:
+@if sid
+maybe-maintainer-clean-sid: maintainer-clean-sid
+
+maintainer-clean-sid: 
+	@: $(MAKE); $(unstage)
+	@[ -f ./sid/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing maintainer-clean in sid"; \
+	(cd $(HOST_SUBDIR)/sid && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          maintainer-clean) \
+	  || exit 1
+
+@endif sid
+
+
+
+.PHONY: configure-sim maybe-configure-sim
+maybe-configure-sim:
+@if gcc-bootstrap
+configure-sim: stage_current
+@endif gcc-bootstrap
+@if sim
+maybe-configure-sim: configure-sim
+configure-sim: 
+	@: $(MAKE); $(unstage)
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	test ! -f $(HOST_SUBDIR)/sim/Makefile || exit 0; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/sim; \
+	$(HOST_EXPORTS)  \
+	echo Configuring in $(HOST_SUBDIR)/sim; \
+	cd "$(HOST_SUBDIR)/sim" || exit 1; \
+	case $(srcdir) in \
+	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
+	  *) topdir=`echo $(HOST_SUBDIR)/sim/ | \
+		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
+	esac; \
+	module_srcdir=sim; \
+	$(SHELL) \
+	  $$s/$$module_srcdir/configure \
+	  --srcdir=$${topdir}/$$module_srcdir \
+	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
+	  --target=${target_alias}  \
+	  || exit 1
+@endif sim
+
+
+
+
+
+.PHONY: all-sim maybe-all-sim
+maybe-all-sim:
+@if gcc-bootstrap
+all-sim: stage_current
+@endif gcc-bootstrap
+@if sim
+TARGET-sim=all
+maybe-all-sim: all-sim
+all-sim: configure-sim
+	@: $(MAKE); $(unstage)
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS)  \
+	(cd $(HOST_SUBDIR)/sim && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) $(EXTRA_HOST_FLAGS) $(STAGE1_FLAGS_TO_PASS)  \
+		$(TARGET-sim))
+@endif sim
+
+
+
+
+.PHONY: check-sim maybe-check-sim
+maybe-check-sim:
+@if sim
+maybe-check-sim: check-sim
+
+check-sim:
+	@: $(MAKE); $(unstage)
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS)  \
+	(cd $(HOST_SUBDIR)/sim && \
+	  $(MAKE) $(FLAGS_TO_PASS)  check)
+
+@endif sim
+
+.PHONY: install-sim maybe-install-sim
+maybe-install-sim:
+@if sim
+maybe-install-sim: install-sim
+
+install-sim: installdirs
+	@: $(MAKE); $(unstage)
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	(cd $(HOST_SUBDIR)/sim && \
+	  $(MAKE) $(FLAGS_TO_PASS)  install)
+
+@endif sim
+
+.PHONY: install-strip-sim maybe-install-strip-sim
+maybe-install-strip-sim:
+@if sim
+maybe-install-strip-sim: install-strip-sim
+
+install-strip-sim: installdirs
+	@: $(MAKE); $(unstage)
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	(cd $(HOST_SUBDIR)/sim && \
+	  $(MAKE) $(FLAGS_TO_PASS)  install-strip)
+
+@endif sim
+
+# Other targets (info, dvi, pdf, etc.)
+
+.PHONY: maybe-info-sim info-sim
+maybe-info-sim:
+@if sim
+maybe-info-sim: info-sim
+
+info-sim: \
+    configure-sim 
+	@: $(MAKE); $(unstage)
+	@[ -f ./sim/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing info in sim"; \
+	(cd $(HOST_SUBDIR)/sim && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          info) \
+	  || exit 1
+
+@endif sim
+
+.PHONY: maybe-dvi-sim dvi-sim
+maybe-dvi-sim:
+@if sim
+maybe-dvi-sim: dvi-sim
+
+dvi-sim: \
+    configure-sim 
+	@: $(MAKE); $(unstage)
+	@[ -f ./sim/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing dvi in sim"; \
+	(cd $(HOST_SUBDIR)/sim && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          dvi) \
+	  || exit 1
+
+@endif sim
+
+.PHONY: maybe-pdf-sim pdf-sim
+maybe-pdf-sim:
+@if sim
+maybe-pdf-sim: pdf-sim
+
+pdf-sim: \
+    configure-sim 
+	@: $(MAKE); $(unstage)
+	@[ -f ./sim/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing pdf in sim"; \
+	(cd $(HOST_SUBDIR)/sim && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          pdf) \
+	  || exit 1
+
+@endif sim
+
+.PHONY: maybe-html-sim html-sim
+maybe-html-sim:
+@if sim
+maybe-html-sim: html-sim
+
+html-sim: \
+    configure-sim 
+	@: $(MAKE); $(unstage)
+	@[ -f ./sim/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing html in sim"; \
+	(cd $(HOST_SUBDIR)/sim && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          html) \
+	  || exit 1
+
+@endif sim
+
+.PHONY: maybe-TAGS-sim TAGS-sim
+maybe-TAGS-sim:
+@if sim
+maybe-TAGS-sim: TAGS-sim
+
+TAGS-sim: \
+    configure-sim 
+	@: $(MAKE); $(unstage)
+	@[ -f ./sim/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing TAGS in sim"; \
+	(cd $(HOST_SUBDIR)/sim && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          TAGS) \
+	  || exit 1
+
+@endif sim
+
+.PHONY: maybe-install-info-sim install-info-sim
+maybe-install-info-sim:
+@if sim
+maybe-install-info-sim: install-info-sim
+
+install-info-sim: \
+    configure-sim \
+    info-sim 
+	@: $(MAKE); $(unstage)
+	@[ -f ./sim/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing install-info in sim"; \
+	(cd $(HOST_SUBDIR)/sim && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          install-info) \
+	  || exit 1
+
+@endif sim
+
+.PHONY: maybe-install-pdf-sim install-pdf-sim
+maybe-install-pdf-sim:
+@if sim
+maybe-install-pdf-sim: install-pdf-sim
+
+install-pdf-sim: \
+    configure-sim \
+    pdf-sim 
+	@: $(MAKE); $(unstage)
+	@[ -f ./sim/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing install-pdf in sim"; \
+	(cd $(HOST_SUBDIR)/sim && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          install-pdf) \
+	  || exit 1
+
+@endif sim
+
+.PHONY: maybe-install-html-sim install-html-sim
+maybe-install-html-sim:
+@if sim
+maybe-install-html-sim: install-html-sim
+
+install-html-sim: \
+    configure-sim \
+    html-sim 
+	@: $(MAKE); $(unstage)
+	@[ -f ./sim/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing install-html in sim"; \
+	(cd $(HOST_SUBDIR)/sim && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          install-html) \
+	  || exit 1
+
+@endif sim
+
+.PHONY: maybe-installcheck-sim installcheck-sim
+maybe-installcheck-sim:
+@if sim
+maybe-installcheck-sim: installcheck-sim
+
+installcheck-sim: \
+    configure-sim 
+	@: $(MAKE); $(unstage)
+	@[ -f ./sim/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing installcheck in sim"; \
+	(cd $(HOST_SUBDIR)/sim && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          installcheck) \
+	  || exit 1
+
+@endif sim
+
+.PHONY: maybe-mostlyclean-sim mostlyclean-sim
+maybe-mostlyclean-sim:
+@if sim
+maybe-mostlyclean-sim: mostlyclean-sim
+
+mostlyclean-sim: 
+	@: $(MAKE); $(unstage)
+	@[ -f ./sim/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing mostlyclean in sim"; \
+	(cd $(HOST_SUBDIR)/sim && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          mostlyclean) \
+	  || exit 1
+
+@endif sim
+
+.PHONY: maybe-clean-sim clean-sim
+maybe-clean-sim:
+@if sim
+maybe-clean-sim: clean-sim
+
+clean-sim: 
+	@: $(MAKE); $(unstage)
+	@[ -f ./sim/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing clean in sim"; \
+	(cd $(HOST_SUBDIR)/sim && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          clean) \
+	  || exit 1
+
+@endif sim
+
+.PHONY: maybe-distclean-sim distclean-sim
+maybe-distclean-sim:
+@if sim
+maybe-distclean-sim: distclean-sim
+
+distclean-sim: 
+	@: $(MAKE); $(unstage)
+	@[ -f ./sim/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing distclean in sim"; \
+	(cd $(HOST_SUBDIR)/sim && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          distclean) \
+	  || exit 1
+
+@endif sim
+
+.PHONY: maybe-maintainer-clean-sim maintainer-clean-sim
+maybe-maintainer-clean-sim:
+@if sim
+maybe-maintainer-clean-sim: maintainer-clean-sim
+
+maintainer-clean-sim: 
+	@: $(MAKE); $(unstage)
+	@[ -f ./sim/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing maintainer-clean in sim"; \
+	(cd $(HOST_SUBDIR)/sim && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          maintainer-clean) \
+	  || exit 1
+
+@endif sim
+
+
+
+.PHONY: configure-texinfo maybe-configure-texinfo
+maybe-configure-texinfo:
+@if gcc-bootstrap
+configure-texinfo: stage_current
+@endif gcc-bootstrap
+@if texinfo
+maybe-configure-texinfo: configure-texinfo
+configure-texinfo: 
+	@: $(MAKE); $(unstage)
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	test ! -f $(HOST_SUBDIR)/texinfo/Makefile || exit 0; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/texinfo; \
+	$(HOST_EXPORTS)  \
+	echo Configuring in $(HOST_SUBDIR)/texinfo; \
+	cd "$(HOST_SUBDIR)/texinfo" || exit 1; \
+	case $(srcdir) in \
+	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
+	  *) topdir=`echo $(HOST_SUBDIR)/texinfo/ | \
+		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
+	esac; \
+	module_srcdir=texinfo; \
+	$(SHELL) \
+	  $$s/$$module_srcdir/configure \
+	  --srcdir=$${topdir}/$$module_srcdir \
+	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
+	  --target=${target_alias}  \
+	  || exit 1
+@endif texinfo
+
+
+
+
+
+.PHONY: all-texinfo maybe-all-texinfo
+maybe-all-texinfo:
+@if gcc-bootstrap
+all-texinfo: stage_current
+@endif gcc-bootstrap
+@if texinfo
+TARGET-texinfo=all
+maybe-all-texinfo: all-texinfo
+all-texinfo: configure-texinfo
+	@: $(MAKE); $(unstage)
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS)  \
+	(cd $(HOST_SUBDIR)/texinfo && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) $(EXTRA_HOST_FLAGS) $(STAGE1_FLAGS_TO_PASS)  \
+		$(TARGET-texinfo))
+@endif texinfo
+
+
+
+
+.PHONY: check-texinfo maybe-check-texinfo
+maybe-check-texinfo:
+@if texinfo
+maybe-check-texinfo: check-texinfo
+
+check-texinfo:
+	@: $(MAKE); $(unstage)
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS)  \
+	(cd $(HOST_SUBDIR)/texinfo && \
+	  $(MAKE) $(FLAGS_TO_PASS)  check)
+
+@endif texinfo
+
+.PHONY: install-texinfo maybe-install-texinfo
+maybe-install-texinfo:
+@if texinfo
+maybe-install-texinfo: install-texinfo
+
+install-texinfo:
+
+@endif texinfo
+
+.PHONY: install-strip-texinfo maybe-install-strip-texinfo
+maybe-install-strip-texinfo:
+@if texinfo
+maybe-install-strip-texinfo: install-strip-texinfo
+
+install-strip-texinfo:
+
+@endif texinfo
+
+# Other targets (info, dvi, pdf, etc.)
+
+.PHONY: maybe-info-texinfo info-texinfo
+maybe-info-texinfo:
+@if texinfo
+maybe-info-texinfo: info-texinfo
+
+info-texinfo: \
+    configure-texinfo 
+	@: $(MAKE); $(unstage)
+	@[ -f ./texinfo/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing info in texinfo"; \
+	(cd $(HOST_SUBDIR)/texinfo && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          info) \
+	  || exit 1
+
+@endif texinfo
+
+.PHONY: maybe-dvi-texinfo dvi-texinfo
+maybe-dvi-texinfo:
+@if texinfo
+maybe-dvi-texinfo: dvi-texinfo
+
+dvi-texinfo: \
+    configure-texinfo 
+	@: $(MAKE); $(unstage)
+	@[ -f ./texinfo/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing dvi in texinfo"; \
+	(cd $(HOST_SUBDIR)/texinfo && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          dvi) \
+	  || exit 1
+
+@endif texinfo
+
+.PHONY: maybe-pdf-texinfo pdf-texinfo
+maybe-pdf-texinfo:
+@if texinfo
+maybe-pdf-texinfo: pdf-texinfo
+
+pdf-texinfo: \
+    configure-texinfo 
+	@: $(MAKE); $(unstage)
+	@[ -f ./texinfo/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing pdf in texinfo"; \
+	(cd $(HOST_SUBDIR)/texinfo && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          pdf) \
+	  || exit 1
+
+@endif texinfo
+
+.PHONY: maybe-html-texinfo html-texinfo
+maybe-html-texinfo:
+@if texinfo
+maybe-html-texinfo: html-texinfo
+
+html-texinfo: \
+    configure-texinfo 
+	@: $(MAKE); $(unstage)
+	@[ -f ./texinfo/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing html in texinfo"; \
+	(cd $(HOST_SUBDIR)/texinfo && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          html) \
+	  || exit 1
+
+@endif texinfo
+
+.PHONY: maybe-TAGS-texinfo TAGS-texinfo
+maybe-TAGS-texinfo:
+@if texinfo
+maybe-TAGS-texinfo: TAGS-texinfo
+
+TAGS-texinfo: \
+    configure-texinfo 
+	@: $(MAKE); $(unstage)
+	@[ -f ./texinfo/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing TAGS in texinfo"; \
+	(cd $(HOST_SUBDIR)/texinfo && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          TAGS) \
+	  || exit 1
+
+@endif texinfo
+
+.PHONY: maybe-install-info-texinfo install-info-texinfo
+maybe-install-info-texinfo:
+@if texinfo
+maybe-install-info-texinfo: install-info-texinfo
+
+install-info-texinfo: \
+    configure-texinfo \
+    info-texinfo 
+	@: $(MAKE); $(unstage)
+	@[ -f ./texinfo/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing install-info in texinfo"; \
+	(cd $(HOST_SUBDIR)/texinfo && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          install-info) \
+	  || exit 1
+
+@endif texinfo
+
+.PHONY: maybe-install-pdf-texinfo install-pdf-texinfo
+maybe-install-pdf-texinfo:
+@if texinfo
+maybe-install-pdf-texinfo: install-pdf-texinfo
+
+install-pdf-texinfo: \
+    configure-texinfo \
+    pdf-texinfo 
+	@: $(MAKE); $(unstage)
+	@[ -f ./texinfo/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing install-pdf in texinfo"; \
+	(cd $(HOST_SUBDIR)/texinfo && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          install-pdf) \
+	  || exit 1
+
+@endif texinfo
+
+.PHONY: maybe-install-html-texinfo install-html-texinfo
+maybe-install-html-texinfo:
+@if texinfo
+maybe-install-html-texinfo: install-html-texinfo
+
+install-html-texinfo: \
+    configure-texinfo \
+    html-texinfo 
+	@: $(MAKE); $(unstage)
+	@[ -f ./texinfo/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing install-html in texinfo"; \
+	(cd $(HOST_SUBDIR)/texinfo && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          install-html) \
+	  || exit 1
+
+@endif texinfo
+
+.PHONY: maybe-installcheck-texinfo installcheck-texinfo
+maybe-installcheck-texinfo:
+@if texinfo
+maybe-installcheck-texinfo: installcheck-texinfo
+
+installcheck-texinfo: \
+    configure-texinfo 
+	@: $(MAKE); $(unstage)
+	@[ -f ./texinfo/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing installcheck in texinfo"; \
+	(cd $(HOST_SUBDIR)/texinfo && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          installcheck) \
+	  || exit 1
+
+@endif texinfo
+
+.PHONY: maybe-mostlyclean-texinfo mostlyclean-texinfo
+maybe-mostlyclean-texinfo:
+@if texinfo
+maybe-mostlyclean-texinfo: mostlyclean-texinfo
+
+mostlyclean-texinfo: 
+	@: $(MAKE); $(unstage)
+	@[ -f ./texinfo/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing mostlyclean in texinfo"; \
+	(cd $(HOST_SUBDIR)/texinfo && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          mostlyclean) \
+	  || exit 1
+
+@endif texinfo
+
+.PHONY: maybe-clean-texinfo clean-texinfo
+maybe-clean-texinfo:
+@if texinfo
+maybe-clean-texinfo: clean-texinfo
+
+clean-texinfo: 
+	@: $(MAKE); $(unstage)
+	@[ -f ./texinfo/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing clean in texinfo"; \
+	(cd $(HOST_SUBDIR)/texinfo && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          clean) \
+	  || exit 1
+
+@endif texinfo
+
+.PHONY: maybe-distclean-texinfo distclean-texinfo
+maybe-distclean-texinfo:
+@if texinfo
+maybe-distclean-texinfo: distclean-texinfo
+
+distclean-texinfo: 
+	@: $(MAKE); $(unstage)
+	@[ -f ./texinfo/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing distclean in texinfo"; \
+	(cd $(HOST_SUBDIR)/texinfo && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          distclean) \
+	  || exit 1
+
+@endif texinfo
+
+.PHONY: maybe-maintainer-clean-texinfo maintainer-clean-texinfo
+maybe-maintainer-clean-texinfo:
+@if texinfo
+maybe-maintainer-clean-texinfo: maintainer-clean-texinfo
+
+maintainer-clean-texinfo: 
+	@: $(MAKE); $(unstage)
+	@[ -f ./texinfo/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing maintainer-clean in texinfo"; \
+	(cd $(HOST_SUBDIR)/texinfo && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          maintainer-clean) \
+	  || exit 1
+
+@endif texinfo
+
+
+
+.PHONY: configure-zlib maybe-configure-zlib
+maybe-configure-zlib:
+@if gcc-bootstrap
+configure-zlib: stage_current
+@endif gcc-bootstrap
+@if zlib
+maybe-configure-zlib: configure-zlib
+configure-zlib: 
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	test ! -f $(HOST_SUBDIR)/zlib/Makefile || exit 0; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib; \
+	$(HOST_EXPORTS)  \
+	echo Configuring in $(HOST_SUBDIR)/zlib; \
+	cd "$(HOST_SUBDIR)/zlib" || exit 1; \
+	case $(srcdir) in \
+	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
+	  *) topdir=`echo $(HOST_SUBDIR)/zlib/ | \
+		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
+	esac; \
+	module_srcdir=zlib; \
+	$(SHELL) \
+	  $$s/$$module_srcdir/configure \
+	  --srcdir=$${topdir}/$$module_srcdir \
+	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
+	  --target=${target_alias} @extra_host_zlib_configure_flags@ \
+	  || exit 1
+@endif zlib
+
+
+
+.PHONY: configure-stage1-zlib maybe-configure-stage1-zlib
+maybe-configure-stage1-zlib:
+@if zlib-bootstrap
+maybe-configure-stage1-zlib: configure-stage1-zlib
+configure-stage1-zlib:
+	@[ $(current_stage) = stage1 ] || $(MAKE) stage1-start
+	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	TFLAGS="$(STAGE1_TFLAGS)"; \
+	test ! -f $(HOST_SUBDIR)/zlib/Makefile || exit 0; \
+	$(HOST_EXPORTS) \
+	CFLAGS="$(STAGE1_CFLAGS)"; export CFLAGS; \
+	CXXFLAGS="$(STAGE1_CXXFLAGS)"; export CXXFLAGS; \
+	LIBCFLAGS="$(LIBCFLAGS)"; export LIBCFLAGS;  \
+	echo Configuring stage 1 in $(HOST_SUBDIR)/zlib; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib; \
+	cd $(HOST_SUBDIR)/zlib || exit 1; \
+	case $(srcdir) in \
+	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
+	  *) topdir=`echo $(HOST_SUBDIR)/zlib/ | \
+		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
+	esac; \
+	module_srcdir=zlib; \
+	$(SHELL) $$s/$$module_srcdir/configure \
+	  --srcdir=$${topdir}/$$module_srcdir \
+	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
+	  --target=${target_alias} \
+	   \
+	  $(STAGE1_CONFIGURE_FLAGS) \
+	  @extra_host_zlib_configure_flags@
+@endif zlib-bootstrap
+
+.PHONY: configure-stage2-zlib maybe-configure-stage2-zlib
+maybe-configure-stage2-zlib:
+@if zlib-bootstrap
+maybe-configure-stage2-zlib: configure-stage2-zlib
+configure-stage2-zlib:
+	@[ $(current_stage) = stage2 ] || $(MAKE) stage2-start
+	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	TFLAGS="$(STAGE2_TFLAGS)"; \
+	test ! -f $(HOST_SUBDIR)/zlib/Makefile || exit 0; \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS) \
+	CFLAGS="$(STAGE2_CFLAGS)"; export CFLAGS; \
+	CXXFLAGS="$(STAGE2_CXXFLAGS)"; export CXXFLAGS; \
+	LIBCFLAGS="$(STAGE2_CFLAGS)"; export LIBCFLAGS;  \
+	echo Configuring stage 2 in $(HOST_SUBDIR)/zlib; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib; \
+	cd $(HOST_SUBDIR)/zlib || exit 1; \
+	case $(srcdir) in \
+	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
+	  *) topdir=`echo $(HOST_SUBDIR)/zlib/ | \
+		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
+	esac; \
+	module_srcdir=zlib; \
+	$(SHELL) $$s/$$module_srcdir/configure \
+	  --srcdir=$${topdir}/$$module_srcdir \
+	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
+	  --target=${target_alias} \
+	  --with-build-libsubdir=$(HOST_SUBDIR) \
+	  $(STAGE2_CONFIGURE_FLAGS) \
+	  @extra_host_zlib_configure_flags@
+@endif zlib-bootstrap
+
+.PHONY: configure-stage3-zlib maybe-configure-stage3-zlib
+maybe-configure-stage3-zlib:
+@if zlib-bootstrap
+maybe-configure-stage3-zlib: configure-stage3-zlib
+configure-stage3-zlib:
+	@[ $(current_stage) = stage3 ] || $(MAKE) stage3-start
+	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	TFLAGS="$(STAGE3_TFLAGS)"; \
+	test ! -f $(HOST_SUBDIR)/zlib/Makefile || exit 0; \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS) \
+	CFLAGS="$(STAGE3_CFLAGS)"; export CFLAGS; \
+	CXXFLAGS="$(STAGE3_CXXFLAGS)"; export CXXFLAGS; \
+	LIBCFLAGS="$(STAGE3_CFLAGS)"; export LIBCFLAGS;  \
+	echo Configuring stage 3 in $(HOST_SUBDIR)/zlib; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib; \
+	cd $(HOST_SUBDIR)/zlib || exit 1; \
+	case $(srcdir) in \
+	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
+	  *) topdir=`echo $(HOST_SUBDIR)/zlib/ | \
+		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
+	esac; \
+	module_srcdir=zlib; \
+	$(SHELL) $$s/$$module_srcdir/configure \
+	  --srcdir=$${topdir}/$$module_srcdir \
+	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
+	  --target=${target_alias} \
+	  --with-build-libsubdir=$(HOST_SUBDIR) \
+	  $(STAGE3_CONFIGURE_FLAGS) \
+	  @extra_host_zlib_configure_flags@
+@endif zlib-bootstrap
+
+.PHONY: configure-stage4-zlib maybe-configure-stage4-zlib
+maybe-configure-stage4-zlib:
+@if zlib-bootstrap
+maybe-configure-stage4-zlib: configure-stage4-zlib
+configure-stage4-zlib:
+	@[ $(current_stage) = stage4 ] || $(MAKE) stage4-start
+	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	TFLAGS="$(STAGE4_TFLAGS)"; \
+	test ! -f $(HOST_SUBDIR)/zlib/Makefile || exit 0; \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS) \
+	CFLAGS="$(STAGE4_CFLAGS)"; export CFLAGS; \
+	CXXFLAGS="$(STAGE4_CXXFLAGS)"; export CXXFLAGS; \
+	LIBCFLAGS="$(STAGE4_CFLAGS)"; export LIBCFLAGS;  \
+	echo Configuring stage 4 in $(HOST_SUBDIR)/zlib; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib; \
+	cd $(HOST_SUBDIR)/zlib || exit 1; \
+	case $(srcdir) in \
+	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
+	  *) topdir=`echo $(HOST_SUBDIR)/zlib/ | \
+		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
+	esac; \
+	module_srcdir=zlib; \
+	$(SHELL) $$s/$$module_srcdir/configure \
+	  --srcdir=$${topdir}/$$module_srcdir \
+	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
+	  --target=${target_alias} \
+	  --with-build-libsubdir=$(HOST_SUBDIR) \
+	  $(STAGE4_CONFIGURE_FLAGS) \
+	  @extra_host_zlib_configure_flags@
+@endif zlib-bootstrap
+
+.PHONY: configure-stageprofile-zlib maybe-configure-stageprofile-zlib
+maybe-configure-stageprofile-zlib:
+@if zlib-bootstrap
+maybe-configure-stageprofile-zlib: configure-stageprofile-zlib
+configure-stageprofile-zlib:
+	@[ $(current_stage) = stageprofile ] || $(MAKE) stageprofile-start
+	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	TFLAGS="$(STAGEprofile_TFLAGS)"; \
+	test ! -f $(HOST_SUBDIR)/zlib/Makefile || exit 0; \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS) \
+	CFLAGS="$(STAGEprofile_CFLAGS)"; export CFLAGS; \
+	CXXFLAGS="$(STAGEprofile_CXXFLAGS)"; export CXXFLAGS; \
+	LIBCFLAGS="$(STAGEprofile_CFLAGS)"; export LIBCFLAGS;  \
+	echo Configuring stage profile in $(HOST_SUBDIR)/zlib; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib; \
+	cd $(HOST_SUBDIR)/zlib || exit 1; \
+	case $(srcdir) in \
+	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
+	  *) topdir=`echo $(HOST_SUBDIR)/zlib/ | \
+		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
+	esac; \
+	module_srcdir=zlib; \
+	$(SHELL) $$s/$$module_srcdir/configure \
+	  --srcdir=$${topdir}/$$module_srcdir \
+	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
+	  --target=${target_alias} \
+	  --with-build-libsubdir=$(HOST_SUBDIR) \
+	  $(STAGEprofile_CONFIGURE_FLAGS) \
+	  @extra_host_zlib_configure_flags@
+@endif zlib-bootstrap
+
+.PHONY: configure-stagetrain-zlib maybe-configure-stagetrain-zlib
+maybe-configure-stagetrain-zlib:
+@if zlib-bootstrap
+maybe-configure-stagetrain-zlib: configure-stagetrain-zlib
+configure-stagetrain-zlib:
+	@[ $(current_stage) = stagetrain ] || $(MAKE) stagetrain-start
+	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	TFLAGS="$(STAGEtrain_TFLAGS)"; \
+	test ! -f $(HOST_SUBDIR)/zlib/Makefile || exit 0; \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS) \
+	CFLAGS="$(STAGEtrain_CFLAGS)"; export CFLAGS; \
+	CXXFLAGS="$(STAGEtrain_CXXFLAGS)"; export CXXFLAGS; \
+	LIBCFLAGS="$(STAGEtrain_CFLAGS)"; export LIBCFLAGS;  \
+	echo Configuring stage train in $(HOST_SUBDIR)/zlib; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib; \
+	cd $(HOST_SUBDIR)/zlib || exit 1; \
+	case $(srcdir) in \
+	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
+	  *) topdir=`echo $(HOST_SUBDIR)/zlib/ | \
+		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
+	esac; \
+	module_srcdir=zlib; \
+	$(SHELL) $$s/$$module_srcdir/configure \
+	  --srcdir=$${topdir}/$$module_srcdir \
+	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
+	  --target=${target_alias} \
+	  --with-build-libsubdir=$(HOST_SUBDIR) \
+	  $(STAGEtrain_CONFIGURE_FLAGS) \
+	  @extra_host_zlib_configure_flags@
+@endif zlib-bootstrap
+
+.PHONY: configure-stagefeedback-zlib maybe-configure-stagefeedback-zlib
+maybe-configure-stagefeedback-zlib:
+@if zlib-bootstrap
+maybe-configure-stagefeedback-zlib: configure-stagefeedback-zlib
+configure-stagefeedback-zlib:
+	@[ $(current_stage) = stagefeedback ] || $(MAKE) stagefeedback-start
+	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	TFLAGS="$(STAGEfeedback_TFLAGS)"; \
+	test ! -f $(HOST_SUBDIR)/zlib/Makefile || exit 0; \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS) \
+	CFLAGS="$(STAGEfeedback_CFLAGS)"; export CFLAGS; \
+	CXXFLAGS="$(STAGEfeedback_CXXFLAGS)"; export CXXFLAGS; \
+	LIBCFLAGS="$(STAGEfeedback_CFLAGS)"; export LIBCFLAGS;  \
+	echo Configuring stage feedback in $(HOST_SUBDIR)/zlib; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib; \
+	cd $(HOST_SUBDIR)/zlib || exit 1; \
+	case $(srcdir) in \
+	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
+	  *) topdir=`echo $(HOST_SUBDIR)/zlib/ | \
+		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
+	esac; \
+	module_srcdir=zlib; \
+	$(SHELL) $$s/$$module_srcdir/configure \
+	  --srcdir=$${topdir}/$$module_srcdir \
+	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
+	  --target=${target_alias} \
+	  --with-build-libsubdir=$(HOST_SUBDIR) \
+	  $(STAGEfeedback_CONFIGURE_FLAGS) \
+	  @extra_host_zlib_configure_flags@
+@endif zlib-bootstrap
+
+.PHONY: configure-stageautoprofile-zlib maybe-configure-stageautoprofile-zlib
+maybe-configure-stageautoprofile-zlib:
+@if zlib-bootstrap
+maybe-configure-stageautoprofile-zlib: configure-stageautoprofile-zlib
+configure-stageautoprofile-zlib:
+	@[ $(current_stage) = stageautoprofile ] || $(MAKE) stageautoprofile-start
+	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	TFLAGS="$(STAGEautoprofile_TFLAGS)"; \
+	test ! -f $(HOST_SUBDIR)/zlib/Makefile || exit 0; \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS) \
+	CFLAGS="$(STAGEautoprofile_CFLAGS)"; export CFLAGS; \
+	CXXFLAGS="$(STAGEautoprofile_CXXFLAGS)"; export CXXFLAGS; \
+	LIBCFLAGS="$(STAGEautoprofile_CFLAGS)"; export LIBCFLAGS;  \
+	echo Configuring stage autoprofile in $(HOST_SUBDIR)/zlib; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib; \
+	cd $(HOST_SUBDIR)/zlib || exit 1; \
+	case $(srcdir) in \
+	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
+	  *) topdir=`echo $(HOST_SUBDIR)/zlib/ | \
+		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
+	esac; \
+	module_srcdir=zlib; \
+	$(SHELL) $$s/$$module_srcdir/configure \
+	  --srcdir=$${topdir}/$$module_srcdir \
+	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
+	  --target=${target_alias} \
+	  --with-build-libsubdir=$(HOST_SUBDIR) \
+	  $(STAGEautoprofile_CONFIGURE_FLAGS) \
+	  @extra_host_zlib_configure_flags@
+@endif zlib-bootstrap
+
+.PHONY: configure-stageautofeedback-zlib maybe-configure-stageautofeedback-zlib
+maybe-configure-stageautofeedback-zlib:
+@if zlib-bootstrap
+maybe-configure-stageautofeedback-zlib: configure-stageautofeedback-zlib
+configure-stageautofeedback-zlib:
+	@[ $(current_stage) = stageautofeedback ] || $(MAKE) stageautofeedback-start
+	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	TFLAGS="$(STAGEautofeedback_TFLAGS)"; \
+	test ! -f $(HOST_SUBDIR)/zlib/Makefile || exit 0; \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS) \
+	CFLAGS="$(STAGEautofeedback_CFLAGS)"; export CFLAGS; \
+	CXXFLAGS="$(STAGEautofeedback_CXXFLAGS)"; export CXXFLAGS; \
+	LIBCFLAGS="$(STAGEautofeedback_CFLAGS)"; export LIBCFLAGS;  \
+	echo Configuring stage autofeedback in $(HOST_SUBDIR)/zlib; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib; \
+	cd $(HOST_SUBDIR)/zlib || exit 1; \
+	case $(srcdir) in \
+	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
+	  *) topdir=`echo $(HOST_SUBDIR)/zlib/ | \
+		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
+	esac; \
+	module_srcdir=zlib; \
+	$(SHELL) $$s/$$module_srcdir/configure \
+	  --srcdir=$${topdir}/$$module_srcdir \
+	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
+	  --target=${target_alias} \
+	  --with-build-libsubdir=$(HOST_SUBDIR) \
+	  $(STAGEautofeedback_CONFIGURE_FLAGS) \
+	  @extra_host_zlib_configure_flags@
+@endif zlib-bootstrap
+
+
+
+
+
+.PHONY: all-zlib maybe-all-zlib
+maybe-all-zlib:
+@if gcc-bootstrap
+all-zlib: stage_current
+@endif gcc-bootstrap
+@if zlib
+TARGET-zlib=all
+maybe-all-zlib: all-zlib
+all-zlib: configure-zlib
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS)  \
+	(cd $(HOST_SUBDIR)/zlib && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) $(EXTRA_HOST_FLAGS) $(STAGE1_FLAGS_TO_PASS)  \
+		$(TARGET-zlib))
+@endif zlib
+
+
+
+.PHONY: all-stage1-zlib maybe-all-stage1-zlib
+.PHONY: clean-stage1-zlib maybe-clean-stage1-zlib
+maybe-all-stage1-zlib:
+maybe-clean-stage1-zlib:
+@if zlib-bootstrap
+maybe-all-stage1-zlib: all-stage1-zlib
+all-stage1: all-stage1-zlib
+TARGET-stage1-zlib = $(TARGET-zlib)
+all-stage1-zlib: configure-stage1-zlib
+	@[ $(current_stage) = stage1 ] || $(MAKE) stage1-start
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	TFLAGS="$(STAGE1_TFLAGS)"; \
+	$(HOST_EXPORTS)  \
+	cd $(HOST_SUBDIR)/zlib && \
+	 \
+	$(MAKE) $(BASE_FLAGS_TO_PASS) \
+		CFLAGS="$(STAGE1_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE1_GENERATOR_CFLAGS)" \
+		CXXFLAGS="$(STAGE1_CXXFLAGS)" \
+		LIBCFLAGS="$(LIBCFLAGS)" \
+		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
+		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
+		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
+		$(EXTRA_HOST_FLAGS)  \
+		$(STAGE1_FLAGS_TO_PASS)  \
+		TFLAGS="$(STAGE1_TFLAGS)"  \
+		$(TARGET-stage1-zlib)
+
+maybe-clean-stage1-zlib: clean-stage1-zlib
+clean-stage1: clean-stage1-zlib
+clean-stage1-zlib:
+	@if [ $(current_stage) = stage1 ]; then \
+	  [ -f $(HOST_SUBDIR)/zlib/Makefile ] || exit 0; \
+	else \
+	  [ -f $(HOST_SUBDIR)/stage1-zlib/Makefile ] || exit 0; \
+	  $(MAKE) stage1-start; \
+	fi; \
+	cd $(HOST_SUBDIR)/zlib && \
+	$(MAKE) $(EXTRA_HOST_FLAGS)  \
+	$(STAGE1_FLAGS_TO_PASS)  clean
+@endif zlib-bootstrap
+
+
+.PHONY: all-stage2-zlib maybe-all-stage2-zlib
+.PHONY: clean-stage2-zlib maybe-clean-stage2-zlib
+maybe-all-stage2-zlib:
+maybe-clean-stage2-zlib:
+@if zlib-bootstrap
+maybe-all-stage2-zlib: all-stage2-zlib
+all-stage2: all-stage2-zlib
+TARGET-stage2-zlib = $(TARGET-zlib)
+all-stage2-zlib: configure-stage2-zlib
+	@[ $(current_stage) = stage2 ] || $(MAKE) stage2-start
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	TFLAGS="$(STAGE2_TFLAGS)"; \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS)  \
+	cd $(HOST_SUBDIR)/zlib && \
+	 \
+	$(MAKE) $(BASE_FLAGS_TO_PASS) \
+		CFLAGS="$(STAGE2_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE2_GENERATOR_CFLAGS)" \
+		CXXFLAGS="$(STAGE2_CXXFLAGS)" \
+		LIBCFLAGS="$(STAGE2_CFLAGS)" \
+		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
+		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
+		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
+		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
+		TFLAGS="$(STAGE2_TFLAGS)"  \
+		$(TARGET-stage2-zlib)
+
+maybe-clean-stage2-zlib: clean-stage2-zlib
+clean-stage2: clean-stage2-zlib
+clean-stage2-zlib:
+	@if [ $(current_stage) = stage2 ]; then \
+	  [ -f $(HOST_SUBDIR)/zlib/Makefile ] || exit 0; \
+	else \
+	  [ -f $(HOST_SUBDIR)/stage2-zlib/Makefile ] || exit 0; \
+	  $(MAKE) stage2-start; \
+	fi; \
+	cd $(HOST_SUBDIR)/zlib && \
+	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
+@endif zlib-bootstrap
+
+
+.PHONY: all-stage3-zlib maybe-all-stage3-zlib
+.PHONY: clean-stage3-zlib maybe-clean-stage3-zlib
+maybe-all-stage3-zlib:
+maybe-clean-stage3-zlib:
+@if zlib-bootstrap
+maybe-all-stage3-zlib: all-stage3-zlib
+all-stage3: all-stage3-zlib
+TARGET-stage3-zlib = $(TARGET-zlib)
+all-stage3-zlib: configure-stage3-zlib
+	@[ $(current_stage) = stage3 ] || $(MAKE) stage3-start
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	TFLAGS="$(STAGE3_TFLAGS)"; \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS)  \
+	cd $(HOST_SUBDIR)/zlib && \
+	 \
+	$(MAKE) $(BASE_FLAGS_TO_PASS) \
+		CFLAGS="$(STAGE3_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE3_GENERATOR_CFLAGS)" \
+		CXXFLAGS="$(STAGE3_CXXFLAGS)" \
+		LIBCFLAGS="$(STAGE3_CFLAGS)" \
+		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
+		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
+		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
+		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
+		TFLAGS="$(STAGE3_TFLAGS)"  \
+		$(TARGET-stage3-zlib)
+
+maybe-clean-stage3-zlib: clean-stage3-zlib
+clean-stage3: clean-stage3-zlib
+clean-stage3-zlib:
+	@if [ $(current_stage) = stage3 ]; then \
+	  [ -f $(HOST_SUBDIR)/zlib/Makefile ] || exit 0; \
+	else \
+	  [ -f $(HOST_SUBDIR)/stage3-zlib/Makefile ] || exit 0; \
+	  $(MAKE) stage3-start; \
+	fi; \
+	cd $(HOST_SUBDIR)/zlib && \
+	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
+@endif zlib-bootstrap
+
+
+.PHONY: all-stage4-zlib maybe-all-stage4-zlib
+.PHONY: clean-stage4-zlib maybe-clean-stage4-zlib
+maybe-all-stage4-zlib:
+maybe-clean-stage4-zlib:
+@if zlib-bootstrap
+maybe-all-stage4-zlib: all-stage4-zlib
+all-stage4: all-stage4-zlib
+TARGET-stage4-zlib = $(TARGET-zlib)
+all-stage4-zlib: configure-stage4-zlib
+	@[ $(current_stage) = stage4 ] || $(MAKE) stage4-start
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	TFLAGS="$(STAGE4_TFLAGS)"; \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS)  \
+	cd $(HOST_SUBDIR)/zlib && \
+	 \
+	$(MAKE) $(BASE_FLAGS_TO_PASS) \
+		CFLAGS="$(STAGE4_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE4_GENERATOR_CFLAGS)" \
+		CXXFLAGS="$(STAGE4_CXXFLAGS)" \
+		LIBCFLAGS="$(STAGE4_CFLAGS)" \
+		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
+		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
+		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
+		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
+		TFLAGS="$(STAGE4_TFLAGS)"  \
+		$(TARGET-stage4-zlib)
+
+maybe-clean-stage4-zlib: clean-stage4-zlib
+clean-stage4: clean-stage4-zlib
+clean-stage4-zlib:
+	@if [ $(current_stage) = stage4 ]; then \
+	  [ -f $(HOST_SUBDIR)/zlib/Makefile ] || exit 0; \
+	else \
+	  [ -f $(HOST_SUBDIR)/stage4-zlib/Makefile ] || exit 0; \
+	  $(MAKE) stage4-start; \
+	fi; \
+	cd $(HOST_SUBDIR)/zlib && \
+	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
+@endif zlib-bootstrap
+
 
-clean-readline: 
-	@: $(MAKE); $(unstage)
-	@[ -f ./readline/Makefile ] || exit 0; \
-	r=`${PWD_COMMAND}`; export r; \
+.PHONY: all-stageprofile-zlib maybe-all-stageprofile-zlib
+.PHONY: clean-stageprofile-zlib maybe-clean-stageprofile-zlib
+maybe-all-stageprofile-zlib:
+maybe-clean-stageprofile-zlib:
+@if zlib-bootstrap
+maybe-all-stageprofile-zlib: all-stageprofile-zlib
+all-stageprofile: all-stageprofile-zlib
+TARGET-stageprofile-zlib = $(TARGET-zlib)
+all-stageprofile-zlib: configure-stageprofile-zlib
+	@[ $(current_stage) = stageprofile ] || $(MAKE) stageprofile-start
+	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	TFLAGS="$(STAGEprofile_TFLAGS)"; \
 	$(HOST_EXPORTS) \
-	for flag in $(EXTRA_HOST_FLAGS) ; do \
-	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
-	done; \
-	echo "Doing clean in readline"; \
-	(cd $(HOST_SUBDIR)/readline && \
-	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
-	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
-	          "RANLIB=$${RANLIB}" \
-	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	          clean) \
-	  || exit 1
+	$(POSTSTAGE1_HOST_EXPORTS)  \
+	cd $(HOST_SUBDIR)/zlib && \
+	 \
+	$(MAKE) $(BASE_FLAGS_TO_PASS) \
+		CFLAGS="$(STAGEprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEprofile_GENERATOR_CFLAGS)" \
+		CXXFLAGS="$(STAGEprofile_CXXFLAGS)" \
+		LIBCFLAGS="$(STAGEprofile_CFLAGS)" \
+		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
+		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
+		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
+		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
+		TFLAGS="$(STAGEprofile_TFLAGS)"  \
+		$(TARGET-stageprofile-zlib)
 
-@endif readline
+maybe-clean-stageprofile-zlib: clean-stageprofile-zlib
+clean-stageprofile: clean-stageprofile-zlib
+clean-stageprofile-zlib:
+	@if [ $(current_stage) = stageprofile ]; then \
+	  [ -f $(HOST_SUBDIR)/zlib/Makefile ] || exit 0; \
+	else \
+	  [ -f $(HOST_SUBDIR)/stageprofile-zlib/Makefile ] || exit 0; \
+	  $(MAKE) stageprofile-start; \
+	fi; \
+	cd $(HOST_SUBDIR)/zlib && \
+	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
+@endif zlib-bootstrap
 
-.PHONY: maybe-distclean-readline distclean-readline
-maybe-distclean-readline:
-@if readline
-maybe-distclean-readline: distclean-readline
 
-distclean-readline: 
-	@: $(MAKE); $(unstage)
-	@[ -f ./readline/Makefile ] || exit 0; \
-	r=`${PWD_COMMAND}`; export r; \
+.PHONY: all-stagetrain-zlib maybe-all-stagetrain-zlib
+.PHONY: clean-stagetrain-zlib maybe-clean-stagetrain-zlib
+maybe-all-stagetrain-zlib:
+maybe-clean-stagetrain-zlib:
+@if zlib-bootstrap
+maybe-all-stagetrain-zlib: all-stagetrain-zlib
+all-stagetrain: all-stagetrain-zlib
+TARGET-stagetrain-zlib = $(TARGET-zlib)
+all-stagetrain-zlib: configure-stagetrain-zlib
+	@[ $(current_stage) = stagetrain ] || $(MAKE) stagetrain-start
+	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	TFLAGS="$(STAGEtrain_TFLAGS)"; \
 	$(HOST_EXPORTS) \
-	for flag in $(EXTRA_HOST_FLAGS) ; do \
-	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
-	done; \
-	echo "Doing distclean in readline"; \
-	(cd $(HOST_SUBDIR)/readline && \
-	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
-	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
-	          "RANLIB=$${RANLIB}" \
-	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	          distclean) \
-	  || exit 1
+	$(POSTSTAGE1_HOST_EXPORTS)  \
+	cd $(HOST_SUBDIR)/zlib && \
+	 \
+	$(MAKE) $(BASE_FLAGS_TO_PASS) \
+		CFLAGS="$(STAGEtrain_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEtrain_GENERATOR_CFLAGS)" \
+		CXXFLAGS="$(STAGEtrain_CXXFLAGS)" \
+		LIBCFLAGS="$(STAGEtrain_CFLAGS)" \
+		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
+		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
+		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
+		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
+		TFLAGS="$(STAGEtrain_TFLAGS)"  \
+		$(TARGET-stagetrain-zlib)
 
-@endif readline
+maybe-clean-stagetrain-zlib: clean-stagetrain-zlib
+clean-stagetrain: clean-stagetrain-zlib
+clean-stagetrain-zlib:
+	@if [ $(current_stage) = stagetrain ]; then \
+	  [ -f $(HOST_SUBDIR)/zlib/Makefile ] || exit 0; \
+	else \
+	  [ -f $(HOST_SUBDIR)/stagetrain-zlib/Makefile ] || exit 0; \
+	  $(MAKE) stagetrain-start; \
+	fi; \
+	cd $(HOST_SUBDIR)/zlib && \
+	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
+@endif zlib-bootstrap
 
-.PHONY: maybe-maintainer-clean-readline maintainer-clean-readline
-maybe-maintainer-clean-readline:
-@if readline
-maybe-maintainer-clean-readline: maintainer-clean-readline
 
-maintainer-clean-readline: 
-	@: $(MAKE); $(unstage)
-	@[ -f ./readline/Makefile ] || exit 0; \
-	r=`${PWD_COMMAND}`; export r; \
+.PHONY: all-stagefeedback-zlib maybe-all-stagefeedback-zlib
+.PHONY: clean-stagefeedback-zlib maybe-clean-stagefeedback-zlib
+maybe-all-stagefeedback-zlib:
+maybe-clean-stagefeedback-zlib:
+@if zlib-bootstrap
+maybe-all-stagefeedback-zlib: all-stagefeedback-zlib
+all-stagefeedback: all-stagefeedback-zlib
+TARGET-stagefeedback-zlib = $(TARGET-zlib)
+all-stagefeedback-zlib: configure-stagefeedback-zlib
+	@[ $(current_stage) = stagefeedback ] || $(MAKE) stagefeedback-start
+	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	TFLAGS="$(STAGEfeedback_TFLAGS)"; \
 	$(HOST_EXPORTS) \
-	for flag in $(EXTRA_HOST_FLAGS) ; do \
-	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
-	done; \
-	echo "Doing maintainer-clean in readline"; \
-	(cd $(HOST_SUBDIR)/readline && \
-	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
-	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
-	          "RANLIB=$${RANLIB}" \
-	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	          maintainer-clean) \
-	  || exit 1
-
-@endif readline
+	$(POSTSTAGE1_HOST_EXPORTS)  \
+	cd $(HOST_SUBDIR)/zlib && \
+	 \
+	$(MAKE) $(BASE_FLAGS_TO_PASS) \
+		CFLAGS="$(STAGEfeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEfeedback_GENERATOR_CFLAGS)" \
+		CXXFLAGS="$(STAGEfeedback_CXXFLAGS)" \
+		LIBCFLAGS="$(STAGEfeedback_CFLAGS)" \
+		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
+		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
+		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
+		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
+		TFLAGS="$(STAGEfeedback_TFLAGS)"  \
+		$(TARGET-stagefeedback-zlib)
 
+maybe-clean-stagefeedback-zlib: clean-stagefeedback-zlib
+clean-stagefeedback: clean-stagefeedback-zlib
+clean-stagefeedback-zlib:
+	@if [ $(current_stage) = stagefeedback ]; then \
+	  [ -f $(HOST_SUBDIR)/zlib/Makefile ] || exit 0; \
+	else \
+	  [ -f $(HOST_SUBDIR)/stagefeedback-zlib/Makefile ] || exit 0; \
+	  $(MAKE) stagefeedback-start; \
+	fi; \
+	cd $(HOST_SUBDIR)/zlib && \
+	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
+@endif zlib-bootstrap
 
 
-.PHONY: configure-sid maybe-configure-sid
-maybe-configure-sid:
-@if gcc-bootstrap
-configure-sid: stage_current
-@endif gcc-bootstrap
-@if sid
-maybe-configure-sid: configure-sid
-configure-sid: 
-	@: $(MAKE); $(unstage)
+.PHONY: all-stageautoprofile-zlib maybe-all-stageautoprofile-zlib
+.PHONY: clean-stageautoprofile-zlib maybe-clean-stageautoprofile-zlib
+maybe-all-stageautoprofile-zlib:
+maybe-clean-stageautoprofile-zlib:
+@if zlib-bootstrap
+maybe-all-stageautoprofile-zlib: all-stageautoprofile-zlib
+all-stageautoprofile: all-stageautoprofile-zlib
+TARGET-stageautoprofile-zlib = $(TARGET-zlib)
+all-stageautoprofile-zlib: configure-stageautoprofile-zlib
+	@[ $(current_stage) = stageautoprofile ] || $(MAKE) stageautoprofile-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	test ! -f $(HOST_SUBDIR)/sid/Makefile || exit 0; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/sid; \
-	$(HOST_EXPORTS)  \
-	echo Configuring in $(HOST_SUBDIR)/sid; \
-	cd "$(HOST_SUBDIR)/sid" || exit 1; \
-	case $(srcdir) in \
-	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(HOST_SUBDIR)/sid/ | \
-		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
-	esac; \
-	module_srcdir=sid; \
-	$(SHELL) \
-	  $$s/$$module_srcdir/configure \
-	  --srcdir=$${topdir}/$$module_srcdir \
-	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
-	  --target=${target_alias}  \
-	  || exit 1
-@endif sid
-
-
+	TFLAGS="$(STAGEautoprofile_TFLAGS)"; \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS)  \
+	cd $(HOST_SUBDIR)/zlib && \
+	$$s/gcc/config/i386/$(AUTO_PROFILE) \
+	$(MAKE) $(BASE_FLAGS_TO_PASS) \
+		CFLAGS="$(STAGEautoprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautoprofile_GENERATOR_CFLAGS)" \
+		CXXFLAGS="$(STAGEautoprofile_CXXFLAGS)" \
+		LIBCFLAGS="$(STAGEautoprofile_CFLAGS)" \
+		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
+		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
+		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
+		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
+		TFLAGS="$(STAGEautoprofile_TFLAGS)"  \
+		$(TARGET-stageautoprofile-zlib)
 
+maybe-clean-stageautoprofile-zlib: clean-stageautoprofile-zlib
+clean-stageautoprofile: clean-stageautoprofile-zlib
+clean-stageautoprofile-zlib:
+	@if [ $(current_stage) = stageautoprofile ]; then \
+	  [ -f $(HOST_SUBDIR)/zlib/Makefile ] || exit 0; \
+	else \
+	  [ -f $(HOST_SUBDIR)/stageautoprofile-zlib/Makefile ] || exit 0; \
+	  $(MAKE) stageautoprofile-start; \
+	fi; \
+	cd $(HOST_SUBDIR)/zlib && \
+	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
+@endif zlib-bootstrap
 
 
-.PHONY: all-sid maybe-all-sid
-maybe-all-sid:
-@if gcc-bootstrap
-all-sid: stage_current
-@endif gcc-bootstrap
-@if sid
-TARGET-sid=all
-maybe-all-sid: all-sid
-all-sid: configure-sid
-	@: $(MAKE); $(unstage)
+.PHONY: all-stageautofeedback-zlib maybe-all-stageautofeedback-zlib
+.PHONY: clean-stageautofeedback-zlib maybe-clean-stageautofeedback-zlib
+maybe-all-stageautofeedback-zlib:
+maybe-clean-stageautofeedback-zlib:
+@if zlib-bootstrap
+maybe-all-stageautofeedback-zlib: all-stageautofeedback-zlib
+all-stageautofeedback: all-stageautofeedback-zlib
+TARGET-stageautofeedback-zlib = $(TARGET-zlib)
+all-stageautofeedback-zlib: configure-stageautofeedback-zlib
+	@[ $(current_stage) = stageautofeedback ] || $(MAKE) stageautofeedback-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(HOST_EXPORTS)  \
-	(cd $(HOST_SUBDIR)/sid && \
-	  $(MAKE) $(BASE_FLAGS_TO_PASS) $(EXTRA_HOST_FLAGS) $(STAGE1_FLAGS_TO_PASS)  \
-		$(TARGET-sid))
-@endif sid
+	TFLAGS="$(STAGEautofeedback_TFLAGS)"; \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS)  \
+	cd $(HOST_SUBDIR)/zlib && \
+	 \
+	$(MAKE) $(BASE_FLAGS_TO_PASS) \
+		CFLAGS="$(STAGEautofeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautofeedback_GENERATOR_CFLAGS)" \
+		CXXFLAGS="$(STAGEautofeedback_CXXFLAGS)" \
+		LIBCFLAGS="$(STAGEautofeedback_CFLAGS)" \
+		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
+		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
+		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
+		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
+		TFLAGS="$(STAGEautofeedback_TFLAGS)" PERF_DATA=perf.data \
+		$(TARGET-stageautofeedback-zlib)
 
+maybe-clean-stageautofeedback-zlib: clean-stageautofeedback-zlib
+clean-stageautofeedback: clean-stageautofeedback-zlib
+clean-stageautofeedback-zlib:
+	@if [ $(current_stage) = stageautofeedback ]; then \
+	  [ -f $(HOST_SUBDIR)/zlib/Makefile ] || exit 0; \
+	else \
+	  [ -f $(HOST_SUBDIR)/stageautofeedback-zlib/Makefile ] || exit 0; \
+	  $(MAKE) stageautofeedback-start; \
+	fi; \
+	cd $(HOST_SUBDIR)/zlib && \
+	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
+@endif zlib-bootstrap
 
 
 
-.PHONY: check-sid maybe-check-sid
-maybe-check-sid:
-@if sid
-maybe-check-sid: check-sid
 
-check-sid:
-	@: $(MAKE); $(unstage)
-	@r=`${PWD_COMMAND}`; export r; \
-	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(HOST_EXPORTS)  \
-	(cd $(HOST_SUBDIR)/sid && \
-	  $(MAKE) $(FLAGS_TO_PASS)  check)
 
-@endif sid
+.PHONY: check-zlib maybe-check-zlib
+maybe-check-zlib:
+@if zlib
+maybe-check-zlib: check-zlib
 
-.PHONY: install-sid maybe-install-sid
-maybe-install-sid:
-@if sid
-maybe-install-sid: install-sid
+check-zlib:
 
-install-sid: installdirs
-	@: $(MAKE); $(unstage)
-	@r=`${PWD_COMMAND}`; export r; \
-	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(HOST_EXPORTS) \
-	(cd $(HOST_SUBDIR)/sid && \
-	  $(MAKE) $(FLAGS_TO_PASS)  install)
+@endif zlib
 
-@endif sid
+.PHONY: install-zlib maybe-install-zlib
+maybe-install-zlib:
+@if zlib
+maybe-install-zlib: install-zlib
 
-.PHONY: install-strip-sid maybe-install-strip-sid
-maybe-install-strip-sid:
-@if sid
-maybe-install-strip-sid: install-strip-sid
+install-zlib:
 
-install-strip-sid: installdirs
-	@: $(MAKE); $(unstage)
-	@r=`${PWD_COMMAND}`; export r; \
-	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(HOST_EXPORTS) \
-	(cd $(HOST_SUBDIR)/sid && \
-	  $(MAKE) $(FLAGS_TO_PASS)  install-strip)
+@endif zlib
 
-@endif sid
+.PHONY: install-strip-zlib maybe-install-strip-zlib
+maybe-install-strip-zlib:
+@if zlib
+maybe-install-strip-zlib: install-strip-zlib
+
+install-strip-zlib:
+
+@endif zlib
 
 # Other targets (info, dvi, pdf, etc.)
 
-.PHONY: maybe-info-sid info-sid
-maybe-info-sid:
-@if sid
-maybe-info-sid: info-sid
+.PHONY: maybe-info-zlib info-zlib
+maybe-info-zlib:
+@if zlib
+maybe-info-zlib: info-zlib
 
-info-sid: \
-    configure-sid 
-	@: $(MAKE); $(unstage)
-	@[ -f ./sid/Makefile ] || exit 0; \
+info-zlib: \
+    configure-zlib 
+	@[ -f ./zlib/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing info in sid"; \
-	(cd $(HOST_SUBDIR)/sid && \
+	echo "Doing info in zlib"; \
+	(cd $(HOST_SUBDIR)/zlib && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -30570,25 +33913,24 @@ info-sid: \
 	          info) \
 	  || exit 1
 
-@endif sid
+@endif zlib
 
-.PHONY: maybe-dvi-sid dvi-sid
-maybe-dvi-sid:
-@if sid
-maybe-dvi-sid: dvi-sid
+.PHONY: maybe-dvi-zlib dvi-zlib
+maybe-dvi-zlib:
+@if zlib
+maybe-dvi-zlib: dvi-zlib
 
-dvi-sid: \
-    configure-sid 
-	@: $(MAKE); $(unstage)
-	@[ -f ./sid/Makefile ] || exit 0; \
+dvi-zlib: \
+    configure-zlib 
+	@[ -f ./zlib/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing dvi in sid"; \
-	(cd $(HOST_SUBDIR)/sid && \
+	echo "Doing dvi in zlib"; \
+	(cd $(HOST_SUBDIR)/zlib && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -30596,25 +33938,24 @@ dvi-sid: \
 	          dvi) \
 	  || exit 1
 
-@endif sid
+@endif zlib
 
-.PHONY: maybe-pdf-sid pdf-sid
-maybe-pdf-sid:
-@if sid
-maybe-pdf-sid: pdf-sid
+.PHONY: maybe-pdf-zlib pdf-zlib
+maybe-pdf-zlib:
+@if zlib
+maybe-pdf-zlib: pdf-zlib
 
-pdf-sid: \
-    configure-sid 
-	@: $(MAKE); $(unstage)
-	@[ -f ./sid/Makefile ] || exit 0; \
+pdf-zlib: \
+    configure-zlib 
+	@[ -f ./zlib/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing pdf in sid"; \
-	(cd $(HOST_SUBDIR)/sid && \
+	echo "Doing pdf in zlib"; \
+	(cd $(HOST_SUBDIR)/zlib && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -30622,25 +33963,24 @@ pdf-sid: \
 	          pdf) \
 	  || exit 1
 
-@endif sid
+@endif zlib
 
-.PHONY: maybe-html-sid html-sid
-maybe-html-sid:
-@if sid
-maybe-html-sid: html-sid
+.PHONY: maybe-html-zlib html-zlib
+maybe-html-zlib:
+@if zlib
+maybe-html-zlib: html-zlib
 
-html-sid: \
-    configure-sid 
-	@: $(MAKE); $(unstage)
-	@[ -f ./sid/Makefile ] || exit 0; \
+html-zlib: \
+    configure-zlib 
+	@[ -f ./zlib/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing html in sid"; \
-	(cd $(HOST_SUBDIR)/sid && \
+	echo "Doing html in zlib"; \
+	(cd $(HOST_SUBDIR)/zlib && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -30648,25 +33988,24 @@ html-sid: \
 	          html) \
 	  || exit 1
 
-@endif sid
+@endif zlib
 
-.PHONY: maybe-TAGS-sid TAGS-sid
-maybe-TAGS-sid:
-@if sid
-maybe-TAGS-sid: TAGS-sid
+.PHONY: maybe-TAGS-zlib TAGS-zlib
+maybe-TAGS-zlib:
+@if zlib
+maybe-TAGS-zlib: TAGS-zlib
 
-TAGS-sid: \
-    configure-sid 
-	@: $(MAKE); $(unstage)
-	@[ -f ./sid/Makefile ] || exit 0; \
+TAGS-zlib: \
+    configure-zlib 
+	@[ -f ./zlib/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing TAGS in sid"; \
-	(cd $(HOST_SUBDIR)/sid && \
+	echo "Doing TAGS in zlib"; \
+	(cd $(HOST_SUBDIR)/zlib && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -30674,26 +34013,25 @@ TAGS-sid: \
 	          TAGS) \
 	  || exit 1
 
-@endif sid
+@endif zlib
 
-.PHONY: maybe-install-info-sid install-info-sid
-maybe-install-info-sid:
-@if sid
-maybe-install-info-sid: install-info-sid
+.PHONY: maybe-install-info-zlib install-info-zlib
+maybe-install-info-zlib:
+@if zlib
+maybe-install-info-zlib: install-info-zlib
 
-install-info-sid: \
-    configure-sid \
-    info-sid 
-	@: $(MAKE); $(unstage)
-	@[ -f ./sid/Makefile ] || exit 0; \
+install-info-zlib: \
+    configure-zlib \
+    info-zlib 
+	@[ -f ./zlib/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing install-info in sid"; \
-	(cd $(HOST_SUBDIR)/sid && \
+	echo "Doing install-info in zlib"; \
+	(cd $(HOST_SUBDIR)/zlib && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -30701,26 +34039,25 @@ install-info-sid: \
 	          install-info) \
 	  || exit 1
 
-@endif sid
+@endif zlib
 
-.PHONY: maybe-install-pdf-sid install-pdf-sid
-maybe-install-pdf-sid:
-@if sid
-maybe-install-pdf-sid: install-pdf-sid
+.PHONY: maybe-install-pdf-zlib install-pdf-zlib
+maybe-install-pdf-zlib:
+@if zlib
+maybe-install-pdf-zlib: install-pdf-zlib
 
-install-pdf-sid: \
-    configure-sid \
-    pdf-sid 
-	@: $(MAKE); $(unstage)
-	@[ -f ./sid/Makefile ] || exit 0; \
+install-pdf-zlib: \
+    configure-zlib \
+    pdf-zlib 
+	@[ -f ./zlib/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing install-pdf in sid"; \
-	(cd $(HOST_SUBDIR)/sid && \
+	echo "Doing install-pdf in zlib"; \
+	(cd $(HOST_SUBDIR)/zlib && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -30728,26 +34065,25 @@ install-pdf-sid: \
 	          install-pdf) \
 	  || exit 1
 
-@endif sid
-
-.PHONY: maybe-install-html-sid install-html-sid
-maybe-install-html-sid:
-@if sid
-maybe-install-html-sid: install-html-sid
+@endif zlib
 
-install-html-sid: \
-    configure-sid \
-    html-sid 
-	@: $(MAKE); $(unstage)
-	@[ -f ./sid/Makefile ] || exit 0; \
+.PHONY: maybe-install-html-zlib install-html-zlib
+maybe-install-html-zlib:
+@if zlib
+maybe-install-html-zlib: install-html-zlib
+
+install-html-zlib: \
+    configure-zlib \
+    html-zlib 
+	@[ -f ./zlib/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing install-html in sid"; \
-	(cd $(HOST_SUBDIR)/sid && \
+	echo "Doing install-html in zlib"; \
+	(cd $(HOST_SUBDIR)/zlib && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -30755,25 +34091,24 @@ install-html-sid: \
 	          install-html) \
 	  || exit 1
 
-@endif sid
+@endif zlib
 
-.PHONY: maybe-installcheck-sid installcheck-sid
-maybe-installcheck-sid:
-@if sid
-maybe-installcheck-sid: installcheck-sid
+.PHONY: maybe-installcheck-zlib installcheck-zlib
+maybe-installcheck-zlib:
+@if zlib
+maybe-installcheck-zlib: installcheck-zlib
 
-installcheck-sid: \
-    configure-sid 
-	@: $(MAKE); $(unstage)
-	@[ -f ./sid/Makefile ] || exit 0; \
+installcheck-zlib: \
+    configure-zlib 
+	@[ -f ./zlib/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing installcheck in sid"; \
-	(cd $(HOST_SUBDIR)/sid && \
+	echo "Doing installcheck in zlib"; \
+	(cd $(HOST_SUBDIR)/zlib && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -30781,24 +34116,23 @@ installcheck-sid: \
 	          installcheck) \
 	  || exit 1
 
-@endif sid
+@endif zlib
 
-.PHONY: maybe-mostlyclean-sid mostlyclean-sid
-maybe-mostlyclean-sid:
-@if sid
-maybe-mostlyclean-sid: mostlyclean-sid
+.PHONY: maybe-mostlyclean-zlib mostlyclean-zlib
+maybe-mostlyclean-zlib:
+@if zlib
+maybe-mostlyclean-zlib: mostlyclean-zlib
 
-mostlyclean-sid: 
-	@: $(MAKE); $(unstage)
-	@[ -f ./sid/Makefile ] || exit 0; \
+mostlyclean-zlib: 
+	@[ -f ./zlib/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing mostlyclean in sid"; \
-	(cd $(HOST_SUBDIR)/sid && \
+	echo "Doing mostlyclean in zlib"; \
+	(cd $(HOST_SUBDIR)/zlib && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -30806,24 +34140,23 @@ mostlyclean-sid:
 	          mostlyclean) \
 	  || exit 1
 
-@endif sid
+@endif zlib
 
-.PHONY: maybe-clean-sid clean-sid
-maybe-clean-sid:
-@if sid
-maybe-clean-sid: clean-sid
+.PHONY: maybe-clean-zlib clean-zlib
+maybe-clean-zlib:
+@if zlib
+maybe-clean-zlib: clean-zlib
 
-clean-sid: 
-	@: $(MAKE); $(unstage)
-	@[ -f ./sid/Makefile ] || exit 0; \
+clean-zlib: 
+	@[ -f ./zlib/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing clean in sid"; \
-	(cd $(HOST_SUBDIR)/sid && \
+	echo "Doing clean in zlib"; \
+	(cd $(HOST_SUBDIR)/zlib && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -30831,24 +34164,23 @@ clean-sid:
 	          clean) \
 	  || exit 1
 
-@endif sid
+@endif zlib
 
-.PHONY: maybe-distclean-sid distclean-sid
-maybe-distclean-sid:
-@if sid
-maybe-distclean-sid: distclean-sid
+.PHONY: maybe-distclean-zlib distclean-zlib
+maybe-distclean-zlib:
+@if zlib
+maybe-distclean-zlib: distclean-zlib
 
-distclean-sid: 
-	@: $(MAKE); $(unstage)
-	@[ -f ./sid/Makefile ] || exit 0; \
+distclean-zlib: 
+	@[ -f ./zlib/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing distclean in sid"; \
-	(cd $(HOST_SUBDIR)/sid && \
+	echo "Doing distclean in zlib"; \
+	(cd $(HOST_SUBDIR)/zlib && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -30856,24 +34188,23 @@ distclean-sid:
 	          distclean) \
 	  || exit 1
 
-@endif sid
+@endif zlib
 
-.PHONY: maybe-maintainer-clean-sid maintainer-clean-sid
-maybe-maintainer-clean-sid:
-@if sid
-maybe-maintainer-clean-sid: maintainer-clean-sid
+.PHONY: maybe-maintainer-clean-zlib maintainer-clean-zlib
+maybe-maintainer-clean-zlib:
+@if zlib
+maybe-maintainer-clean-zlib: maintainer-clean-zlib
 
-maintainer-clean-sid: 
-	@: $(MAKE); $(unstage)
-	@[ -f ./sid/Makefile ] || exit 0; \
+maintainer-clean-zlib: 
+	@[ -f ./zlib/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing maintainer-clean in sid"; \
-	(cd $(HOST_SUBDIR)/sid && \
+	echo "Doing maintainer-clean in zlib"; \
+	(cd $(HOST_SUBDIR)/zlib && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -30881,129 +34212,129 @@ maintainer-clean-sid:
 	          maintainer-clean) \
 	  || exit 1
 
-@endif sid
+@endif zlib
 
 
 
-.PHONY: configure-sim maybe-configure-sim
-maybe-configure-sim:
+.PHONY: configure-gnulib maybe-configure-gnulib
+maybe-configure-gnulib:
 @if gcc-bootstrap
-configure-sim: stage_current
+configure-gnulib: stage_current
 @endif gcc-bootstrap
-@if sim
-maybe-configure-sim: configure-sim
-configure-sim: 
+@if gnulib
+maybe-configure-gnulib: configure-gnulib
+configure-gnulib: 
 	@: $(MAKE); $(unstage)
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	test ! -f $(HOST_SUBDIR)/sim/Makefile || exit 0; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/sim; \
+	test ! -f $(HOST_SUBDIR)/gnulib/Makefile || exit 0; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/gnulib; \
 	$(HOST_EXPORTS)  \
-	echo Configuring in $(HOST_SUBDIR)/sim; \
-	cd "$(HOST_SUBDIR)/sim" || exit 1; \
+	echo Configuring in $(HOST_SUBDIR)/gnulib; \
+	cd "$(HOST_SUBDIR)/gnulib" || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(HOST_SUBDIR)/sim/ | \
+	  *) topdir=`echo $(HOST_SUBDIR)/gnulib/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=sim; \
+	module_srcdir=gnulib; \
 	$(SHELL) \
 	  $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
 	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
 	  --target=${target_alias}  \
 	  || exit 1
-@endif sim
+@endif gnulib
 
 
 
 
 
-.PHONY: all-sim maybe-all-sim
-maybe-all-sim:
+.PHONY: all-gnulib maybe-all-gnulib
+maybe-all-gnulib:
 @if gcc-bootstrap
-all-sim: stage_current
+all-gnulib: stage_current
 @endif gcc-bootstrap
-@if sim
-TARGET-sim=all
-maybe-all-sim: all-sim
-all-sim: configure-sim
+@if gnulib
+TARGET-gnulib=all
+maybe-all-gnulib: all-gnulib
+all-gnulib: configure-gnulib
 	@: $(MAKE); $(unstage)
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS)  \
-	(cd $(HOST_SUBDIR)/sim && \
+	(cd $(HOST_SUBDIR)/gnulib && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) $(EXTRA_HOST_FLAGS) $(STAGE1_FLAGS_TO_PASS)  \
-		$(TARGET-sim))
-@endif sim
+		$(TARGET-gnulib))
+@endif gnulib
 
 
 
 
-.PHONY: check-sim maybe-check-sim
-maybe-check-sim:
-@if sim
-maybe-check-sim: check-sim
+.PHONY: check-gnulib maybe-check-gnulib
+maybe-check-gnulib:
+@if gnulib
+maybe-check-gnulib: check-gnulib
 
-check-sim:
+check-gnulib:
 	@: $(MAKE); $(unstage)
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS)  \
-	(cd $(HOST_SUBDIR)/sim && \
+	(cd $(HOST_SUBDIR)/gnulib && \
 	  $(MAKE) $(FLAGS_TO_PASS)  check)
 
-@endif sim
+@endif gnulib
 
-.PHONY: install-sim maybe-install-sim
-maybe-install-sim:
-@if sim
-maybe-install-sim: install-sim
+.PHONY: install-gnulib maybe-install-gnulib
+maybe-install-gnulib:
+@if gnulib
+maybe-install-gnulib: install-gnulib
 
-install-sim: installdirs
+install-gnulib: installdirs
 	@: $(MAKE); $(unstage)
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
-	(cd $(HOST_SUBDIR)/sim && \
+	(cd $(HOST_SUBDIR)/gnulib && \
 	  $(MAKE) $(FLAGS_TO_PASS)  install)
 
-@endif sim
+@endif gnulib
 
-.PHONY: install-strip-sim maybe-install-strip-sim
-maybe-install-strip-sim:
-@if sim
-maybe-install-strip-sim: install-strip-sim
+.PHONY: install-strip-gnulib maybe-install-strip-gnulib
+maybe-install-strip-gnulib:
+@if gnulib
+maybe-install-strip-gnulib: install-strip-gnulib
 
-install-strip-sim: installdirs
+install-strip-gnulib: installdirs
 	@: $(MAKE); $(unstage)
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
-	(cd $(HOST_SUBDIR)/sim && \
+	(cd $(HOST_SUBDIR)/gnulib && \
 	  $(MAKE) $(FLAGS_TO_PASS)  install-strip)
 
-@endif sim
+@endif gnulib
 
 # Other targets (info, dvi, pdf, etc.)
 
-.PHONY: maybe-info-sim info-sim
-maybe-info-sim:
-@if sim
-maybe-info-sim: info-sim
+.PHONY: maybe-info-gnulib info-gnulib
+maybe-info-gnulib:
+@if gnulib
+maybe-info-gnulib: info-gnulib
 
-info-sim: \
-    configure-sim 
+info-gnulib: \
+    configure-gnulib 
 	@: $(MAKE); $(unstage)
-	@[ -f ./sim/Makefile ] || exit 0; \
+	@[ -f ./gnulib/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing info in sim"; \
-	(cd $(HOST_SUBDIR)/sim && \
+	echo "Doing info in gnulib"; \
+	(cd $(HOST_SUBDIR)/gnulib && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -31011,25 +34342,25 @@ info-sim: \
 	          info) \
 	  || exit 1
 
-@endif sim
+@endif gnulib
 
-.PHONY: maybe-dvi-sim dvi-sim
-maybe-dvi-sim:
-@if sim
-maybe-dvi-sim: dvi-sim
+.PHONY: maybe-dvi-gnulib dvi-gnulib
+maybe-dvi-gnulib:
+@if gnulib
+maybe-dvi-gnulib: dvi-gnulib
 
-dvi-sim: \
-    configure-sim 
+dvi-gnulib: \
+    configure-gnulib 
 	@: $(MAKE); $(unstage)
-	@[ -f ./sim/Makefile ] || exit 0; \
+	@[ -f ./gnulib/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing dvi in sim"; \
-	(cd $(HOST_SUBDIR)/sim && \
+	echo "Doing dvi in gnulib"; \
+	(cd $(HOST_SUBDIR)/gnulib && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -31037,25 +34368,25 @@ dvi-sim: \
 	          dvi) \
 	  || exit 1
 
-@endif sim
+@endif gnulib
 
-.PHONY: maybe-pdf-sim pdf-sim
-maybe-pdf-sim:
-@if sim
-maybe-pdf-sim: pdf-sim
+.PHONY: maybe-pdf-gnulib pdf-gnulib
+maybe-pdf-gnulib:
+@if gnulib
+maybe-pdf-gnulib: pdf-gnulib
 
-pdf-sim: \
-    configure-sim 
+pdf-gnulib: \
+    configure-gnulib 
 	@: $(MAKE); $(unstage)
-	@[ -f ./sim/Makefile ] || exit 0; \
+	@[ -f ./gnulib/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing pdf in sim"; \
-	(cd $(HOST_SUBDIR)/sim && \
+	echo "Doing pdf in gnulib"; \
+	(cd $(HOST_SUBDIR)/gnulib && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -31063,25 +34394,25 @@ pdf-sim: \
 	          pdf) \
 	  || exit 1
 
-@endif sim
+@endif gnulib
 
-.PHONY: maybe-html-sim html-sim
-maybe-html-sim:
-@if sim
-maybe-html-sim: html-sim
+.PHONY: maybe-html-gnulib html-gnulib
+maybe-html-gnulib:
+@if gnulib
+maybe-html-gnulib: html-gnulib
 
-html-sim: \
-    configure-sim 
+html-gnulib: \
+    configure-gnulib 
 	@: $(MAKE); $(unstage)
-	@[ -f ./sim/Makefile ] || exit 0; \
+	@[ -f ./gnulib/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing html in sim"; \
-	(cd $(HOST_SUBDIR)/sim && \
+	echo "Doing html in gnulib"; \
+	(cd $(HOST_SUBDIR)/gnulib && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -31089,25 +34420,25 @@ html-sim: \
 	          html) \
 	  || exit 1
 
-@endif sim
+@endif gnulib
 
-.PHONY: maybe-TAGS-sim TAGS-sim
-maybe-TAGS-sim:
-@if sim
-maybe-TAGS-sim: TAGS-sim
+.PHONY: maybe-TAGS-gnulib TAGS-gnulib
+maybe-TAGS-gnulib:
+@if gnulib
+maybe-TAGS-gnulib: TAGS-gnulib
 
-TAGS-sim: \
-    configure-sim 
+TAGS-gnulib: \
+    configure-gnulib 
 	@: $(MAKE); $(unstage)
-	@[ -f ./sim/Makefile ] || exit 0; \
+	@[ -f ./gnulib/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing TAGS in sim"; \
-	(cd $(HOST_SUBDIR)/sim && \
+	echo "Doing TAGS in gnulib"; \
+	(cd $(HOST_SUBDIR)/gnulib && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -31115,26 +34446,26 @@ TAGS-sim: \
 	          TAGS) \
 	  || exit 1
 
-@endif sim
+@endif gnulib
 
-.PHONY: maybe-install-info-sim install-info-sim
-maybe-install-info-sim:
-@if sim
-maybe-install-info-sim: install-info-sim
+.PHONY: maybe-install-info-gnulib install-info-gnulib
+maybe-install-info-gnulib:
+@if gnulib
+maybe-install-info-gnulib: install-info-gnulib
 
-install-info-sim: \
-    configure-sim \
-    info-sim 
+install-info-gnulib: \
+    configure-gnulib \
+    info-gnulib 
 	@: $(MAKE); $(unstage)
-	@[ -f ./sim/Makefile ] || exit 0; \
+	@[ -f ./gnulib/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing install-info in sim"; \
-	(cd $(HOST_SUBDIR)/sim && \
+	echo "Doing install-info in gnulib"; \
+	(cd $(HOST_SUBDIR)/gnulib && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -31142,26 +34473,26 @@ install-info-sim: \
 	          install-info) \
 	  || exit 1
 
-@endif sim
+@endif gnulib
 
-.PHONY: maybe-install-pdf-sim install-pdf-sim
-maybe-install-pdf-sim:
-@if sim
-maybe-install-pdf-sim: install-pdf-sim
+.PHONY: maybe-install-pdf-gnulib install-pdf-gnulib
+maybe-install-pdf-gnulib:
+@if gnulib
+maybe-install-pdf-gnulib: install-pdf-gnulib
 
-install-pdf-sim: \
-    configure-sim \
-    pdf-sim 
+install-pdf-gnulib: \
+    configure-gnulib \
+    pdf-gnulib 
 	@: $(MAKE); $(unstage)
-	@[ -f ./sim/Makefile ] || exit 0; \
+	@[ -f ./gnulib/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing install-pdf in sim"; \
-	(cd $(HOST_SUBDIR)/sim && \
+	echo "Doing install-pdf in gnulib"; \
+	(cd $(HOST_SUBDIR)/gnulib && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -31169,26 +34500,26 @@ install-pdf-sim: \
 	          install-pdf) \
 	  || exit 1
 
-@endif sim
+@endif gnulib
 
-.PHONY: maybe-install-html-sim install-html-sim
-maybe-install-html-sim:
-@if sim
-maybe-install-html-sim: install-html-sim
+.PHONY: maybe-install-html-gnulib install-html-gnulib
+maybe-install-html-gnulib:
+@if gnulib
+maybe-install-html-gnulib: install-html-gnulib
 
-install-html-sim: \
-    configure-sim \
-    html-sim 
+install-html-gnulib: \
+    configure-gnulib \
+    html-gnulib 
 	@: $(MAKE); $(unstage)
-	@[ -f ./sim/Makefile ] || exit 0; \
+	@[ -f ./gnulib/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing install-html in sim"; \
-	(cd $(HOST_SUBDIR)/sim && \
+	echo "Doing install-html in gnulib"; \
+	(cd $(HOST_SUBDIR)/gnulib && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -31196,25 +34527,25 @@ install-html-sim: \
 	          install-html) \
 	  || exit 1
 
-@endif sim
+@endif gnulib
 
-.PHONY: maybe-installcheck-sim installcheck-sim
-maybe-installcheck-sim:
-@if sim
-maybe-installcheck-sim: installcheck-sim
+.PHONY: maybe-installcheck-gnulib installcheck-gnulib
+maybe-installcheck-gnulib:
+@if gnulib
+maybe-installcheck-gnulib: installcheck-gnulib
 
-installcheck-sim: \
-    configure-sim 
+installcheck-gnulib: \
+    configure-gnulib 
 	@: $(MAKE); $(unstage)
-	@[ -f ./sim/Makefile ] || exit 0; \
+	@[ -f ./gnulib/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing installcheck in sim"; \
-	(cd $(HOST_SUBDIR)/sim && \
+	echo "Doing installcheck in gnulib"; \
+	(cd $(HOST_SUBDIR)/gnulib && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -31222,24 +34553,24 @@ installcheck-sim: \
 	          installcheck) \
 	  || exit 1
 
-@endif sim
+@endif gnulib
 
-.PHONY: maybe-mostlyclean-sim mostlyclean-sim
-maybe-mostlyclean-sim:
-@if sim
-maybe-mostlyclean-sim: mostlyclean-sim
+.PHONY: maybe-mostlyclean-gnulib mostlyclean-gnulib
+maybe-mostlyclean-gnulib:
+@if gnulib
+maybe-mostlyclean-gnulib: mostlyclean-gnulib
 
-mostlyclean-sim: 
+mostlyclean-gnulib: 
 	@: $(MAKE); $(unstage)
-	@[ -f ./sim/Makefile ] || exit 0; \
+	@[ -f ./gnulib/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing mostlyclean in sim"; \
-	(cd $(HOST_SUBDIR)/sim && \
+	echo "Doing mostlyclean in gnulib"; \
+	(cd $(HOST_SUBDIR)/gnulib && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -31247,24 +34578,24 @@ mostlyclean-sim:
 	          mostlyclean) \
 	  || exit 1
 
-@endif sim
+@endif gnulib
 
-.PHONY: maybe-clean-sim clean-sim
-maybe-clean-sim:
-@if sim
-maybe-clean-sim: clean-sim
+.PHONY: maybe-clean-gnulib clean-gnulib
+maybe-clean-gnulib:
+@if gnulib
+maybe-clean-gnulib: clean-gnulib
 
-clean-sim: 
+clean-gnulib: 
 	@: $(MAKE); $(unstage)
-	@[ -f ./sim/Makefile ] || exit 0; \
+	@[ -f ./gnulib/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing clean in sim"; \
-	(cd $(HOST_SUBDIR)/sim && \
+	echo "Doing clean in gnulib"; \
+	(cd $(HOST_SUBDIR)/gnulib && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -31272,24 +34603,24 @@ clean-sim:
 	          clean) \
 	  || exit 1
 
-@endif sim
+@endif gnulib
 
-.PHONY: maybe-distclean-sim distclean-sim
-maybe-distclean-sim:
-@if sim
-maybe-distclean-sim: distclean-sim
+.PHONY: maybe-distclean-gnulib distclean-gnulib
+maybe-distclean-gnulib:
+@if gnulib
+maybe-distclean-gnulib: distclean-gnulib
 
-distclean-sim: 
+distclean-gnulib: 
 	@: $(MAKE); $(unstage)
-	@[ -f ./sim/Makefile ] || exit 0; \
+	@[ -f ./gnulib/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing distclean in sim"; \
-	(cd $(HOST_SUBDIR)/sim && \
+	echo "Doing distclean in gnulib"; \
+	(cd $(HOST_SUBDIR)/gnulib && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -31297,24 +34628,24 @@ distclean-sim:
 	          distclean) \
 	  || exit 1
 
-@endif sim
+@endif gnulib
 
-.PHONY: maybe-maintainer-clean-sim maintainer-clean-sim
-maybe-maintainer-clean-sim:
-@if sim
-maybe-maintainer-clean-sim: maintainer-clean-sim
+.PHONY: maybe-maintainer-clean-gnulib maintainer-clean-gnulib
+maybe-maintainer-clean-gnulib:
+@if gnulib
+maybe-maintainer-clean-gnulib: maintainer-clean-gnulib
 
-maintainer-clean-sim: 
+maintainer-clean-gnulib: 
 	@: $(MAKE); $(unstage)
-	@[ -f ./sim/Makefile ] || exit 0; \
+	@[ -f ./gnulib/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing maintainer-clean in sim"; \
-	(cd $(HOST_SUBDIR)/sim && \
+	echo "Doing maintainer-clean in gnulib"; \
+	(cd $(HOST_SUBDIR)/gnulib && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -31322,1225 +34653,570 @@ maintainer-clean-sim:
 	          maintainer-clean) \
 	  || exit 1
 
-@endif sim
+@endif gnulib
 
 
 
-.PHONY: configure-texinfo maybe-configure-texinfo
-maybe-configure-texinfo:
+.PHONY: configure-gdbsupport maybe-configure-gdbsupport
+maybe-configure-gdbsupport:
 @if gcc-bootstrap
-configure-texinfo: stage_current
+configure-gdbsupport: stage_current
 @endif gcc-bootstrap
-@if texinfo
-maybe-configure-texinfo: configure-texinfo
-configure-texinfo: 
+@if gdbsupport
+maybe-configure-gdbsupport: configure-gdbsupport
+configure-gdbsupport: 
 	@: $(MAKE); $(unstage)
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	test ! -f $(HOST_SUBDIR)/texinfo/Makefile || exit 0; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/texinfo; \
+	test ! -f $(HOST_SUBDIR)/gdbsupport/Makefile || exit 0; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/gdbsupport; \
 	$(HOST_EXPORTS)  \
-	echo Configuring in $(HOST_SUBDIR)/texinfo; \
-	cd "$(HOST_SUBDIR)/texinfo" || exit 1; \
+	echo Configuring in $(HOST_SUBDIR)/gdbsupport; \
+	cd "$(HOST_SUBDIR)/gdbsupport" || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(HOST_SUBDIR)/texinfo/ | \
+	  *) topdir=`echo $(HOST_SUBDIR)/gdbsupport/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=texinfo; \
+	module_srcdir=gdbsupport; \
 	$(SHELL) \
 	  $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
 	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
 	  --target=${target_alias}  \
 	  || exit 1
-@endif texinfo
+@endif gdbsupport
 
 
 
 
 
-.PHONY: all-texinfo maybe-all-texinfo
-maybe-all-texinfo:
+.PHONY: all-gdbsupport maybe-all-gdbsupport
+maybe-all-gdbsupport:
 @if gcc-bootstrap
-all-texinfo: stage_current
+all-gdbsupport: stage_current
 @endif gcc-bootstrap
-@if texinfo
-TARGET-texinfo=all
-maybe-all-texinfo: all-texinfo
-all-texinfo: configure-texinfo
+@if gdbsupport
+TARGET-gdbsupport=all
+maybe-all-gdbsupport: all-gdbsupport
+all-gdbsupport: configure-gdbsupport
 	@: $(MAKE); $(unstage)
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS)  \
-	(cd $(HOST_SUBDIR)/texinfo && \
+	(cd $(HOST_SUBDIR)/gdbsupport && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) $(EXTRA_HOST_FLAGS) $(STAGE1_FLAGS_TO_PASS)  \
-		$(TARGET-texinfo))
-@endif texinfo
+		$(TARGET-gdbsupport))
+@endif gdbsupport
 
 
 
 
-.PHONY: check-texinfo maybe-check-texinfo
-maybe-check-texinfo:
-@if texinfo
-maybe-check-texinfo: check-texinfo
+.PHONY: check-gdbsupport maybe-check-gdbsupport
+maybe-check-gdbsupport:
+@if gdbsupport
+maybe-check-gdbsupport: check-gdbsupport
 
-check-texinfo:
+check-gdbsupport:
 	@: $(MAKE); $(unstage)
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS)  \
-	(cd $(HOST_SUBDIR)/texinfo && \
+	(cd $(HOST_SUBDIR)/gdbsupport && \
 	  $(MAKE) $(FLAGS_TO_PASS)  check)
 
-@endif texinfo
-
-.PHONY: install-texinfo maybe-install-texinfo
-maybe-install-texinfo:
-@if texinfo
-maybe-install-texinfo: install-texinfo
-
-install-texinfo:
-
-@endif texinfo
-
-.PHONY: install-strip-texinfo maybe-install-strip-texinfo
-maybe-install-strip-texinfo:
-@if texinfo
-maybe-install-strip-texinfo: install-strip-texinfo
-
-install-strip-texinfo:
-
-@endif texinfo
-
-# Other targets (info, dvi, pdf, etc.)
-
-.PHONY: maybe-info-texinfo info-texinfo
-maybe-info-texinfo:
-@if texinfo
-maybe-info-texinfo: info-texinfo
-
-info-texinfo: \
-    configure-texinfo 
-	@: $(MAKE); $(unstage)
-	@[ -f ./texinfo/Makefile ] || exit 0; \
-	r=`${PWD_COMMAND}`; export r; \
-	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(HOST_EXPORTS) \
-	for flag in $(EXTRA_HOST_FLAGS) ; do \
-	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
-	done; \
-	echo "Doing info in texinfo"; \
-	(cd $(HOST_SUBDIR)/texinfo && \
-	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
-	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
-	          "RANLIB=$${RANLIB}" \
-	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	          info) \
-	  || exit 1
-
-@endif texinfo
-
-.PHONY: maybe-dvi-texinfo dvi-texinfo
-maybe-dvi-texinfo:
-@if texinfo
-maybe-dvi-texinfo: dvi-texinfo
-
-dvi-texinfo: \
-    configure-texinfo 
-	@: $(MAKE); $(unstage)
-	@[ -f ./texinfo/Makefile ] || exit 0; \
-	r=`${PWD_COMMAND}`; export r; \
-	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(HOST_EXPORTS) \
-	for flag in $(EXTRA_HOST_FLAGS) ; do \
-	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
-	done; \
-	echo "Doing dvi in texinfo"; \
-	(cd $(HOST_SUBDIR)/texinfo && \
-	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
-	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
-	          "RANLIB=$${RANLIB}" \
-	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	          dvi) \
-	  || exit 1
-
-@endif texinfo
-
-.PHONY: maybe-pdf-texinfo pdf-texinfo
-maybe-pdf-texinfo:
-@if texinfo
-maybe-pdf-texinfo: pdf-texinfo
-
-pdf-texinfo: \
-    configure-texinfo 
-	@: $(MAKE); $(unstage)
-	@[ -f ./texinfo/Makefile ] || exit 0; \
-	r=`${PWD_COMMAND}`; export r; \
-	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(HOST_EXPORTS) \
-	for flag in $(EXTRA_HOST_FLAGS) ; do \
-	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
-	done; \
-	echo "Doing pdf in texinfo"; \
-	(cd $(HOST_SUBDIR)/texinfo && \
-	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
-	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
-	          "RANLIB=$${RANLIB}" \
-	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	          pdf) \
-	  || exit 1
-
-@endif texinfo
+@endif gdbsupport
 
-.PHONY: maybe-html-texinfo html-texinfo
-maybe-html-texinfo:
-@if texinfo
-maybe-html-texinfo: html-texinfo
+.PHONY: install-gdbsupport maybe-install-gdbsupport
+maybe-install-gdbsupport:
+@if gdbsupport
+maybe-install-gdbsupport: install-gdbsupport
 
-html-texinfo: \
-    configure-texinfo 
+install-gdbsupport: installdirs
 	@: $(MAKE); $(unstage)
-	@[ -f ./texinfo/Makefile ] || exit 0; \
-	r=`${PWD_COMMAND}`; export r; \
+	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
-	for flag in $(EXTRA_HOST_FLAGS) ; do \
-	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
-	done; \
-	echo "Doing html in texinfo"; \
-	(cd $(HOST_SUBDIR)/texinfo && \
-	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
-	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
-	          "RANLIB=$${RANLIB}" \
-	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	          html) \
-	  || exit 1
+	(cd $(HOST_SUBDIR)/gdbsupport && \
+	  $(MAKE) $(FLAGS_TO_PASS)  install)
 
-@endif texinfo
+@endif gdbsupport
 
-.PHONY: maybe-TAGS-texinfo TAGS-texinfo
-maybe-TAGS-texinfo:
-@if texinfo
-maybe-TAGS-texinfo: TAGS-texinfo
+.PHONY: install-strip-gdbsupport maybe-install-strip-gdbsupport
+maybe-install-strip-gdbsupport:
+@if gdbsupport
+maybe-install-strip-gdbsupport: install-strip-gdbsupport
 
-TAGS-texinfo: \
-    configure-texinfo 
+install-strip-gdbsupport: installdirs
 	@: $(MAKE); $(unstage)
-	@[ -f ./texinfo/Makefile ] || exit 0; \
-	r=`${PWD_COMMAND}`; export r; \
+	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
-	for flag in $(EXTRA_HOST_FLAGS) ; do \
-	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
-	done; \
-	echo "Doing TAGS in texinfo"; \
-	(cd $(HOST_SUBDIR)/texinfo && \
-	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
-	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
-	          "RANLIB=$${RANLIB}" \
-	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	          TAGS) \
-	  || exit 1
-
-@endif texinfo
-
-.PHONY: maybe-install-info-texinfo install-info-texinfo
-maybe-install-info-texinfo:
-@if texinfo
-maybe-install-info-texinfo: install-info-texinfo
+	(cd $(HOST_SUBDIR)/gdbsupport && \
+	  $(MAKE) $(FLAGS_TO_PASS)  install-strip)
 
-install-info-texinfo: \
-    configure-texinfo \
-    info-texinfo 
-	@: $(MAKE); $(unstage)
-	@[ -f ./texinfo/Makefile ] || exit 0; \
-	r=`${PWD_COMMAND}`; export r; \
-	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(HOST_EXPORTS) \
-	for flag in $(EXTRA_HOST_FLAGS) ; do \
-	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
-	done; \
-	echo "Doing install-info in texinfo"; \
-	(cd $(HOST_SUBDIR)/texinfo && \
-	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
-	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
-	          "RANLIB=$${RANLIB}" \
-	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	          install-info) \
-	  || exit 1
+@endif gdbsupport
 
-@endif texinfo
+# Other targets (info, dvi, pdf, etc.)
 
-.PHONY: maybe-install-pdf-texinfo install-pdf-texinfo
-maybe-install-pdf-texinfo:
-@if texinfo
-maybe-install-pdf-texinfo: install-pdf-texinfo
+.PHONY: maybe-info-gdbsupport info-gdbsupport
+maybe-info-gdbsupport:
+@if gdbsupport
+maybe-info-gdbsupport: info-gdbsupport
 
-install-pdf-texinfo: \
-    configure-texinfo \
-    pdf-texinfo 
+info-gdbsupport: \
+    configure-gdbsupport 
 	@: $(MAKE); $(unstage)
-	@[ -f ./texinfo/Makefile ] || exit 0; \
+	@[ -f ./gdbsupport/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing install-pdf in texinfo"; \
-	(cd $(HOST_SUBDIR)/texinfo && \
+	echo "Doing info in gdbsupport"; \
+	(cd $(HOST_SUBDIR)/gdbsupport && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
 	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	          install-pdf) \
+	          info) \
 	  || exit 1
 
-@endif texinfo
+@endif gdbsupport
 
-.PHONY: maybe-install-html-texinfo install-html-texinfo
-maybe-install-html-texinfo:
-@if texinfo
-maybe-install-html-texinfo: install-html-texinfo
+.PHONY: maybe-dvi-gdbsupport dvi-gdbsupport
+maybe-dvi-gdbsupport:
+@if gdbsupport
+maybe-dvi-gdbsupport: dvi-gdbsupport
 
-install-html-texinfo: \
-    configure-texinfo \
-    html-texinfo 
+dvi-gdbsupport: \
+    configure-gdbsupport 
 	@: $(MAKE); $(unstage)
-	@[ -f ./texinfo/Makefile ] || exit 0; \
+	@[ -f ./gdbsupport/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing install-html in texinfo"; \
-	(cd $(HOST_SUBDIR)/texinfo && \
+	echo "Doing dvi in gdbsupport"; \
+	(cd $(HOST_SUBDIR)/gdbsupport && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
 	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	          install-html) \
+	          dvi) \
 	  || exit 1
 
-@endif texinfo
-
-.PHONY: maybe-installcheck-texinfo installcheck-texinfo
-maybe-installcheck-texinfo:
-@if texinfo
-maybe-installcheck-texinfo: installcheck-texinfo
-
-installcheck-texinfo: \
-    configure-texinfo 
+@endif gdbsupport
+
+.PHONY: maybe-pdf-gdbsupport pdf-gdbsupport
+maybe-pdf-gdbsupport:
+@if gdbsupport
+maybe-pdf-gdbsupport: pdf-gdbsupport
+
+pdf-gdbsupport: \
+    configure-gdbsupport 
 	@: $(MAKE); $(unstage)
-	@[ -f ./texinfo/Makefile ] || exit 0; \
+	@[ -f ./gdbsupport/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing installcheck in texinfo"; \
-	(cd $(HOST_SUBDIR)/texinfo && \
+	echo "Doing pdf in gdbsupport"; \
+	(cd $(HOST_SUBDIR)/gdbsupport && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
 	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	          installcheck) \
+	          pdf) \
 	  || exit 1
 
-@endif texinfo
+@endif gdbsupport
 
-.PHONY: maybe-mostlyclean-texinfo mostlyclean-texinfo
-maybe-mostlyclean-texinfo:
-@if texinfo
-maybe-mostlyclean-texinfo: mostlyclean-texinfo
+.PHONY: maybe-html-gdbsupport html-gdbsupport
+maybe-html-gdbsupport:
+@if gdbsupport
+maybe-html-gdbsupport: html-gdbsupport
 
-mostlyclean-texinfo: 
+html-gdbsupport: \
+    configure-gdbsupport 
 	@: $(MAKE); $(unstage)
-	@[ -f ./texinfo/Makefile ] || exit 0; \
+	@[ -f ./gdbsupport/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing mostlyclean in texinfo"; \
-	(cd $(HOST_SUBDIR)/texinfo && \
+	echo "Doing html in gdbsupport"; \
+	(cd $(HOST_SUBDIR)/gdbsupport && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
 	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	          mostlyclean) \
+	          html) \
 	  || exit 1
 
-@endif texinfo
+@endif gdbsupport
 
-.PHONY: maybe-clean-texinfo clean-texinfo
-maybe-clean-texinfo:
-@if texinfo
-maybe-clean-texinfo: clean-texinfo
+.PHONY: maybe-TAGS-gdbsupport TAGS-gdbsupport
+maybe-TAGS-gdbsupport:
+@if gdbsupport
+maybe-TAGS-gdbsupport: TAGS-gdbsupport
 
-clean-texinfo: 
+TAGS-gdbsupport: \
+    configure-gdbsupport 
 	@: $(MAKE); $(unstage)
-	@[ -f ./texinfo/Makefile ] || exit 0; \
+	@[ -f ./gdbsupport/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing clean in texinfo"; \
-	(cd $(HOST_SUBDIR)/texinfo && \
+	echo "Doing TAGS in gdbsupport"; \
+	(cd $(HOST_SUBDIR)/gdbsupport && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
 	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	          clean) \
+	          TAGS) \
 	  || exit 1
 
-@endif texinfo
+@endif gdbsupport
 
-.PHONY: maybe-distclean-texinfo distclean-texinfo
-maybe-distclean-texinfo:
-@if texinfo
-maybe-distclean-texinfo: distclean-texinfo
+.PHONY: maybe-install-info-gdbsupport install-info-gdbsupport
+maybe-install-info-gdbsupport:
+@if gdbsupport
+maybe-install-info-gdbsupport: install-info-gdbsupport
 
-distclean-texinfo: 
+install-info-gdbsupport: \
+    configure-gdbsupport \
+    info-gdbsupport 
 	@: $(MAKE); $(unstage)
-	@[ -f ./texinfo/Makefile ] || exit 0; \
+	@[ -f ./gdbsupport/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing distclean in texinfo"; \
-	(cd $(HOST_SUBDIR)/texinfo && \
+	echo "Doing install-info in gdbsupport"; \
+	(cd $(HOST_SUBDIR)/gdbsupport && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
 	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	          distclean) \
+	          install-info) \
 	  || exit 1
 
-@endif texinfo
+@endif gdbsupport
 
-.PHONY: maybe-maintainer-clean-texinfo maintainer-clean-texinfo
-maybe-maintainer-clean-texinfo:
-@if texinfo
-maybe-maintainer-clean-texinfo: maintainer-clean-texinfo
+.PHONY: maybe-install-pdf-gdbsupport install-pdf-gdbsupport
+maybe-install-pdf-gdbsupport:
+@if gdbsupport
+maybe-install-pdf-gdbsupport: install-pdf-gdbsupport
 
-maintainer-clean-texinfo: 
+install-pdf-gdbsupport: \
+    configure-gdbsupport \
+    pdf-gdbsupport 
 	@: $(MAKE); $(unstage)
-	@[ -f ./texinfo/Makefile ] || exit 0; \
+	@[ -f ./gdbsupport/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing maintainer-clean in texinfo"; \
-	(cd $(HOST_SUBDIR)/texinfo && \
+	echo "Doing install-pdf in gdbsupport"; \
+	(cd $(HOST_SUBDIR)/gdbsupport && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
 	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	          maintainer-clean) \
-	  || exit 1
-
-@endif texinfo
-
-
-
-.PHONY: configure-zlib maybe-configure-zlib
-maybe-configure-zlib:
-@if gcc-bootstrap
-configure-zlib: stage_current
-@endif gcc-bootstrap
-@if zlib
-maybe-configure-zlib: configure-zlib
-configure-zlib: 
-	@r=`${PWD_COMMAND}`; export r; \
-	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	test ! -f $(HOST_SUBDIR)/zlib/Makefile || exit 0; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib; \
-	$(HOST_EXPORTS)  \
-	echo Configuring in $(HOST_SUBDIR)/zlib; \
-	cd "$(HOST_SUBDIR)/zlib" || exit 1; \
-	case $(srcdir) in \
-	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(HOST_SUBDIR)/zlib/ | \
-		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
-	esac; \
-	module_srcdir=zlib; \
-	$(SHELL) \
-	  $$s/$$module_srcdir/configure \
-	  --srcdir=$${topdir}/$$module_srcdir \
-	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
-	  --target=${target_alias} @extra_host_zlib_configure_flags@ \
+	          install-pdf) \
 	  || exit 1
-@endif zlib
-
-
-
-.PHONY: configure-stage1-zlib maybe-configure-stage1-zlib
-maybe-configure-stage1-zlib:
-@if zlib-bootstrap
-maybe-configure-stage1-zlib: configure-stage1-zlib
-configure-stage1-zlib:
-	@[ $(current_stage) = stage1 ] || $(MAKE) stage1-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib
-	@r=`${PWD_COMMAND}`; export r; \
-	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	TFLAGS="$(STAGE1_TFLAGS)"; \
-	test ! -f $(HOST_SUBDIR)/zlib/Makefile || exit 0; \
-	$(HOST_EXPORTS) \
-	CFLAGS="$(STAGE1_CFLAGS)"; export CFLAGS; \
-	CXXFLAGS="$(STAGE1_CXXFLAGS)"; export CXXFLAGS; \
-	LIBCFLAGS="$(LIBCFLAGS)"; export LIBCFLAGS;  \
-	echo Configuring stage 1 in $(HOST_SUBDIR)/zlib; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib; \
-	cd $(HOST_SUBDIR)/zlib || exit 1; \
-	case $(srcdir) in \
-	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(HOST_SUBDIR)/zlib/ | \
-		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
-	esac; \
-	module_srcdir=zlib; \
-	$(SHELL) $$s/$$module_srcdir/configure \
-	  --srcdir=$${topdir}/$$module_srcdir \
-	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
-	  --target=${target_alias} \
-	   \
-	  $(STAGE1_CONFIGURE_FLAGS) \
-	  @extra_host_zlib_configure_flags@
-@endif zlib-bootstrap
-
-.PHONY: configure-stage2-zlib maybe-configure-stage2-zlib
-maybe-configure-stage2-zlib:
-@if zlib-bootstrap
-maybe-configure-stage2-zlib: configure-stage2-zlib
-configure-stage2-zlib:
-	@[ $(current_stage) = stage2 ] || $(MAKE) stage2-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib
-	@r=`${PWD_COMMAND}`; export r; \
-	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	TFLAGS="$(STAGE2_TFLAGS)"; \
-	test ! -f $(HOST_SUBDIR)/zlib/Makefile || exit 0; \
-	$(HOST_EXPORTS) \
-	$(POSTSTAGE1_HOST_EXPORTS) \
-	CFLAGS="$(STAGE2_CFLAGS)"; export CFLAGS; \
-	CXXFLAGS="$(STAGE2_CXXFLAGS)"; export CXXFLAGS; \
-	LIBCFLAGS="$(STAGE2_CFLAGS)"; export LIBCFLAGS;  \
-	echo Configuring stage 2 in $(HOST_SUBDIR)/zlib; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib; \
-	cd $(HOST_SUBDIR)/zlib || exit 1; \
-	case $(srcdir) in \
-	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(HOST_SUBDIR)/zlib/ | \
-		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
-	esac; \
-	module_srcdir=zlib; \
-	$(SHELL) $$s/$$module_srcdir/configure \
-	  --srcdir=$${topdir}/$$module_srcdir \
-	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
-	  --target=${target_alias} \
-	  --with-build-libsubdir=$(HOST_SUBDIR) \
-	  $(STAGE2_CONFIGURE_FLAGS) \
-	  @extra_host_zlib_configure_flags@
-@endif zlib-bootstrap
-
-.PHONY: configure-stage3-zlib maybe-configure-stage3-zlib
-maybe-configure-stage3-zlib:
-@if zlib-bootstrap
-maybe-configure-stage3-zlib: configure-stage3-zlib
-configure-stage3-zlib:
-	@[ $(current_stage) = stage3 ] || $(MAKE) stage3-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib
-	@r=`${PWD_COMMAND}`; export r; \
-	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	TFLAGS="$(STAGE3_TFLAGS)"; \
-	test ! -f $(HOST_SUBDIR)/zlib/Makefile || exit 0; \
-	$(HOST_EXPORTS) \
-	$(POSTSTAGE1_HOST_EXPORTS) \
-	CFLAGS="$(STAGE3_CFLAGS)"; export CFLAGS; \
-	CXXFLAGS="$(STAGE3_CXXFLAGS)"; export CXXFLAGS; \
-	LIBCFLAGS="$(STAGE3_CFLAGS)"; export LIBCFLAGS;  \
-	echo Configuring stage 3 in $(HOST_SUBDIR)/zlib; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib; \
-	cd $(HOST_SUBDIR)/zlib || exit 1; \
-	case $(srcdir) in \
-	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(HOST_SUBDIR)/zlib/ | \
-		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
-	esac; \
-	module_srcdir=zlib; \
-	$(SHELL) $$s/$$module_srcdir/configure \
-	  --srcdir=$${topdir}/$$module_srcdir \
-	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
-	  --target=${target_alias} \
-	  --with-build-libsubdir=$(HOST_SUBDIR) \
-	  $(STAGE3_CONFIGURE_FLAGS) \
-	  @extra_host_zlib_configure_flags@
-@endif zlib-bootstrap
-
-.PHONY: configure-stage4-zlib maybe-configure-stage4-zlib
-maybe-configure-stage4-zlib:
-@if zlib-bootstrap
-maybe-configure-stage4-zlib: configure-stage4-zlib
-configure-stage4-zlib:
-	@[ $(current_stage) = stage4 ] || $(MAKE) stage4-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib
-	@r=`${PWD_COMMAND}`; export r; \
-	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	TFLAGS="$(STAGE4_TFLAGS)"; \
-	test ! -f $(HOST_SUBDIR)/zlib/Makefile || exit 0; \
-	$(HOST_EXPORTS) \
-	$(POSTSTAGE1_HOST_EXPORTS) \
-	CFLAGS="$(STAGE4_CFLAGS)"; export CFLAGS; \
-	CXXFLAGS="$(STAGE4_CXXFLAGS)"; export CXXFLAGS; \
-	LIBCFLAGS="$(STAGE4_CFLAGS)"; export LIBCFLAGS;  \
-	echo Configuring stage 4 in $(HOST_SUBDIR)/zlib; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib; \
-	cd $(HOST_SUBDIR)/zlib || exit 1; \
-	case $(srcdir) in \
-	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(HOST_SUBDIR)/zlib/ | \
-		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
-	esac; \
-	module_srcdir=zlib; \
-	$(SHELL) $$s/$$module_srcdir/configure \
-	  --srcdir=$${topdir}/$$module_srcdir \
-	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
-	  --target=${target_alias} \
-	  --with-build-libsubdir=$(HOST_SUBDIR) \
-	  $(STAGE4_CONFIGURE_FLAGS) \
-	  @extra_host_zlib_configure_flags@
-@endif zlib-bootstrap
-
-.PHONY: configure-stageprofile-zlib maybe-configure-stageprofile-zlib
-maybe-configure-stageprofile-zlib:
-@if zlib-bootstrap
-maybe-configure-stageprofile-zlib: configure-stageprofile-zlib
-configure-stageprofile-zlib:
-	@[ $(current_stage) = stageprofile ] || $(MAKE) stageprofile-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib
-	@r=`${PWD_COMMAND}`; export r; \
-	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	TFLAGS="$(STAGEprofile_TFLAGS)"; \
-	test ! -f $(HOST_SUBDIR)/zlib/Makefile || exit 0; \
-	$(HOST_EXPORTS) \
-	$(POSTSTAGE1_HOST_EXPORTS) \
-	CFLAGS="$(STAGEprofile_CFLAGS)"; export CFLAGS; \
-	CXXFLAGS="$(STAGEprofile_CXXFLAGS)"; export CXXFLAGS; \
-	LIBCFLAGS="$(STAGEprofile_CFLAGS)"; export LIBCFLAGS;  \
-	echo Configuring stage profile in $(HOST_SUBDIR)/zlib; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib; \
-	cd $(HOST_SUBDIR)/zlib || exit 1; \
-	case $(srcdir) in \
-	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(HOST_SUBDIR)/zlib/ | \
-		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
-	esac; \
-	module_srcdir=zlib; \
-	$(SHELL) $$s/$$module_srcdir/configure \
-	  --srcdir=$${topdir}/$$module_srcdir \
-	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
-	  --target=${target_alias} \
-	  --with-build-libsubdir=$(HOST_SUBDIR) \
-	  $(STAGEprofile_CONFIGURE_FLAGS) \
-	  @extra_host_zlib_configure_flags@
-@endif zlib-bootstrap
-
-.PHONY: configure-stagetrain-zlib maybe-configure-stagetrain-zlib
-maybe-configure-stagetrain-zlib:
-@if zlib-bootstrap
-maybe-configure-stagetrain-zlib: configure-stagetrain-zlib
-configure-stagetrain-zlib:
-	@[ $(current_stage) = stagetrain ] || $(MAKE) stagetrain-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib
-	@r=`${PWD_COMMAND}`; export r; \
-	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	TFLAGS="$(STAGEtrain_TFLAGS)"; \
-	test ! -f $(HOST_SUBDIR)/zlib/Makefile || exit 0; \
-	$(HOST_EXPORTS) \
-	$(POSTSTAGE1_HOST_EXPORTS) \
-	CFLAGS="$(STAGEtrain_CFLAGS)"; export CFLAGS; \
-	CXXFLAGS="$(STAGEtrain_CXXFLAGS)"; export CXXFLAGS; \
-	LIBCFLAGS="$(STAGEtrain_CFLAGS)"; export LIBCFLAGS;  \
-	echo Configuring stage train in $(HOST_SUBDIR)/zlib; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib; \
-	cd $(HOST_SUBDIR)/zlib || exit 1; \
-	case $(srcdir) in \
-	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(HOST_SUBDIR)/zlib/ | \
-		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
-	esac; \
-	module_srcdir=zlib; \
-	$(SHELL) $$s/$$module_srcdir/configure \
-	  --srcdir=$${topdir}/$$module_srcdir \
-	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
-	  --target=${target_alias} \
-	  --with-build-libsubdir=$(HOST_SUBDIR) \
-	  $(STAGEtrain_CONFIGURE_FLAGS) \
-	  @extra_host_zlib_configure_flags@
-@endif zlib-bootstrap
-
-.PHONY: configure-stagefeedback-zlib maybe-configure-stagefeedback-zlib
-maybe-configure-stagefeedback-zlib:
-@if zlib-bootstrap
-maybe-configure-stagefeedback-zlib: configure-stagefeedback-zlib
-configure-stagefeedback-zlib:
-	@[ $(current_stage) = stagefeedback ] || $(MAKE) stagefeedback-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib
-	@r=`${PWD_COMMAND}`; export r; \
-	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	TFLAGS="$(STAGEfeedback_TFLAGS)"; \
-	test ! -f $(HOST_SUBDIR)/zlib/Makefile || exit 0; \
-	$(HOST_EXPORTS) \
-	$(POSTSTAGE1_HOST_EXPORTS) \
-	CFLAGS="$(STAGEfeedback_CFLAGS)"; export CFLAGS; \
-	CXXFLAGS="$(STAGEfeedback_CXXFLAGS)"; export CXXFLAGS; \
-	LIBCFLAGS="$(STAGEfeedback_CFLAGS)"; export LIBCFLAGS;  \
-	echo Configuring stage feedback in $(HOST_SUBDIR)/zlib; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib; \
-	cd $(HOST_SUBDIR)/zlib || exit 1; \
-	case $(srcdir) in \
-	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(HOST_SUBDIR)/zlib/ | \
-		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
-	esac; \
-	module_srcdir=zlib; \
-	$(SHELL) $$s/$$module_srcdir/configure \
-	  --srcdir=$${topdir}/$$module_srcdir \
-	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
-	  --target=${target_alias} \
-	  --with-build-libsubdir=$(HOST_SUBDIR) \
-	  $(STAGEfeedback_CONFIGURE_FLAGS) \
-	  @extra_host_zlib_configure_flags@
-@endif zlib-bootstrap
-
-.PHONY: configure-stageautoprofile-zlib maybe-configure-stageautoprofile-zlib
-maybe-configure-stageautoprofile-zlib:
-@if zlib-bootstrap
-maybe-configure-stageautoprofile-zlib: configure-stageautoprofile-zlib
-configure-stageautoprofile-zlib:
-	@[ $(current_stage) = stageautoprofile ] || $(MAKE) stageautoprofile-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib
-	@r=`${PWD_COMMAND}`; export r; \
-	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	TFLAGS="$(STAGEautoprofile_TFLAGS)"; \
-	test ! -f $(HOST_SUBDIR)/zlib/Makefile || exit 0; \
-	$(HOST_EXPORTS) \
-	$(POSTSTAGE1_HOST_EXPORTS) \
-	CFLAGS="$(STAGEautoprofile_CFLAGS)"; export CFLAGS; \
-	CXXFLAGS="$(STAGEautoprofile_CXXFLAGS)"; export CXXFLAGS; \
-	LIBCFLAGS="$(STAGEautoprofile_CFLAGS)"; export LIBCFLAGS;  \
-	echo Configuring stage autoprofile in $(HOST_SUBDIR)/zlib; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib; \
-	cd $(HOST_SUBDIR)/zlib || exit 1; \
-	case $(srcdir) in \
-	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(HOST_SUBDIR)/zlib/ | \
-		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
-	esac; \
-	module_srcdir=zlib; \
-	$(SHELL) $$s/$$module_srcdir/configure \
-	  --srcdir=$${topdir}/$$module_srcdir \
-	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
-	  --target=${target_alias} \
-	  --with-build-libsubdir=$(HOST_SUBDIR) \
-	  $(STAGEautoprofile_CONFIGURE_FLAGS) \
-	  @extra_host_zlib_configure_flags@
-@endif zlib-bootstrap
-
-.PHONY: configure-stageautofeedback-zlib maybe-configure-stageautofeedback-zlib
-maybe-configure-stageautofeedback-zlib:
-@if zlib-bootstrap
-maybe-configure-stageautofeedback-zlib: configure-stageautofeedback-zlib
-configure-stageautofeedback-zlib:
-	@[ $(current_stage) = stageautofeedback ] || $(MAKE) stageautofeedback-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib
-	@r=`${PWD_COMMAND}`; export r; \
-	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	TFLAGS="$(STAGEautofeedback_TFLAGS)"; \
-	test ! -f $(HOST_SUBDIR)/zlib/Makefile || exit 0; \
-	$(HOST_EXPORTS) \
-	$(POSTSTAGE1_HOST_EXPORTS) \
-	CFLAGS="$(STAGEautofeedback_CFLAGS)"; export CFLAGS; \
-	CXXFLAGS="$(STAGEautofeedback_CXXFLAGS)"; export CXXFLAGS; \
-	LIBCFLAGS="$(STAGEautofeedback_CFLAGS)"; export LIBCFLAGS;  \
-	echo Configuring stage autofeedback in $(HOST_SUBDIR)/zlib; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/zlib; \
-	cd $(HOST_SUBDIR)/zlib || exit 1; \
-	case $(srcdir) in \
-	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(HOST_SUBDIR)/zlib/ | \
-		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
-	esac; \
-	module_srcdir=zlib; \
-	$(SHELL) $$s/$$module_srcdir/configure \
-	  --srcdir=$${topdir}/$$module_srcdir \
-	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
-	  --target=${target_alias} \
-	  --with-build-libsubdir=$(HOST_SUBDIR) \
-	  $(STAGEautofeedback_CONFIGURE_FLAGS) \
-	  @extra_host_zlib_configure_flags@
-@endif zlib-bootstrap
-
-
-
-
-
-.PHONY: all-zlib maybe-all-zlib
-maybe-all-zlib:
-@if gcc-bootstrap
-all-zlib: stage_current
-@endif gcc-bootstrap
-@if zlib
-TARGET-zlib=all
-maybe-all-zlib: all-zlib
-all-zlib: configure-zlib
-	@r=`${PWD_COMMAND}`; export r; \
-	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(HOST_EXPORTS)  \
-	(cd $(HOST_SUBDIR)/zlib && \
-	  $(MAKE) $(BASE_FLAGS_TO_PASS) $(EXTRA_HOST_FLAGS) $(STAGE1_FLAGS_TO_PASS)  \
-		$(TARGET-zlib))
-@endif zlib
-
-
-
-.PHONY: all-stage1-zlib maybe-all-stage1-zlib
-.PHONY: clean-stage1-zlib maybe-clean-stage1-zlib
-maybe-all-stage1-zlib:
-maybe-clean-stage1-zlib:
-@if zlib-bootstrap
-maybe-all-stage1-zlib: all-stage1-zlib
-all-stage1: all-stage1-zlib
-TARGET-stage1-zlib = $(TARGET-zlib)
-all-stage1-zlib: configure-stage1-zlib
-	@[ $(current_stage) = stage1 ] || $(MAKE) stage1-start
-	@r=`${PWD_COMMAND}`; export r; \
-	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	TFLAGS="$(STAGE1_TFLAGS)"; \
-	$(HOST_EXPORTS)  \
-	cd $(HOST_SUBDIR)/zlib && \
-	 \
-	$(MAKE) $(BASE_FLAGS_TO_PASS) \
-		CFLAGS="$(STAGE1_CFLAGS)" \
-		CXXFLAGS="$(STAGE1_CXXFLAGS)" \
-		LIBCFLAGS="$(LIBCFLAGS)" \
-		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
-		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
-		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
-		$(EXTRA_HOST_FLAGS)  \
-		$(STAGE1_FLAGS_TO_PASS)  \
-		TFLAGS="$(STAGE1_TFLAGS)"  \
-		$(TARGET-stage1-zlib)
-
-maybe-clean-stage1-zlib: clean-stage1-zlib
-clean-stage1: clean-stage1-zlib
-clean-stage1-zlib:
-	@if [ $(current_stage) = stage1 ]; then \
-	  [ -f $(HOST_SUBDIR)/zlib/Makefile ] || exit 0; \
-	else \
-	  [ -f $(HOST_SUBDIR)/stage1-zlib/Makefile ] || exit 0; \
-	  $(MAKE) stage1-start; \
-	fi; \
-	cd $(HOST_SUBDIR)/zlib && \
-	$(MAKE) $(EXTRA_HOST_FLAGS)  \
-	$(STAGE1_FLAGS_TO_PASS)  clean
-@endif zlib-bootstrap
-
-
-.PHONY: all-stage2-zlib maybe-all-stage2-zlib
-.PHONY: clean-stage2-zlib maybe-clean-stage2-zlib
-maybe-all-stage2-zlib:
-maybe-clean-stage2-zlib:
-@if zlib-bootstrap
-maybe-all-stage2-zlib: all-stage2-zlib
-all-stage2: all-stage2-zlib
-TARGET-stage2-zlib = $(TARGET-zlib)
-all-stage2-zlib: configure-stage2-zlib
-	@[ $(current_stage) = stage2 ] || $(MAKE) stage2-start
-	@r=`${PWD_COMMAND}`; export r; \
-	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	TFLAGS="$(STAGE2_TFLAGS)"; \
-	$(HOST_EXPORTS) \
-	$(POSTSTAGE1_HOST_EXPORTS)  \
-	cd $(HOST_SUBDIR)/zlib && \
-	 \
-	$(MAKE) $(BASE_FLAGS_TO_PASS) \
-		CFLAGS="$(STAGE2_CFLAGS)" \
-		CXXFLAGS="$(STAGE2_CXXFLAGS)" \
-		LIBCFLAGS="$(STAGE2_CFLAGS)" \
-		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
-		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
-		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
-		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
-		TFLAGS="$(STAGE2_TFLAGS)"  \
-		$(TARGET-stage2-zlib)
 
-maybe-clean-stage2-zlib: clean-stage2-zlib
-clean-stage2: clean-stage2-zlib
-clean-stage2-zlib:
-	@if [ $(current_stage) = stage2 ]; then \
-	  [ -f $(HOST_SUBDIR)/zlib/Makefile ] || exit 0; \
-	else \
-	  [ -f $(HOST_SUBDIR)/stage2-zlib/Makefile ] || exit 0; \
-	  $(MAKE) stage2-start; \
-	fi; \
-	cd $(HOST_SUBDIR)/zlib && \
-	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
-@endif zlib-bootstrap
+@endif gdbsupport
 
+.PHONY: maybe-install-html-gdbsupport install-html-gdbsupport
+maybe-install-html-gdbsupport:
+@if gdbsupport
+maybe-install-html-gdbsupport: install-html-gdbsupport
 
-.PHONY: all-stage3-zlib maybe-all-stage3-zlib
-.PHONY: clean-stage3-zlib maybe-clean-stage3-zlib
-maybe-all-stage3-zlib:
-maybe-clean-stage3-zlib:
-@if zlib-bootstrap
-maybe-all-stage3-zlib: all-stage3-zlib
-all-stage3: all-stage3-zlib
-TARGET-stage3-zlib = $(TARGET-zlib)
-all-stage3-zlib: configure-stage3-zlib
-	@[ $(current_stage) = stage3 ] || $(MAKE) stage3-start
-	@r=`${PWD_COMMAND}`; export r; \
+install-html-gdbsupport: \
+    configure-gdbsupport \
+    html-gdbsupport 
+	@: $(MAKE); $(unstage)
+	@[ -f ./gdbsupport/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	TFLAGS="$(STAGE3_TFLAGS)"; \
 	$(HOST_EXPORTS) \
-	$(POSTSTAGE1_HOST_EXPORTS)  \
-	cd $(HOST_SUBDIR)/zlib && \
-	 \
-	$(MAKE) $(BASE_FLAGS_TO_PASS) \
-		CFLAGS="$(STAGE3_CFLAGS)" \
-		CXXFLAGS="$(STAGE3_CXXFLAGS)" \
-		LIBCFLAGS="$(STAGE3_CFLAGS)" \
-		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
-		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
-		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
-		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
-		TFLAGS="$(STAGE3_TFLAGS)"  \
-		$(TARGET-stage3-zlib)
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing install-html in gdbsupport"; \
+	(cd $(HOST_SUBDIR)/gdbsupport && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          install-html) \
+	  || exit 1
 
-maybe-clean-stage3-zlib: clean-stage3-zlib
-clean-stage3: clean-stage3-zlib
-clean-stage3-zlib:
-	@if [ $(current_stage) = stage3 ]; then \
-	  [ -f $(HOST_SUBDIR)/zlib/Makefile ] || exit 0; \
-	else \
-	  [ -f $(HOST_SUBDIR)/stage3-zlib/Makefile ] || exit 0; \
-	  $(MAKE) stage3-start; \
-	fi; \
-	cd $(HOST_SUBDIR)/zlib && \
-	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
-@endif zlib-bootstrap
+@endif gdbsupport
 
+.PHONY: maybe-installcheck-gdbsupport installcheck-gdbsupport
+maybe-installcheck-gdbsupport:
+@if gdbsupport
+maybe-installcheck-gdbsupport: installcheck-gdbsupport
 
-.PHONY: all-stage4-zlib maybe-all-stage4-zlib
-.PHONY: clean-stage4-zlib maybe-clean-stage4-zlib
-maybe-all-stage4-zlib:
-maybe-clean-stage4-zlib:
-@if zlib-bootstrap
-maybe-all-stage4-zlib: all-stage4-zlib
-all-stage4: all-stage4-zlib
-TARGET-stage4-zlib = $(TARGET-zlib)
-all-stage4-zlib: configure-stage4-zlib
-	@[ $(current_stage) = stage4 ] || $(MAKE) stage4-start
-	@r=`${PWD_COMMAND}`; export r; \
+installcheck-gdbsupport: \
+    configure-gdbsupport 
+	@: $(MAKE); $(unstage)
+	@[ -f ./gdbsupport/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	TFLAGS="$(STAGE4_TFLAGS)"; \
 	$(HOST_EXPORTS) \
-	$(POSTSTAGE1_HOST_EXPORTS)  \
-	cd $(HOST_SUBDIR)/zlib && \
-	 \
-	$(MAKE) $(BASE_FLAGS_TO_PASS) \
-		CFLAGS="$(STAGE4_CFLAGS)" \
-		CXXFLAGS="$(STAGE4_CXXFLAGS)" \
-		LIBCFLAGS="$(STAGE4_CFLAGS)" \
-		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
-		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
-		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
-		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
-		TFLAGS="$(STAGE4_TFLAGS)"  \
-		$(TARGET-stage4-zlib)
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing installcheck in gdbsupport"; \
+	(cd $(HOST_SUBDIR)/gdbsupport && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          installcheck) \
+	  || exit 1
 
-maybe-clean-stage4-zlib: clean-stage4-zlib
-clean-stage4: clean-stage4-zlib
-clean-stage4-zlib:
-	@if [ $(current_stage) = stage4 ]; then \
-	  [ -f $(HOST_SUBDIR)/zlib/Makefile ] || exit 0; \
-	else \
-	  [ -f $(HOST_SUBDIR)/stage4-zlib/Makefile ] || exit 0; \
-	  $(MAKE) stage4-start; \
-	fi; \
-	cd $(HOST_SUBDIR)/zlib && \
-	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
-@endif zlib-bootstrap
+@endif gdbsupport
 
+.PHONY: maybe-mostlyclean-gdbsupport mostlyclean-gdbsupport
+maybe-mostlyclean-gdbsupport:
+@if gdbsupport
+maybe-mostlyclean-gdbsupport: mostlyclean-gdbsupport
 
-.PHONY: all-stageprofile-zlib maybe-all-stageprofile-zlib
-.PHONY: clean-stageprofile-zlib maybe-clean-stageprofile-zlib
-maybe-all-stageprofile-zlib:
-maybe-clean-stageprofile-zlib:
-@if zlib-bootstrap
-maybe-all-stageprofile-zlib: all-stageprofile-zlib
-all-stageprofile: all-stageprofile-zlib
-TARGET-stageprofile-zlib = $(TARGET-zlib)
-all-stageprofile-zlib: configure-stageprofile-zlib
-	@[ $(current_stage) = stageprofile ] || $(MAKE) stageprofile-start
-	@r=`${PWD_COMMAND}`; export r; \
+mostlyclean-gdbsupport: 
+	@: $(MAKE); $(unstage)
+	@[ -f ./gdbsupport/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	TFLAGS="$(STAGEprofile_TFLAGS)"; \
 	$(HOST_EXPORTS) \
-	$(POSTSTAGE1_HOST_EXPORTS)  \
-	cd $(HOST_SUBDIR)/zlib && \
-	 \
-	$(MAKE) $(BASE_FLAGS_TO_PASS) \
-		CFLAGS="$(STAGEprofile_CFLAGS)" \
-		CXXFLAGS="$(STAGEprofile_CXXFLAGS)" \
-		LIBCFLAGS="$(STAGEprofile_CFLAGS)" \
-		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
-		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
-		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
-		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
-		TFLAGS="$(STAGEprofile_TFLAGS)"  \
-		$(TARGET-stageprofile-zlib)
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing mostlyclean in gdbsupport"; \
+	(cd $(HOST_SUBDIR)/gdbsupport && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          mostlyclean) \
+	  || exit 1
 
-maybe-clean-stageprofile-zlib: clean-stageprofile-zlib
-clean-stageprofile: clean-stageprofile-zlib
-clean-stageprofile-zlib:
-	@if [ $(current_stage) = stageprofile ]; then \
-	  [ -f $(HOST_SUBDIR)/zlib/Makefile ] || exit 0; \
-	else \
-	  [ -f $(HOST_SUBDIR)/stageprofile-zlib/Makefile ] || exit 0; \
-	  $(MAKE) stageprofile-start; \
-	fi; \
-	cd $(HOST_SUBDIR)/zlib && \
-	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
-@endif zlib-bootstrap
+@endif gdbsupport
 
+.PHONY: maybe-clean-gdbsupport clean-gdbsupport
+maybe-clean-gdbsupport:
+@if gdbsupport
+maybe-clean-gdbsupport: clean-gdbsupport
 
-.PHONY: all-stagetrain-zlib maybe-all-stagetrain-zlib
-.PHONY: clean-stagetrain-zlib maybe-clean-stagetrain-zlib
-maybe-all-stagetrain-zlib:
-maybe-clean-stagetrain-zlib:
-@if zlib-bootstrap
-maybe-all-stagetrain-zlib: all-stagetrain-zlib
-all-stagetrain: all-stagetrain-zlib
-TARGET-stagetrain-zlib = $(TARGET-zlib)
-all-stagetrain-zlib: configure-stagetrain-zlib
-	@[ $(current_stage) = stagetrain ] || $(MAKE) stagetrain-start
-	@r=`${PWD_COMMAND}`; export r; \
+clean-gdbsupport: 
+	@: $(MAKE); $(unstage)
+	@[ -f ./gdbsupport/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	TFLAGS="$(STAGEtrain_TFLAGS)"; \
 	$(HOST_EXPORTS) \
-	$(POSTSTAGE1_HOST_EXPORTS)  \
-	cd $(HOST_SUBDIR)/zlib && \
-	 \
-	$(MAKE) $(BASE_FLAGS_TO_PASS) \
-		CFLAGS="$(STAGEtrain_CFLAGS)" \
-		CXXFLAGS="$(STAGEtrain_CXXFLAGS)" \
-		LIBCFLAGS="$(STAGEtrain_CFLAGS)" \
-		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
-		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
-		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
-		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
-		TFLAGS="$(STAGEtrain_TFLAGS)"  \
-		$(TARGET-stagetrain-zlib)
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing clean in gdbsupport"; \
+	(cd $(HOST_SUBDIR)/gdbsupport && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          clean) \
+	  || exit 1
 
-maybe-clean-stagetrain-zlib: clean-stagetrain-zlib
-clean-stagetrain: clean-stagetrain-zlib
-clean-stagetrain-zlib:
-	@if [ $(current_stage) = stagetrain ]; then \
-	  [ -f $(HOST_SUBDIR)/zlib/Makefile ] || exit 0; \
-	else \
-	  [ -f $(HOST_SUBDIR)/stagetrain-zlib/Makefile ] || exit 0; \
-	  $(MAKE) stagetrain-start; \
-	fi; \
-	cd $(HOST_SUBDIR)/zlib && \
-	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
-@endif zlib-bootstrap
+@endif gdbsupport
 
+.PHONY: maybe-distclean-gdbsupport distclean-gdbsupport
+maybe-distclean-gdbsupport:
+@if gdbsupport
+maybe-distclean-gdbsupport: distclean-gdbsupport
 
-.PHONY: all-stagefeedback-zlib maybe-all-stagefeedback-zlib
-.PHONY: clean-stagefeedback-zlib maybe-clean-stagefeedback-zlib
-maybe-all-stagefeedback-zlib:
-maybe-clean-stagefeedback-zlib:
-@if zlib-bootstrap
-maybe-all-stagefeedback-zlib: all-stagefeedback-zlib
-all-stagefeedback: all-stagefeedback-zlib
-TARGET-stagefeedback-zlib = $(TARGET-zlib)
-all-stagefeedback-zlib: configure-stagefeedback-zlib
-	@[ $(current_stage) = stagefeedback ] || $(MAKE) stagefeedback-start
-	@r=`${PWD_COMMAND}`; export r; \
+distclean-gdbsupport: 
+	@: $(MAKE); $(unstage)
+	@[ -f ./gdbsupport/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	TFLAGS="$(STAGEfeedback_TFLAGS)"; \
 	$(HOST_EXPORTS) \
-	$(POSTSTAGE1_HOST_EXPORTS)  \
-	cd $(HOST_SUBDIR)/zlib && \
-	 \
-	$(MAKE) $(BASE_FLAGS_TO_PASS) \
-		CFLAGS="$(STAGEfeedback_CFLAGS)" \
-		CXXFLAGS="$(STAGEfeedback_CXXFLAGS)" \
-		LIBCFLAGS="$(STAGEfeedback_CFLAGS)" \
-		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
-		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
-		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
-		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
-		TFLAGS="$(STAGEfeedback_TFLAGS)"  \
-		$(TARGET-stagefeedback-zlib)
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing distclean in gdbsupport"; \
+	(cd $(HOST_SUBDIR)/gdbsupport && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          distclean) \
+	  || exit 1
 
-maybe-clean-stagefeedback-zlib: clean-stagefeedback-zlib
-clean-stagefeedback: clean-stagefeedback-zlib
-clean-stagefeedback-zlib:
-	@if [ $(current_stage) = stagefeedback ]; then \
-	  [ -f $(HOST_SUBDIR)/zlib/Makefile ] || exit 0; \
-	else \
-	  [ -f $(HOST_SUBDIR)/stagefeedback-zlib/Makefile ] || exit 0; \
-	  $(MAKE) stagefeedback-start; \
-	fi; \
-	cd $(HOST_SUBDIR)/zlib && \
-	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
-@endif zlib-bootstrap
+@endif gdbsupport
 
+.PHONY: maybe-maintainer-clean-gdbsupport maintainer-clean-gdbsupport
+maybe-maintainer-clean-gdbsupport:
+@if gdbsupport
+maybe-maintainer-clean-gdbsupport: maintainer-clean-gdbsupport
 
-.PHONY: all-stageautoprofile-zlib maybe-all-stageautoprofile-zlib
-.PHONY: clean-stageautoprofile-zlib maybe-clean-stageautoprofile-zlib
-maybe-all-stageautoprofile-zlib:
-maybe-clean-stageautoprofile-zlib:
-@if zlib-bootstrap
-maybe-all-stageautoprofile-zlib: all-stageautoprofile-zlib
-all-stageautoprofile: all-stageautoprofile-zlib
-TARGET-stageautoprofile-zlib = $(TARGET-zlib)
-all-stageautoprofile-zlib: configure-stageautoprofile-zlib
-	@[ $(current_stage) = stageautoprofile ] || $(MAKE) stageautoprofile-start
-	@r=`${PWD_COMMAND}`; export r; \
+maintainer-clean-gdbsupport: 
+	@: $(MAKE); $(unstage)
+	@[ -f ./gdbsupport/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	TFLAGS="$(STAGEautoprofile_TFLAGS)"; \
 	$(HOST_EXPORTS) \
-	$(POSTSTAGE1_HOST_EXPORTS)  \
-	cd $(HOST_SUBDIR)/zlib && \
-	$$s/gcc/config/i386/$(AUTO_PROFILE) \
-	$(MAKE) $(BASE_FLAGS_TO_PASS) \
-		CFLAGS="$(STAGEautoprofile_CFLAGS)" \
-		CXXFLAGS="$(STAGEautoprofile_CXXFLAGS)" \
-		LIBCFLAGS="$(STAGEautoprofile_CFLAGS)" \
-		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
-		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
-		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
-		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
-		TFLAGS="$(STAGEautoprofile_TFLAGS)"  \
-		$(TARGET-stageautoprofile-zlib)
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing maintainer-clean in gdbsupport"; \
+	(cd $(HOST_SUBDIR)/gdbsupport && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          maintainer-clean) \
+	  || exit 1
 
-maybe-clean-stageautoprofile-zlib: clean-stageautoprofile-zlib
-clean-stageautoprofile: clean-stageautoprofile-zlib
-clean-stageautoprofile-zlib:
-	@if [ $(current_stage) = stageautoprofile ]; then \
-	  [ -f $(HOST_SUBDIR)/zlib/Makefile ] || exit 0; \
-	else \
-	  [ -f $(HOST_SUBDIR)/stageautoprofile-zlib/Makefile ] || exit 0; \
-	  $(MAKE) stageautoprofile-start; \
-	fi; \
-	cd $(HOST_SUBDIR)/zlib && \
-	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
-@endif zlib-bootstrap
+@endif gdbsupport
 
 
-.PHONY: all-stageautofeedback-zlib maybe-all-stageautofeedback-zlib
-.PHONY: clean-stageautofeedback-zlib maybe-clean-stageautofeedback-zlib
-maybe-all-stageautofeedback-zlib:
-maybe-clean-stageautofeedback-zlib:
-@if zlib-bootstrap
-maybe-all-stageautofeedback-zlib: all-stageautofeedback-zlib
-all-stageautofeedback: all-stageautofeedback-zlib
-TARGET-stageautofeedback-zlib = $(TARGET-zlib)
-all-stageautofeedback-zlib: configure-stageautofeedback-zlib
-	@[ $(current_stage) = stageautofeedback ] || $(MAKE) stageautofeedback-start
+
+.PHONY: configure-gdbserver maybe-configure-gdbserver
+maybe-configure-gdbserver:
+@if gcc-bootstrap
+configure-gdbserver: stage_current
+@endif gcc-bootstrap
+@if gdbserver
+maybe-configure-gdbserver: configure-gdbserver
+configure-gdbserver: 
+	@: $(MAKE); $(unstage)
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	TFLAGS="$(STAGEautofeedback_TFLAGS)"; \
-	$(HOST_EXPORTS) \
-	$(POSTSTAGE1_HOST_EXPORTS)  \
-	cd $(HOST_SUBDIR)/zlib && \
-	 \
-	$(MAKE) $(BASE_FLAGS_TO_PASS) \
-		CFLAGS="$(STAGEautofeedback_CFLAGS)" \
-		CXXFLAGS="$(STAGEautofeedback_CXXFLAGS)" \
-		LIBCFLAGS="$(STAGEautofeedback_CFLAGS)" \
-		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
-		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
-		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
-		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
-		TFLAGS="$(STAGEautofeedback_TFLAGS)" PERF_DATA=perf.data \
-		$(TARGET-stageautofeedback-zlib)
+	test ! -f $(HOST_SUBDIR)/gdbserver/Makefile || exit 0; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/gdbserver; \
+	$(HOST_EXPORTS)  \
+	echo Configuring in $(HOST_SUBDIR)/gdbserver; \
+	cd "$(HOST_SUBDIR)/gdbserver" || exit 1; \
+	case $(srcdir) in \
+	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
+	  *) topdir=`echo $(HOST_SUBDIR)/gdbserver/ | \
+		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
+	esac; \
+	module_srcdir=gdbserver; \
+	$(SHELL) \
+	  $$s/$$module_srcdir/configure \
+	  --srcdir=$${topdir}/$$module_srcdir \
+	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
+	  --target=${target_alias}  \
+	  || exit 1
+@endif gdbserver
 
-maybe-clean-stageautofeedback-zlib: clean-stageautofeedback-zlib
-clean-stageautofeedback: clean-stageautofeedback-zlib
-clean-stageautofeedback-zlib:
-	@if [ $(current_stage) = stageautofeedback ]; then \
-	  [ -f $(HOST_SUBDIR)/zlib/Makefile ] || exit 0; \
-	else \
-	  [ -f $(HOST_SUBDIR)/stageautofeedback-zlib/Makefile ] || exit 0; \
-	  $(MAKE) stageautofeedback-start; \
-	fi; \
-	cd $(HOST_SUBDIR)/zlib && \
-	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
-@endif zlib-bootstrap
 
 
 
 
+.PHONY: all-gdbserver maybe-all-gdbserver
+maybe-all-gdbserver:
+@if gcc-bootstrap
+all-gdbserver: stage_current
+@endif gcc-bootstrap
+@if gdbserver
+TARGET-gdbserver=all
+maybe-all-gdbserver: all-gdbserver
+all-gdbserver: configure-gdbserver
+	@: $(MAKE); $(unstage)
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS)  \
+	(cd $(HOST_SUBDIR)/gdbserver && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) $(EXTRA_HOST_FLAGS) $(STAGE1_FLAGS_TO_PASS)  \
+		$(TARGET-gdbserver))
+@endif gdbserver
 
-.PHONY: check-zlib maybe-check-zlib
-maybe-check-zlib:
-@if zlib
-maybe-check-zlib: check-zlib
 
-check-zlib:
 
-@endif zlib
 
-.PHONY: install-zlib maybe-install-zlib
-maybe-install-zlib:
-@if zlib
-maybe-install-zlib: install-zlib
+.PHONY: check-gdbserver maybe-check-gdbserver
+maybe-check-gdbserver:
+@if gdbserver
+maybe-check-gdbserver: check-gdbserver
 
-install-zlib:
+check-gdbserver:
+	@: $(MAKE); $(unstage)
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS)  \
+	(cd $(HOST_SUBDIR)/gdbserver && \
+	  $(MAKE) $(FLAGS_TO_PASS)  check)
 
-@endif zlib
+@endif gdbserver
 
-.PHONY: install-strip-zlib maybe-install-strip-zlib
-maybe-install-strip-zlib:
-@if zlib
-maybe-install-strip-zlib: install-strip-zlib
+.PHONY: install-gdbserver maybe-install-gdbserver
+maybe-install-gdbserver:
+@if gdbserver
+maybe-install-gdbserver: install-gdbserver
 
-install-strip-zlib:
+install-gdbserver: installdirs
+	@: $(MAKE); $(unstage)
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	(cd $(HOST_SUBDIR)/gdbserver && \
+	  $(MAKE) $(FLAGS_TO_PASS)  install)
 
-@endif zlib
+@endif gdbserver
+
+.PHONY: install-strip-gdbserver maybe-install-strip-gdbserver
+maybe-install-strip-gdbserver:
+@if gdbserver
+maybe-install-strip-gdbserver: install-strip-gdbserver
+
+install-strip-gdbserver: installdirs
+	@: $(MAKE); $(unstage)
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	(cd $(HOST_SUBDIR)/gdbserver && \
+	  $(MAKE) $(FLAGS_TO_PASS)  install-strip)
+
+@endif gdbserver
 
 # Other targets (info, dvi, pdf, etc.)
 
-.PHONY: maybe-info-zlib info-zlib
-maybe-info-zlib:
-@if zlib
-maybe-info-zlib: info-zlib
+.PHONY: maybe-info-gdbserver info-gdbserver
+maybe-info-gdbserver:
+@if gdbserver
+maybe-info-gdbserver: info-gdbserver
 
-info-zlib: \
-    configure-zlib 
-	@[ -f ./zlib/Makefile ] || exit 0; \
+info-gdbserver: \
+    configure-gdbserver 
+	@: $(MAKE); $(unstage)
+	@[ -f ./gdbserver/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing info in zlib"; \
-	(cd $(HOST_SUBDIR)/zlib && \
+	echo "Doing info in gdbserver"; \
+	(cd $(HOST_SUBDIR)/gdbserver && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -32548,24 +35224,25 @@ info-zlib: \
 	          info) \
 	  || exit 1
 
-@endif zlib
+@endif gdbserver
 
-.PHONY: maybe-dvi-zlib dvi-zlib
-maybe-dvi-zlib:
-@if zlib
-maybe-dvi-zlib: dvi-zlib
+.PHONY: maybe-dvi-gdbserver dvi-gdbserver
+maybe-dvi-gdbserver:
+@if gdbserver
+maybe-dvi-gdbserver: dvi-gdbserver
 
-dvi-zlib: \
-    configure-zlib 
-	@[ -f ./zlib/Makefile ] || exit 0; \
+dvi-gdbserver: \
+    configure-gdbserver 
+	@: $(MAKE); $(unstage)
+	@[ -f ./gdbserver/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing dvi in zlib"; \
-	(cd $(HOST_SUBDIR)/zlib && \
+	echo "Doing dvi in gdbserver"; \
+	(cd $(HOST_SUBDIR)/gdbserver && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -32573,24 +35250,25 @@ dvi-zlib: \
 	          dvi) \
 	  || exit 1
 
-@endif zlib
+@endif gdbserver
 
-.PHONY: maybe-pdf-zlib pdf-zlib
-maybe-pdf-zlib:
-@if zlib
-maybe-pdf-zlib: pdf-zlib
+.PHONY: maybe-pdf-gdbserver pdf-gdbserver
+maybe-pdf-gdbserver:
+@if gdbserver
+maybe-pdf-gdbserver: pdf-gdbserver
 
-pdf-zlib: \
-    configure-zlib 
-	@[ -f ./zlib/Makefile ] || exit 0; \
+pdf-gdbserver: \
+    configure-gdbserver 
+	@: $(MAKE); $(unstage)
+	@[ -f ./gdbserver/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing pdf in zlib"; \
-	(cd $(HOST_SUBDIR)/zlib && \
+	echo "Doing pdf in gdbserver"; \
+	(cd $(HOST_SUBDIR)/gdbserver && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -32598,24 +35276,25 @@ pdf-zlib: \
 	          pdf) \
 	  || exit 1
 
-@endif zlib
+@endif gdbserver
 
-.PHONY: maybe-html-zlib html-zlib
-maybe-html-zlib:
-@if zlib
-maybe-html-zlib: html-zlib
+.PHONY: maybe-html-gdbserver html-gdbserver
+maybe-html-gdbserver:
+@if gdbserver
+maybe-html-gdbserver: html-gdbserver
 
-html-zlib: \
-    configure-zlib 
-	@[ -f ./zlib/Makefile ] || exit 0; \
+html-gdbserver: \
+    configure-gdbserver 
+	@: $(MAKE); $(unstage)
+	@[ -f ./gdbserver/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing html in zlib"; \
-	(cd $(HOST_SUBDIR)/zlib && \
+	echo "Doing html in gdbserver"; \
+	(cd $(HOST_SUBDIR)/gdbserver && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -32623,24 +35302,25 @@ html-zlib: \
 	          html) \
 	  || exit 1
 
-@endif zlib
+@endif gdbserver
 
-.PHONY: maybe-TAGS-zlib TAGS-zlib
-maybe-TAGS-zlib:
-@if zlib
-maybe-TAGS-zlib: TAGS-zlib
+.PHONY: maybe-TAGS-gdbserver TAGS-gdbserver
+maybe-TAGS-gdbserver:
+@if gdbserver
+maybe-TAGS-gdbserver: TAGS-gdbserver
 
-TAGS-zlib: \
-    configure-zlib 
-	@[ -f ./zlib/Makefile ] || exit 0; \
+TAGS-gdbserver: \
+    configure-gdbserver 
+	@: $(MAKE); $(unstage)
+	@[ -f ./gdbserver/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing TAGS in zlib"; \
-	(cd $(HOST_SUBDIR)/zlib && \
+	echo "Doing TAGS in gdbserver"; \
+	(cd $(HOST_SUBDIR)/gdbserver && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -32648,25 +35328,26 @@ TAGS-zlib: \
 	          TAGS) \
 	  || exit 1
 
-@endif zlib
+@endif gdbserver
 
-.PHONY: maybe-install-info-zlib install-info-zlib
-maybe-install-info-zlib:
-@if zlib
-maybe-install-info-zlib: install-info-zlib
+.PHONY: maybe-install-info-gdbserver install-info-gdbserver
+maybe-install-info-gdbserver:
+@if gdbserver
+maybe-install-info-gdbserver: install-info-gdbserver
 
-install-info-zlib: \
-    configure-zlib \
-    info-zlib 
-	@[ -f ./zlib/Makefile ] || exit 0; \
+install-info-gdbserver: \
+    configure-gdbserver \
+    info-gdbserver 
+	@: $(MAKE); $(unstage)
+	@[ -f ./gdbserver/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing install-info in zlib"; \
-	(cd $(HOST_SUBDIR)/zlib && \
+	echo "Doing install-info in gdbserver"; \
+	(cd $(HOST_SUBDIR)/gdbserver && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -32674,25 +35355,26 @@ install-info-zlib: \
 	          install-info) \
 	  || exit 1
 
-@endif zlib
+@endif gdbserver
 
-.PHONY: maybe-install-pdf-zlib install-pdf-zlib
-maybe-install-pdf-zlib:
-@if zlib
-maybe-install-pdf-zlib: install-pdf-zlib
+.PHONY: maybe-install-pdf-gdbserver install-pdf-gdbserver
+maybe-install-pdf-gdbserver:
+@if gdbserver
+maybe-install-pdf-gdbserver: install-pdf-gdbserver
 
-install-pdf-zlib: \
-    configure-zlib \
-    pdf-zlib 
-	@[ -f ./zlib/Makefile ] || exit 0; \
+install-pdf-gdbserver: \
+    configure-gdbserver \
+    pdf-gdbserver 
+	@: $(MAKE); $(unstage)
+	@[ -f ./gdbserver/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing install-pdf in zlib"; \
-	(cd $(HOST_SUBDIR)/zlib && \
+	echo "Doing install-pdf in gdbserver"; \
+	(cd $(HOST_SUBDIR)/gdbserver && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -32700,25 +35382,26 @@ install-pdf-zlib: \
 	          install-pdf) \
 	  || exit 1
 
-@endif zlib
+@endif gdbserver
 
-.PHONY: maybe-install-html-zlib install-html-zlib
-maybe-install-html-zlib:
-@if zlib
-maybe-install-html-zlib: install-html-zlib
+.PHONY: maybe-install-html-gdbserver install-html-gdbserver
+maybe-install-html-gdbserver:
+@if gdbserver
+maybe-install-html-gdbserver: install-html-gdbserver
 
-install-html-zlib: \
-    configure-zlib \
-    html-zlib 
-	@[ -f ./zlib/Makefile ] || exit 0; \
+install-html-gdbserver: \
+    configure-gdbserver \
+    html-gdbserver 
+	@: $(MAKE); $(unstage)
+	@[ -f ./gdbserver/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing install-html in zlib"; \
-	(cd $(HOST_SUBDIR)/zlib && \
+	echo "Doing install-html in gdbserver"; \
+	(cd $(HOST_SUBDIR)/gdbserver && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -32726,24 +35409,25 @@ install-html-zlib: \
 	          install-html) \
 	  || exit 1
 
-@endif zlib
+@endif gdbserver
 
-.PHONY: maybe-installcheck-zlib installcheck-zlib
-maybe-installcheck-zlib:
-@if zlib
-maybe-installcheck-zlib: installcheck-zlib
+.PHONY: maybe-installcheck-gdbserver installcheck-gdbserver
+maybe-installcheck-gdbserver:
+@if gdbserver
+maybe-installcheck-gdbserver: installcheck-gdbserver
 
-installcheck-zlib: \
-    configure-zlib 
-	@[ -f ./zlib/Makefile ] || exit 0; \
+installcheck-gdbserver: \
+    configure-gdbserver 
+	@: $(MAKE); $(unstage)
+	@[ -f ./gdbserver/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing installcheck in zlib"; \
-	(cd $(HOST_SUBDIR)/zlib && \
+	echo "Doing installcheck in gdbserver"; \
+	(cd $(HOST_SUBDIR)/gdbserver && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -32751,23 +35435,24 @@ installcheck-zlib: \
 	          installcheck) \
 	  || exit 1
 
-@endif zlib
+@endif gdbserver
 
-.PHONY: maybe-mostlyclean-zlib mostlyclean-zlib
-maybe-mostlyclean-zlib:
-@if zlib
-maybe-mostlyclean-zlib: mostlyclean-zlib
+.PHONY: maybe-mostlyclean-gdbserver mostlyclean-gdbserver
+maybe-mostlyclean-gdbserver:
+@if gdbserver
+maybe-mostlyclean-gdbserver: mostlyclean-gdbserver
 
-mostlyclean-zlib: 
-	@[ -f ./zlib/Makefile ] || exit 0; \
+mostlyclean-gdbserver: 
+	@: $(MAKE); $(unstage)
+	@[ -f ./gdbserver/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing mostlyclean in zlib"; \
-	(cd $(HOST_SUBDIR)/zlib && \
+	echo "Doing mostlyclean in gdbserver"; \
+	(cd $(HOST_SUBDIR)/gdbserver && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -32775,23 +35460,24 @@ mostlyclean-zlib:
 	          mostlyclean) \
 	  || exit 1
 
-@endif zlib
+@endif gdbserver
 
-.PHONY: maybe-clean-zlib clean-zlib
-maybe-clean-zlib:
-@if zlib
-maybe-clean-zlib: clean-zlib
+.PHONY: maybe-clean-gdbserver clean-gdbserver
+maybe-clean-gdbserver:
+@if gdbserver
+maybe-clean-gdbserver: clean-gdbserver
 
-clean-zlib: 
-	@[ -f ./zlib/Makefile ] || exit 0; \
+clean-gdbserver: 
+	@: $(MAKE); $(unstage)
+	@[ -f ./gdbserver/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing clean in zlib"; \
-	(cd $(HOST_SUBDIR)/zlib && \
+	echo "Doing clean in gdbserver"; \
+	(cd $(HOST_SUBDIR)/gdbserver && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -32799,23 +35485,24 @@ clean-zlib:
 	          clean) \
 	  || exit 1
 
-@endif zlib
+@endif gdbserver
 
-.PHONY: maybe-distclean-zlib distclean-zlib
-maybe-distclean-zlib:
-@if zlib
-maybe-distclean-zlib: distclean-zlib
+.PHONY: maybe-distclean-gdbserver distclean-gdbserver
+maybe-distclean-gdbserver:
+@if gdbserver
+maybe-distclean-gdbserver: distclean-gdbserver
 
-distclean-zlib: 
-	@[ -f ./zlib/Makefile ] || exit 0; \
+distclean-gdbserver: 
+	@: $(MAKE); $(unstage)
+	@[ -f ./gdbserver/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing distclean in zlib"; \
-	(cd $(HOST_SUBDIR)/zlib && \
+	echo "Doing distclean in gdbserver"; \
+	(cd $(HOST_SUBDIR)/gdbserver && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -32823,23 +35510,24 @@ distclean-zlib:
 	          distclean) \
 	  || exit 1
 
-@endif zlib
+@endif gdbserver
 
-.PHONY: maybe-maintainer-clean-zlib maintainer-clean-zlib
-maybe-maintainer-clean-zlib:
-@if zlib
-maybe-maintainer-clean-zlib: maintainer-clean-zlib
+.PHONY: maybe-maintainer-clean-gdbserver maintainer-clean-gdbserver
+maybe-maintainer-clean-gdbserver:
+@if gdbserver
+maybe-maintainer-clean-gdbserver: maintainer-clean-gdbserver
 
-maintainer-clean-zlib: 
-	@[ -f ./zlib/Makefile ] || exit 0; \
+maintainer-clean-gdbserver: 
+	@: $(MAKE); $(unstage)
+	@[ -f ./gdbserver/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	echo "Doing maintainer-clean in zlib"; \
-	(cd $(HOST_SUBDIR)/zlib && \
+	echo "Doing maintainer-clean in gdbserver"; \
+	(cd $(HOST_SUBDIR)/gdbserver && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -32847,7 +35535,7 @@ maintainer-clean-zlib:
 	          maintainer-clean) \
 	  || exit 1
 
-@endif zlib
+@endif gdbserver
 
 
 
@@ -35425,6 +38113,447 @@ maintainer-clean-utils:
 
 
 
+.PHONY: configure-c++tools maybe-configure-c++tools
+maybe-configure-c++tools:
+@if gcc-bootstrap
+configure-c++tools: stage_current
+@endif gcc-bootstrap
+@if c++tools
+maybe-configure-c++tools: configure-c++tools
+configure-c++tools: 
+	@: $(MAKE); $(unstage)
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	test ! -f $(HOST_SUBDIR)/c++tools/Makefile || exit 0; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/c++tools; \
+	$(HOST_EXPORTS)  \
+	echo Configuring in $(HOST_SUBDIR)/c++tools; \
+	cd "$(HOST_SUBDIR)/c++tools" || exit 1; \
+	case $(srcdir) in \
+	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
+	  *) topdir=`echo $(HOST_SUBDIR)/c++tools/ | \
+		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
+	esac; \
+	module_srcdir=c++tools; \
+	$(SHELL) \
+	  $$s/$$module_srcdir/configure \
+	  --srcdir=$${topdir}/$$module_srcdir \
+	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
+	  --target=${target_alias}  \
+	  || exit 1
+@endif c++tools
+
+
+
+
+
+.PHONY: all-c++tools maybe-all-c++tools
+maybe-all-c++tools:
+@if gcc-bootstrap
+all-c++tools: stage_current
+@endif gcc-bootstrap
+@if c++tools
+TARGET-c++tools=all
+maybe-all-c++tools: all-c++tools
+all-c++tools: configure-c++tools
+	@: $(MAKE); $(unstage)
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS)  \
+	(cd $(HOST_SUBDIR)/c++tools && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) $(EXTRA_HOST_FLAGS) $(STAGE1_FLAGS_TO_PASS)  \
+		$(TARGET-c++tools))
+@endif c++tools
+
+
+
+
+.PHONY: check-c++tools maybe-check-c++tools
+maybe-check-c++tools:
+@if c++tools
+maybe-check-c++tools: check-c++tools
+
+check-c++tools:
+	@: $(MAKE); $(unstage)
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS)  \
+	(cd $(HOST_SUBDIR)/c++tools && \
+	  $(MAKE) $(FLAGS_TO_PASS)  check)
+
+@endif c++tools
+
+.PHONY: install-c++tools maybe-install-c++tools
+maybe-install-c++tools:
+@if c++tools
+maybe-install-c++tools: install-c++tools
+
+install-c++tools: installdirs
+	@: $(MAKE); $(unstage)
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	(cd $(HOST_SUBDIR)/c++tools && \
+	  $(MAKE) $(FLAGS_TO_PASS)  install)
+
+@endif c++tools
+
+.PHONY: install-strip-c++tools maybe-install-strip-c++tools
+maybe-install-strip-c++tools:
+@if c++tools
+maybe-install-strip-c++tools: install-strip-c++tools
+
+install-strip-c++tools: installdirs
+	@: $(MAKE); $(unstage)
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	(cd $(HOST_SUBDIR)/c++tools && \
+	  $(MAKE) $(FLAGS_TO_PASS)  install-strip)
+
+@endif c++tools
+
+# Other targets (info, dvi, pdf, etc.)
+
+.PHONY: maybe-info-c++tools info-c++tools
+maybe-info-c++tools:
+@if c++tools
+maybe-info-c++tools: info-c++tools
+
+info-c++tools: \
+    configure-c++tools 
+	@: $(MAKE); $(unstage)
+	@[ -f ./c++tools/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing info in c++tools"; \
+	(cd $(HOST_SUBDIR)/c++tools && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          info) \
+	  || exit 1
+
+@endif c++tools
+
+.PHONY: maybe-dvi-c++tools dvi-c++tools
+maybe-dvi-c++tools:
+@if c++tools
+maybe-dvi-c++tools: dvi-c++tools
+
+dvi-c++tools: \
+    configure-c++tools 
+	@: $(MAKE); $(unstage)
+	@[ -f ./c++tools/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing dvi in c++tools"; \
+	(cd $(HOST_SUBDIR)/c++tools && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          dvi) \
+	  || exit 1
+
+@endif c++tools
+
+.PHONY: maybe-pdf-c++tools pdf-c++tools
+maybe-pdf-c++tools:
+@if c++tools
+maybe-pdf-c++tools: pdf-c++tools
+
+pdf-c++tools: \
+    configure-c++tools 
+	@: $(MAKE); $(unstage)
+	@[ -f ./c++tools/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing pdf in c++tools"; \
+	(cd $(HOST_SUBDIR)/c++tools && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          pdf) \
+	  || exit 1
+
+@endif c++tools
+
+.PHONY: maybe-html-c++tools html-c++tools
+maybe-html-c++tools:
+@if c++tools
+maybe-html-c++tools: html-c++tools
+
+html-c++tools: \
+    configure-c++tools 
+	@: $(MAKE); $(unstage)
+	@[ -f ./c++tools/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing html in c++tools"; \
+	(cd $(HOST_SUBDIR)/c++tools && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          html) \
+	  || exit 1
+
+@endif c++tools
+
+.PHONY: maybe-TAGS-c++tools TAGS-c++tools
+maybe-TAGS-c++tools:
+@if c++tools
+maybe-TAGS-c++tools: TAGS-c++tools
+
+TAGS-c++tools: \
+    configure-c++tools 
+	@: $(MAKE); $(unstage)
+	@[ -f ./c++tools/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing TAGS in c++tools"; \
+	(cd $(HOST_SUBDIR)/c++tools && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          TAGS) \
+	  || exit 1
+
+@endif c++tools
+
+.PHONY: maybe-install-info-c++tools install-info-c++tools
+maybe-install-info-c++tools:
+@if c++tools
+maybe-install-info-c++tools: install-info-c++tools
+
+install-info-c++tools: \
+    configure-c++tools \
+    info-c++tools 
+	@: $(MAKE); $(unstage)
+	@[ -f ./c++tools/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing install-info in c++tools"; \
+	(cd $(HOST_SUBDIR)/c++tools && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          install-info) \
+	  || exit 1
+
+@endif c++tools
+
+.PHONY: maybe-install-pdf-c++tools install-pdf-c++tools
+maybe-install-pdf-c++tools:
+@if c++tools
+maybe-install-pdf-c++tools: install-pdf-c++tools
+
+install-pdf-c++tools: \
+    configure-c++tools \
+    pdf-c++tools 
+	@: $(MAKE); $(unstage)
+	@[ -f ./c++tools/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing install-pdf in c++tools"; \
+	(cd $(HOST_SUBDIR)/c++tools && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          install-pdf) \
+	  || exit 1
+
+@endif c++tools
+
+.PHONY: maybe-install-html-c++tools install-html-c++tools
+maybe-install-html-c++tools:
+@if c++tools
+maybe-install-html-c++tools: install-html-c++tools
+
+install-html-c++tools: \
+    configure-c++tools \
+    html-c++tools 
+	@: $(MAKE); $(unstage)
+	@[ -f ./c++tools/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing install-html in c++tools"; \
+	(cd $(HOST_SUBDIR)/c++tools && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          install-html) \
+	  || exit 1
+
+@endif c++tools
+
+.PHONY: maybe-installcheck-c++tools installcheck-c++tools
+maybe-installcheck-c++tools:
+@if c++tools
+maybe-installcheck-c++tools: installcheck-c++tools
+
+installcheck-c++tools: \
+    configure-c++tools 
+	@: $(MAKE); $(unstage)
+	@[ -f ./c++tools/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing installcheck in c++tools"; \
+	(cd $(HOST_SUBDIR)/c++tools && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          installcheck) \
+	  || exit 1
+
+@endif c++tools
+
+.PHONY: maybe-mostlyclean-c++tools mostlyclean-c++tools
+maybe-mostlyclean-c++tools:
+@if c++tools
+maybe-mostlyclean-c++tools: mostlyclean-c++tools
+
+mostlyclean-c++tools: 
+	@: $(MAKE); $(unstage)
+	@[ -f ./c++tools/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing mostlyclean in c++tools"; \
+	(cd $(HOST_SUBDIR)/c++tools && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          mostlyclean) \
+	  || exit 1
+
+@endif c++tools
+
+.PHONY: maybe-clean-c++tools clean-c++tools
+maybe-clean-c++tools:
+@if c++tools
+maybe-clean-c++tools: clean-c++tools
+
+clean-c++tools: 
+	@: $(MAKE); $(unstage)
+	@[ -f ./c++tools/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing clean in c++tools"; \
+	(cd $(HOST_SUBDIR)/c++tools && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          clean) \
+	  || exit 1
+
+@endif c++tools
+
+.PHONY: maybe-distclean-c++tools distclean-c++tools
+maybe-distclean-c++tools:
+@if c++tools
+maybe-distclean-c++tools: distclean-c++tools
+
+distclean-c++tools: 
+	@: $(MAKE); $(unstage)
+	@[ -f ./c++tools/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing distclean in c++tools"; \
+	(cd $(HOST_SUBDIR)/c++tools && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          distclean) \
+	  || exit 1
+
+@endif c++tools
+
+.PHONY: maybe-maintainer-clean-c++tools maintainer-clean-c++tools
+maybe-maintainer-clean-c++tools:
+@if c++tools
+maybe-maintainer-clean-c++tools: maintainer-clean-c++tools
+
+maintainer-clean-c++tools: 
+	@: $(MAKE); $(unstage)
+	@[ -f ./c++tools/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	echo "Doing maintainer-clean in c++tools"; \
+	(cd $(HOST_SUBDIR)/c++tools && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	          maintainer-clean) \
+	  || exit 1
+
+@endif c++tools
+
+
+
 .PHONY: configure-gnattools maybe-configure-gnattools
 maybe-configure-gnattools:
 @if gcc-bootstrap
@@ -36243,6 +39372,7 @@ all-stage1-lto-plugin: configure-stage1-lto-plugin
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE1_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE1_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE1_CXXFLAGS)" \
 		LIBCFLAGS="$(LIBCFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -36287,6 +39417,7 @@ all-stage2-lto-plugin: configure-stage2-lto-plugin
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE2_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE2_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE2_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE2_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -36329,6 +39460,7 @@ all-stage3-lto-plugin: configure-stage3-lto-plugin
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE3_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE3_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE3_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE3_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -36371,6 +39503,7 @@ all-stage4-lto-plugin: configure-stage4-lto-plugin
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGE4_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE4_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE4_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGE4_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -36413,6 +39546,7 @@ all-stageprofile-lto-plugin: configure-stageprofile-lto-plugin
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -36455,6 +39589,7 @@ all-stagetrain-lto-plugin: configure-stagetrain-lto-plugin
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEtrain_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEtrain_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEtrain_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEtrain_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -36497,6 +39632,7 @@ all-stagefeedback-lto-plugin: configure-stagefeedback-lto-plugin
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEfeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEfeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEfeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEfeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -36539,6 +39675,7 @@ all-stageautoprofile-lto-plugin: configure-stageautoprofile-lto-plugin
 	$$s/gcc/config/i386/$(AUTO_PROFILE) \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautoprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautoprofile_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautoprofile_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautoprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -36581,6 +39718,7 @@ all-stageautofeedback-lto-plugin: configure-stageautofeedback-lto-plugin
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(STAGEautofeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautofeedback_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGEautofeedback_CXXFLAGS)" \
 		LIBCFLAGS="$(STAGEautofeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
@@ -37862,1290 +41000,1161 @@ maintainer-clean-gotools:
 
 
 
-# ---------------------------------------
-# Modules which run on the target machine
-# ---------------------------------------
-
-
-
-
-.PHONY: configure-target-libstdc++-v3 maybe-configure-target-libstdc++-v3
-maybe-configure-target-libstdc++-v3:
+.PHONY: configure-libctf maybe-configure-libctf
+maybe-configure-libctf:
 @if gcc-bootstrap
-configure-target-libstdc++-v3: stage_current
+configure-libctf: stage_current
 @endif gcc-bootstrap
-@if target-libstdc++-v3
-maybe-configure-target-libstdc++-v3: configure-target-libstdc++-v3
-configure-target-libstdc++-v3: 
+@if libctf
+maybe-configure-libctf: configure-libctf
+configure-libctf: 
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	echo "Checking multilib configuration for libstdc++-v3..."; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3; \
-	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp 2> /dev/null; \
-	if test -r $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
-	  if cmp -s $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
-	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp; \
-	  else \
-	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile; \
-	    mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
-	  fi; \
-	else \
-	  mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
-	fi; \
-	test ! -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile || exit 0; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3; \
-	$(RAW_CXX_TARGET_EXPORTS)  \
-	echo Configuring in $(TARGET_SUBDIR)/libstdc++-v3; \
-	cd "$(TARGET_SUBDIR)/libstdc++-v3" || exit 1; \
+	test ! -f $(HOST_SUBDIR)/libctf/Makefile || exit 0; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libctf; \
+	$(HOST_EXPORTS)  \
+	echo Configuring in $(HOST_SUBDIR)/libctf; \
+	cd "$(HOST_SUBDIR)/libctf" || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(TARGET_SUBDIR)/libstdc++-v3/ | \
+	  *) topdir=`echo $(HOST_SUBDIR)/libctf/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libstdc++-v3; \
-	rm -f no-such-file || : ; \
-	CONFIG_SITE=no-such-file $(SHELL) \
+	module_srcdir=libctf; \
+	$(SHELL) \
 	  $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
-	  $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \
+	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
 	  --target=${target_alias}  \
 	  || exit 1
-@endif target-libstdc++-v3
+@endif libctf
 
 
 
-.PHONY: configure-stage1-target-libstdc++-v3 maybe-configure-stage1-target-libstdc++-v3
-maybe-configure-stage1-target-libstdc++-v3:
-@if target-libstdc++-v3-bootstrap
-maybe-configure-stage1-target-libstdc++-v3: configure-stage1-target-libstdc++-v3
-configure-stage1-target-libstdc++-v3:
+.PHONY: configure-stage1-libctf maybe-configure-stage1-libctf
+maybe-configure-stage1-libctf:
+@if libctf-bootstrap
+maybe-configure-stage1-libctf: configure-stage1-libctf
+configure-stage1-libctf:
 	@[ $(current_stage) = stage1 ] || $(MAKE) stage1-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3
+	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libctf
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGE1_TFLAGS)"; \
-	echo "Checking multilib configuration for libstdc++-v3..."; \
-	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp 2> /dev/null; \
-	if test -r $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
-	  if cmp -s $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
-	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp; \
-	  else \
-	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile; \
-	    mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
-	  fi; \
-	else \
-	  mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
-	fi; \
-	test ! -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile || exit 0; \
-	$(RAW_CXX_TARGET_EXPORTS) \
-	CFLAGS="$(CFLAGS_FOR_TARGET)"; export CFLAGS; \
-	CXXFLAGS="$(CXXFLAGS_FOR_TARGET)"; export CXXFLAGS; \
-	LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)"; export LIBCFLAGS;  \
-	echo Configuring stage 1 in $(TARGET_SUBDIR)/libstdc++-v3; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3; \
-	cd $(TARGET_SUBDIR)/libstdc++-v3 || exit 1; \
+	test ! -f $(HOST_SUBDIR)/libctf/Makefile || exit 0; \
+	$(HOST_EXPORTS) \
+	CFLAGS="$(STAGE1_CFLAGS)"; export CFLAGS; \
+	CXXFLAGS="$(STAGE1_CXXFLAGS)"; export CXXFLAGS; \
+	LIBCFLAGS="$(LIBCFLAGS)"; export LIBCFLAGS;  \
+	echo Configuring stage 1 in $(HOST_SUBDIR)/libctf; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libctf; \
+	cd $(HOST_SUBDIR)/libctf || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(TARGET_SUBDIR)/libstdc++-v3/ | \
+	  *) topdir=`echo $(HOST_SUBDIR)/libctf/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libstdc++-v3; \
+	module_srcdir=libctf; \
 	$(SHELL) $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
-	  $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \
+	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
 	  --target=${target_alias} \
 	   \
 	  $(STAGE1_CONFIGURE_FLAGS)
-@endif target-libstdc++-v3-bootstrap
+@endif libctf-bootstrap
 
-.PHONY: configure-stage2-target-libstdc++-v3 maybe-configure-stage2-target-libstdc++-v3
-maybe-configure-stage2-target-libstdc++-v3:
-@if target-libstdc++-v3-bootstrap
-maybe-configure-stage2-target-libstdc++-v3: configure-stage2-target-libstdc++-v3
-configure-stage2-target-libstdc++-v3:
+.PHONY: configure-stage2-libctf maybe-configure-stage2-libctf
+maybe-configure-stage2-libctf:
+@if libctf-bootstrap
+maybe-configure-stage2-libctf: configure-stage2-libctf
+configure-stage2-libctf:
 	@[ $(current_stage) = stage2 ] || $(MAKE) stage2-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3
+	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libctf
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGE2_TFLAGS)"; \
-	echo "Checking multilib configuration for libstdc++-v3..."; \
-	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp 2> /dev/null; \
-	if test -r $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
-	  if cmp -s $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
-	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp; \
-	  else \
-	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile; \
-	    mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
-	  fi; \
-	else \
-	  mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
-	fi; \
-	test ! -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile || exit 0; \
-	$(RAW_CXX_TARGET_EXPORTS) \
-	 \
-	CFLAGS="$(CFLAGS_FOR_TARGET)"; export CFLAGS; \
-	CXXFLAGS="$(CXXFLAGS_FOR_TARGET)"; export CXXFLAGS; \
-	LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)"; export LIBCFLAGS;  \
-	echo Configuring stage 2 in $(TARGET_SUBDIR)/libstdc++-v3; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3; \
-	cd $(TARGET_SUBDIR)/libstdc++-v3 || exit 1; \
+	test ! -f $(HOST_SUBDIR)/libctf/Makefile || exit 0; \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS) \
+	CFLAGS="$(STAGE2_CFLAGS)"; export CFLAGS; \
+	CXXFLAGS="$(STAGE2_CXXFLAGS)"; export CXXFLAGS; \
+	LIBCFLAGS="$(STAGE2_CFLAGS)"; export LIBCFLAGS;  \
+	echo Configuring stage 2 in $(HOST_SUBDIR)/libctf; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libctf; \
+	cd $(HOST_SUBDIR)/libctf || exit 1; \
 	case $(srcdir) in \
-	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(TARGET_SUBDIR)/libstdc++-v3/ | \
-		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
-	esac; \
-	module_srcdir=libstdc++-v3; \
-	$(SHELL) $$s/$$module_srcdir/configure \
-	  --srcdir=$${topdir}/$$module_srcdir \
-	  $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \
-	  --target=${target_alias} \
-	  --with-build-libsubdir=$(HOST_SUBDIR) \
-	  $(STAGE2_CONFIGURE_FLAGS)
-@endif target-libstdc++-v3-bootstrap
-
-.PHONY: configure-stage3-target-libstdc++-v3 maybe-configure-stage3-target-libstdc++-v3
-maybe-configure-stage3-target-libstdc++-v3:
-@if target-libstdc++-v3-bootstrap
-maybe-configure-stage3-target-libstdc++-v3: configure-stage3-target-libstdc++-v3
-configure-stage3-target-libstdc++-v3:
-	@[ $(current_stage) = stage3 ] || $(MAKE) stage3-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3
-	@r=`${PWD_COMMAND}`; export r; \
-	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	TFLAGS="$(STAGE3_TFLAGS)"; \
-	echo "Checking multilib configuration for libstdc++-v3..."; \
-	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp 2> /dev/null; \
-	if test -r $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
-	  if cmp -s $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
-	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp; \
-	  else \
-	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile; \
-	    mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
-	  fi; \
-	else \
-	  mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
-	fi; \
-	test ! -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile || exit 0; \
-	$(RAW_CXX_TARGET_EXPORTS) \
-	 \
-	CFLAGS="$(CFLAGS_FOR_TARGET)"; export CFLAGS; \
-	CXXFLAGS="$(CXXFLAGS_FOR_TARGET)"; export CXXFLAGS; \
-	LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)"; export LIBCFLAGS;  \
-	echo Configuring stage 3 in $(TARGET_SUBDIR)/libstdc++-v3; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3; \
-	cd $(TARGET_SUBDIR)/libstdc++-v3 || exit 1; \
+	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
+	  *) topdir=`echo $(HOST_SUBDIR)/libctf/ | \
+		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
+	esac; \
+	module_srcdir=libctf; \
+	$(SHELL) $$s/$$module_srcdir/configure \
+	  --srcdir=$${topdir}/$$module_srcdir \
+	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
+	  --target=${target_alias} \
+	  --with-build-libsubdir=$(HOST_SUBDIR) \
+	  $(STAGE2_CONFIGURE_FLAGS)
+@endif libctf-bootstrap
+
+.PHONY: configure-stage3-libctf maybe-configure-stage3-libctf
+maybe-configure-stage3-libctf:
+@if libctf-bootstrap
+maybe-configure-stage3-libctf: configure-stage3-libctf
+configure-stage3-libctf:
+	@[ $(current_stage) = stage3 ] || $(MAKE) stage3-start
+	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libctf
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	TFLAGS="$(STAGE3_TFLAGS)"; \
+	test ! -f $(HOST_SUBDIR)/libctf/Makefile || exit 0; \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS) \
+	CFLAGS="$(STAGE3_CFLAGS)"; export CFLAGS; \
+	CXXFLAGS="$(STAGE3_CXXFLAGS)"; export CXXFLAGS; \
+	LIBCFLAGS="$(STAGE3_CFLAGS)"; export LIBCFLAGS;  \
+	echo Configuring stage 3 in $(HOST_SUBDIR)/libctf; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libctf; \
+	cd $(HOST_SUBDIR)/libctf || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(TARGET_SUBDIR)/libstdc++-v3/ | \
+	  *) topdir=`echo $(HOST_SUBDIR)/libctf/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libstdc++-v3; \
+	module_srcdir=libctf; \
 	$(SHELL) $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
-	  $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \
+	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
 	  --target=${target_alias} \
 	  --with-build-libsubdir=$(HOST_SUBDIR) \
 	  $(STAGE3_CONFIGURE_FLAGS)
-@endif target-libstdc++-v3-bootstrap
+@endif libctf-bootstrap
 
-.PHONY: configure-stage4-target-libstdc++-v3 maybe-configure-stage4-target-libstdc++-v3
-maybe-configure-stage4-target-libstdc++-v3:
-@if target-libstdc++-v3-bootstrap
-maybe-configure-stage4-target-libstdc++-v3: configure-stage4-target-libstdc++-v3
-configure-stage4-target-libstdc++-v3:
+.PHONY: configure-stage4-libctf maybe-configure-stage4-libctf
+maybe-configure-stage4-libctf:
+@if libctf-bootstrap
+maybe-configure-stage4-libctf: configure-stage4-libctf
+configure-stage4-libctf:
 	@[ $(current_stage) = stage4 ] || $(MAKE) stage4-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3
+	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libctf
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGE4_TFLAGS)"; \
-	echo "Checking multilib configuration for libstdc++-v3..."; \
-	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp 2> /dev/null; \
-	if test -r $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
-	  if cmp -s $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
-	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp; \
-	  else \
-	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile; \
-	    mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
-	  fi; \
-	else \
-	  mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
-	fi; \
-	test ! -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile || exit 0; \
-	$(RAW_CXX_TARGET_EXPORTS) \
-	 \
-	CFLAGS="$(CFLAGS_FOR_TARGET)"; export CFLAGS; \
-	CXXFLAGS="$(CXXFLAGS_FOR_TARGET)"; export CXXFLAGS; \
-	LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)"; export LIBCFLAGS;  \
-	echo Configuring stage 4 in $(TARGET_SUBDIR)/libstdc++-v3; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3; \
-	cd $(TARGET_SUBDIR)/libstdc++-v3 || exit 1; \
+	test ! -f $(HOST_SUBDIR)/libctf/Makefile || exit 0; \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS) \
+	CFLAGS="$(STAGE4_CFLAGS)"; export CFLAGS; \
+	CXXFLAGS="$(STAGE4_CXXFLAGS)"; export CXXFLAGS; \
+	LIBCFLAGS="$(STAGE4_CFLAGS)"; export LIBCFLAGS;  \
+	echo Configuring stage 4 in $(HOST_SUBDIR)/libctf; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libctf; \
+	cd $(HOST_SUBDIR)/libctf || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(TARGET_SUBDIR)/libstdc++-v3/ | \
+	  *) topdir=`echo $(HOST_SUBDIR)/libctf/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libstdc++-v3; \
+	module_srcdir=libctf; \
 	$(SHELL) $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
-	  $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \
+	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
 	  --target=${target_alias} \
 	  --with-build-libsubdir=$(HOST_SUBDIR) \
 	  $(STAGE4_CONFIGURE_FLAGS)
-@endif target-libstdc++-v3-bootstrap
+@endif libctf-bootstrap
 
-.PHONY: configure-stageprofile-target-libstdc++-v3 maybe-configure-stageprofile-target-libstdc++-v3
-maybe-configure-stageprofile-target-libstdc++-v3:
-@if target-libstdc++-v3-bootstrap
-maybe-configure-stageprofile-target-libstdc++-v3: configure-stageprofile-target-libstdc++-v3
-configure-stageprofile-target-libstdc++-v3:
+.PHONY: configure-stageprofile-libctf maybe-configure-stageprofile-libctf
+maybe-configure-stageprofile-libctf:
+@if libctf-bootstrap
+maybe-configure-stageprofile-libctf: configure-stageprofile-libctf
+configure-stageprofile-libctf:
 	@[ $(current_stage) = stageprofile ] || $(MAKE) stageprofile-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3
+	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libctf
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEprofile_TFLAGS)"; \
-	echo "Checking multilib configuration for libstdc++-v3..."; \
-	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp 2> /dev/null; \
-	if test -r $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
-	  if cmp -s $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
-	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp; \
-	  else \
-	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile; \
-	    mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
-	  fi; \
-	else \
-	  mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
-	fi; \
-	test ! -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile || exit 0; \
-	$(RAW_CXX_TARGET_EXPORTS) \
-	 \
-	CFLAGS="$(CFLAGS_FOR_TARGET)"; export CFLAGS; \
-	CXXFLAGS="$(CXXFLAGS_FOR_TARGET)"; export CXXFLAGS; \
-	LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)"; export LIBCFLAGS;  \
-	echo Configuring stage profile in $(TARGET_SUBDIR)/libstdc++-v3; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3; \
-	cd $(TARGET_SUBDIR)/libstdc++-v3 || exit 1; \
+	test ! -f $(HOST_SUBDIR)/libctf/Makefile || exit 0; \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS) \
+	CFLAGS="$(STAGEprofile_CFLAGS)"; export CFLAGS; \
+	CXXFLAGS="$(STAGEprofile_CXXFLAGS)"; export CXXFLAGS; \
+	LIBCFLAGS="$(STAGEprofile_CFLAGS)"; export LIBCFLAGS;  \
+	echo Configuring stage profile in $(HOST_SUBDIR)/libctf; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libctf; \
+	cd $(HOST_SUBDIR)/libctf || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(TARGET_SUBDIR)/libstdc++-v3/ | \
+	  *) topdir=`echo $(HOST_SUBDIR)/libctf/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libstdc++-v3; \
+	module_srcdir=libctf; \
 	$(SHELL) $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
-	  $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \
+	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
 	  --target=${target_alias} \
 	  --with-build-libsubdir=$(HOST_SUBDIR) \
 	  $(STAGEprofile_CONFIGURE_FLAGS)
-@endif target-libstdc++-v3-bootstrap
+@endif libctf-bootstrap
 
-.PHONY: configure-stagetrain-target-libstdc++-v3 maybe-configure-stagetrain-target-libstdc++-v3
-maybe-configure-stagetrain-target-libstdc++-v3:
-@if target-libstdc++-v3-bootstrap
-maybe-configure-stagetrain-target-libstdc++-v3: configure-stagetrain-target-libstdc++-v3
-configure-stagetrain-target-libstdc++-v3:
+.PHONY: configure-stagetrain-libctf maybe-configure-stagetrain-libctf
+maybe-configure-stagetrain-libctf:
+@if libctf-bootstrap
+maybe-configure-stagetrain-libctf: configure-stagetrain-libctf
+configure-stagetrain-libctf:
 	@[ $(current_stage) = stagetrain ] || $(MAKE) stagetrain-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3
+	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libctf
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEtrain_TFLAGS)"; \
-	echo "Checking multilib configuration for libstdc++-v3..."; \
-	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp 2> /dev/null; \
-	if test -r $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
-	  if cmp -s $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
-	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp; \
-	  else \
-	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile; \
-	    mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
-	  fi; \
-	else \
-	  mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
-	fi; \
-	test ! -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile || exit 0; \
-	$(RAW_CXX_TARGET_EXPORTS) \
-	 \
-	CFLAGS="$(CFLAGS_FOR_TARGET)"; export CFLAGS; \
-	CXXFLAGS="$(CXXFLAGS_FOR_TARGET)"; export CXXFLAGS; \
-	LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)"; export LIBCFLAGS;  \
-	echo Configuring stage train in $(TARGET_SUBDIR)/libstdc++-v3; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3; \
-	cd $(TARGET_SUBDIR)/libstdc++-v3 || exit 1; \
+	test ! -f $(HOST_SUBDIR)/libctf/Makefile || exit 0; \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS) \
+	CFLAGS="$(STAGEtrain_CFLAGS)"; export CFLAGS; \
+	CXXFLAGS="$(STAGEtrain_CXXFLAGS)"; export CXXFLAGS; \
+	LIBCFLAGS="$(STAGEtrain_CFLAGS)"; export LIBCFLAGS;  \
+	echo Configuring stage train in $(HOST_SUBDIR)/libctf; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libctf; \
+	cd $(HOST_SUBDIR)/libctf || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(TARGET_SUBDIR)/libstdc++-v3/ | \
+	  *) topdir=`echo $(HOST_SUBDIR)/libctf/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libstdc++-v3; \
+	module_srcdir=libctf; \
 	$(SHELL) $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
-	  $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \
+	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
 	  --target=${target_alias} \
 	  --with-build-libsubdir=$(HOST_SUBDIR) \
 	  $(STAGEtrain_CONFIGURE_FLAGS)
-@endif target-libstdc++-v3-bootstrap
+@endif libctf-bootstrap
 
-.PHONY: configure-stagefeedback-target-libstdc++-v3 maybe-configure-stagefeedback-target-libstdc++-v3
-maybe-configure-stagefeedback-target-libstdc++-v3:
-@if target-libstdc++-v3-bootstrap
-maybe-configure-stagefeedback-target-libstdc++-v3: configure-stagefeedback-target-libstdc++-v3
-configure-stagefeedback-target-libstdc++-v3:
+.PHONY: configure-stagefeedback-libctf maybe-configure-stagefeedback-libctf
+maybe-configure-stagefeedback-libctf:
+@if libctf-bootstrap
+maybe-configure-stagefeedback-libctf: configure-stagefeedback-libctf
+configure-stagefeedback-libctf:
 	@[ $(current_stage) = stagefeedback ] || $(MAKE) stagefeedback-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3
+	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libctf
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEfeedback_TFLAGS)"; \
-	echo "Checking multilib configuration for libstdc++-v3..."; \
-	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp 2> /dev/null; \
-	if test -r $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
-	  if cmp -s $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
-	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp; \
-	  else \
-	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile; \
-	    mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
-	  fi; \
-	else \
-	  mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
-	fi; \
-	test ! -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile || exit 0; \
-	$(RAW_CXX_TARGET_EXPORTS) \
-	 \
-	CFLAGS="$(CFLAGS_FOR_TARGET)"; export CFLAGS; \
-	CXXFLAGS="$(CXXFLAGS_FOR_TARGET)"; export CXXFLAGS; \
-	LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)"; export LIBCFLAGS;  \
-	echo Configuring stage feedback in $(TARGET_SUBDIR)/libstdc++-v3; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3; \
-	cd $(TARGET_SUBDIR)/libstdc++-v3 || exit 1; \
+	test ! -f $(HOST_SUBDIR)/libctf/Makefile || exit 0; \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS) \
+	CFLAGS="$(STAGEfeedback_CFLAGS)"; export CFLAGS; \
+	CXXFLAGS="$(STAGEfeedback_CXXFLAGS)"; export CXXFLAGS; \
+	LIBCFLAGS="$(STAGEfeedback_CFLAGS)"; export LIBCFLAGS;  \
+	echo Configuring stage feedback in $(HOST_SUBDIR)/libctf; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libctf; \
+	cd $(HOST_SUBDIR)/libctf || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(TARGET_SUBDIR)/libstdc++-v3/ | \
+	  *) topdir=`echo $(HOST_SUBDIR)/libctf/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libstdc++-v3; \
+	module_srcdir=libctf; \
 	$(SHELL) $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
-	  $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \
+	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
 	  --target=${target_alias} \
 	  --with-build-libsubdir=$(HOST_SUBDIR) \
 	  $(STAGEfeedback_CONFIGURE_FLAGS)
-@endif target-libstdc++-v3-bootstrap
+@endif libctf-bootstrap
 
-.PHONY: configure-stageautoprofile-target-libstdc++-v3 maybe-configure-stageautoprofile-target-libstdc++-v3
-maybe-configure-stageautoprofile-target-libstdc++-v3:
-@if target-libstdc++-v3-bootstrap
-maybe-configure-stageautoprofile-target-libstdc++-v3: configure-stageautoprofile-target-libstdc++-v3
-configure-stageautoprofile-target-libstdc++-v3:
+.PHONY: configure-stageautoprofile-libctf maybe-configure-stageautoprofile-libctf
+maybe-configure-stageautoprofile-libctf:
+@if libctf-bootstrap
+maybe-configure-stageautoprofile-libctf: configure-stageautoprofile-libctf
+configure-stageautoprofile-libctf:
 	@[ $(current_stage) = stageautoprofile ] || $(MAKE) stageautoprofile-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3
+	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libctf
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEautoprofile_TFLAGS)"; \
-	echo "Checking multilib configuration for libstdc++-v3..."; \
-	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp 2> /dev/null; \
-	if test -r $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
-	  if cmp -s $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
-	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp; \
-	  else \
-	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile; \
-	    mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
-	  fi; \
-	else \
-	  mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
-	fi; \
-	test ! -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile || exit 0; \
-	$(RAW_CXX_TARGET_EXPORTS) \
-	 \
-	CFLAGS="$(CFLAGS_FOR_TARGET)"; export CFLAGS; \
-	CXXFLAGS="$(CXXFLAGS_FOR_TARGET)"; export CXXFLAGS; \
-	LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)"; export LIBCFLAGS;  \
-	echo Configuring stage autoprofile in $(TARGET_SUBDIR)/libstdc++-v3; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3; \
-	cd $(TARGET_SUBDIR)/libstdc++-v3 || exit 1; \
+	test ! -f $(HOST_SUBDIR)/libctf/Makefile || exit 0; \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS) \
+	CFLAGS="$(STAGEautoprofile_CFLAGS)"; export CFLAGS; \
+	CXXFLAGS="$(STAGEautoprofile_CXXFLAGS)"; export CXXFLAGS; \
+	LIBCFLAGS="$(STAGEautoprofile_CFLAGS)"; export LIBCFLAGS;  \
+	echo Configuring stage autoprofile in $(HOST_SUBDIR)/libctf; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libctf; \
+	cd $(HOST_SUBDIR)/libctf || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(TARGET_SUBDIR)/libstdc++-v3/ | \
+	  *) topdir=`echo $(HOST_SUBDIR)/libctf/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libstdc++-v3; \
+	module_srcdir=libctf; \
 	$(SHELL) $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
-	  $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \
+	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
 	  --target=${target_alias} \
 	  --with-build-libsubdir=$(HOST_SUBDIR) \
 	  $(STAGEautoprofile_CONFIGURE_FLAGS)
-@endif target-libstdc++-v3-bootstrap
+@endif libctf-bootstrap
 
-.PHONY: configure-stageautofeedback-target-libstdc++-v3 maybe-configure-stageautofeedback-target-libstdc++-v3
-maybe-configure-stageautofeedback-target-libstdc++-v3:
-@if target-libstdc++-v3-bootstrap
-maybe-configure-stageautofeedback-target-libstdc++-v3: configure-stageautofeedback-target-libstdc++-v3
-configure-stageautofeedback-target-libstdc++-v3:
+.PHONY: configure-stageautofeedback-libctf maybe-configure-stageautofeedback-libctf
+maybe-configure-stageautofeedback-libctf:
+@if libctf-bootstrap
+maybe-configure-stageautofeedback-libctf: configure-stageautofeedback-libctf
+configure-stageautofeedback-libctf:
 	@[ $(current_stage) = stageautofeedback ] || $(MAKE) stageautofeedback-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3
+	@$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libctf
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEautofeedback_TFLAGS)"; \
-	echo "Checking multilib configuration for libstdc++-v3..."; \
-	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp 2> /dev/null; \
-	if test -r $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
-	  if cmp -s $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
-	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp; \
-	  else \
-	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile; \
-	    mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
-	  fi; \
-	else \
-	  mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
-	fi; \
-	test ! -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile || exit 0; \
-	$(RAW_CXX_TARGET_EXPORTS) \
-	 \
-	CFLAGS="$(CFLAGS_FOR_TARGET)"; export CFLAGS; \
-	CXXFLAGS="$(CXXFLAGS_FOR_TARGET)"; export CXXFLAGS; \
-	LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)"; export LIBCFLAGS;  \
-	echo Configuring stage autofeedback in $(TARGET_SUBDIR)/libstdc++-v3; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3; \
-	cd $(TARGET_SUBDIR)/libstdc++-v3 || exit 1; \
+	test ! -f $(HOST_SUBDIR)/libctf/Makefile || exit 0; \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS) \
+	CFLAGS="$(STAGEautofeedback_CFLAGS)"; export CFLAGS; \
+	CXXFLAGS="$(STAGEautofeedback_CXXFLAGS)"; export CXXFLAGS; \
+	LIBCFLAGS="$(STAGEautofeedback_CFLAGS)"; export LIBCFLAGS;  \
+	echo Configuring stage autofeedback in $(HOST_SUBDIR)/libctf; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(HOST_SUBDIR)/libctf; \
+	cd $(HOST_SUBDIR)/libctf || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(TARGET_SUBDIR)/libstdc++-v3/ | \
+	  *) topdir=`echo $(HOST_SUBDIR)/libctf/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libstdc++-v3; \
+	module_srcdir=libctf; \
 	$(SHELL) $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
-	  $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \
+	  $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \
 	  --target=${target_alias} \
 	  --with-build-libsubdir=$(HOST_SUBDIR) \
 	  $(STAGEautofeedback_CONFIGURE_FLAGS)
-@endif target-libstdc++-v3-bootstrap
+@endif libctf-bootstrap
 
 
 
 
 
-.PHONY: all-target-libstdc++-v3 maybe-all-target-libstdc++-v3
-maybe-all-target-libstdc++-v3:
+.PHONY: all-libctf maybe-all-libctf
+maybe-all-libctf:
 @if gcc-bootstrap
-all-target-libstdc++-v3: stage_current
+all-libctf: stage_current
 @endif gcc-bootstrap
-@if target-libstdc++-v3
-TARGET-target-libstdc++-v3=all
-maybe-all-target-libstdc++-v3: all-target-libstdc++-v3
-all-target-libstdc++-v3: configure-target-libstdc++-v3
+@if libctf
+TARGET-libctf=all
+maybe-all-libctf: all-libctf
+all-libctf: configure-libctf
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(RAW_CXX_TARGET_EXPORTS)  \
-	(cd $(TARGET_SUBDIR)/libstdc++-v3 && \
-	  $(MAKE) $(BASE_FLAGS_TO_PASS) $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   \
-		$(TARGET-target-libstdc++-v3))
-@endif target-libstdc++-v3
+	$(HOST_EXPORTS)  \
+	(cd $(HOST_SUBDIR)/libctf && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) $(EXTRA_HOST_FLAGS) $(STAGE1_FLAGS_TO_PASS)  \
+		$(TARGET-libctf))
+@endif libctf
 
 
 
-.PHONY: all-stage1-target-libstdc++-v3 maybe-all-stage1-target-libstdc++-v3
-.PHONY: clean-stage1-target-libstdc++-v3 maybe-clean-stage1-target-libstdc++-v3
-maybe-all-stage1-target-libstdc++-v3:
-maybe-clean-stage1-target-libstdc++-v3:
-@if target-libstdc++-v3-bootstrap
-maybe-all-stage1-target-libstdc++-v3: all-stage1-target-libstdc++-v3
-all-stage1: all-stage1-target-libstdc++-v3
-TARGET-stage1-target-libstdc++-v3 = $(TARGET-target-libstdc++-v3)
-all-stage1-target-libstdc++-v3: configure-stage1-target-libstdc++-v3
+.PHONY: all-stage1-libctf maybe-all-stage1-libctf
+.PHONY: clean-stage1-libctf maybe-clean-stage1-libctf
+maybe-all-stage1-libctf:
+maybe-clean-stage1-libctf:
+@if libctf-bootstrap
+maybe-all-stage1-libctf: all-stage1-libctf
+all-stage1: all-stage1-libctf
+TARGET-stage1-libctf = $(TARGET-libctf)
+all-stage1-libctf: configure-stage1-libctf
 	@[ $(current_stage) = stage1 ] || $(MAKE) stage1-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGE1_TFLAGS)"; \
-	$(RAW_CXX_TARGET_EXPORTS)  \
-	cd $(TARGET_SUBDIR)/libstdc++-v3 && \
+	$(HOST_EXPORTS)  \
+	cd $(HOST_SUBDIR)/libctf && \
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
-		CFLAGS="$(CFLAGS_FOR_TARGET)" \
-		CXXFLAGS="$(CXXFLAGS_FOR_TARGET)" \
-		LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)" \
+		CFLAGS="$(STAGE1_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE1_GENERATOR_CFLAGS)" \
+		CXXFLAGS="$(STAGE1_CXXFLAGS)" \
+		LIBCFLAGS="$(LIBCFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
 		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
 		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
-		$(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'  \
-		  \
+		$(EXTRA_HOST_FLAGS)  \
+		$(STAGE1_FLAGS_TO_PASS)  \
 		TFLAGS="$(STAGE1_TFLAGS)"  \
-		$(TARGET-stage1-target-libstdc++-v3)
+		$(TARGET-stage1-libctf)
 
-maybe-clean-stage1-target-libstdc++-v3: clean-stage1-target-libstdc++-v3
-clean-stage1: clean-stage1-target-libstdc++-v3
-clean-stage1-target-libstdc++-v3:
+maybe-clean-stage1-libctf: clean-stage1-libctf
+clean-stage1: clean-stage1-libctf
+clean-stage1-libctf:
 	@if [ $(current_stage) = stage1 ]; then \
-	  [ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
+	  [ -f $(HOST_SUBDIR)/libctf/Makefile ] || exit 0; \
 	else \
-	  [ -f $(TARGET_SUBDIR)/stage1-libstdc++-v3/Makefile ] || exit 0; \
+	  [ -f $(HOST_SUBDIR)/stage1-libctf/Makefile ] || exit 0; \
 	  $(MAKE) stage1-start; \
 	fi; \
-	cd $(TARGET_SUBDIR)/libstdc++-v3 && \
-	$(MAKE) $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'  \
-	  clean
-@endif target-libstdc++-v3-bootstrap
-
-
-.PHONY: all-stage2-target-libstdc++-v3 maybe-all-stage2-target-libstdc++-v3
-.PHONY: clean-stage2-target-libstdc++-v3 maybe-clean-stage2-target-libstdc++-v3
-maybe-all-stage2-target-libstdc++-v3:
-maybe-clean-stage2-target-libstdc++-v3:
-@if target-libstdc++-v3-bootstrap
-maybe-all-stage2-target-libstdc++-v3: all-stage2-target-libstdc++-v3
-all-stage2: all-stage2-target-libstdc++-v3
-TARGET-stage2-target-libstdc++-v3 = $(TARGET-target-libstdc++-v3)
-all-stage2-target-libstdc++-v3: configure-stage2-target-libstdc++-v3
+	cd $(HOST_SUBDIR)/libctf && \
+	$(MAKE) $(EXTRA_HOST_FLAGS)  \
+	$(STAGE1_FLAGS_TO_PASS)  clean
+@endif libctf-bootstrap
+
+
+.PHONY: all-stage2-libctf maybe-all-stage2-libctf
+.PHONY: clean-stage2-libctf maybe-clean-stage2-libctf
+maybe-all-stage2-libctf:
+maybe-clean-stage2-libctf:
+@if libctf-bootstrap
+maybe-all-stage2-libctf: all-stage2-libctf
+all-stage2: all-stage2-libctf
+TARGET-stage2-libctf = $(TARGET-libctf)
+all-stage2-libctf: configure-stage2-libctf
 	@[ $(current_stage) = stage2 ] || $(MAKE) stage2-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGE2_TFLAGS)"; \
-	$(RAW_CXX_TARGET_EXPORTS) \
-	  \
-	cd $(TARGET_SUBDIR)/libstdc++-v3 && \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS)  \
+	cd $(HOST_SUBDIR)/libctf && \
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
-		CFLAGS="$(CFLAGS_FOR_TARGET)" \
-		CXXFLAGS="$(CXXFLAGS_FOR_TARGET)" \
-		LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)" \
+		CFLAGS="$(STAGE2_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE2_GENERATOR_CFLAGS)" \
+		CXXFLAGS="$(STAGE2_CXXFLAGS)" \
+		LIBCFLAGS="$(STAGE2_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
 		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
 		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
-		$(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   \
+		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
 		TFLAGS="$(STAGE2_TFLAGS)"  \
-		$(TARGET-stage2-target-libstdc++-v3)
+		$(TARGET-stage2-libctf)
 
-maybe-clean-stage2-target-libstdc++-v3: clean-stage2-target-libstdc++-v3
-clean-stage2: clean-stage2-target-libstdc++-v3
-clean-stage2-target-libstdc++-v3:
+maybe-clean-stage2-libctf: clean-stage2-libctf
+clean-stage2: clean-stage2-libctf
+clean-stage2-libctf:
 	@if [ $(current_stage) = stage2 ]; then \
-	  [ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
+	  [ -f $(HOST_SUBDIR)/libctf/Makefile ] || exit 0; \
 	else \
-	  [ -f $(TARGET_SUBDIR)/stage2-libstdc++-v3/Makefile ] || exit 0; \
+	  [ -f $(HOST_SUBDIR)/stage2-libctf/Makefile ] || exit 0; \
 	  $(MAKE) stage2-start; \
 	fi; \
-	cd $(TARGET_SUBDIR)/libstdc++-v3 && \
-	$(MAKE) $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   clean
-@endif target-libstdc++-v3-bootstrap
-
-
-.PHONY: all-stage3-target-libstdc++-v3 maybe-all-stage3-target-libstdc++-v3
-.PHONY: clean-stage3-target-libstdc++-v3 maybe-clean-stage3-target-libstdc++-v3
-maybe-all-stage3-target-libstdc++-v3:
-maybe-clean-stage3-target-libstdc++-v3:
-@if target-libstdc++-v3-bootstrap
-maybe-all-stage3-target-libstdc++-v3: all-stage3-target-libstdc++-v3
-all-stage3: all-stage3-target-libstdc++-v3
-TARGET-stage3-target-libstdc++-v3 = $(TARGET-target-libstdc++-v3)
-all-stage3-target-libstdc++-v3: configure-stage3-target-libstdc++-v3
+	cd $(HOST_SUBDIR)/libctf && \
+	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
+@endif libctf-bootstrap
+
+
+.PHONY: all-stage3-libctf maybe-all-stage3-libctf
+.PHONY: clean-stage3-libctf maybe-clean-stage3-libctf
+maybe-all-stage3-libctf:
+maybe-clean-stage3-libctf:
+@if libctf-bootstrap
+maybe-all-stage3-libctf: all-stage3-libctf
+all-stage3: all-stage3-libctf
+TARGET-stage3-libctf = $(TARGET-libctf)
+all-stage3-libctf: configure-stage3-libctf
 	@[ $(current_stage) = stage3 ] || $(MAKE) stage3-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGE3_TFLAGS)"; \
-	$(RAW_CXX_TARGET_EXPORTS) \
-	  \
-	cd $(TARGET_SUBDIR)/libstdc++-v3 && \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS)  \
+	cd $(HOST_SUBDIR)/libctf && \
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
-		CFLAGS="$(CFLAGS_FOR_TARGET)" \
-		CXXFLAGS="$(CXXFLAGS_FOR_TARGET)" \
-		LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)" \
+		CFLAGS="$(STAGE3_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE3_GENERATOR_CFLAGS)" \
+		CXXFLAGS="$(STAGE3_CXXFLAGS)" \
+		LIBCFLAGS="$(STAGE3_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
 		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
 		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
-		$(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   \
+		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
 		TFLAGS="$(STAGE3_TFLAGS)"  \
-		$(TARGET-stage3-target-libstdc++-v3)
+		$(TARGET-stage3-libctf)
 
-maybe-clean-stage3-target-libstdc++-v3: clean-stage3-target-libstdc++-v3
-clean-stage3: clean-stage3-target-libstdc++-v3
-clean-stage3-target-libstdc++-v3:
+maybe-clean-stage3-libctf: clean-stage3-libctf
+clean-stage3: clean-stage3-libctf
+clean-stage3-libctf:
 	@if [ $(current_stage) = stage3 ]; then \
-	  [ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
+	  [ -f $(HOST_SUBDIR)/libctf/Makefile ] || exit 0; \
 	else \
-	  [ -f $(TARGET_SUBDIR)/stage3-libstdc++-v3/Makefile ] || exit 0; \
+	  [ -f $(HOST_SUBDIR)/stage3-libctf/Makefile ] || exit 0; \
 	  $(MAKE) stage3-start; \
 	fi; \
-	cd $(TARGET_SUBDIR)/libstdc++-v3 && \
-	$(MAKE) $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   clean
-@endif target-libstdc++-v3-bootstrap
-
-
-.PHONY: all-stage4-target-libstdc++-v3 maybe-all-stage4-target-libstdc++-v3
-.PHONY: clean-stage4-target-libstdc++-v3 maybe-clean-stage4-target-libstdc++-v3
-maybe-all-stage4-target-libstdc++-v3:
-maybe-clean-stage4-target-libstdc++-v3:
-@if target-libstdc++-v3-bootstrap
-maybe-all-stage4-target-libstdc++-v3: all-stage4-target-libstdc++-v3
-all-stage4: all-stage4-target-libstdc++-v3
-TARGET-stage4-target-libstdc++-v3 = $(TARGET-target-libstdc++-v3)
-all-stage4-target-libstdc++-v3: configure-stage4-target-libstdc++-v3
+	cd $(HOST_SUBDIR)/libctf && \
+	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
+@endif libctf-bootstrap
+
+
+.PHONY: all-stage4-libctf maybe-all-stage4-libctf
+.PHONY: clean-stage4-libctf maybe-clean-stage4-libctf
+maybe-all-stage4-libctf:
+maybe-clean-stage4-libctf:
+@if libctf-bootstrap
+maybe-all-stage4-libctf: all-stage4-libctf
+all-stage4: all-stage4-libctf
+TARGET-stage4-libctf = $(TARGET-libctf)
+all-stage4-libctf: configure-stage4-libctf
 	@[ $(current_stage) = stage4 ] || $(MAKE) stage4-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGE4_TFLAGS)"; \
-	$(RAW_CXX_TARGET_EXPORTS) \
-	  \
-	cd $(TARGET_SUBDIR)/libstdc++-v3 && \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS)  \
+	cd $(HOST_SUBDIR)/libctf && \
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
-		CFLAGS="$(CFLAGS_FOR_TARGET)" \
-		CXXFLAGS="$(CXXFLAGS_FOR_TARGET)" \
-		LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)" \
+		CFLAGS="$(STAGE4_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE4_GENERATOR_CFLAGS)" \
+		CXXFLAGS="$(STAGE4_CXXFLAGS)" \
+		LIBCFLAGS="$(STAGE4_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
 		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
 		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
-		$(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   \
+		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
 		TFLAGS="$(STAGE4_TFLAGS)"  \
-		$(TARGET-stage4-target-libstdc++-v3)
+		$(TARGET-stage4-libctf)
 
-maybe-clean-stage4-target-libstdc++-v3: clean-stage4-target-libstdc++-v3
-clean-stage4: clean-stage4-target-libstdc++-v3
-clean-stage4-target-libstdc++-v3:
+maybe-clean-stage4-libctf: clean-stage4-libctf
+clean-stage4: clean-stage4-libctf
+clean-stage4-libctf:
 	@if [ $(current_stage) = stage4 ]; then \
-	  [ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
+	  [ -f $(HOST_SUBDIR)/libctf/Makefile ] || exit 0; \
 	else \
-	  [ -f $(TARGET_SUBDIR)/stage4-libstdc++-v3/Makefile ] || exit 0; \
+	  [ -f $(HOST_SUBDIR)/stage4-libctf/Makefile ] || exit 0; \
 	  $(MAKE) stage4-start; \
 	fi; \
-	cd $(TARGET_SUBDIR)/libstdc++-v3 && \
-	$(MAKE) $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   clean
-@endif target-libstdc++-v3-bootstrap
-
-
-.PHONY: all-stageprofile-target-libstdc++-v3 maybe-all-stageprofile-target-libstdc++-v3
-.PHONY: clean-stageprofile-target-libstdc++-v3 maybe-clean-stageprofile-target-libstdc++-v3
-maybe-all-stageprofile-target-libstdc++-v3:
-maybe-clean-stageprofile-target-libstdc++-v3:
-@if target-libstdc++-v3-bootstrap
-maybe-all-stageprofile-target-libstdc++-v3: all-stageprofile-target-libstdc++-v3
-all-stageprofile: all-stageprofile-target-libstdc++-v3
-TARGET-stageprofile-target-libstdc++-v3 = $(TARGET-target-libstdc++-v3)
-all-stageprofile-target-libstdc++-v3: configure-stageprofile-target-libstdc++-v3
+	cd $(HOST_SUBDIR)/libctf && \
+	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
+@endif libctf-bootstrap
+
+
+.PHONY: all-stageprofile-libctf maybe-all-stageprofile-libctf
+.PHONY: clean-stageprofile-libctf maybe-clean-stageprofile-libctf
+maybe-all-stageprofile-libctf:
+maybe-clean-stageprofile-libctf:
+@if libctf-bootstrap
+maybe-all-stageprofile-libctf: all-stageprofile-libctf
+all-stageprofile: all-stageprofile-libctf
+TARGET-stageprofile-libctf = $(TARGET-libctf)
+all-stageprofile-libctf: configure-stageprofile-libctf
 	@[ $(current_stage) = stageprofile ] || $(MAKE) stageprofile-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEprofile_TFLAGS)"; \
-	$(RAW_CXX_TARGET_EXPORTS) \
-	  \
-	cd $(TARGET_SUBDIR)/libstdc++-v3 && \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS)  \
+	cd $(HOST_SUBDIR)/libctf && \
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
-		CFLAGS="$(CFLAGS_FOR_TARGET)" \
-		CXXFLAGS="$(CXXFLAGS_FOR_TARGET)" \
-		LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)" \
+		CFLAGS="$(STAGEprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEprofile_GENERATOR_CFLAGS)" \
+		CXXFLAGS="$(STAGEprofile_CXXFLAGS)" \
+		LIBCFLAGS="$(STAGEprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
 		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
 		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
-		$(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   \
+		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
 		TFLAGS="$(STAGEprofile_TFLAGS)"  \
-		$(TARGET-stageprofile-target-libstdc++-v3)
+		$(TARGET-stageprofile-libctf)
 
-maybe-clean-stageprofile-target-libstdc++-v3: clean-stageprofile-target-libstdc++-v3
-clean-stageprofile: clean-stageprofile-target-libstdc++-v3
-clean-stageprofile-target-libstdc++-v3:
+maybe-clean-stageprofile-libctf: clean-stageprofile-libctf
+clean-stageprofile: clean-stageprofile-libctf
+clean-stageprofile-libctf:
 	@if [ $(current_stage) = stageprofile ]; then \
-	  [ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
+	  [ -f $(HOST_SUBDIR)/libctf/Makefile ] || exit 0; \
 	else \
-	  [ -f $(TARGET_SUBDIR)/stageprofile-libstdc++-v3/Makefile ] || exit 0; \
+	  [ -f $(HOST_SUBDIR)/stageprofile-libctf/Makefile ] || exit 0; \
 	  $(MAKE) stageprofile-start; \
 	fi; \
-	cd $(TARGET_SUBDIR)/libstdc++-v3 && \
-	$(MAKE) $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   clean
-@endif target-libstdc++-v3-bootstrap
-
-
-.PHONY: all-stagetrain-target-libstdc++-v3 maybe-all-stagetrain-target-libstdc++-v3
-.PHONY: clean-stagetrain-target-libstdc++-v3 maybe-clean-stagetrain-target-libstdc++-v3
-maybe-all-stagetrain-target-libstdc++-v3:
-maybe-clean-stagetrain-target-libstdc++-v3:
-@if target-libstdc++-v3-bootstrap
-maybe-all-stagetrain-target-libstdc++-v3: all-stagetrain-target-libstdc++-v3
-all-stagetrain: all-stagetrain-target-libstdc++-v3
-TARGET-stagetrain-target-libstdc++-v3 = $(TARGET-target-libstdc++-v3)
-all-stagetrain-target-libstdc++-v3: configure-stagetrain-target-libstdc++-v3
+	cd $(HOST_SUBDIR)/libctf && \
+	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
+@endif libctf-bootstrap
+
+
+.PHONY: all-stagetrain-libctf maybe-all-stagetrain-libctf
+.PHONY: clean-stagetrain-libctf maybe-clean-stagetrain-libctf
+maybe-all-stagetrain-libctf:
+maybe-clean-stagetrain-libctf:
+@if libctf-bootstrap
+maybe-all-stagetrain-libctf: all-stagetrain-libctf
+all-stagetrain: all-stagetrain-libctf
+TARGET-stagetrain-libctf = $(TARGET-libctf)
+all-stagetrain-libctf: configure-stagetrain-libctf
 	@[ $(current_stage) = stagetrain ] || $(MAKE) stagetrain-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEtrain_TFLAGS)"; \
-	$(RAW_CXX_TARGET_EXPORTS) \
-	  \
-	cd $(TARGET_SUBDIR)/libstdc++-v3 && \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS)  \
+	cd $(HOST_SUBDIR)/libctf && \
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
-		CFLAGS="$(CFLAGS_FOR_TARGET)" \
-		CXXFLAGS="$(CXXFLAGS_FOR_TARGET)" \
-		LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)" \
+		CFLAGS="$(STAGEtrain_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEtrain_GENERATOR_CFLAGS)" \
+		CXXFLAGS="$(STAGEtrain_CXXFLAGS)" \
+		LIBCFLAGS="$(STAGEtrain_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
 		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
 		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
-		$(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   \
+		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
 		TFLAGS="$(STAGEtrain_TFLAGS)"  \
-		$(TARGET-stagetrain-target-libstdc++-v3)
+		$(TARGET-stagetrain-libctf)
 
-maybe-clean-stagetrain-target-libstdc++-v3: clean-stagetrain-target-libstdc++-v3
-clean-stagetrain: clean-stagetrain-target-libstdc++-v3
-clean-stagetrain-target-libstdc++-v3:
+maybe-clean-stagetrain-libctf: clean-stagetrain-libctf
+clean-stagetrain: clean-stagetrain-libctf
+clean-stagetrain-libctf:
 	@if [ $(current_stage) = stagetrain ]; then \
-	  [ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
+	  [ -f $(HOST_SUBDIR)/libctf/Makefile ] || exit 0; \
 	else \
-	  [ -f $(TARGET_SUBDIR)/stagetrain-libstdc++-v3/Makefile ] || exit 0; \
+	  [ -f $(HOST_SUBDIR)/stagetrain-libctf/Makefile ] || exit 0; \
 	  $(MAKE) stagetrain-start; \
 	fi; \
-	cd $(TARGET_SUBDIR)/libstdc++-v3 && \
-	$(MAKE) $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   clean
-@endif target-libstdc++-v3-bootstrap
-
-
-.PHONY: all-stagefeedback-target-libstdc++-v3 maybe-all-stagefeedback-target-libstdc++-v3
-.PHONY: clean-stagefeedback-target-libstdc++-v3 maybe-clean-stagefeedback-target-libstdc++-v3
-maybe-all-stagefeedback-target-libstdc++-v3:
-maybe-clean-stagefeedback-target-libstdc++-v3:
-@if target-libstdc++-v3-bootstrap
-maybe-all-stagefeedback-target-libstdc++-v3: all-stagefeedback-target-libstdc++-v3
-all-stagefeedback: all-stagefeedback-target-libstdc++-v3
-TARGET-stagefeedback-target-libstdc++-v3 = $(TARGET-target-libstdc++-v3)
-all-stagefeedback-target-libstdc++-v3: configure-stagefeedback-target-libstdc++-v3
+	cd $(HOST_SUBDIR)/libctf && \
+	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
+@endif libctf-bootstrap
+
+
+.PHONY: all-stagefeedback-libctf maybe-all-stagefeedback-libctf
+.PHONY: clean-stagefeedback-libctf maybe-clean-stagefeedback-libctf
+maybe-all-stagefeedback-libctf:
+maybe-clean-stagefeedback-libctf:
+@if libctf-bootstrap
+maybe-all-stagefeedback-libctf: all-stagefeedback-libctf
+all-stagefeedback: all-stagefeedback-libctf
+TARGET-stagefeedback-libctf = $(TARGET-libctf)
+all-stagefeedback-libctf: configure-stagefeedback-libctf
 	@[ $(current_stage) = stagefeedback ] || $(MAKE) stagefeedback-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEfeedback_TFLAGS)"; \
-	$(RAW_CXX_TARGET_EXPORTS) \
-	  \
-	cd $(TARGET_SUBDIR)/libstdc++-v3 && \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS)  \
+	cd $(HOST_SUBDIR)/libctf && \
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
-		CFLAGS="$(CFLAGS_FOR_TARGET)" \
-		CXXFLAGS="$(CXXFLAGS_FOR_TARGET)" \
-		LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)" \
+		CFLAGS="$(STAGEfeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEfeedback_GENERATOR_CFLAGS)" \
+		CXXFLAGS="$(STAGEfeedback_CXXFLAGS)" \
+		LIBCFLAGS="$(STAGEfeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
 		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
 		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
-		$(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   \
+		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
 		TFLAGS="$(STAGEfeedback_TFLAGS)"  \
-		$(TARGET-stagefeedback-target-libstdc++-v3)
-
-maybe-clean-stagefeedback-target-libstdc++-v3: clean-stagefeedback-target-libstdc++-v3
-clean-stagefeedback: clean-stagefeedback-target-libstdc++-v3
-clean-stagefeedback-target-libstdc++-v3:
-	@if [ $(current_stage) = stagefeedback ]; then \
-	  [ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
-	else \
-	  [ -f $(TARGET_SUBDIR)/stagefeedback-libstdc++-v3/Makefile ] || exit 0; \
-	  $(MAKE) stagefeedback-start; \
-	fi; \
-	cd $(TARGET_SUBDIR)/libstdc++-v3 && \
-	$(MAKE) $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   clean
-@endif target-libstdc++-v3-bootstrap
+		$(TARGET-stagefeedback-libctf)
 
-
-.PHONY: all-stageautoprofile-target-libstdc++-v3 maybe-all-stageautoprofile-target-libstdc++-v3
-.PHONY: clean-stageautoprofile-target-libstdc++-v3 maybe-clean-stageautoprofile-target-libstdc++-v3
-maybe-all-stageautoprofile-target-libstdc++-v3:
-maybe-clean-stageautoprofile-target-libstdc++-v3:
-@if target-libstdc++-v3-bootstrap
-maybe-all-stageautoprofile-target-libstdc++-v3: all-stageautoprofile-target-libstdc++-v3
-all-stageautoprofile: all-stageautoprofile-target-libstdc++-v3
-TARGET-stageautoprofile-target-libstdc++-v3 = $(TARGET-target-libstdc++-v3)
-all-stageautoprofile-target-libstdc++-v3: configure-stageautoprofile-target-libstdc++-v3
+maybe-clean-stagefeedback-libctf: clean-stagefeedback-libctf
+clean-stagefeedback: clean-stagefeedback-libctf
+clean-stagefeedback-libctf:
+	@if [ $(current_stage) = stagefeedback ]; then \
+	  [ -f $(HOST_SUBDIR)/libctf/Makefile ] || exit 0; \
+	else \
+	  [ -f $(HOST_SUBDIR)/stagefeedback-libctf/Makefile ] || exit 0; \
+	  $(MAKE) stagefeedback-start; \
+	fi; \
+	cd $(HOST_SUBDIR)/libctf && \
+	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
+@endif libctf-bootstrap
+
+
+.PHONY: all-stageautoprofile-libctf maybe-all-stageautoprofile-libctf
+.PHONY: clean-stageautoprofile-libctf maybe-clean-stageautoprofile-libctf
+maybe-all-stageautoprofile-libctf:
+maybe-clean-stageautoprofile-libctf:
+@if libctf-bootstrap
+maybe-all-stageautoprofile-libctf: all-stageautoprofile-libctf
+all-stageautoprofile: all-stageautoprofile-libctf
+TARGET-stageautoprofile-libctf = $(TARGET-libctf)
+all-stageautoprofile-libctf: configure-stageautoprofile-libctf
 	@[ $(current_stage) = stageautoprofile ] || $(MAKE) stageautoprofile-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEautoprofile_TFLAGS)"; \
-	$(RAW_CXX_TARGET_EXPORTS) \
-	  \
-	cd $(TARGET_SUBDIR)/libstdc++-v3 && \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS)  \
+	cd $(HOST_SUBDIR)/libctf && \
 	$$s/gcc/config/i386/$(AUTO_PROFILE) \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
-		CFLAGS="$(CFLAGS_FOR_TARGET)" \
-		CXXFLAGS="$(CXXFLAGS_FOR_TARGET)" \
-		LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)" \
+		CFLAGS="$(STAGEautoprofile_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautoprofile_GENERATOR_CFLAGS)" \
+		CXXFLAGS="$(STAGEautoprofile_CXXFLAGS)" \
+		LIBCFLAGS="$(STAGEautoprofile_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
 		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
 		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
-		$(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   \
+		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
 		TFLAGS="$(STAGEautoprofile_TFLAGS)"  \
-		$(TARGET-stageautoprofile-target-libstdc++-v3)
+		$(TARGET-stageautoprofile-libctf)
 
-maybe-clean-stageautoprofile-target-libstdc++-v3: clean-stageautoprofile-target-libstdc++-v3
-clean-stageautoprofile: clean-stageautoprofile-target-libstdc++-v3
-clean-stageautoprofile-target-libstdc++-v3:
+maybe-clean-stageautoprofile-libctf: clean-stageautoprofile-libctf
+clean-stageautoprofile: clean-stageautoprofile-libctf
+clean-stageautoprofile-libctf:
 	@if [ $(current_stage) = stageautoprofile ]; then \
-	  [ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
+	  [ -f $(HOST_SUBDIR)/libctf/Makefile ] || exit 0; \
 	else \
-	  [ -f $(TARGET_SUBDIR)/stageautoprofile-libstdc++-v3/Makefile ] || exit 0; \
+	  [ -f $(HOST_SUBDIR)/stageautoprofile-libctf/Makefile ] || exit 0; \
 	  $(MAKE) stageautoprofile-start; \
 	fi; \
-	cd $(TARGET_SUBDIR)/libstdc++-v3 && \
-	$(MAKE) $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   clean
-@endif target-libstdc++-v3-bootstrap
-
-
-.PHONY: all-stageautofeedback-target-libstdc++-v3 maybe-all-stageautofeedback-target-libstdc++-v3
-.PHONY: clean-stageautofeedback-target-libstdc++-v3 maybe-clean-stageautofeedback-target-libstdc++-v3
-maybe-all-stageautofeedback-target-libstdc++-v3:
-maybe-clean-stageautofeedback-target-libstdc++-v3:
-@if target-libstdc++-v3-bootstrap
-maybe-all-stageautofeedback-target-libstdc++-v3: all-stageautofeedback-target-libstdc++-v3
-all-stageautofeedback: all-stageautofeedback-target-libstdc++-v3
-TARGET-stageautofeedback-target-libstdc++-v3 = $(TARGET-target-libstdc++-v3)
-all-stageautofeedback-target-libstdc++-v3: configure-stageautofeedback-target-libstdc++-v3
+	cd $(HOST_SUBDIR)/libctf && \
+	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
+@endif libctf-bootstrap
+
+
+.PHONY: all-stageautofeedback-libctf maybe-all-stageautofeedback-libctf
+.PHONY: clean-stageautofeedback-libctf maybe-clean-stageautofeedback-libctf
+maybe-all-stageautofeedback-libctf:
+maybe-clean-stageautofeedback-libctf:
+@if libctf-bootstrap
+maybe-all-stageautofeedback-libctf: all-stageautofeedback-libctf
+all-stageautofeedback: all-stageautofeedback-libctf
+TARGET-stageautofeedback-libctf = $(TARGET-libctf)
+all-stageautofeedback-libctf: configure-stageautofeedback-libctf
 	@[ $(current_stage) = stageautofeedback ] || $(MAKE) stageautofeedback-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEautofeedback_TFLAGS)"; \
-	$(RAW_CXX_TARGET_EXPORTS) \
-	  \
-	cd $(TARGET_SUBDIR)/libstdc++-v3 && \
+	$(HOST_EXPORTS) \
+	$(POSTSTAGE1_HOST_EXPORTS)  \
+	cd $(HOST_SUBDIR)/libctf && \
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
-		CFLAGS="$(CFLAGS_FOR_TARGET)" \
-		CXXFLAGS="$(CXXFLAGS_FOR_TARGET)" \
-		LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)" \
+		CFLAGS="$(STAGEautofeedback_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGEautofeedback_GENERATOR_CFLAGS)" \
+		CXXFLAGS="$(STAGEautofeedback_CXXFLAGS)" \
+		LIBCFLAGS="$(STAGEautofeedback_CFLAGS)" \
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
 		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
 		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
-		$(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   \
+		$(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  \
 		TFLAGS="$(STAGEautofeedback_TFLAGS)" PERF_DATA=perf.data \
-		$(TARGET-stageautofeedback-target-libstdc++-v3)
+		$(TARGET-stageautofeedback-libctf)
 
-maybe-clean-stageautofeedback-target-libstdc++-v3: clean-stageautofeedback-target-libstdc++-v3
-clean-stageautofeedback: clean-stageautofeedback-target-libstdc++-v3
-clean-stageautofeedback-target-libstdc++-v3:
+maybe-clean-stageautofeedback-libctf: clean-stageautofeedback-libctf
+clean-stageautofeedback: clean-stageautofeedback-libctf
+clean-stageautofeedback-libctf:
 	@if [ $(current_stage) = stageautofeedback ]; then \
-	  [ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
+	  [ -f $(HOST_SUBDIR)/libctf/Makefile ] || exit 0; \
 	else \
-	  [ -f $(TARGET_SUBDIR)/stageautofeedback-libstdc++-v3/Makefile ] || exit 0; \
+	  [ -f $(HOST_SUBDIR)/stageautofeedback-libctf/Makefile ] || exit 0; \
 	  $(MAKE) stageautofeedback-start; \
 	fi; \
-	cd $(TARGET_SUBDIR)/libstdc++-v3 && \
-	$(MAKE) $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   clean
-@endif target-libstdc++-v3-bootstrap
-
+	cd $(HOST_SUBDIR)/libctf && \
+	$(MAKE) $(EXTRA_HOST_FLAGS) $(POSTSTAGE1_FLAGS_TO_PASS)  clean
+@endif libctf-bootstrap
 
 
 
 
 
-.PHONY: check-target-libstdc++-v3 maybe-check-target-libstdc++-v3
-maybe-check-target-libstdc++-v3:
-@if target-libstdc++-v3
-maybe-check-target-libstdc++-v3: check-target-libstdc++-v3
+.PHONY: check-libctf maybe-check-libctf
+maybe-check-libctf:
+@if libctf
+maybe-check-libctf: check-libctf
 
-check-target-libstdc++-v3:
+check-libctf:
 	@: $(MAKE); $(unstage)
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(RAW_CXX_TARGET_EXPORTS) \
-	(cd $(TARGET_SUBDIR)/libstdc++-v3 && \
-	  $(MAKE) $(TARGET_FLAGS_TO_PASS)  'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   check)
+	$(HOST_EXPORTS) $(EXTRA_HOST_EXPORTS) \
+	(cd $(HOST_SUBDIR)/libctf && \
+	  $(MAKE) $(FLAGS_TO_PASS)  $(EXTRA_BOOTSTRAP_FLAGS) check)
 
-@endif target-libstdc++-v3
+@endif libctf
 
-.PHONY: install-target-libstdc++-v3 maybe-install-target-libstdc++-v3
-maybe-install-target-libstdc++-v3:
-@if target-libstdc++-v3
-maybe-install-target-libstdc++-v3: install-target-libstdc++-v3
+.PHONY: install-libctf maybe-install-libctf
+maybe-install-libctf:
+@if libctf
+maybe-install-libctf: install-libctf
 
-install-target-libstdc++-v3: installdirs
+install-libctf: installdirs
 	@: $(MAKE); $(unstage)
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(RAW_CXX_TARGET_EXPORTS) \
-	(cd $(TARGET_SUBDIR)/libstdc++-v3 && \
-	  $(MAKE) $(TARGET_FLAGS_TO_PASS)  install)
+	$(HOST_EXPORTS) \
+	(cd $(HOST_SUBDIR)/libctf && \
+	  $(MAKE) $(FLAGS_TO_PASS)  install)
 
-@endif target-libstdc++-v3
+@endif libctf
 
-.PHONY: install-strip-target-libstdc++-v3 maybe-install-strip-target-libstdc++-v3
-maybe-install-strip-target-libstdc++-v3:
-@if target-libstdc++-v3
-maybe-install-strip-target-libstdc++-v3: install-strip-target-libstdc++-v3
+.PHONY: install-strip-libctf maybe-install-strip-libctf
+maybe-install-strip-libctf:
+@if libctf
+maybe-install-strip-libctf: install-strip-libctf
 
-install-strip-target-libstdc++-v3: installdirs
+install-strip-libctf: installdirs
 	@: $(MAKE); $(unstage)
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(RAW_CXX_TARGET_EXPORTS) \
-	(cd $(TARGET_SUBDIR)/libstdc++-v3 && \
-	  $(MAKE) $(TARGET_FLAGS_TO_PASS)  install-strip)
+	$(HOST_EXPORTS) \
+	(cd $(HOST_SUBDIR)/libctf && \
+	  $(MAKE) $(FLAGS_TO_PASS)  install-strip)
 
-@endif target-libstdc++-v3
+@endif libctf
 
 # Other targets (info, dvi, pdf, etc.)
 
-.PHONY: maybe-info-target-libstdc++-v3 info-target-libstdc++-v3
-maybe-info-target-libstdc++-v3:
-@if target-libstdc++-v3
-maybe-info-target-libstdc++-v3: info-target-libstdc++-v3
+.PHONY: maybe-info-libctf info-libctf
+maybe-info-libctf:
+@if libctf
+maybe-info-libctf: info-libctf
 
-info-target-libstdc++-v3: \
-    configure-target-libstdc++-v3 
-	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
+info-libctf: \
+    configure-libctf 
+	@[ -f ./libctf/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(RAW_CXX_TARGET_EXPORTS) \
-	echo "Doing info in $(TARGET_SUBDIR)/libstdc++-v3"; \
-	for flag in $(EXTRA_TARGET_FLAGS); do \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libstdc++-v3 && \
+	echo "Doing info in libctf"; \
+	(cd $(HOST_SUBDIR)/libctf && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
 	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	           info) \
+	          info) \
 	  || exit 1
 
-@endif target-libstdc++-v3
+@endif libctf
 
-.PHONY: maybe-dvi-target-libstdc++-v3 dvi-target-libstdc++-v3
-maybe-dvi-target-libstdc++-v3:
-@if target-libstdc++-v3
-maybe-dvi-target-libstdc++-v3: dvi-target-libstdc++-v3
+.PHONY: maybe-dvi-libctf dvi-libctf
+maybe-dvi-libctf:
+@if libctf
+maybe-dvi-libctf: dvi-libctf
 
-dvi-target-libstdc++-v3: \
-    configure-target-libstdc++-v3 
-	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
+dvi-libctf: \
+    configure-libctf 
+	@[ -f ./libctf/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(RAW_CXX_TARGET_EXPORTS) \
-	echo "Doing dvi in $(TARGET_SUBDIR)/libstdc++-v3"; \
-	for flag in $(EXTRA_TARGET_FLAGS); do \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libstdc++-v3 && \
+	echo "Doing dvi in libctf"; \
+	(cd $(HOST_SUBDIR)/libctf && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
 	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	           dvi) \
+	          dvi) \
 	  || exit 1
 
-@endif target-libstdc++-v3
+@endif libctf
 
-.PHONY: maybe-pdf-target-libstdc++-v3 pdf-target-libstdc++-v3
-maybe-pdf-target-libstdc++-v3:
-@if target-libstdc++-v3
-maybe-pdf-target-libstdc++-v3: pdf-target-libstdc++-v3
+.PHONY: maybe-pdf-libctf pdf-libctf
+maybe-pdf-libctf:
+@if libctf
+maybe-pdf-libctf: pdf-libctf
 
-pdf-target-libstdc++-v3: \
-    configure-target-libstdc++-v3 
-	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
+pdf-libctf: \
+    configure-libctf 
+	@[ -f ./libctf/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(RAW_CXX_TARGET_EXPORTS) \
-	echo "Doing pdf in $(TARGET_SUBDIR)/libstdc++-v3"; \
-	for flag in $(EXTRA_TARGET_FLAGS); do \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libstdc++-v3 && \
+	echo "Doing pdf in libctf"; \
+	(cd $(HOST_SUBDIR)/libctf && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
 	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	           pdf) \
+	          pdf) \
 	  || exit 1
 
-@endif target-libstdc++-v3
+@endif libctf
 
-.PHONY: maybe-html-target-libstdc++-v3 html-target-libstdc++-v3
-maybe-html-target-libstdc++-v3:
-@if target-libstdc++-v3
-maybe-html-target-libstdc++-v3: html-target-libstdc++-v3
+.PHONY: maybe-html-libctf html-libctf
+maybe-html-libctf:
+@if libctf
+maybe-html-libctf: html-libctf
 
-html-target-libstdc++-v3: \
-    configure-target-libstdc++-v3 
-	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
+html-libctf: \
+    configure-libctf 
+	@[ -f ./libctf/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(RAW_CXX_TARGET_EXPORTS) \
-	echo "Doing html in $(TARGET_SUBDIR)/libstdc++-v3"; \
-	for flag in $(EXTRA_TARGET_FLAGS); do \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libstdc++-v3 && \
+	echo "Doing html in libctf"; \
+	(cd $(HOST_SUBDIR)/libctf && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
 	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	           html) \
+	          html) \
 	  || exit 1
 
-@endif target-libstdc++-v3
+@endif libctf
 
-.PHONY: maybe-TAGS-target-libstdc++-v3 TAGS-target-libstdc++-v3
-maybe-TAGS-target-libstdc++-v3:
-@if target-libstdc++-v3
-maybe-TAGS-target-libstdc++-v3: TAGS-target-libstdc++-v3
+.PHONY: maybe-TAGS-libctf TAGS-libctf
+maybe-TAGS-libctf:
+@if libctf
+maybe-TAGS-libctf: TAGS-libctf
 
-TAGS-target-libstdc++-v3: \
-    configure-target-libstdc++-v3 
-	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
+TAGS-libctf: \
+    configure-libctf 
+	@[ -f ./libctf/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(RAW_CXX_TARGET_EXPORTS) \
-	echo "Doing TAGS in $(TARGET_SUBDIR)/libstdc++-v3"; \
-	for flag in $(EXTRA_TARGET_FLAGS); do \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libstdc++-v3 && \
+	echo "Doing TAGS in libctf"; \
+	(cd $(HOST_SUBDIR)/libctf && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
 	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	           TAGS) \
+	          TAGS) \
 	  || exit 1
 
-@endif target-libstdc++-v3
+@endif libctf
 
-.PHONY: maybe-install-info-target-libstdc++-v3 install-info-target-libstdc++-v3
-maybe-install-info-target-libstdc++-v3:
-@if target-libstdc++-v3
-maybe-install-info-target-libstdc++-v3: install-info-target-libstdc++-v3
+.PHONY: maybe-install-info-libctf install-info-libctf
+maybe-install-info-libctf:
+@if libctf
+maybe-install-info-libctf: install-info-libctf
 
-install-info-target-libstdc++-v3: \
-    configure-target-libstdc++-v3 \
-    info-target-libstdc++-v3 
-	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
+install-info-libctf: \
+    configure-libctf \
+    info-libctf 
+	@[ -f ./libctf/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(RAW_CXX_TARGET_EXPORTS) \
-	echo "Doing install-info in $(TARGET_SUBDIR)/libstdc++-v3"; \
-	for flag in $(EXTRA_TARGET_FLAGS); do \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libstdc++-v3 && \
+	echo "Doing install-info in libctf"; \
+	(cd $(HOST_SUBDIR)/libctf && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
 	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	           install-info) \
+	          install-info) \
 	  || exit 1
 
-@endif target-libstdc++-v3
+@endif libctf
 
-.PHONY: maybe-install-pdf-target-libstdc++-v3 install-pdf-target-libstdc++-v3
-maybe-install-pdf-target-libstdc++-v3:
-@if target-libstdc++-v3
-maybe-install-pdf-target-libstdc++-v3: install-pdf-target-libstdc++-v3
+.PHONY: maybe-install-pdf-libctf install-pdf-libctf
+maybe-install-pdf-libctf:
+@if libctf
+maybe-install-pdf-libctf: install-pdf-libctf
 
-install-pdf-target-libstdc++-v3: \
-    configure-target-libstdc++-v3 \
-    pdf-target-libstdc++-v3 
-	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
+install-pdf-libctf: \
+    configure-libctf \
+    pdf-libctf 
+	@[ -f ./libctf/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(RAW_CXX_TARGET_EXPORTS) \
-	echo "Doing install-pdf in $(TARGET_SUBDIR)/libstdc++-v3"; \
-	for flag in $(EXTRA_TARGET_FLAGS); do \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libstdc++-v3 && \
+	echo "Doing install-pdf in libctf"; \
+	(cd $(HOST_SUBDIR)/libctf && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
 	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	           install-pdf) \
+	          install-pdf) \
 	  || exit 1
 
-@endif target-libstdc++-v3
+@endif libctf
 
-.PHONY: maybe-install-html-target-libstdc++-v3 install-html-target-libstdc++-v3
-maybe-install-html-target-libstdc++-v3:
-@if target-libstdc++-v3
-maybe-install-html-target-libstdc++-v3: install-html-target-libstdc++-v3
+.PHONY: maybe-install-html-libctf install-html-libctf
+maybe-install-html-libctf:
+@if libctf
+maybe-install-html-libctf: install-html-libctf
 
-install-html-target-libstdc++-v3: \
-    configure-target-libstdc++-v3 \
-    html-target-libstdc++-v3 
-	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
+install-html-libctf: \
+    configure-libctf \
+    html-libctf 
+	@[ -f ./libctf/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(RAW_CXX_TARGET_EXPORTS) \
-	echo "Doing install-html in $(TARGET_SUBDIR)/libstdc++-v3"; \
-	for flag in $(EXTRA_TARGET_FLAGS); do \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libstdc++-v3 && \
+	echo "Doing install-html in libctf"; \
+	(cd $(HOST_SUBDIR)/libctf && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
 	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	           install-html) \
+	          install-html) \
 	  || exit 1
 
-@endif target-libstdc++-v3
+@endif libctf
 
-.PHONY: maybe-installcheck-target-libstdc++-v3 installcheck-target-libstdc++-v3
-maybe-installcheck-target-libstdc++-v3:
-@if target-libstdc++-v3
-maybe-installcheck-target-libstdc++-v3: installcheck-target-libstdc++-v3
+.PHONY: maybe-installcheck-libctf installcheck-libctf
+maybe-installcheck-libctf:
+@if libctf
+maybe-installcheck-libctf: installcheck-libctf
 
-installcheck-target-libstdc++-v3: \
-    configure-target-libstdc++-v3 
-	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
+installcheck-libctf: \
+    configure-libctf 
+	@[ -f ./libctf/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(RAW_CXX_TARGET_EXPORTS) \
-	echo "Doing installcheck in $(TARGET_SUBDIR)/libstdc++-v3"; \
-	for flag in $(EXTRA_TARGET_FLAGS); do \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libstdc++-v3 && \
+	echo "Doing installcheck in libctf"; \
+	(cd $(HOST_SUBDIR)/libctf && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
 	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	           installcheck) \
+	          installcheck) \
 	  || exit 1
 
-@endif target-libstdc++-v3
+@endif libctf
 
-.PHONY: maybe-mostlyclean-target-libstdc++-v3 mostlyclean-target-libstdc++-v3
-maybe-mostlyclean-target-libstdc++-v3:
-@if target-libstdc++-v3
-maybe-mostlyclean-target-libstdc++-v3: mostlyclean-target-libstdc++-v3
+.PHONY: maybe-mostlyclean-libctf mostlyclean-libctf
+maybe-mostlyclean-libctf:
+@if libctf
+maybe-mostlyclean-libctf: mostlyclean-libctf
 
-mostlyclean-target-libstdc++-v3: 
-	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
+mostlyclean-libctf: 
+	@[ -f ./libctf/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(RAW_CXX_TARGET_EXPORTS) \
-	echo "Doing mostlyclean in $(TARGET_SUBDIR)/libstdc++-v3"; \
-	for flag in $(EXTRA_TARGET_FLAGS); do \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libstdc++-v3 && \
+	echo "Doing mostlyclean in libctf"; \
+	(cd $(HOST_SUBDIR)/libctf && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
 	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	           mostlyclean) \
+	          mostlyclean) \
 	  || exit 1
 
-@endif target-libstdc++-v3
+@endif libctf
 
-.PHONY: maybe-clean-target-libstdc++-v3 clean-target-libstdc++-v3
-maybe-clean-target-libstdc++-v3:
-@if target-libstdc++-v3
-maybe-clean-target-libstdc++-v3: clean-target-libstdc++-v3
+.PHONY: maybe-clean-libctf clean-libctf
+maybe-clean-libctf:
+@if libctf
+maybe-clean-libctf: clean-libctf
 
-clean-target-libstdc++-v3: 
-	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
+clean-libctf: 
+	@[ -f ./libctf/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(RAW_CXX_TARGET_EXPORTS) \
-	echo "Doing clean in $(TARGET_SUBDIR)/libstdc++-v3"; \
-	for flag in $(EXTRA_TARGET_FLAGS); do \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libstdc++-v3 && \
+	echo "Doing clean in libctf"; \
+	(cd $(HOST_SUBDIR)/libctf && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
 	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	           clean) \
+	          clean) \
 	  || exit 1
 
-@endif target-libstdc++-v3
+@endif libctf
 
-.PHONY: maybe-distclean-target-libstdc++-v3 distclean-target-libstdc++-v3
-maybe-distclean-target-libstdc++-v3:
-@if target-libstdc++-v3
-maybe-distclean-target-libstdc++-v3: distclean-target-libstdc++-v3
+.PHONY: maybe-distclean-libctf distclean-libctf
+maybe-distclean-libctf:
+@if libctf
+maybe-distclean-libctf: distclean-libctf
 
-distclean-target-libstdc++-v3: 
-	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
+distclean-libctf: 
+	@[ -f ./libctf/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(RAW_CXX_TARGET_EXPORTS) \
-	echo "Doing distclean in $(TARGET_SUBDIR)/libstdc++-v3"; \
-	for flag in $(EXTRA_TARGET_FLAGS); do \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libstdc++-v3 && \
+	echo "Doing distclean in libctf"; \
+	(cd $(HOST_SUBDIR)/libctf && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
 	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	           distclean) \
+	          distclean) \
 	  || exit 1
 
-@endif target-libstdc++-v3
+@endif libctf
 
-.PHONY: maybe-maintainer-clean-target-libstdc++-v3 maintainer-clean-target-libstdc++-v3
-maybe-maintainer-clean-target-libstdc++-v3:
-@if target-libstdc++-v3
-maybe-maintainer-clean-target-libstdc++-v3: maintainer-clean-target-libstdc++-v3
+.PHONY: maybe-maintainer-clean-libctf maintainer-clean-libctf
+maybe-maintainer-clean-libctf:
+@if libctf
+maybe-maintainer-clean-libctf: maintainer-clean-libctf
 
-maintainer-clean-target-libstdc++-v3: 
-	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
+maintainer-clean-libctf: 
+	@[ -f ./libctf/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(RAW_CXX_TARGET_EXPORTS) \
-	echo "Doing maintainer-clean in $(TARGET_SUBDIR)/libstdc++-v3"; \
-	for flag in $(EXTRA_TARGET_FLAGS); do \
+	$(HOST_EXPORTS) \
+	for flag in $(EXTRA_HOST_FLAGS) ; do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libstdc++-v3 && \
+	echo "Doing maintainer-clean in libctf"; \
+	(cd $(HOST_SUBDIR)/libctf && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
 	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
-	           maintainer-clean) \
+	          maintainer-clean) \
 	  || exit 1
 
-@endif target-libstdc++-v3
+@endif libctf
 
 
 
+# ---------------------------------------
+# Modules which run on the target machine
+# ---------------------------------------
 
 
-.PHONY: configure-target-libsanitizer maybe-configure-target-libsanitizer
-maybe-configure-target-libsanitizer:
+
+
+.PHONY: configure-target-libstdc++-v3 maybe-configure-target-libstdc++-v3
+maybe-configure-target-libstdc++-v3:
 @if gcc-bootstrap
-configure-target-libsanitizer: stage_current
+configure-target-libstdc++-v3: stage_current
 @endif gcc-bootstrap
-@if target-libsanitizer
-maybe-configure-target-libsanitizer: configure-target-libsanitizer
-configure-target-libsanitizer: 
+@if target-libstdc++-v3
+maybe-configure-target-libstdc++-v3: configure-target-libstdc++-v3
+configure-target-libstdc++-v3: 
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	echo "Checking multilib configuration for libsanitizer..."; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer; \
-	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libsanitizer/multilib.tmp 2> /dev/null; \
-	if test -r $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
-	  if cmp -s $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
-	    rm -f $(TARGET_SUBDIR)/libsanitizer/multilib.tmp; \
+	echo "Checking multilib configuration for libstdc++-v3..."; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3; \
+	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp 2> /dev/null; \
+	if test -r $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
+	  if cmp -s $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
+	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp; \
 	  else \
-	    rm -f $(TARGET_SUBDIR)/libsanitizer/Makefile; \
-	    mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
+	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile; \
+	    mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
 	  fi; \
 	else \
-	  mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
+	  mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
 	fi; \
-	test ! -f $(TARGET_SUBDIR)/libsanitizer/Makefile || exit 0; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer; \
+	test ! -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile || exit 0; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3; \
 	$(RAW_CXX_TARGET_EXPORTS)  \
-	echo Configuring in $(TARGET_SUBDIR)/libsanitizer; \
-	cd "$(TARGET_SUBDIR)/libsanitizer" || exit 1; \
+	echo Configuring in $(TARGET_SUBDIR)/libstdc++-v3; \
+	cd "$(TARGET_SUBDIR)/libstdc++-v3" || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(TARGET_SUBDIR)/libsanitizer/ | \
+	  *) topdir=`echo $(TARGET_SUBDIR)/libstdc++-v3/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libsanitizer; \
+	module_srcdir=libstdc++-v3; \
 	rm -f no-such-file || : ; \
 	CONFIG_SITE=no-such-file $(SHELL) \
 	  $$s/$$module_srcdir/configure \
@@ -39153,452 +42162,452 @@ configure-target-libsanitizer:
 	  $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \
 	  --target=${target_alias}  \
 	  || exit 1
-@endif target-libsanitizer
+@endif target-libstdc++-v3
 
 
 
-.PHONY: configure-stage1-target-libsanitizer maybe-configure-stage1-target-libsanitizer
-maybe-configure-stage1-target-libsanitizer:
-@if target-libsanitizer-bootstrap
-maybe-configure-stage1-target-libsanitizer: configure-stage1-target-libsanitizer
-configure-stage1-target-libsanitizer:
+.PHONY: configure-stage1-target-libstdc++-v3 maybe-configure-stage1-target-libstdc++-v3
+maybe-configure-stage1-target-libstdc++-v3:
+@if target-libstdc++-v3-bootstrap
+maybe-configure-stage1-target-libstdc++-v3: configure-stage1-target-libstdc++-v3
+configure-stage1-target-libstdc++-v3:
 	@[ $(current_stage) = stage1 ] || $(MAKE) stage1-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer
+	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGE1_TFLAGS)"; \
-	echo "Checking multilib configuration for libsanitizer..."; \
-	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libsanitizer/multilib.tmp 2> /dev/null; \
-	if test -r $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
-	  if cmp -s $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
-	    rm -f $(TARGET_SUBDIR)/libsanitizer/multilib.tmp; \
+	echo "Checking multilib configuration for libstdc++-v3..."; \
+	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp 2> /dev/null; \
+	if test -r $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
+	  if cmp -s $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
+	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp; \
 	  else \
-	    rm -f $(TARGET_SUBDIR)/libsanitizer/Makefile; \
-	    mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
+	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile; \
+	    mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
 	  fi; \
 	else \
-	  mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
+	  mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
 	fi; \
-	test ! -f $(TARGET_SUBDIR)/libsanitizer/Makefile || exit 0; \
+	test ! -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile || exit 0; \
 	$(RAW_CXX_TARGET_EXPORTS) \
 	CFLAGS="$(CFLAGS_FOR_TARGET)"; export CFLAGS; \
 	CXXFLAGS="$(CXXFLAGS_FOR_TARGET)"; export CXXFLAGS; \
 	LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)"; export LIBCFLAGS;  \
-	echo Configuring stage 1 in $(TARGET_SUBDIR)/libsanitizer; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer; \
-	cd $(TARGET_SUBDIR)/libsanitizer || exit 1; \
+	echo Configuring stage 1 in $(TARGET_SUBDIR)/libstdc++-v3; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3; \
+	cd $(TARGET_SUBDIR)/libstdc++-v3 || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(TARGET_SUBDIR)/libsanitizer/ | \
+	  *) topdir=`echo $(TARGET_SUBDIR)/libstdc++-v3/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libsanitizer; \
+	module_srcdir=libstdc++-v3; \
 	$(SHELL) $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
 	  $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \
 	  --target=${target_alias} \
 	   \
 	  $(STAGE1_CONFIGURE_FLAGS)
-@endif target-libsanitizer-bootstrap
+@endif target-libstdc++-v3-bootstrap
 
-.PHONY: configure-stage2-target-libsanitizer maybe-configure-stage2-target-libsanitizer
-maybe-configure-stage2-target-libsanitizer:
-@if target-libsanitizer-bootstrap
-maybe-configure-stage2-target-libsanitizer: configure-stage2-target-libsanitizer
-configure-stage2-target-libsanitizer:
+.PHONY: configure-stage2-target-libstdc++-v3 maybe-configure-stage2-target-libstdc++-v3
+maybe-configure-stage2-target-libstdc++-v3:
+@if target-libstdc++-v3-bootstrap
+maybe-configure-stage2-target-libstdc++-v3: configure-stage2-target-libstdc++-v3
+configure-stage2-target-libstdc++-v3:
 	@[ $(current_stage) = stage2 ] || $(MAKE) stage2-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer
+	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGE2_TFLAGS)"; \
-	echo "Checking multilib configuration for libsanitizer..."; \
-	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libsanitizer/multilib.tmp 2> /dev/null; \
-	if test -r $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
-	  if cmp -s $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
-	    rm -f $(TARGET_SUBDIR)/libsanitizer/multilib.tmp; \
+	echo "Checking multilib configuration for libstdc++-v3..."; \
+	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp 2> /dev/null; \
+	if test -r $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
+	  if cmp -s $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
+	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp; \
 	  else \
-	    rm -f $(TARGET_SUBDIR)/libsanitizer/Makefile; \
-	    mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
+	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile; \
+	    mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
 	  fi; \
 	else \
-	  mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
+	  mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
 	fi; \
-	test ! -f $(TARGET_SUBDIR)/libsanitizer/Makefile || exit 0; \
+	test ! -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile || exit 0; \
 	$(RAW_CXX_TARGET_EXPORTS) \
 	 \
 	CFLAGS="$(CFLAGS_FOR_TARGET)"; export CFLAGS; \
 	CXXFLAGS="$(CXXFLAGS_FOR_TARGET)"; export CXXFLAGS; \
 	LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)"; export LIBCFLAGS;  \
-	echo Configuring stage 2 in $(TARGET_SUBDIR)/libsanitizer; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer; \
-	cd $(TARGET_SUBDIR)/libsanitizer || exit 1; \
+	echo Configuring stage 2 in $(TARGET_SUBDIR)/libstdc++-v3; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3; \
+	cd $(TARGET_SUBDIR)/libstdc++-v3 || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(TARGET_SUBDIR)/libsanitizer/ | \
+	  *) topdir=`echo $(TARGET_SUBDIR)/libstdc++-v3/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libsanitizer; \
+	module_srcdir=libstdc++-v3; \
 	$(SHELL) $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
 	  $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \
 	  --target=${target_alias} \
 	  --with-build-libsubdir=$(HOST_SUBDIR) \
 	  $(STAGE2_CONFIGURE_FLAGS)
-@endif target-libsanitizer-bootstrap
+@endif target-libstdc++-v3-bootstrap
 
-.PHONY: configure-stage3-target-libsanitizer maybe-configure-stage3-target-libsanitizer
-maybe-configure-stage3-target-libsanitizer:
-@if target-libsanitizer-bootstrap
-maybe-configure-stage3-target-libsanitizer: configure-stage3-target-libsanitizer
-configure-stage3-target-libsanitizer:
+.PHONY: configure-stage3-target-libstdc++-v3 maybe-configure-stage3-target-libstdc++-v3
+maybe-configure-stage3-target-libstdc++-v3:
+@if target-libstdc++-v3-bootstrap
+maybe-configure-stage3-target-libstdc++-v3: configure-stage3-target-libstdc++-v3
+configure-stage3-target-libstdc++-v3:
 	@[ $(current_stage) = stage3 ] || $(MAKE) stage3-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer
+	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGE3_TFLAGS)"; \
-	echo "Checking multilib configuration for libsanitizer..."; \
-	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libsanitizer/multilib.tmp 2> /dev/null; \
-	if test -r $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
-	  if cmp -s $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
-	    rm -f $(TARGET_SUBDIR)/libsanitizer/multilib.tmp; \
+	echo "Checking multilib configuration for libstdc++-v3..."; \
+	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp 2> /dev/null; \
+	if test -r $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
+	  if cmp -s $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
+	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp; \
 	  else \
-	    rm -f $(TARGET_SUBDIR)/libsanitizer/Makefile; \
-	    mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
+	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile; \
+	    mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
 	  fi; \
 	else \
-	  mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
+	  mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
 	fi; \
-	test ! -f $(TARGET_SUBDIR)/libsanitizer/Makefile || exit 0; \
+	test ! -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile || exit 0; \
 	$(RAW_CXX_TARGET_EXPORTS) \
 	 \
 	CFLAGS="$(CFLAGS_FOR_TARGET)"; export CFLAGS; \
 	CXXFLAGS="$(CXXFLAGS_FOR_TARGET)"; export CXXFLAGS; \
 	LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)"; export LIBCFLAGS;  \
-	echo Configuring stage 3 in $(TARGET_SUBDIR)/libsanitizer; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer; \
-	cd $(TARGET_SUBDIR)/libsanitizer || exit 1; \
+	echo Configuring stage 3 in $(TARGET_SUBDIR)/libstdc++-v3; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3; \
+	cd $(TARGET_SUBDIR)/libstdc++-v3 || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(TARGET_SUBDIR)/libsanitizer/ | \
+	  *) topdir=`echo $(TARGET_SUBDIR)/libstdc++-v3/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libsanitizer; \
+	module_srcdir=libstdc++-v3; \
 	$(SHELL) $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
 	  $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \
 	  --target=${target_alias} \
 	  --with-build-libsubdir=$(HOST_SUBDIR) \
 	  $(STAGE3_CONFIGURE_FLAGS)
-@endif target-libsanitizer-bootstrap
+@endif target-libstdc++-v3-bootstrap
 
-.PHONY: configure-stage4-target-libsanitizer maybe-configure-stage4-target-libsanitizer
-maybe-configure-stage4-target-libsanitizer:
-@if target-libsanitizer-bootstrap
-maybe-configure-stage4-target-libsanitizer: configure-stage4-target-libsanitizer
-configure-stage4-target-libsanitizer:
+.PHONY: configure-stage4-target-libstdc++-v3 maybe-configure-stage4-target-libstdc++-v3
+maybe-configure-stage4-target-libstdc++-v3:
+@if target-libstdc++-v3-bootstrap
+maybe-configure-stage4-target-libstdc++-v3: configure-stage4-target-libstdc++-v3
+configure-stage4-target-libstdc++-v3:
 	@[ $(current_stage) = stage4 ] || $(MAKE) stage4-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer
+	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGE4_TFLAGS)"; \
-	echo "Checking multilib configuration for libsanitizer..."; \
-	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libsanitizer/multilib.tmp 2> /dev/null; \
-	if test -r $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
-	  if cmp -s $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
-	    rm -f $(TARGET_SUBDIR)/libsanitizer/multilib.tmp; \
+	echo "Checking multilib configuration for libstdc++-v3..."; \
+	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp 2> /dev/null; \
+	if test -r $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
+	  if cmp -s $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
+	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp; \
 	  else \
-	    rm -f $(TARGET_SUBDIR)/libsanitizer/Makefile; \
-	    mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
+	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile; \
+	    mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
 	  fi; \
 	else \
-	  mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
+	  mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
 	fi; \
-	test ! -f $(TARGET_SUBDIR)/libsanitizer/Makefile || exit 0; \
+	test ! -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile || exit 0; \
 	$(RAW_CXX_TARGET_EXPORTS) \
 	 \
 	CFLAGS="$(CFLAGS_FOR_TARGET)"; export CFLAGS; \
 	CXXFLAGS="$(CXXFLAGS_FOR_TARGET)"; export CXXFLAGS; \
 	LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)"; export LIBCFLAGS;  \
-	echo Configuring stage 4 in $(TARGET_SUBDIR)/libsanitizer; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer; \
-	cd $(TARGET_SUBDIR)/libsanitizer || exit 1; \
+	echo Configuring stage 4 in $(TARGET_SUBDIR)/libstdc++-v3; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3; \
+	cd $(TARGET_SUBDIR)/libstdc++-v3 || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(TARGET_SUBDIR)/libsanitizer/ | \
+	  *) topdir=`echo $(TARGET_SUBDIR)/libstdc++-v3/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libsanitizer; \
+	module_srcdir=libstdc++-v3; \
 	$(SHELL) $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
 	  $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \
 	  --target=${target_alias} \
 	  --with-build-libsubdir=$(HOST_SUBDIR) \
 	  $(STAGE4_CONFIGURE_FLAGS)
-@endif target-libsanitizer-bootstrap
+@endif target-libstdc++-v3-bootstrap
 
-.PHONY: configure-stageprofile-target-libsanitizer maybe-configure-stageprofile-target-libsanitizer
-maybe-configure-stageprofile-target-libsanitizer:
-@if target-libsanitizer-bootstrap
-maybe-configure-stageprofile-target-libsanitizer: configure-stageprofile-target-libsanitizer
-configure-stageprofile-target-libsanitizer:
+.PHONY: configure-stageprofile-target-libstdc++-v3 maybe-configure-stageprofile-target-libstdc++-v3
+maybe-configure-stageprofile-target-libstdc++-v3:
+@if target-libstdc++-v3-bootstrap
+maybe-configure-stageprofile-target-libstdc++-v3: configure-stageprofile-target-libstdc++-v3
+configure-stageprofile-target-libstdc++-v3:
 	@[ $(current_stage) = stageprofile ] || $(MAKE) stageprofile-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer
+	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEprofile_TFLAGS)"; \
-	echo "Checking multilib configuration for libsanitizer..."; \
-	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libsanitizer/multilib.tmp 2> /dev/null; \
-	if test -r $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
-	  if cmp -s $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
-	    rm -f $(TARGET_SUBDIR)/libsanitizer/multilib.tmp; \
+	echo "Checking multilib configuration for libstdc++-v3..."; \
+	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp 2> /dev/null; \
+	if test -r $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
+	  if cmp -s $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
+	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp; \
 	  else \
-	    rm -f $(TARGET_SUBDIR)/libsanitizer/Makefile; \
-	    mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
+	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile; \
+	    mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
 	  fi; \
 	else \
-	  mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
+	  mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
 	fi; \
-	test ! -f $(TARGET_SUBDIR)/libsanitizer/Makefile || exit 0; \
+	test ! -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile || exit 0; \
 	$(RAW_CXX_TARGET_EXPORTS) \
 	 \
 	CFLAGS="$(CFLAGS_FOR_TARGET)"; export CFLAGS; \
 	CXXFLAGS="$(CXXFLAGS_FOR_TARGET)"; export CXXFLAGS; \
 	LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)"; export LIBCFLAGS;  \
-	echo Configuring stage profile in $(TARGET_SUBDIR)/libsanitizer; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer; \
-	cd $(TARGET_SUBDIR)/libsanitizer || exit 1; \
+	echo Configuring stage profile in $(TARGET_SUBDIR)/libstdc++-v3; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3; \
+	cd $(TARGET_SUBDIR)/libstdc++-v3 || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(TARGET_SUBDIR)/libsanitizer/ | \
+	  *) topdir=`echo $(TARGET_SUBDIR)/libstdc++-v3/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libsanitizer; \
+	module_srcdir=libstdc++-v3; \
 	$(SHELL) $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
 	  $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \
 	  --target=${target_alias} \
 	  --with-build-libsubdir=$(HOST_SUBDIR) \
 	  $(STAGEprofile_CONFIGURE_FLAGS)
-@endif target-libsanitizer-bootstrap
+@endif target-libstdc++-v3-bootstrap
 
-.PHONY: configure-stagetrain-target-libsanitizer maybe-configure-stagetrain-target-libsanitizer
-maybe-configure-stagetrain-target-libsanitizer:
-@if target-libsanitizer-bootstrap
-maybe-configure-stagetrain-target-libsanitizer: configure-stagetrain-target-libsanitizer
-configure-stagetrain-target-libsanitizer:
+.PHONY: configure-stagetrain-target-libstdc++-v3 maybe-configure-stagetrain-target-libstdc++-v3
+maybe-configure-stagetrain-target-libstdc++-v3:
+@if target-libstdc++-v3-bootstrap
+maybe-configure-stagetrain-target-libstdc++-v3: configure-stagetrain-target-libstdc++-v3
+configure-stagetrain-target-libstdc++-v3:
 	@[ $(current_stage) = stagetrain ] || $(MAKE) stagetrain-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer
+	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEtrain_TFLAGS)"; \
-	echo "Checking multilib configuration for libsanitizer..."; \
-	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libsanitizer/multilib.tmp 2> /dev/null; \
-	if test -r $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
-	  if cmp -s $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
-	    rm -f $(TARGET_SUBDIR)/libsanitizer/multilib.tmp; \
+	echo "Checking multilib configuration for libstdc++-v3..."; \
+	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp 2> /dev/null; \
+	if test -r $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
+	  if cmp -s $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
+	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp; \
 	  else \
-	    rm -f $(TARGET_SUBDIR)/libsanitizer/Makefile; \
-	    mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
+	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile; \
+	    mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
 	  fi; \
 	else \
-	  mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
+	  mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
 	fi; \
-	test ! -f $(TARGET_SUBDIR)/libsanitizer/Makefile || exit 0; \
+	test ! -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile || exit 0; \
 	$(RAW_CXX_TARGET_EXPORTS) \
 	 \
 	CFLAGS="$(CFLAGS_FOR_TARGET)"; export CFLAGS; \
 	CXXFLAGS="$(CXXFLAGS_FOR_TARGET)"; export CXXFLAGS; \
 	LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)"; export LIBCFLAGS;  \
-	echo Configuring stage train in $(TARGET_SUBDIR)/libsanitizer; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer; \
-	cd $(TARGET_SUBDIR)/libsanitizer || exit 1; \
+	echo Configuring stage train in $(TARGET_SUBDIR)/libstdc++-v3; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3; \
+	cd $(TARGET_SUBDIR)/libstdc++-v3 || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(TARGET_SUBDIR)/libsanitizer/ | \
+	  *) topdir=`echo $(TARGET_SUBDIR)/libstdc++-v3/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libsanitizer; \
+	module_srcdir=libstdc++-v3; \
 	$(SHELL) $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
 	  $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \
 	  --target=${target_alias} \
 	  --with-build-libsubdir=$(HOST_SUBDIR) \
 	  $(STAGEtrain_CONFIGURE_FLAGS)
-@endif target-libsanitizer-bootstrap
+@endif target-libstdc++-v3-bootstrap
 
-.PHONY: configure-stagefeedback-target-libsanitizer maybe-configure-stagefeedback-target-libsanitizer
-maybe-configure-stagefeedback-target-libsanitizer:
-@if target-libsanitizer-bootstrap
-maybe-configure-stagefeedback-target-libsanitizer: configure-stagefeedback-target-libsanitizer
-configure-stagefeedback-target-libsanitizer:
+.PHONY: configure-stagefeedback-target-libstdc++-v3 maybe-configure-stagefeedback-target-libstdc++-v3
+maybe-configure-stagefeedback-target-libstdc++-v3:
+@if target-libstdc++-v3-bootstrap
+maybe-configure-stagefeedback-target-libstdc++-v3: configure-stagefeedback-target-libstdc++-v3
+configure-stagefeedback-target-libstdc++-v3:
 	@[ $(current_stage) = stagefeedback ] || $(MAKE) stagefeedback-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer
+	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEfeedback_TFLAGS)"; \
-	echo "Checking multilib configuration for libsanitizer..."; \
-	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libsanitizer/multilib.tmp 2> /dev/null; \
-	if test -r $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
-	  if cmp -s $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
-	    rm -f $(TARGET_SUBDIR)/libsanitizer/multilib.tmp; \
+	echo "Checking multilib configuration for libstdc++-v3..."; \
+	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp 2> /dev/null; \
+	if test -r $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
+	  if cmp -s $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
+	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp; \
 	  else \
-	    rm -f $(TARGET_SUBDIR)/libsanitizer/Makefile; \
-	    mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
+	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile; \
+	    mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
 	  fi; \
 	else \
-	  mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
+	  mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
 	fi; \
-	test ! -f $(TARGET_SUBDIR)/libsanitizer/Makefile || exit 0; \
+	test ! -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile || exit 0; \
 	$(RAW_CXX_TARGET_EXPORTS) \
 	 \
 	CFLAGS="$(CFLAGS_FOR_TARGET)"; export CFLAGS; \
 	CXXFLAGS="$(CXXFLAGS_FOR_TARGET)"; export CXXFLAGS; \
 	LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)"; export LIBCFLAGS;  \
-	echo Configuring stage feedback in $(TARGET_SUBDIR)/libsanitizer; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer; \
-	cd $(TARGET_SUBDIR)/libsanitizer || exit 1; \
+	echo Configuring stage feedback in $(TARGET_SUBDIR)/libstdc++-v3; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3; \
+	cd $(TARGET_SUBDIR)/libstdc++-v3 || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(TARGET_SUBDIR)/libsanitizer/ | \
+	  *) topdir=`echo $(TARGET_SUBDIR)/libstdc++-v3/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libsanitizer; \
+	module_srcdir=libstdc++-v3; \
 	$(SHELL) $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
 	  $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \
 	  --target=${target_alias} \
 	  --with-build-libsubdir=$(HOST_SUBDIR) \
 	  $(STAGEfeedback_CONFIGURE_FLAGS)
-@endif target-libsanitizer-bootstrap
+@endif target-libstdc++-v3-bootstrap
 
-.PHONY: configure-stageautoprofile-target-libsanitizer maybe-configure-stageautoprofile-target-libsanitizer
-maybe-configure-stageautoprofile-target-libsanitizer:
-@if target-libsanitizer-bootstrap
-maybe-configure-stageautoprofile-target-libsanitizer: configure-stageautoprofile-target-libsanitizer
-configure-stageautoprofile-target-libsanitizer:
+.PHONY: configure-stageautoprofile-target-libstdc++-v3 maybe-configure-stageautoprofile-target-libstdc++-v3
+maybe-configure-stageautoprofile-target-libstdc++-v3:
+@if target-libstdc++-v3-bootstrap
+maybe-configure-stageautoprofile-target-libstdc++-v3: configure-stageautoprofile-target-libstdc++-v3
+configure-stageautoprofile-target-libstdc++-v3:
 	@[ $(current_stage) = stageautoprofile ] || $(MAKE) stageautoprofile-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer
+	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEautoprofile_TFLAGS)"; \
-	echo "Checking multilib configuration for libsanitizer..."; \
-	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libsanitizer/multilib.tmp 2> /dev/null; \
-	if test -r $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
-	  if cmp -s $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
-	    rm -f $(TARGET_SUBDIR)/libsanitizer/multilib.tmp; \
+	echo "Checking multilib configuration for libstdc++-v3..."; \
+	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp 2> /dev/null; \
+	if test -r $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
+	  if cmp -s $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
+	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp; \
 	  else \
-	    rm -f $(TARGET_SUBDIR)/libsanitizer/Makefile; \
-	    mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
+	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile; \
+	    mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
 	  fi; \
 	else \
-	  mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
+	  mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
 	fi; \
-	test ! -f $(TARGET_SUBDIR)/libsanitizer/Makefile || exit 0; \
+	test ! -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile || exit 0; \
 	$(RAW_CXX_TARGET_EXPORTS) \
 	 \
 	CFLAGS="$(CFLAGS_FOR_TARGET)"; export CFLAGS; \
 	CXXFLAGS="$(CXXFLAGS_FOR_TARGET)"; export CXXFLAGS; \
 	LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)"; export LIBCFLAGS;  \
-	echo Configuring stage autoprofile in $(TARGET_SUBDIR)/libsanitizer; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer; \
-	cd $(TARGET_SUBDIR)/libsanitizer || exit 1; \
+	echo Configuring stage autoprofile in $(TARGET_SUBDIR)/libstdc++-v3; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3; \
+	cd $(TARGET_SUBDIR)/libstdc++-v3 || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(TARGET_SUBDIR)/libsanitizer/ | \
+	  *) topdir=`echo $(TARGET_SUBDIR)/libstdc++-v3/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libsanitizer; \
+	module_srcdir=libstdc++-v3; \
 	$(SHELL) $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
 	  $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \
 	  --target=${target_alias} \
 	  --with-build-libsubdir=$(HOST_SUBDIR) \
 	  $(STAGEautoprofile_CONFIGURE_FLAGS)
-@endif target-libsanitizer-bootstrap
+@endif target-libstdc++-v3-bootstrap
 
-.PHONY: configure-stageautofeedback-target-libsanitizer maybe-configure-stageautofeedback-target-libsanitizer
-maybe-configure-stageautofeedback-target-libsanitizer:
-@if target-libsanitizer-bootstrap
-maybe-configure-stageautofeedback-target-libsanitizer: configure-stageautofeedback-target-libsanitizer
-configure-stageautofeedback-target-libsanitizer:
+.PHONY: configure-stageautofeedback-target-libstdc++-v3 maybe-configure-stageautofeedback-target-libstdc++-v3
+maybe-configure-stageautofeedback-target-libstdc++-v3:
+@if target-libstdc++-v3-bootstrap
+maybe-configure-stageautofeedback-target-libstdc++-v3: configure-stageautofeedback-target-libstdc++-v3
+configure-stageautofeedback-target-libstdc++-v3:
 	@[ $(current_stage) = stageautofeedback ] || $(MAKE) stageautofeedback-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer
+	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEautofeedback_TFLAGS)"; \
-	echo "Checking multilib configuration for libsanitizer..."; \
-	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libsanitizer/multilib.tmp 2> /dev/null; \
-	if test -r $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
-	  if cmp -s $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
-	    rm -f $(TARGET_SUBDIR)/libsanitizer/multilib.tmp; \
+	echo "Checking multilib configuration for libstdc++-v3..."; \
+	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp 2> /dev/null; \
+	if test -r $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
+	  if cmp -s $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; then \
+	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp; \
 	  else \
-	    rm -f $(TARGET_SUBDIR)/libsanitizer/Makefile; \
-	    mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
+	    rm -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile; \
+	    mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
 	  fi; \
 	else \
-	  mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
+	  mv $(TARGET_SUBDIR)/libstdc++-v3/multilib.tmp $(TARGET_SUBDIR)/libstdc++-v3/multilib.out; \
 	fi; \
-	test ! -f $(TARGET_SUBDIR)/libsanitizer/Makefile || exit 0; \
+	test ! -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile || exit 0; \
 	$(RAW_CXX_TARGET_EXPORTS) \
 	 \
 	CFLAGS="$(CFLAGS_FOR_TARGET)"; export CFLAGS; \
 	CXXFLAGS="$(CXXFLAGS_FOR_TARGET)"; export CXXFLAGS; \
 	LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)"; export LIBCFLAGS;  \
-	echo Configuring stage autofeedback in $(TARGET_SUBDIR)/libsanitizer; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer; \
-	cd $(TARGET_SUBDIR)/libsanitizer || exit 1; \
+	echo Configuring stage autofeedback in $(TARGET_SUBDIR)/libstdc++-v3; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libstdc++-v3; \
+	cd $(TARGET_SUBDIR)/libstdc++-v3 || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(TARGET_SUBDIR)/libsanitizer/ | \
+	  *) topdir=`echo $(TARGET_SUBDIR)/libstdc++-v3/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libsanitizer; \
+	module_srcdir=libstdc++-v3; \
 	$(SHELL) $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
 	  $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \
 	  --target=${target_alias} \
 	  --with-build-libsubdir=$(HOST_SUBDIR) \
 	  $(STAGEautofeedback_CONFIGURE_FLAGS)
-@endif target-libsanitizer-bootstrap
+@endif target-libstdc++-v3-bootstrap
 
 
 
 
 
-.PHONY: all-target-libsanitizer maybe-all-target-libsanitizer
-maybe-all-target-libsanitizer:
+.PHONY: all-target-libstdc++-v3 maybe-all-target-libstdc++-v3
+maybe-all-target-libstdc++-v3:
 @if gcc-bootstrap
-all-target-libsanitizer: stage_current
+all-target-libstdc++-v3: stage_current
 @endif gcc-bootstrap
-@if target-libsanitizer
-TARGET-target-libsanitizer=all
-maybe-all-target-libsanitizer: all-target-libsanitizer
-all-target-libsanitizer: configure-target-libsanitizer
+@if target-libstdc++-v3
+TARGET-target-libstdc++-v3=all
+maybe-all-target-libstdc++-v3: all-target-libstdc++-v3
+all-target-libstdc++-v3: configure-target-libstdc++-v3
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(RAW_CXX_TARGET_EXPORTS)  \
-	(cd $(TARGET_SUBDIR)/libsanitizer && \
+	(cd $(TARGET_SUBDIR)/libstdc++-v3 && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   \
-		$(TARGET-target-libsanitizer))
-@endif target-libsanitizer
+		$(TARGET-target-libstdc++-v3))
+@endif target-libstdc++-v3
 
 
 
-.PHONY: all-stage1-target-libsanitizer maybe-all-stage1-target-libsanitizer
-.PHONY: clean-stage1-target-libsanitizer maybe-clean-stage1-target-libsanitizer
-maybe-all-stage1-target-libsanitizer:
-maybe-clean-stage1-target-libsanitizer:
-@if target-libsanitizer-bootstrap
-maybe-all-stage1-target-libsanitizer: all-stage1-target-libsanitizer
-all-stage1: all-stage1-target-libsanitizer
-TARGET-stage1-target-libsanitizer = $(TARGET-target-libsanitizer)
-all-stage1-target-libsanitizer: configure-stage1-target-libsanitizer
+.PHONY: all-stage1-target-libstdc++-v3 maybe-all-stage1-target-libstdc++-v3
+.PHONY: clean-stage1-target-libstdc++-v3 maybe-clean-stage1-target-libstdc++-v3
+maybe-all-stage1-target-libstdc++-v3:
+maybe-clean-stage1-target-libstdc++-v3:
+@if target-libstdc++-v3-bootstrap
+maybe-all-stage1-target-libstdc++-v3: all-stage1-target-libstdc++-v3
+all-stage1: all-stage1-target-libstdc++-v3
+TARGET-stage1-target-libstdc++-v3 = $(TARGET-target-libstdc++-v3)
+all-stage1-target-libstdc++-v3: configure-stage1-target-libstdc++-v3
 	@[ $(current_stage) = stage1 ] || $(MAKE) stage1-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGE1_TFLAGS)"; \
 	$(RAW_CXX_TARGET_EXPORTS)  \
-	cd $(TARGET_SUBDIR)/libsanitizer && \
+	cd $(TARGET_SUBDIR)/libstdc++-v3 && \
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(CFLAGS_FOR_TARGET)" \
@@ -39610,39 +42619,39 @@ all-stage1-target-libsanitizer: configure-stage1-target-libsanitizer
 		$(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'  \
 		  \
 		TFLAGS="$(STAGE1_TFLAGS)"  \
-		$(TARGET-stage1-target-libsanitizer)
+		$(TARGET-stage1-target-libstdc++-v3)
 
-maybe-clean-stage1-target-libsanitizer: clean-stage1-target-libsanitizer
-clean-stage1: clean-stage1-target-libsanitizer
-clean-stage1-target-libsanitizer:
+maybe-clean-stage1-target-libstdc++-v3: clean-stage1-target-libstdc++-v3
+clean-stage1: clean-stage1-target-libstdc++-v3
+clean-stage1-target-libstdc++-v3:
 	@if [ $(current_stage) = stage1 ]; then \
-	  [ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
+	  [ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
 	else \
-	  [ -f $(TARGET_SUBDIR)/stage1-libsanitizer/Makefile ] || exit 0; \
+	  [ -f $(TARGET_SUBDIR)/stage1-libstdc++-v3/Makefile ] || exit 0; \
 	  $(MAKE) stage1-start; \
 	fi; \
-	cd $(TARGET_SUBDIR)/libsanitizer && \
+	cd $(TARGET_SUBDIR)/libstdc++-v3 && \
 	$(MAKE) $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'  \
 	  clean
-@endif target-libsanitizer-bootstrap
+@endif target-libstdc++-v3-bootstrap
 
 
-.PHONY: all-stage2-target-libsanitizer maybe-all-stage2-target-libsanitizer
-.PHONY: clean-stage2-target-libsanitizer maybe-clean-stage2-target-libsanitizer
-maybe-all-stage2-target-libsanitizer:
-maybe-clean-stage2-target-libsanitizer:
-@if target-libsanitizer-bootstrap
-maybe-all-stage2-target-libsanitizer: all-stage2-target-libsanitizer
-all-stage2: all-stage2-target-libsanitizer
-TARGET-stage2-target-libsanitizer = $(TARGET-target-libsanitizer)
-all-stage2-target-libsanitizer: configure-stage2-target-libsanitizer
+.PHONY: all-stage2-target-libstdc++-v3 maybe-all-stage2-target-libstdc++-v3
+.PHONY: clean-stage2-target-libstdc++-v3 maybe-clean-stage2-target-libstdc++-v3
+maybe-all-stage2-target-libstdc++-v3:
+maybe-clean-stage2-target-libstdc++-v3:
+@if target-libstdc++-v3-bootstrap
+maybe-all-stage2-target-libstdc++-v3: all-stage2-target-libstdc++-v3
+all-stage2: all-stage2-target-libstdc++-v3
+TARGET-stage2-target-libstdc++-v3 = $(TARGET-target-libstdc++-v3)
+all-stage2-target-libstdc++-v3: configure-stage2-target-libstdc++-v3
 	@[ $(current_stage) = stage2 ] || $(MAKE) stage2-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGE2_TFLAGS)"; \
 	$(RAW_CXX_TARGET_EXPORTS) \
 	  \
-	cd $(TARGET_SUBDIR)/libsanitizer && \
+	cd $(TARGET_SUBDIR)/libstdc++-v3 && \
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(CFLAGS_FOR_TARGET)" \
@@ -39653,38 +42662,38 @@ all-stage2-target-libsanitizer: configure-stage2-target-libsanitizer
 		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
 		$(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   \
 		TFLAGS="$(STAGE2_TFLAGS)"  \
-		$(TARGET-stage2-target-libsanitizer)
+		$(TARGET-stage2-target-libstdc++-v3)
 
-maybe-clean-stage2-target-libsanitizer: clean-stage2-target-libsanitizer
-clean-stage2: clean-stage2-target-libsanitizer
-clean-stage2-target-libsanitizer:
+maybe-clean-stage2-target-libstdc++-v3: clean-stage2-target-libstdc++-v3
+clean-stage2: clean-stage2-target-libstdc++-v3
+clean-stage2-target-libstdc++-v3:
 	@if [ $(current_stage) = stage2 ]; then \
-	  [ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
+	  [ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
 	else \
-	  [ -f $(TARGET_SUBDIR)/stage2-libsanitizer/Makefile ] || exit 0; \
+	  [ -f $(TARGET_SUBDIR)/stage2-libstdc++-v3/Makefile ] || exit 0; \
 	  $(MAKE) stage2-start; \
 	fi; \
-	cd $(TARGET_SUBDIR)/libsanitizer && \
+	cd $(TARGET_SUBDIR)/libstdc++-v3 && \
 	$(MAKE) $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   clean
-@endif target-libsanitizer-bootstrap
+@endif target-libstdc++-v3-bootstrap
 
 
-.PHONY: all-stage3-target-libsanitizer maybe-all-stage3-target-libsanitizer
-.PHONY: clean-stage3-target-libsanitizer maybe-clean-stage3-target-libsanitizer
-maybe-all-stage3-target-libsanitizer:
-maybe-clean-stage3-target-libsanitizer:
-@if target-libsanitizer-bootstrap
-maybe-all-stage3-target-libsanitizer: all-stage3-target-libsanitizer
-all-stage3: all-stage3-target-libsanitizer
-TARGET-stage3-target-libsanitizer = $(TARGET-target-libsanitizer)
-all-stage3-target-libsanitizer: configure-stage3-target-libsanitizer
+.PHONY: all-stage3-target-libstdc++-v3 maybe-all-stage3-target-libstdc++-v3
+.PHONY: clean-stage3-target-libstdc++-v3 maybe-clean-stage3-target-libstdc++-v3
+maybe-all-stage3-target-libstdc++-v3:
+maybe-clean-stage3-target-libstdc++-v3:
+@if target-libstdc++-v3-bootstrap
+maybe-all-stage3-target-libstdc++-v3: all-stage3-target-libstdc++-v3
+all-stage3: all-stage3-target-libstdc++-v3
+TARGET-stage3-target-libstdc++-v3 = $(TARGET-target-libstdc++-v3)
+all-stage3-target-libstdc++-v3: configure-stage3-target-libstdc++-v3
 	@[ $(current_stage) = stage3 ] || $(MAKE) stage3-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGE3_TFLAGS)"; \
 	$(RAW_CXX_TARGET_EXPORTS) \
 	  \
-	cd $(TARGET_SUBDIR)/libsanitizer && \
+	cd $(TARGET_SUBDIR)/libstdc++-v3 && \
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(CFLAGS_FOR_TARGET)" \
@@ -39695,38 +42704,38 @@ all-stage3-target-libsanitizer: configure-stage3-target-libsanitizer
 		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
 		$(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   \
 		TFLAGS="$(STAGE3_TFLAGS)"  \
-		$(TARGET-stage3-target-libsanitizer)
+		$(TARGET-stage3-target-libstdc++-v3)
 
-maybe-clean-stage3-target-libsanitizer: clean-stage3-target-libsanitizer
-clean-stage3: clean-stage3-target-libsanitizer
-clean-stage3-target-libsanitizer:
+maybe-clean-stage3-target-libstdc++-v3: clean-stage3-target-libstdc++-v3
+clean-stage3: clean-stage3-target-libstdc++-v3
+clean-stage3-target-libstdc++-v3:
 	@if [ $(current_stage) = stage3 ]; then \
-	  [ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
+	  [ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
 	else \
-	  [ -f $(TARGET_SUBDIR)/stage3-libsanitizer/Makefile ] || exit 0; \
+	  [ -f $(TARGET_SUBDIR)/stage3-libstdc++-v3/Makefile ] || exit 0; \
 	  $(MAKE) stage3-start; \
 	fi; \
-	cd $(TARGET_SUBDIR)/libsanitizer && \
+	cd $(TARGET_SUBDIR)/libstdc++-v3 && \
 	$(MAKE) $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   clean
-@endif target-libsanitizer-bootstrap
+@endif target-libstdc++-v3-bootstrap
 
 
-.PHONY: all-stage4-target-libsanitizer maybe-all-stage4-target-libsanitizer
-.PHONY: clean-stage4-target-libsanitizer maybe-clean-stage4-target-libsanitizer
-maybe-all-stage4-target-libsanitizer:
-maybe-clean-stage4-target-libsanitizer:
-@if target-libsanitizer-bootstrap
-maybe-all-stage4-target-libsanitizer: all-stage4-target-libsanitizer
-all-stage4: all-stage4-target-libsanitizer
-TARGET-stage4-target-libsanitizer = $(TARGET-target-libsanitizer)
-all-stage4-target-libsanitizer: configure-stage4-target-libsanitizer
+.PHONY: all-stage4-target-libstdc++-v3 maybe-all-stage4-target-libstdc++-v3
+.PHONY: clean-stage4-target-libstdc++-v3 maybe-clean-stage4-target-libstdc++-v3
+maybe-all-stage4-target-libstdc++-v3:
+maybe-clean-stage4-target-libstdc++-v3:
+@if target-libstdc++-v3-bootstrap
+maybe-all-stage4-target-libstdc++-v3: all-stage4-target-libstdc++-v3
+all-stage4: all-stage4-target-libstdc++-v3
+TARGET-stage4-target-libstdc++-v3 = $(TARGET-target-libstdc++-v3)
+all-stage4-target-libstdc++-v3: configure-stage4-target-libstdc++-v3
 	@[ $(current_stage) = stage4 ] || $(MAKE) stage4-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGE4_TFLAGS)"; \
 	$(RAW_CXX_TARGET_EXPORTS) \
 	  \
-	cd $(TARGET_SUBDIR)/libsanitizer && \
+	cd $(TARGET_SUBDIR)/libstdc++-v3 && \
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(CFLAGS_FOR_TARGET)" \
@@ -39737,38 +42746,38 @@ all-stage4-target-libsanitizer: configure-stage4-target-libsanitizer
 		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
 		$(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   \
 		TFLAGS="$(STAGE4_TFLAGS)"  \
-		$(TARGET-stage4-target-libsanitizer)
+		$(TARGET-stage4-target-libstdc++-v3)
 
-maybe-clean-stage4-target-libsanitizer: clean-stage4-target-libsanitizer
-clean-stage4: clean-stage4-target-libsanitizer
-clean-stage4-target-libsanitizer:
+maybe-clean-stage4-target-libstdc++-v3: clean-stage4-target-libstdc++-v3
+clean-stage4: clean-stage4-target-libstdc++-v3
+clean-stage4-target-libstdc++-v3:
 	@if [ $(current_stage) = stage4 ]; then \
-	  [ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
+	  [ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
 	else \
-	  [ -f $(TARGET_SUBDIR)/stage4-libsanitizer/Makefile ] || exit 0; \
+	  [ -f $(TARGET_SUBDIR)/stage4-libstdc++-v3/Makefile ] || exit 0; \
 	  $(MAKE) stage4-start; \
 	fi; \
-	cd $(TARGET_SUBDIR)/libsanitizer && \
+	cd $(TARGET_SUBDIR)/libstdc++-v3 && \
 	$(MAKE) $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   clean
-@endif target-libsanitizer-bootstrap
+@endif target-libstdc++-v3-bootstrap
 
 
-.PHONY: all-stageprofile-target-libsanitizer maybe-all-stageprofile-target-libsanitizer
-.PHONY: clean-stageprofile-target-libsanitizer maybe-clean-stageprofile-target-libsanitizer
-maybe-all-stageprofile-target-libsanitizer:
-maybe-clean-stageprofile-target-libsanitizer:
-@if target-libsanitizer-bootstrap
-maybe-all-stageprofile-target-libsanitizer: all-stageprofile-target-libsanitizer
-all-stageprofile: all-stageprofile-target-libsanitizer
-TARGET-stageprofile-target-libsanitizer = $(TARGET-target-libsanitizer)
-all-stageprofile-target-libsanitizer: configure-stageprofile-target-libsanitizer
+.PHONY: all-stageprofile-target-libstdc++-v3 maybe-all-stageprofile-target-libstdc++-v3
+.PHONY: clean-stageprofile-target-libstdc++-v3 maybe-clean-stageprofile-target-libstdc++-v3
+maybe-all-stageprofile-target-libstdc++-v3:
+maybe-clean-stageprofile-target-libstdc++-v3:
+@if target-libstdc++-v3-bootstrap
+maybe-all-stageprofile-target-libstdc++-v3: all-stageprofile-target-libstdc++-v3
+all-stageprofile: all-stageprofile-target-libstdc++-v3
+TARGET-stageprofile-target-libstdc++-v3 = $(TARGET-target-libstdc++-v3)
+all-stageprofile-target-libstdc++-v3: configure-stageprofile-target-libstdc++-v3
 	@[ $(current_stage) = stageprofile ] || $(MAKE) stageprofile-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEprofile_TFLAGS)"; \
 	$(RAW_CXX_TARGET_EXPORTS) \
 	  \
-	cd $(TARGET_SUBDIR)/libsanitizer && \
+	cd $(TARGET_SUBDIR)/libstdc++-v3 && \
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(CFLAGS_FOR_TARGET)" \
@@ -39779,38 +42788,38 @@ all-stageprofile-target-libsanitizer: configure-stageprofile-target-libsanitizer
 		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
 		$(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   \
 		TFLAGS="$(STAGEprofile_TFLAGS)"  \
-		$(TARGET-stageprofile-target-libsanitizer)
+		$(TARGET-stageprofile-target-libstdc++-v3)
 
-maybe-clean-stageprofile-target-libsanitizer: clean-stageprofile-target-libsanitizer
-clean-stageprofile: clean-stageprofile-target-libsanitizer
-clean-stageprofile-target-libsanitizer:
+maybe-clean-stageprofile-target-libstdc++-v3: clean-stageprofile-target-libstdc++-v3
+clean-stageprofile: clean-stageprofile-target-libstdc++-v3
+clean-stageprofile-target-libstdc++-v3:
 	@if [ $(current_stage) = stageprofile ]; then \
-	  [ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
+	  [ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
 	else \
-	  [ -f $(TARGET_SUBDIR)/stageprofile-libsanitizer/Makefile ] || exit 0; \
+	  [ -f $(TARGET_SUBDIR)/stageprofile-libstdc++-v3/Makefile ] || exit 0; \
 	  $(MAKE) stageprofile-start; \
 	fi; \
-	cd $(TARGET_SUBDIR)/libsanitizer && \
+	cd $(TARGET_SUBDIR)/libstdc++-v3 && \
 	$(MAKE) $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   clean
-@endif target-libsanitizer-bootstrap
+@endif target-libstdc++-v3-bootstrap
 
 
-.PHONY: all-stagetrain-target-libsanitizer maybe-all-stagetrain-target-libsanitizer
-.PHONY: clean-stagetrain-target-libsanitizer maybe-clean-stagetrain-target-libsanitizer
-maybe-all-stagetrain-target-libsanitizer:
-maybe-clean-stagetrain-target-libsanitizer:
-@if target-libsanitizer-bootstrap
-maybe-all-stagetrain-target-libsanitizer: all-stagetrain-target-libsanitizer
-all-stagetrain: all-stagetrain-target-libsanitizer
-TARGET-stagetrain-target-libsanitizer = $(TARGET-target-libsanitizer)
-all-stagetrain-target-libsanitizer: configure-stagetrain-target-libsanitizer
+.PHONY: all-stagetrain-target-libstdc++-v3 maybe-all-stagetrain-target-libstdc++-v3
+.PHONY: clean-stagetrain-target-libstdc++-v3 maybe-clean-stagetrain-target-libstdc++-v3
+maybe-all-stagetrain-target-libstdc++-v3:
+maybe-clean-stagetrain-target-libstdc++-v3:
+@if target-libstdc++-v3-bootstrap
+maybe-all-stagetrain-target-libstdc++-v3: all-stagetrain-target-libstdc++-v3
+all-stagetrain: all-stagetrain-target-libstdc++-v3
+TARGET-stagetrain-target-libstdc++-v3 = $(TARGET-target-libstdc++-v3)
+all-stagetrain-target-libstdc++-v3: configure-stagetrain-target-libstdc++-v3
 	@[ $(current_stage) = stagetrain ] || $(MAKE) stagetrain-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEtrain_TFLAGS)"; \
 	$(RAW_CXX_TARGET_EXPORTS) \
 	  \
-	cd $(TARGET_SUBDIR)/libsanitizer && \
+	cd $(TARGET_SUBDIR)/libstdc++-v3 && \
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(CFLAGS_FOR_TARGET)" \
@@ -39821,38 +42830,38 @@ all-stagetrain-target-libsanitizer: configure-stagetrain-target-libsanitizer
 		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
 		$(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   \
 		TFLAGS="$(STAGEtrain_TFLAGS)"  \
-		$(TARGET-stagetrain-target-libsanitizer)
+		$(TARGET-stagetrain-target-libstdc++-v3)
 
-maybe-clean-stagetrain-target-libsanitizer: clean-stagetrain-target-libsanitizer
-clean-stagetrain: clean-stagetrain-target-libsanitizer
-clean-stagetrain-target-libsanitizer:
+maybe-clean-stagetrain-target-libstdc++-v3: clean-stagetrain-target-libstdc++-v3
+clean-stagetrain: clean-stagetrain-target-libstdc++-v3
+clean-stagetrain-target-libstdc++-v3:
 	@if [ $(current_stage) = stagetrain ]; then \
-	  [ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
+	  [ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
 	else \
-	  [ -f $(TARGET_SUBDIR)/stagetrain-libsanitizer/Makefile ] || exit 0; \
+	  [ -f $(TARGET_SUBDIR)/stagetrain-libstdc++-v3/Makefile ] || exit 0; \
 	  $(MAKE) stagetrain-start; \
 	fi; \
-	cd $(TARGET_SUBDIR)/libsanitizer && \
+	cd $(TARGET_SUBDIR)/libstdc++-v3 && \
 	$(MAKE) $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   clean
-@endif target-libsanitizer-bootstrap
+@endif target-libstdc++-v3-bootstrap
 
 
-.PHONY: all-stagefeedback-target-libsanitizer maybe-all-stagefeedback-target-libsanitizer
-.PHONY: clean-stagefeedback-target-libsanitizer maybe-clean-stagefeedback-target-libsanitizer
-maybe-all-stagefeedback-target-libsanitizer:
-maybe-clean-stagefeedback-target-libsanitizer:
-@if target-libsanitizer-bootstrap
-maybe-all-stagefeedback-target-libsanitizer: all-stagefeedback-target-libsanitizer
-all-stagefeedback: all-stagefeedback-target-libsanitizer
-TARGET-stagefeedback-target-libsanitizer = $(TARGET-target-libsanitizer)
-all-stagefeedback-target-libsanitizer: configure-stagefeedback-target-libsanitizer
+.PHONY: all-stagefeedback-target-libstdc++-v3 maybe-all-stagefeedback-target-libstdc++-v3
+.PHONY: clean-stagefeedback-target-libstdc++-v3 maybe-clean-stagefeedback-target-libstdc++-v3
+maybe-all-stagefeedback-target-libstdc++-v3:
+maybe-clean-stagefeedback-target-libstdc++-v3:
+@if target-libstdc++-v3-bootstrap
+maybe-all-stagefeedback-target-libstdc++-v3: all-stagefeedback-target-libstdc++-v3
+all-stagefeedback: all-stagefeedback-target-libstdc++-v3
+TARGET-stagefeedback-target-libstdc++-v3 = $(TARGET-target-libstdc++-v3)
+all-stagefeedback-target-libstdc++-v3: configure-stagefeedback-target-libstdc++-v3
 	@[ $(current_stage) = stagefeedback ] || $(MAKE) stagefeedback-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEfeedback_TFLAGS)"; \
 	$(RAW_CXX_TARGET_EXPORTS) \
 	  \
-	cd $(TARGET_SUBDIR)/libsanitizer && \
+	cd $(TARGET_SUBDIR)/libstdc++-v3 && \
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(CFLAGS_FOR_TARGET)" \
@@ -39863,38 +42872,38 @@ all-stagefeedback-target-libsanitizer: configure-stagefeedback-target-libsanitiz
 		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
 		$(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   \
 		TFLAGS="$(STAGEfeedback_TFLAGS)"  \
-		$(TARGET-stagefeedback-target-libsanitizer)
+		$(TARGET-stagefeedback-target-libstdc++-v3)
 
-maybe-clean-stagefeedback-target-libsanitizer: clean-stagefeedback-target-libsanitizer
-clean-stagefeedback: clean-stagefeedback-target-libsanitizer
-clean-stagefeedback-target-libsanitizer:
+maybe-clean-stagefeedback-target-libstdc++-v3: clean-stagefeedback-target-libstdc++-v3
+clean-stagefeedback: clean-stagefeedback-target-libstdc++-v3
+clean-stagefeedback-target-libstdc++-v3:
 	@if [ $(current_stage) = stagefeedback ]; then \
-	  [ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
+	  [ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
 	else \
-	  [ -f $(TARGET_SUBDIR)/stagefeedback-libsanitizer/Makefile ] || exit 0; \
+	  [ -f $(TARGET_SUBDIR)/stagefeedback-libstdc++-v3/Makefile ] || exit 0; \
 	  $(MAKE) stagefeedback-start; \
 	fi; \
-	cd $(TARGET_SUBDIR)/libsanitizer && \
+	cd $(TARGET_SUBDIR)/libstdc++-v3 && \
 	$(MAKE) $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   clean
-@endif target-libsanitizer-bootstrap
+@endif target-libstdc++-v3-bootstrap
 
 
-.PHONY: all-stageautoprofile-target-libsanitizer maybe-all-stageautoprofile-target-libsanitizer
-.PHONY: clean-stageautoprofile-target-libsanitizer maybe-clean-stageautoprofile-target-libsanitizer
-maybe-all-stageautoprofile-target-libsanitizer:
-maybe-clean-stageautoprofile-target-libsanitizer:
-@if target-libsanitizer-bootstrap
-maybe-all-stageautoprofile-target-libsanitizer: all-stageautoprofile-target-libsanitizer
-all-stageautoprofile: all-stageautoprofile-target-libsanitizer
-TARGET-stageautoprofile-target-libsanitizer = $(TARGET-target-libsanitizer)
-all-stageautoprofile-target-libsanitizer: configure-stageautoprofile-target-libsanitizer
+.PHONY: all-stageautoprofile-target-libstdc++-v3 maybe-all-stageautoprofile-target-libstdc++-v3
+.PHONY: clean-stageautoprofile-target-libstdc++-v3 maybe-clean-stageautoprofile-target-libstdc++-v3
+maybe-all-stageautoprofile-target-libstdc++-v3:
+maybe-clean-stageautoprofile-target-libstdc++-v3:
+@if target-libstdc++-v3-bootstrap
+maybe-all-stageautoprofile-target-libstdc++-v3: all-stageautoprofile-target-libstdc++-v3
+all-stageautoprofile: all-stageautoprofile-target-libstdc++-v3
+TARGET-stageautoprofile-target-libstdc++-v3 = $(TARGET-target-libstdc++-v3)
+all-stageautoprofile-target-libstdc++-v3: configure-stageautoprofile-target-libstdc++-v3
 	@[ $(current_stage) = stageautoprofile ] || $(MAKE) stageautoprofile-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEautoprofile_TFLAGS)"; \
 	$(RAW_CXX_TARGET_EXPORTS) \
 	  \
-	cd $(TARGET_SUBDIR)/libsanitizer && \
+	cd $(TARGET_SUBDIR)/libstdc++-v3 && \
 	$$s/gcc/config/i386/$(AUTO_PROFILE) \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(CFLAGS_FOR_TARGET)" \
@@ -39905,38 +42914,38 @@ all-stageautoprofile-target-libsanitizer: configure-stageautoprofile-target-libs
 		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
 		$(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   \
 		TFLAGS="$(STAGEautoprofile_TFLAGS)"  \
-		$(TARGET-stageautoprofile-target-libsanitizer)
+		$(TARGET-stageautoprofile-target-libstdc++-v3)
 
-maybe-clean-stageautoprofile-target-libsanitizer: clean-stageautoprofile-target-libsanitizer
-clean-stageautoprofile: clean-stageautoprofile-target-libsanitizer
-clean-stageautoprofile-target-libsanitizer:
+maybe-clean-stageautoprofile-target-libstdc++-v3: clean-stageautoprofile-target-libstdc++-v3
+clean-stageautoprofile: clean-stageautoprofile-target-libstdc++-v3
+clean-stageautoprofile-target-libstdc++-v3:
 	@if [ $(current_stage) = stageautoprofile ]; then \
-	  [ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
+	  [ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
 	else \
-	  [ -f $(TARGET_SUBDIR)/stageautoprofile-libsanitizer/Makefile ] || exit 0; \
+	  [ -f $(TARGET_SUBDIR)/stageautoprofile-libstdc++-v3/Makefile ] || exit 0; \
 	  $(MAKE) stageautoprofile-start; \
 	fi; \
-	cd $(TARGET_SUBDIR)/libsanitizer && \
+	cd $(TARGET_SUBDIR)/libstdc++-v3 && \
 	$(MAKE) $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   clean
-@endif target-libsanitizer-bootstrap
+@endif target-libstdc++-v3-bootstrap
 
 
-.PHONY: all-stageautofeedback-target-libsanitizer maybe-all-stageautofeedback-target-libsanitizer
-.PHONY: clean-stageautofeedback-target-libsanitizer maybe-clean-stageautofeedback-target-libsanitizer
-maybe-all-stageautofeedback-target-libsanitizer:
-maybe-clean-stageautofeedback-target-libsanitizer:
-@if target-libsanitizer-bootstrap
-maybe-all-stageautofeedback-target-libsanitizer: all-stageautofeedback-target-libsanitizer
-all-stageautofeedback: all-stageautofeedback-target-libsanitizer
-TARGET-stageautofeedback-target-libsanitizer = $(TARGET-target-libsanitizer)
-all-stageautofeedback-target-libsanitizer: configure-stageautofeedback-target-libsanitizer
+.PHONY: all-stageautofeedback-target-libstdc++-v3 maybe-all-stageautofeedback-target-libstdc++-v3
+.PHONY: clean-stageautofeedback-target-libstdc++-v3 maybe-clean-stageautofeedback-target-libstdc++-v3
+maybe-all-stageautofeedback-target-libstdc++-v3:
+maybe-clean-stageautofeedback-target-libstdc++-v3:
+@if target-libstdc++-v3-bootstrap
+maybe-all-stageautofeedback-target-libstdc++-v3: all-stageautofeedback-target-libstdc++-v3
+all-stageautofeedback: all-stageautofeedback-target-libstdc++-v3
+TARGET-stageautofeedback-target-libstdc++-v3 = $(TARGET-target-libstdc++-v3)
+all-stageautofeedback-target-libstdc++-v3: configure-stageautofeedback-target-libstdc++-v3
 	@[ $(current_stage) = stageautofeedback ] || $(MAKE) stageautofeedback-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEautofeedback_TFLAGS)"; \
 	$(RAW_CXX_TARGET_EXPORTS) \
 	  \
-	cd $(TARGET_SUBDIR)/libsanitizer && \
+	cd $(TARGET_SUBDIR)/libstdc++-v3 && \
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(CFLAGS_FOR_TARGET)" \
@@ -39947,90 +42956,90 @@ all-stageautofeedback-target-libsanitizer: configure-stageautofeedback-target-li
 		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
 		$(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   \
 		TFLAGS="$(STAGEautofeedback_TFLAGS)" PERF_DATA=perf.data \
-		$(TARGET-stageautofeedback-target-libsanitizer)
+		$(TARGET-stageautofeedback-target-libstdc++-v3)
 
-maybe-clean-stageautofeedback-target-libsanitizer: clean-stageautofeedback-target-libsanitizer
-clean-stageautofeedback: clean-stageautofeedback-target-libsanitizer
-clean-stageautofeedback-target-libsanitizer:
+maybe-clean-stageautofeedback-target-libstdc++-v3: clean-stageautofeedback-target-libstdc++-v3
+clean-stageautofeedback: clean-stageautofeedback-target-libstdc++-v3
+clean-stageautofeedback-target-libstdc++-v3:
 	@if [ $(current_stage) = stageautofeedback ]; then \
-	  [ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
+	  [ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
 	else \
-	  [ -f $(TARGET_SUBDIR)/stageautofeedback-libsanitizer/Makefile ] || exit 0; \
+	  [ -f $(TARGET_SUBDIR)/stageautofeedback-libstdc++-v3/Makefile ] || exit 0; \
 	  $(MAKE) stageautofeedback-start; \
 	fi; \
-	cd $(TARGET_SUBDIR)/libsanitizer && \
+	cd $(TARGET_SUBDIR)/libstdc++-v3 && \
 	$(MAKE) $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   clean
-@endif target-libsanitizer-bootstrap
+@endif target-libstdc++-v3-bootstrap
 
 
 
 
 
 
-.PHONY: check-target-libsanitizer maybe-check-target-libsanitizer
-maybe-check-target-libsanitizer:
-@if target-libsanitizer
-maybe-check-target-libsanitizer: check-target-libsanitizer
+.PHONY: check-target-libstdc++-v3 maybe-check-target-libstdc++-v3
+maybe-check-target-libstdc++-v3:
+@if target-libstdc++-v3
+maybe-check-target-libstdc++-v3: check-target-libstdc++-v3
 
-check-target-libsanitizer:
+check-target-libstdc++-v3:
 	@: $(MAKE); $(unstage)
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(RAW_CXX_TARGET_EXPORTS) \
-	(cd $(TARGET_SUBDIR)/libsanitizer && \
+	(cd $(TARGET_SUBDIR)/libstdc++-v3 && \
 	  $(MAKE) $(TARGET_FLAGS_TO_PASS)  'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   check)
 
-@endif target-libsanitizer
+@endif target-libstdc++-v3
 
-.PHONY: install-target-libsanitizer maybe-install-target-libsanitizer
-maybe-install-target-libsanitizer:
-@if target-libsanitizer
-maybe-install-target-libsanitizer: install-target-libsanitizer
+.PHONY: install-target-libstdc++-v3 maybe-install-target-libstdc++-v3
+maybe-install-target-libstdc++-v3:
+@if target-libstdc++-v3
+maybe-install-target-libstdc++-v3: install-target-libstdc++-v3
 
-install-target-libsanitizer: installdirs
+install-target-libstdc++-v3: installdirs
 	@: $(MAKE); $(unstage)
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(RAW_CXX_TARGET_EXPORTS) \
-	(cd $(TARGET_SUBDIR)/libsanitizer && \
+	(cd $(TARGET_SUBDIR)/libstdc++-v3 && \
 	  $(MAKE) $(TARGET_FLAGS_TO_PASS)  install)
 
-@endif target-libsanitizer
+@endif target-libstdc++-v3
 
-.PHONY: install-strip-target-libsanitizer maybe-install-strip-target-libsanitizer
-maybe-install-strip-target-libsanitizer:
-@if target-libsanitizer
-maybe-install-strip-target-libsanitizer: install-strip-target-libsanitizer
+.PHONY: install-strip-target-libstdc++-v3 maybe-install-strip-target-libstdc++-v3
+maybe-install-strip-target-libstdc++-v3:
+@if target-libstdc++-v3
+maybe-install-strip-target-libstdc++-v3: install-strip-target-libstdc++-v3
 
-install-strip-target-libsanitizer: installdirs
+install-strip-target-libstdc++-v3: installdirs
 	@: $(MAKE); $(unstage)
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(RAW_CXX_TARGET_EXPORTS) \
-	(cd $(TARGET_SUBDIR)/libsanitizer && \
+	(cd $(TARGET_SUBDIR)/libstdc++-v3 && \
 	  $(MAKE) $(TARGET_FLAGS_TO_PASS)  install-strip)
 
-@endif target-libsanitizer
+@endif target-libstdc++-v3
 
 # Other targets (info, dvi, pdf, etc.)
 
-.PHONY: maybe-info-target-libsanitizer info-target-libsanitizer
-maybe-info-target-libsanitizer:
-@if target-libsanitizer
-maybe-info-target-libsanitizer: info-target-libsanitizer
+.PHONY: maybe-info-target-libstdc++-v3 info-target-libstdc++-v3
+maybe-info-target-libstdc++-v3:
+@if target-libstdc++-v3
+maybe-info-target-libstdc++-v3: info-target-libstdc++-v3
 
-info-target-libsanitizer: \
-    configure-target-libsanitizer 
+info-target-libstdc++-v3: \
+    configure-target-libstdc++-v3 
 	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
+	@[ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(RAW_CXX_TARGET_EXPORTS) \
-	echo "Doing info in $(TARGET_SUBDIR)/libsanitizer"; \
+	echo "Doing info in $(TARGET_SUBDIR)/libstdc++-v3"; \
 	for flag in $(EXTRA_TARGET_FLAGS); do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libsanitizer && \
+	(cd $(TARGET_SUBDIR)/libstdc++-v3 && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -40038,25 +43047,25 @@ info-target-libsanitizer: \
 	           info) \
 	  || exit 1
 
-@endif target-libsanitizer
+@endif target-libstdc++-v3
 
-.PHONY: maybe-dvi-target-libsanitizer dvi-target-libsanitizer
-maybe-dvi-target-libsanitizer:
-@if target-libsanitizer
-maybe-dvi-target-libsanitizer: dvi-target-libsanitizer
+.PHONY: maybe-dvi-target-libstdc++-v3 dvi-target-libstdc++-v3
+maybe-dvi-target-libstdc++-v3:
+@if target-libstdc++-v3
+maybe-dvi-target-libstdc++-v3: dvi-target-libstdc++-v3
 
-dvi-target-libsanitizer: \
-    configure-target-libsanitizer 
+dvi-target-libstdc++-v3: \
+    configure-target-libstdc++-v3 
 	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
+	@[ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(RAW_CXX_TARGET_EXPORTS) \
-	echo "Doing dvi in $(TARGET_SUBDIR)/libsanitizer"; \
+	echo "Doing dvi in $(TARGET_SUBDIR)/libstdc++-v3"; \
 	for flag in $(EXTRA_TARGET_FLAGS); do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libsanitizer && \
+	(cd $(TARGET_SUBDIR)/libstdc++-v3 && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -40064,25 +43073,25 @@ dvi-target-libsanitizer: \
 	           dvi) \
 	  || exit 1
 
-@endif target-libsanitizer
+@endif target-libstdc++-v3
 
-.PHONY: maybe-pdf-target-libsanitizer pdf-target-libsanitizer
-maybe-pdf-target-libsanitizer:
-@if target-libsanitizer
-maybe-pdf-target-libsanitizer: pdf-target-libsanitizer
+.PHONY: maybe-pdf-target-libstdc++-v3 pdf-target-libstdc++-v3
+maybe-pdf-target-libstdc++-v3:
+@if target-libstdc++-v3
+maybe-pdf-target-libstdc++-v3: pdf-target-libstdc++-v3
 
-pdf-target-libsanitizer: \
-    configure-target-libsanitizer 
+pdf-target-libstdc++-v3: \
+    configure-target-libstdc++-v3 
 	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
+	@[ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(RAW_CXX_TARGET_EXPORTS) \
-	echo "Doing pdf in $(TARGET_SUBDIR)/libsanitizer"; \
+	echo "Doing pdf in $(TARGET_SUBDIR)/libstdc++-v3"; \
 	for flag in $(EXTRA_TARGET_FLAGS); do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libsanitizer && \
+	(cd $(TARGET_SUBDIR)/libstdc++-v3 && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -40090,25 +43099,25 @@ pdf-target-libsanitizer: \
 	           pdf) \
 	  || exit 1
 
-@endif target-libsanitizer
+@endif target-libstdc++-v3
 
-.PHONY: maybe-html-target-libsanitizer html-target-libsanitizer
-maybe-html-target-libsanitizer:
-@if target-libsanitizer
-maybe-html-target-libsanitizer: html-target-libsanitizer
+.PHONY: maybe-html-target-libstdc++-v3 html-target-libstdc++-v3
+maybe-html-target-libstdc++-v3:
+@if target-libstdc++-v3
+maybe-html-target-libstdc++-v3: html-target-libstdc++-v3
 
-html-target-libsanitizer: \
-    configure-target-libsanitizer 
+html-target-libstdc++-v3: \
+    configure-target-libstdc++-v3 
 	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
+	@[ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(RAW_CXX_TARGET_EXPORTS) \
-	echo "Doing html in $(TARGET_SUBDIR)/libsanitizer"; \
+	echo "Doing html in $(TARGET_SUBDIR)/libstdc++-v3"; \
 	for flag in $(EXTRA_TARGET_FLAGS); do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libsanitizer && \
+	(cd $(TARGET_SUBDIR)/libstdc++-v3 && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -40116,25 +43125,25 @@ html-target-libsanitizer: \
 	           html) \
 	  || exit 1
 
-@endif target-libsanitizer
+@endif target-libstdc++-v3
 
-.PHONY: maybe-TAGS-target-libsanitizer TAGS-target-libsanitizer
-maybe-TAGS-target-libsanitizer:
-@if target-libsanitizer
-maybe-TAGS-target-libsanitizer: TAGS-target-libsanitizer
+.PHONY: maybe-TAGS-target-libstdc++-v3 TAGS-target-libstdc++-v3
+maybe-TAGS-target-libstdc++-v3:
+@if target-libstdc++-v3
+maybe-TAGS-target-libstdc++-v3: TAGS-target-libstdc++-v3
 
-TAGS-target-libsanitizer: \
-    configure-target-libsanitizer 
+TAGS-target-libstdc++-v3: \
+    configure-target-libstdc++-v3 
 	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
+	@[ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(RAW_CXX_TARGET_EXPORTS) \
-	echo "Doing TAGS in $(TARGET_SUBDIR)/libsanitizer"; \
+	echo "Doing TAGS in $(TARGET_SUBDIR)/libstdc++-v3"; \
 	for flag in $(EXTRA_TARGET_FLAGS); do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libsanitizer && \
+	(cd $(TARGET_SUBDIR)/libstdc++-v3 && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -40142,26 +43151,26 @@ TAGS-target-libsanitizer: \
 	           TAGS) \
 	  || exit 1
 
-@endif target-libsanitizer
+@endif target-libstdc++-v3
 
-.PHONY: maybe-install-info-target-libsanitizer install-info-target-libsanitizer
-maybe-install-info-target-libsanitizer:
-@if target-libsanitizer
-maybe-install-info-target-libsanitizer: install-info-target-libsanitizer
+.PHONY: maybe-install-info-target-libstdc++-v3 install-info-target-libstdc++-v3
+maybe-install-info-target-libstdc++-v3:
+@if target-libstdc++-v3
+maybe-install-info-target-libstdc++-v3: install-info-target-libstdc++-v3
 
-install-info-target-libsanitizer: \
-    configure-target-libsanitizer \
-    info-target-libsanitizer 
+install-info-target-libstdc++-v3: \
+    configure-target-libstdc++-v3 \
+    info-target-libstdc++-v3 
 	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
+	@[ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(RAW_CXX_TARGET_EXPORTS) \
-	echo "Doing install-info in $(TARGET_SUBDIR)/libsanitizer"; \
+	echo "Doing install-info in $(TARGET_SUBDIR)/libstdc++-v3"; \
 	for flag in $(EXTRA_TARGET_FLAGS); do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libsanitizer && \
+	(cd $(TARGET_SUBDIR)/libstdc++-v3 && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -40169,26 +43178,26 @@ install-info-target-libsanitizer: \
 	           install-info) \
 	  || exit 1
 
-@endif target-libsanitizer
+@endif target-libstdc++-v3
 
-.PHONY: maybe-install-pdf-target-libsanitizer install-pdf-target-libsanitizer
-maybe-install-pdf-target-libsanitizer:
-@if target-libsanitizer
-maybe-install-pdf-target-libsanitizer: install-pdf-target-libsanitizer
+.PHONY: maybe-install-pdf-target-libstdc++-v3 install-pdf-target-libstdc++-v3
+maybe-install-pdf-target-libstdc++-v3:
+@if target-libstdc++-v3
+maybe-install-pdf-target-libstdc++-v3: install-pdf-target-libstdc++-v3
 
-install-pdf-target-libsanitizer: \
-    configure-target-libsanitizer \
-    pdf-target-libsanitizer 
+install-pdf-target-libstdc++-v3: \
+    configure-target-libstdc++-v3 \
+    pdf-target-libstdc++-v3 
 	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
+	@[ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(RAW_CXX_TARGET_EXPORTS) \
-	echo "Doing install-pdf in $(TARGET_SUBDIR)/libsanitizer"; \
+	echo "Doing install-pdf in $(TARGET_SUBDIR)/libstdc++-v3"; \
 	for flag in $(EXTRA_TARGET_FLAGS); do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libsanitizer && \
+	(cd $(TARGET_SUBDIR)/libstdc++-v3 && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -40196,26 +43205,26 @@ install-pdf-target-libsanitizer: \
 	           install-pdf) \
 	  || exit 1
 
-@endif target-libsanitizer
+@endif target-libstdc++-v3
 
-.PHONY: maybe-install-html-target-libsanitizer install-html-target-libsanitizer
-maybe-install-html-target-libsanitizer:
-@if target-libsanitizer
-maybe-install-html-target-libsanitizer: install-html-target-libsanitizer
+.PHONY: maybe-install-html-target-libstdc++-v3 install-html-target-libstdc++-v3
+maybe-install-html-target-libstdc++-v3:
+@if target-libstdc++-v3
+maybe-install-html-target-libstdc++-v3: install-html-target-libstdc++-v3
 
-install-html-target-libsanitizer: \
-    configure-target-libsanitizer \
-    html-target-libsanitizer 
+install-html-target-libstdc++-v3: \
+    configure-target-libstdc++-v3 \
+    html-target-libstdc++-v3 
 	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
+	@[ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(RAW_CXX_TARGET_EXPORTS) \
-	echo "Doing install-html in $(TARGET_SUBDIR)/libsanitizer"; \
+	echo "Doing install-html in $(TARGET_SUBDIR)/libstdc++-v3"; \
 	for flag in $(EXTRA_TARGET_FLAGS); do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libsanitizer && \
+	(cd $(TARGET_SUBDIR)/libstdc++-v3 && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -40223,25 +43232,25 @@ install-html-target-libsanitizer: \
 	           install-html) \
 	  || exit 1
 
-@endif target-libsanitizer
+@endif target-libstdc++-v3
 
-.PHONY: maybe-installcheck-target-libsanitizer installcheck-target-libsanitizer
-maybe-installcheck-target-libsanitizer:
-@if target-libsanitizer
-maybe-installcheck-target-libsanitizer: installcheck-target-libsanitizer
+.PHONY: maybe-installcheck-target-libstdc++-v3 installcheck-target-libstdc++-v3
+maybe-installcheck-target-libstdc++-v3:
+@if target-libstdc++-v3
+maybe-installcheck-target-libstdc++-v3: installcheck-target-libstdc++-v3
 
-installcheck-target-libsanitizer: \
-    configure-target-libsanitizer 
+installcheck-target-libstdc++-v3: \
+    configure-target-libstdc++-v3 
 	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
+	@[ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(RAW_CXX_TARGET_EXPORTS) \
-	echo "Doing installcheck in $(TARGET_SUBDIR)/libsanitizer"; \
+	echo "Doing installcheck in $(TARGET_SUBDIR)/libstdc++-v3"; \
 	for flag in $(EXTRA_TARGET_FLAGS); do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libsanitizer && \
+	(cd $(TARGET_SUBDIR)/libstdc++-v3 && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -40249,24 +43258,24 @@ installcheck-target-libsanitizer: \
 	           installcheck) \
 	  || exit 1
 
-@endif target-libsanitizer
+@endif target-libstdc++-v3
 
-.PHONY: maybe-mostlyclean-target-libsanitizer mostlyclean-target-libsanitizer
-maybe-mostlyclean-target-libsanitizer:
-@if target-libsanitizer
-maybe-mostlyclean-target-libsanitizer: mostlyclean-target-libsanitizer
+.PHONY: maybe-mostlyclean-target-libstdc++-v3 mostlyclean-target-libstdc++-v3
+maybe-mostlyclean-target-libstdc++-v3:
+@if target-libstdc++-v3
+maybe-mostlyclean-target-libstdc++-v3: mostlyclean-target-libstdc++-v3
 
-mostlyclean-target-libsanitizer: 
+mostlyclean-target-libstdc++-v3: 
 	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
+	@[ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(RAW_CXX_TARGET_EXPORTS) \
-	echo "Doing mostlyclean in $(TARGET_SUBDIR)/libsanitizer"; \
+	echo "Doing mostlyclean in $(TARGET_SUBDIR)/libstdc++-v3"; \
 	for flag in $(EXTRA_TARGET_FLAGS); do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libsanitizer && \
+	(cd $(TARGET_SUBDIR)/libstdc++-v3 && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -40274,24 +43283,24 @@ mostlyclean-target-libsanitizer:
 	           mostlyclean) \
 	  || exit 1
 
-@endif target-libsanitizer
+@endif target-libstdc++-v3
 
-.PHONY: maybe-clean-target-libsanitizer clean-target-libsanitizer
-maybe-clean-target-libsanitizer:
-@if target-libsanitizer
-maybe-clean-target-libsanitizer: clean-target-libsanitizer
+.PHONY: maybe-clean-target-libstdc++-v3 clean-target-libstdc++-v3
+maybe-clean-target-libstdc++-v3:
+@if target-libstdc++-v3
+maybe-clean-target-libstdc++-v3: clean-target-libstdc++-v3
 
-clean-target-libsanitizer: 
+clean-target-libstdc++-v3: 
 	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
+	@[ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(RAW_CXX_TARGET_EXPORTS) \
-	echo "Doing clean in $(TARGET_SUBDIR)/libsanitizer"; \
+	echo "Doing clean in $(TARGET_SUBDIR)/libstdc++-v3"; \
 	for flag in $(EXTRA_TARGET_FLAGS); do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libsanitizer && \
+	(cd $(TARGET_SUBDIR)/libstdc++-v3 && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -40299,24 +43308,24 @@ clean-target-libsanitizer:
 	           clean) \
 	  || exit 1
 
-@endif target-libsanitizer
+@endif target-libstdc++-v3
 
-.PHONY: maybe-distclean-target-libsanitizer distclean-target-libsanitizer
-maybe-distclean-target-libsanitizer:
-@if target-libsanitizer
-maybe-distclean-target-libsanitizer: distclean-target-libsanitizer
+.PHONY: maybe-distclean-target-libstdc++-v3 distclean-target-libstdc++-v3
+maybe-distclean-target-libstdc++-v3:
+@if target-libstdc++-v3
+maybe-distclean-target-libstdc++-v3: distclean-target-libstdc++-v3
 
-distclean-target-libsanitizer: 
+distclean-target-libstdc++-v3: 
 	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
+	@[ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(RAW_CXX_TARGET_EXPORTS) \
-	echo "Doing distclean in $(TARGET_SUBDIR)/libsanitizer"; \
+	echo "Doing distclean in $(TARGET_SUBDIR)/libstdc++-v3"; \
 	for flag in $(EXTRA_TARGET_FLAGS); do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libsanitizer && \
+	(cd $(TARGET_SUBDIR)/libstdc++-v3 && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -40324,24 +43333,24 @@ distclean-target-libsanitizer:
 	           distclean) \
 	  || exit 1
 
-@endif target-libsanitizer
+@endif target-libstdc++-v3
 
-.PHONY: maybe-maintainer-clean-target-libsanitizer maintainer-clean-target-libsanitizer
-maybe-maintainer-clean-target-libsanitizer:
-@if target-libsanitizer
-maybe-maintainer-clean-target-libsanitizer: maintainer-clean-target-libsanitizer
+.PHONY: maybe-maintainer-clean-target-libstdc++-v3 maintainer-clean-target-libstdc++-v3
+maybe-maintainer-clean-target-libstdc++-v3:
+@if target-libstdc++-v3
+maybe-maintainer-clean-target-libstdc++-v3: maintainer-clean-target-libstdc++-v3
 
-maintainer-clean-target-libsanitizer: 
+maintainer-clean-target-libstdc++-v3: 
 	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
+	@[ -f $(TARGET_SUBDIR)/libstdc++-v3/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(RAW_CXX_TARGET_EXPORTS) \
-	echo "Doing maintainer-clean in $(TARGET_SUBDIR)/libsanitizer"; \
+	echo "Doing maintainer-clean in $(TARGET_SUBDIR)/libstdc++-v3"; \
 	for flag in $(EXTRA_TARGET_FLAGS); do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libsanitizer && \
+	(cd $(TARGET_SUBDIR)/libstdc++-v3 && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -40349,46 +43358,46 @@ maintainer-clean-target-libsanitizer:
 	           maintainer-clean) \
 	  || exit 1
 
-@endif target-libsanitizer
+@endif target-libstdc++-v3
 
 
 
 
 
-.PHONY: configure-target-libmpx maybe-configure-target-libmpx
-maybe-configure-target-libmpx:
+.PHONY: configure-target-libsanitizer maybe-configure-target-libsanitizer
+maybe-configure-target-libsanitizer:
 @if gcc-bootstrap
-configure-target-libmpx: stage_current
+configure-target-libsanitizer: stage_current
 @endif gcc-bootstrap
-@if target-libmpx
-maybe-configure-target-libmpx: configure-target-libmpx
-configure-target-libmpx: 
+@if target-libsanitizer
+maybe-configure-target-libsanitizer: configure-target-libsanitizer
+configure-target-libsanitizer: 
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	echo "Checking multilib configuration for libmpx..."; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libmpx; \
-	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libmpx/multilib.tmp 2> /dev/null; \
-	if test -r $(TARGET_SUBDIR)/libmpx/multilib.out; then \
-	  if cmp -s $(TARGET_SUBDIR)/libmpx/multilib.tmp $(TARGET_SUBDIR)/libmpx/multilib.out; then \
-	    rm -f $(TARGET_SUBDIR)/libmpx/multilib.tmp; \
+	echo "Checking multilib configuration for libsanitizer..."; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer; \
+	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libsanitizer/multilib.tmp 2> /dev/null; \
+	if test -r $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
+	  if cmp -s $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
+	    rm -f $(TARGET_SUBDIR)/libsanitizer/multilib.tmp; \
 	  else \
-	    rm -f $(TARGET_SUBDIR)/libmpx/Makefile; \
-	    mv $(TARGET_SUBDIR)/libmpx/multilib.tmp $(TARGET_SUBDIR)/libmpx/multilib.out; \
+	    rm -f $(TARGET_SUBDIR)/libsanitizer/Makefile; \
+	    mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
 	  fi; \
 	else \
-	  mv $(TARGET_SUBDIR)/libmpx/multilib.tmp $(TARGET_SUBDIR)/libmpx/multilib.out; \
+	  mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
 	fi; \
-	test ! -f $(TARGET_SUBDIR)/libmpx/Makefile || exit 0; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libmpx; \
-	$(NORMAL_TARGET_EXPORTS)  \
-	echo Configuring in $(TARGET_SUBDIR)/libmpx; \
-	cd "$(TARGET_SUBDIR)/libmpx" || exit 1; \
+	test ! -f $(TARGET_SUBDIR)/libsanitizer/Makefile || exit 0; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer; \
+	$(RAW_CXX_TARGET_EXPORTS)  \
+	echo Configuring in $(TARGET_SUBDIR)/libsanitizer; \
+	cd "$(TARGET_SUBDIR)/libsanitizer" || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(TARGET_SUBDIR)/libmpx/ | \
+	  *) topdir=`echo $(TARGET_SUBDIR)/libsanitizer/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libmpx; \
+	module_srcdir=libsanitizer; \
 	rm -f no-such-file || : ; \
 	CONFIG_SITE=no-such-file $(SHELL) \
 	  $$s/$$module_srcdir/configure \
@@ -40396,452 +43405,452 @@ configure-target-libmpx:
 	  $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \
 	  --target=${target_alias}  \
 	  || exit 1
-@endif target-libmpx
+@endif target-libsanitizer
 
 
 
-.PHONY: configure-stage1-target-libmpx maybe-configure-stage1-target-libmpx
-maybe-configure-stage1-target-libmpx:
-@if target-libmpx-bootstrap
-maybe-configure-stage1-target-libmpx: configure-stage1-target-libmpx
-configure-stage1-target-libmpx:
+.PHONY: configure-stage1-target-libsanitizer maybe-configure-stage1-target-libsanitizer
+maybe-configure-stage1-target-libsanitizer:
+@if target-libsanitizer-bootstrap
+maybe-configure-stage1-target-libsanitizer: configure-stage1-target-libsanitizer
+configure-stage1-target-libsanitizer:
 	@[ $(current_stage) = stage1 ] || $(MAKE) stage1-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libmpx
+	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGE1_TFLAGS)"; \
-	echo "Checking multilib configuration for libmpx..."; \
-	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libmpx/multilib.tmp 2> /dev/null; \
-	if test -r $(TARGET_SUBDIR)/libmpx/multilib.out; then \
-	  if cmp -s $(TARGET_SUBDIR)/libmpx/multilib.tmp $(TARGET_SUBDIR)/libmpx/multilib.out; then \
-	    rm -f $(TARGET_SUBDIR)/libmpx/multilib.tmp; \
+	echo "Checking multilib configuration for libsanitizer..."; \
+	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libsanitizer/multilib.tmp 2> /dev/null; \
+	if test -r $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
+	  if cmp -s $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
+	    rm -f $(TARGET_SUBDIR)/libsanitizer/multilib.tmp; \
 	  else \
-	    rm -f $(TARGET_SUBDIR)/libmpx/Makefile; \
-	    mv $(TARGET_SUBDIR)/libmpx/multilib.tmp $(TARGET_SUBDIR)/libmpx/multilib.out; \
+	    rm -f $(TARGET_SUBDIR)/libsanitizer/Makefile; \
+	    mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
 	  fi; \
 	else \
-	  mv $(TARGET_SUBDIR)/libmpx/multilib.tmp $(TARGET_SUBDIR)/libmpx/multilib.out; \
+	  mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
 	fi; \
-	test ! -f $(TARGET_SUBDIR)/libmpx/Makefile || exit 0; \
-	$(NORMAL_TARGET_EXPORTS) \
+	test ! -f $(TARGET_SUBDIR)/libsanitizer/Makefile || exit 0; \
+	$(RAW_CXX_TARGET_EXPORTS) \
 	CFLAGS="$(CFLAGS_FOR_TARGET)"; export CFLAGS; \
 	CXXFLAGS="$(CXXFLAGS_FOR_TARGET)"; export CXXFLAGS; \
 	LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)"; export LIBCFLAGS;  \
-	echo Configuring stage 1 in $(TARGET_SUBDIR)/libmpx; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libmpx; \
-	cd $(TARGET_SUBDIR)/libmpx || exit 1; \
+	echo Configuring stage 1 in $(TARGET_SUBDIR)/libsanitizer; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer; \
+	cd $(TARGET_SUBDIR)/libsanitizer || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(TARGET_SUBDIR)/libmpx/ | \
+	  *) topdir=`echo $(TARGET_SUBDIR)/libsanitizer/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libmpx; \
+	module_srcdir=libsanitizer; \
 	$(SHELL) $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
 	  $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \
 	  --target=${target_alias} \
 	   \
 	  $(STAGE1_CONFIGURE_FLAGS)
-@endif target-libmpx-bootstrap
+@endif target-libsanitizer-bootstrap
 
-.PHONY: configure-stage2-target-libmpx maybe-configure-stage2-target-libmpx
-maybe-configure-stage2-target-libmpx:
-@if target-libmpx-bootstrap
-maybe-configure-stage2-target-libmpx: configure-stage2-target-libmpx
-configure-stage2-target-libmpx:
+.PHONY: configure-stage2-target-libsanitizer maybe-configure-stage2-target-libsanitizer
+maybe-configure-stage2-target-libsanitizer:
+@if target-libsanitizer-bootstrap
+maybe-configure-stage2-target-libsanitizer: configure-stage2-target-libsanitizer
+configure-stage2-target-libsanitizer:
 	@[ $(current_stage) = stage2 ] || $(MAKE) stage2-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libmpx
+	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGE2_TFLAGS)"; \
-	echo "Checking multilib configuration for libmpx..."; \
-	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libmpx/multilib.tmp 2> /dev/null; \
-	if test -r $(TARGET_SUBDIR)/libmpx/multilib.out; then \
-	  if cmp -s $(TARGET_SUBDIR)/libmpx/multilib.tmp $(TARGET_SUBDIR)/libmpx/multilib.out; then \
-	    rm -f $(TARGET_SUBDIR)/libmpx/multilib.tmp; \
+	echo "Checking multilib configuration for libsanitizer..."; \
+	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libsanitizer/multilib.tmp 2> /dev/null; \
+	if test -r $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
+	  if cmp -s $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
+	    rm -f $(TARGET_SUBDIR)/libsanitizer/multilib.tmp; \
 	  else \
-	    rm -f $(TARGET_SUBDIR)/libmpx/Makefile; \
-	    mv $(TARGET_SUBDIR)/libmpx/multilib.tmp $(TARGET_SUBDIR)/libmpx/multilib.out; \
+	    rm -f $(TARGET_SUBDIR)/libsanitizer/Makefile; \
+	    mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
 	  fi; \
 	else \
-	  mv $(TARGET_SUBDIR)/libmpx/multilib.tmp $(TARGET_SUBDIR)/libmpx/multilib.out; \
+	  mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
 	fi; \
-	test ! -f $(TARGET_SUBDIR)/libmpx/Makefile || exit 0; \
-	$(NORMAL_TARGET_EXPORTS) \
+	test ! -f $(TARGET_SUBDIR)/libsanitizer/Makefile || exit 0; \
+	$(RAW_CXX_TARGET_EXPORTS) \
 	 \
 	CFLAGS="$(CFLAGS_FOR_TARGET)"; export CFLAGS; \
 	CXXFLAGS="$(CXXFLAGS_FOR_TARGET)"; export CXXFLAGS; \
 	LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)"; export LIBCFLAGS;  \
-	echo Configuring stage 2 in $(TARGET_SUBDIR)/libmpx; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libmpx; \
-	cd $(TARGET_SUBDIR)/libmpx || exit 1; \
+	echo Configuring stage 2 in $(TARGET_SUBDIR)/libsanitizer; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer; \
+	cd $(TARGET_SUBDIR)/libsanitizer || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(TARGET_SUBDIR)/libmpx/ | \
+	  *) topdir=`echo $(TARGET_SUBDIR)/libsanitizer/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libmpx; \
+	module_srcdir=libsanitizer; \
 	$(SHELL) $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
 	  $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \
 	  --target=${target_alias} \
 	  --with-build-libsubdir=$(HOST_SUBDIR) \
 	  $(STAGE2_CONFIGURE_FLAGS)
-@endif target-libmpx-bootstrap
+@endif target-libsanitizer-bootstrap
 
-.PHONY: configure-stage3-target-libmpx maybe-configure-stage3-target-libmpx
-maybe-configure-stage3-target-libmpx:
-@if target-libmpx-bootstrap
-maybe-configure-stage3-target-libmpx: configure-stage3-target-libmpx
-configure-stage3-target-libmpx:
+.PHONY: configure-stage3-target-libsanitizer maybe-configure-stage3-target-libsanitizer
+maybe-configure-stage3-target-libsanitizer:
+@if target-libsanitizer-bootstrap
+maybe-configure-stage3-target-libsanitizer: configure-stage3-target-libsanitizer
+configure-stage3-target-libsanitizer:
 	@[ $(current_stage) = stage3 ] || $(MAKE) stage3-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libmpx
+	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGE3_TFLAGS)"; \
-	echo "Checking multilib configuration for libmpx..."; \
-	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libmpx/multilib.tmp 2> /dev/null; \
-	if test -r $(TARGET_SUBDIR)/libmpx/multilib.out; then \
-	  if cmp -s $(TARGET_SUBDIR)/libmpx/multilib.tmp $(TARGET_SUBDIR)/libmpx/multilib.out; then \
-	    rm -f $(TARGET_SUBDIR)/libmpx/multilib.tmp; \
+	echo "Checking multilib configuration for libsanitizer..."; \
+	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libsanitizer/multilib.tmp 2> /dev/null; \
+	if test -r $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
+	  if cmp -s $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
+	    rm -f $(TARGET_SUBDIR)/libsanitizer/multilib.tmp; \
 	  else \
-	    rm -f $(TARGET_SUBDIR)/libmpx/Makefile; \
-	    mv $(TARGET_SUBDIR)/libmpx/multilib.tmp $(TARGET_SUBDIR)/libmpx/multilib.out; \
+	    rm -f $(TARGET_SUBDIR)/libsanitizer/Makefile; \
+	    mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
 	  fi; \
 	else \
-	  mv $(TARGET_SUBDIR)/libmpx/multilib.tmp $(TARGET_SUBDIR)/libmpx/multilib.out; \
+	  mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
 	fi; \
-	test ! -f $(TARGET_SUBDIR)/libmpx/Makefile || exit 0; \
-	$(NORMAL_TARGET_EXPORTS) \
+	test ! -f $(TARGET_SUBDIR)/libsanitizer/Makefile || exit 0; \
+	$(RAW_CXX_TARGET_EXPORTS) \
 	 \
 	CFLAGS="$(CFLAGS_FOR_TARGET)"; export CFLAGS; \
 	CXXFLAGS="$(CXXFLAGS_FOR_TARGET)"; export CXXFLAGS; \
 	LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)"; export LIBCFLAGS;  \
-	echo Configuring stage 3 in $(TARGET_SUBDIR)/libmpx; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libmpx; \
-	cd $(TARGET_SUBDIR)/libmpx || exit 1; \
+	echo Configuring stage 3 in $(TARGET_SUBDIR)/libsanitizer; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer; \
+	cd $(TARGET_SUBDIR)/libsanitizer || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(TARGET_SUBDIR)/libmpx/ | \
+	  *) topdir=`echo $(TARGET_SUBDIR)/libsanitizer/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libmpx; \
+	module_srcdir=libsanitizer; \
 	$(SHELL) $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
 	  $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \
 	  --target=${target_alias} \
 	  --with-build-libsubdir=$(HOST_SUBDIR) \
 	  $(STAGE3_CONFIGURE_FLAGS)
-@endif target-libmpx-bootstrap
+@endif target-libsanitizer-bootstrap
 
-.PHONY: configure-stage4-target-libmpx maybe-configure-stage4-target-libmpx
-maybe-configure-stage4-target-libmpx:
-@if target-libmpx-bootstrap
-maybe-configure-stage4-target-libmpx: configure-stage4-target-libmpx
-configure-stage4-target-libmpx:
+.PHONY: configure-stage4-target-libsanitizer maybe-configure-stage4-target-libsanitizer
+maybe-configure-stage4-target-libsanitizer:
+@if target-libsanitizer-bootstrap
+maybe-configure-stage4-target-libsanitizer: configure-stage4-target-libsanitizer
+configure-stage4-target-libsanitizer:
 	@[ $(current_stage) = stage4 ] || $(MAKE) stage4-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libmpx
+	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGE4_TFLAGS)"; \
-	echo "Checking multilib configuration for libmpx..."; \
-	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libmpx/multilib.tmp 2> /dev/null; \
-	if test -r $(TARGET_SUBDIR)/libmpx/multilib.out; then \
-	  if cmp -s $(TARGET_SUBDIR)/libmpx/multilib.tmp $(TARGET_SUBDIR)/libmpx/multilib.out; then \
-	    rm -f $(TARGET_SUBDIR)/libmpx/multilib.tmp; \
+	echo "Checking multilib configuration for libsanitizer..."; \
+	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libsanitizer/multilib.tmp 2> /dev/null; \
+	if test -r $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
+	  if cmp -s $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
+	    rm -f $(TARGET_SUBDIR)/libsanitizer/multilib.tmp; \
 	  else \
-	    rm -f $(TARGET_SUBDIR)/libmpx/Makefile; \
-	    mv $(TARGET_SUBDIR)/libmpx/multilib.tmp $(TARGET_SUBDIR)/libmpx/multilib.out; \
+	    rm -f $(TARGET_SUBDIR)/libsanitizer/Makefile; \
+	    mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
 	  fi; \
 	else \
-	  mv $(TARGET_SUBDIR)/libmpx/multilib.tmp $(TARGET_SUBDIR)/libmpx/multilib.out; \
+	  mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
 	fi; \
-	test ! -f $(TARGET_SUBDIR)/libmpx/Makefile || exit 0; \
-	$(NORMAL_TARGET_EXPORTS) \
+	test ! -f $(TARGET_SUBDIR)/libsanitizer/Makefile || exit 0; \
+	$(RAW_CXX_TARGET_EXPORTS) \
 	 \
 	CFLAGS="$(CFLAGS_FOR_TARGET)"; export CFLAGS; \
 	CXXFLAGS="$(CXXFLAGS_FOR_TARGET)"; export CXXFLAGS; \
 	LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)"; export LIBCFLAGS;  \
-	echo Configuring stage 4 in $(TARGET_SUBDIR)/libmpx; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libmpx; \
-	cd $(TARGET_SUBDIR)/libmpx || exit 1; \
+	echo Configuring stage 4 in $(TARGET_SUBDIR)/libsanitizer; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer; \
+	cd $(TARGET_SUBDIR)/libsanitizer || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(TARGET_SUBDIR)/libmpx/ | \
+	  *) topdir=`echo $(TARGET_SUBDIR)/libsanitizer/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libmpx; \
+	module_srcdir=libsanitizer; \
 	$(SHELL) $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
 	  $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \
 	  --target=${target_alias} \
 	  --with-build-libsubdir=$(HOST_SUBDIR) \
 	  $(STAGE4_CONFIGURE_FLAGS)
-@endif target-libmpx-bootstrap
+@endif target-libsanitizer-bootstrap
 
-.PHONY: configure-stageprofile-target-libmpx maybe-configure-stageprofile-target-libmpx
-maybe-configure-stageprofile-target-libmpx:
-@if target-libmpx-bootstrap
-maybe-configure-stageprofile-target-libmpx: configure-stageprofile-target-libmpx
-configure-stageprofile-target-libmpx:
+.PHONY: configure-stageprofile-target-libsanitizer maybe-configure-stageprofile-target-libsanitizer
+maybe-configure-stageprofile-target-libsanitizer:
+@if target-libsanitizer-bootstrap
+maybe-configure-stageprofile-target-libsanitizer: configure-stageprofile-target-libsanitizer
+configure-stageprofile-target-libsanitizer:
 	@[ $(current_stage) = stageprofile ] || $(MAKE) stageprofile-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libmpx
+	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEprofile_TFLAGS)"; \
-	echo "Checking multilib configuration for libmpx..."; \
-	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libmpx/multilib.tmp 2> /dev/null; \
-	if test -r $(TARGET_SUBDIR)/libmpx/multilib.out; then \
-	  if cmp -s $(TARGET_SUBDIR)/libmpx/multilib.tmp $(TARGET_SUBDIR)/libmpx/multilib.out; then \
-	    rm -f $(TARGET_SUBDIR)/libmpx/multilib.tmp; \
+	echo "Checking multilib configuration for libsanitizer..."; \
+	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libsanitizer/multilib.tmp 2> /dev/null; \
+	if test -r $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
+	  if cmp -s $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
+	    rm -f $(TARGET_SUBDIR)/libsanitizer/multilib.tmp; \
 	  else \
-	    rm -f $(TARGET_SUBDIR)/libmpx/Makefile; \
-	    mv $(TARGET_SUBDIR)/libmpx/multilib.tmp $(TARGET_SUBDIR)/libmpx/multilib.out; \
+	    rm -f $(TARGET_SUBDIR)/libsanitizer/Makefile; \
+	    mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
 	  fi; \
 	else \
-	  mv $(TARGET_SUBDIR)/libmpx/multilib.tmp $(TARGET_SUBDIR)/libmpx/multilib.out; \
+	  mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
 	fi; \
-	test ! -f $(TARGET_SUBDIR)/libmpx/Makefile || exit 0; \
-	$(NORMAL_TARGET_EXPORTS) \
+	test ! -f $(TARGET_SUBDIR)/libsanitizer/Makefile || exit 0; \
+	$(RAW_CXX_TARGET_EXPORTS) \
 	 \
 	CFLAGS="$(CFLAGS_FOR_TARGET)"; export CFLAGS; \
 	CXXFLAGS="$(CXXFLAGS_FOR_TARGET)"; export CXXFLAGS; \
 	LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)"; export LIBCFLAGS;  \
-	echo Configuring stage profile in $(TARGET_SUBDIR)/libmpx; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libmpx; \
-	cd $(TARGET_SUBDIR)/libmpx || exit 1; \
+	echo Configuring stage profile in $(TARGET_SUBDIR)/libsanitizer; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer; \
+	cd $(TARGET_SUBDIR)/libsanitizer || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(TARGET_SUBDIR)/libmpx/ | \
+	  *) topdir=`echo $(TARGET_SUBDIR)/libsanitizer/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libmpx; \
+	module_srcdir=libsanitizer; \
 	$(SHELL) $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
 	  $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \
 	  --target=${target_alias} \
 	  --with-build-libsubdir=$(HOST_SUBDIR) \
 	  $(STAGEprofile_CONFIGURE_FLAGS)
-@endif target-libmpx-bootstrap
+@endif target-libsanitizer-bootstrap
 
-.PHONY: configure-stagetrain-target-libmpx maybe-configure-stagetrain-target-libmpx
-maybe-configure-stagetrain-target-libmpx:
-@if target-libmpx-bootstrap
-maybe-configure-stagetrain-target-libmpx: configure-stagetrain-target-libmpx
-configure-stagetrain-target-libmpx:
+.PHONY: configure-stagetrain-target-libsanitizer maybe-configure-stagetrain-target-libsanitizer
+maybe-configure-stagetrain-target-libsanitizer:
+@if target-libsanitizer-bootstrap
+maybe-configure-stagetrain-target-libsanitizer: configure-stagetrain-target-libsanitizer
+configure-stagetrain-target-libsanitizer:
 	@[ $(current_stage) = stagetrain ] || $(MAKE) stagetrain-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libmpx
+	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEtrain_TFLAGS)"; \
-	echo "Checking multilib configuration for libmpx..."; \
-	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libmpx/multilib.tmp 2> /dev/null; \
-	if test -r $(TARGET_SUBDIR)/libmpx/multilib.out; then \
-	  if cmp -s $(TARGET_SUBDIR)/libmpx/multilib.tmp $(TARGET_SUBDIR)/libmpx/multilib.out; then \
-	    rm -f $(TARGET_SUBDIR)/libmpx/multilib.tmp; \
+	echo "Checking multilib configuration for libsanitizer..."; \
+	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libsanitizer/multilib.tmp 2> /dev/null; \
+	if test -r $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
+	  if cmp -s $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
+	    rm -f $(TARGET_SUBDIR)/libsanitizer/multilib.tmp; \
 	  else \
-	    rm -f $(TARGET_SUBDIR)/libmpx/Makefile; \
-	    mv $(TARGET_SUBDIR)/libmpx/multilib.tmp $(TARGET_SUBDIR)/libmpx/multilib.out; \
+	    rm -f $(TARGET_SUBDIR)/libsanitizer/Makefile; \
+	    mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
 	  fi; \
 	else \
-	  mv $(TARGET_SUBDIR)/libmpx/multilib.tmp $(TARGET_SUBDIR)/libmpx/multilib.out; \
+	  mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
 	fi; \
-	test ! -f $(TARGET_SUBDIR)/libmpx/Makefile || exit 0; \
-	$(NORMAL_TARGET_EXPORTS) \
+	test ! -f $(TARGET_SUBDIR)/libsanitizer/Makefile || exit 0; \
+	$(RAW_CXX_TARGET_EXPORTS) \
 	 \
 	CFLAGS="$(CFLAGS_FOR_TARGET)"; export CFLAGS; \
 	CXXFLAGS="$(CXXFLAGS_FOR_TARGET)"; export CXXFLAGS; \
 	LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)"; export LIBCFLAGS;  \
-	echo Configuring stage train in $(TARGET_SUBDIR)/libmpx; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libmpx; \
-	cd $(TARGET_SUBDIR)/libmpx || exit 1; \
+	echo Configuring stage train in $(TARGET_SUBDIR)/libsanitizer; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer; \
+	cd $(TARGET_SUBDIR)/libsanitizer || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(TARGET_SUBDIR)/libmpx/ | \
+	  *) topdir=`echo $(TARGET_SUBDIR)/libsanitizer/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libmpx; \
+	module_srcdir=libsanitizer; \
 	$(SHELL) $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
 	  $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \
 	  --target=${target_alias} \
 	  --with-build-libsubdir=$(HOST_SUBDIR) \
 	  $(STAGEtrain_CONFIGURE_FLAGS)
-@endif target-libmpx-bootstrap
+@endif target-libsanitizer-bootstrap
 
-.PHONY: configure-stagefeedback-target-libmpx maybe-configure-stagefeedback-target-libmpx
-maybe-configure-stagefeedback-target-libmpx:
-@if target-libmpx-bootstrap
-maybe-configure-stagefeedback-target-libmpx: configure-stagefeedback-target-libmpx
-configure-stagefeedback-target-libmpx:
+.PHONY: configure-stagefeedback-target-libsanitizer maybe-configure-stagefeedback-target-libsanitizer
+maybe-configure-stagefeedback-target-libsanitizer:
+@if target-libsanitizer-bootstrap
+maybe-configure-stagefeedback-target-libsanitizer: configure-stagefeedback-target-libsanitizer
+configure-stagefeedback-target-libsanitizer:
 	@[ $(current_stage) = stagefeedback ] || $(MAKE) stagefeedback-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libmpx
+	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEfeedback_TFLAGS)"; \
-	echo "Checking multilib configuration for libmpx..."; \
-	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libmpx/multilib.tmp 2> /dev/null; \
-	if test -r $(TARGET_SUBDIR)/libmpx/multilib.out; then \
-	  if cmp -s $(TARGET_SUBDIR)/libmpx/multilib.tmp $(TARGET_SUBDIR)/libmpx/multilib.out; then \
-	    rm -f $(TARGET_SUBDIR)/libmpx/multilib.tmp; \
+	echo "Checking multilib configuration for libsanitizer..."; \
+	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libsanitizer/multilib.tmp 2> /dev/null; \
+	if test -r $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
+	  if cmp -s $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
+	    rm -f $(TARGET_SUBDIR)/libsanitizer/multilib.tmp; \
 	  else \
-	    rm -f $(TARGET_SUBDIR)/libmpx/Makefile; \
-	    mv $(TARGET_SUBDIR)/libmpx/multilib.tmp $(TARGET_SUBDIR)/libmpx/multilib.out; \
+	    rm -f $(TARGET_SUBDIR)/libsanitizer/Makefile; \
+	    mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
 	  fi; \
 	else \
-	  mv $(TARGET_SUBDIR)/libmpx/multilib.tmp $(TARGET_SUBDIR)/libmpx/multilib.out; \
+	  mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
 	fi; \
-	test ! -f $(TARGET_SUBDIR)/libmpx/Makefile || exit 0; \
-	$(NORMAL_TARGET_EXPORTS) \
+	test ! -f $(TARGET_SUBDIR)/libsanitizer/Makefile || exit 0; \
+	$(RAW_CXX_TARGET_EXPORTS) \
 	 \
 	CFLAGS="$(CFLAGS_FOR_TARGET)"; export CFLAGS; \
 	CXXFLAGS="$(CXXFLAGS_FOR_TARGET)"; export CXXFLAGS; \
 	LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)"; export LIBCFLAGS;  \
-	echo Configuring stage feedback in $(TARGET_SUBDIR)/libmpx; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libmpx; \
-	cd $(TARGET_SUBDIR)/libmpx || exit 1; \
+	echo Configuring stage feedback in $(TARGET_SUBDIR)/libsanitizer; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer; \
+	cd $(TARGET_SUBDIR)/libsanitizer || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(TARGET_SUBDIR)/libmpx/ | \
+	  *) topdir=`echo $(TARGET_SUBDIR)/libsanitizer/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libmpx; \
+	module_srcdir=libsanitizer; \
 	$(SHELL) $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
 	  $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \
 	  --target=${target_alias} \
 	  --with-build-libsubdir=$(HOST_SUBDIR) \
 	  $(STAGEfeedback_CONFIGURE_FLAGS)
-@endif target-libmpx-bootstrap
+@endif target-libsanitizer-bootstrap
 
-.PHONY: configure-stageautoprofile-target-libmpx maybe-configure-stageautoprofile-target-libmpx
-maybe-configure-stageautoprofile-target-libmpx:
-@if target-libmpx-bootstrap
-maybe-configure-stageautoprofile-target-libmpx: configure-stageautoprofile-target-libmpx
-configure-stageautoprofile-target-libmpx:
+.PHONY: configure-stageautoprofile-target-libsanitizer maybe-configure-stageautoprofile-target-libsanitizer
+maybe-configure-stageautoprofile-target-libsanitizer:
+@if target-libsanitizer-bootstrap
+maybe-configure-stageautoprofile-target-libsanitizer: configure-stageautoprofile-target-libsanitizer
+configure-stageautoprofile-target-libsanitizer:
 	@[ $(current_stage) = stageautoprofile ] || $(MAKE) stageautoprofile-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libmpx
+	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEautoprofile_TFLAGS)"; \
-	echo "Checking multilib configuration for libmpx..."; \
-	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libmpx/multilib.tmp 2> /dev/null; \
-	if test -r $(TARGET_SUBDIR)/libmpx/multilib.out; then \
-	  if cmp -s $(TARGET_SUBDIR)/libmpx/multilib.tmp $(TARGET_SUBDIR)/libmpx/multilib.out; then \
-	    rm -f $(TARGET_SUBDIR)/libmpx/multilib.tmp; \
+	echo "Checking multilib configuration for libsanitizer..."; \
+	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libsanitizer/multilib.tmp 2> /dev/null; \
+	if test -r $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
+	  if cmp -s $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
+	    rm -f $(TARGET_SUBDIR)/libsanitizer/multilib.tmp; \
 	  else \
-	    rm -f $(TARGET_SUBDIR)/libmpx/Makefile; \
-	    mv $(TARGET_SUBDIR)/libmpx/multilib.tmp $(TARGET_SUBDIR)/libmpx/multilib.out; \
+	    rm -f $(TARGET_SUBDIR)/libsanitizer/Makefile; \
+	    mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
 	  fi; \
 	else \
-	  mv $(TARGET_SUBDIR)/libmpx/multilib.tmp $(TARGET_SUBDIR)/libmpx/multilib.out; \
+	  mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
 	fi; \
-	test ! -f $(TARGET_SUBDIR)/libmpx/Makefile || exit 0; \
-	$(NORMAL_TARGET_EXPORTS) \
+	test ! -f $(TARGET_SUBDIR)/libsanitizer/Makefile || exit 0; \
+	$(RAW_CXX_TARGET_EXPORTS) \
 	 \
 	CFLAGS="$(CFLAGS_FOR_TARGET)"; export CFLAGS; \
 	CXXFLAGS="$(CXXFLAGS_FOR_TARGET)"; export CXXFLAGS; \
 	LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)"; export LIBCFLAGS;  \
-	echo Configuring stage autoprofile in $(TARGET_SUBDIR)/libmpx; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libmpx; \
-	cd $(TARGET_SUBDIR)/libmpx || exit 1; \
+	echo Configuring stage autoprofile in $(TARGET_SUBDIR)/libsanitizer; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer; \
+	cd $(TARGET_SUBDIR)/libsanitizer || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(TARGET_SUBDIR)/libmpx/ | \
+	  *) topdir=`echo $(TARGET_SUBDIR)/libsanitizer/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libmpx; \
+	module_srcdir=libsanitizer; \
 	$(SHELL) $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
 	  $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \
 	  --target=${target_alias} \
 	  --with-build-libsubdir=$(HOST_SUBDIR) \
 	  $(STAGEautoprofile_CONFIGURE_FLAGS)
-@endif target-libmpx-bootstrap
+@endif target-libsanitizer-bootstrap
 
-.PHONY: configure-stageautofeedback-target-libmpx maybe-configure-stageautofeedback-target-libmpx
-maybe-configure-stageautofeedback-target-libmpx:
-@if target-libmpx-bootstrap
-maybe-configure-stageautofeedback-target-libmpx: configure-stageautofeedback-target-libmpx
-configure-stageautofeedback-target-libmpx:
+.PHONY: configure-stageautofeedback-target-libsanitizer maybe-configure-stageautofeedback-target-libsanitizer
+maybe-configure-stageautofeedback-target-libsanitizer:
+@if target-libsanitizer-bootstrap
+maybe-configure-stageautofeedback-target-libsanitizer: configure-stageautofeedback-target-libsanitizer
+configure-stageautofeedback-target-libsanitizer:
 	@[ $(current_stage) = stageautofeedback ] || $(MAKE) stageautofeedback-start
-	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libmpx
+	@$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEautofeedback_TFLAGS)"; \
-	echo "Checking multilib configuration for libmpx..."; \
-	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libmpx/multilib.tmp 2> /dev/null; \
-	if test -r $(TARGET_SUBDIR)/libmpx/multilib.out; then \
-	  if cmp -s $(TARGET_SUBDIR)/libmpx/multilib.tmp $(TARGET_SUBDIR)/libmpx/multilib.out; then \
-	    rm -f $(TARGET_SUBDIR)/libmpx/multilib.tmp; \
+	echo "Checking multilib configuration for libsanitizer..."; \
+	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libsanitizer/multilib.tmp 2> /dev/null; \
+	if test -r $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
+	  if cmp -s $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; then \
+	    rm -f $(TARGET_SUBDIR)/libsanitizer/multilib.tmp; \
 	  else \
-	    rm -f $(TARGET_SUBDIR)/libmpx/Makefile; \
-	    mv $(TARGET_SUBDIR)/libmpx/multilib.tmp $(TARGET_SUBDIR)/libmpx/multilib.out; \
+	    rm -f $(TARGET_SUBDIR)/libsanitizer/Makefile; \
+	    mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
 	  fi; \
 	else \
-	  mv $(TARGET_SUBDIR)/libmpx/multilib.tmp $(TARGET_SUBDIR)/libmpx/multilib.out; \
+	  mv $(TARGET_SUBDIR)/libsanitizer/multilib.tmp $(TARGET_SUBDIR)/libsanitizer/multilib.out; \
 	fi; \
-	test ! -f $(TARGET_SUBDIR)/libmpx/Makefile || exit 0; \
-	$(NORMAL_TARGET_EXPORTS) \
+	test ! -f $(TARGET_SUBDIR)/libsanitizer/Makefile || exit 0; \
+	$(RAW_CXX_TARGET_EXPORTS) \
 	 \
 	CFLAGS="$(CFLAGS_FOR_TARGET)"; export CFLAGS; \
 	CXXFLAGS="$(CXXFLAGS_FOR_TARGET)"; export CXXFLAGS; \
 	LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)"; export LIBCFLAGS;  \
-	echo Configuring stage autofeedback in $(TARGET_SUBDIR)/libmpx; \
-	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libmpx; \
-	cd $(TARGET_SUBDIR)/libmpx || exit 1; \
+	echo Configuring stage autofeedback in $(TARGET_SUBDIR)/libsanitizer; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libsanitizer; \
+	cd $(TARGET_SUBDIR)/libsanitizer || exit 1; \
 	case $(srcdir) in \
 	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
-	  *) topdir=`echo $(TARGET_SUBDIR)/libmpx/ | \
+	  *) topdir=`echo $(TARGET_SUBDIR)/libsanitizer/ | \
 		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
 	esac; \
-	module_srcdir=libmpx; \
+	module_srcdir=libsanitizer; \
 	$(SHELL) $$s/$$module_srcdir/configure \
 	  --srcdir=$${topdir}/$$module_srcdir \
 	  $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \
 	  --target=${target_alias} \
 	  --with-build-libsubdir=$(HOST_SUBDIR) \
 	  $(STAGEautofeedback_CONFIGURE_FLAGS)
-@endif target-libmpx-bootstrap
+@endif target-libsanitizer-bootstrap
 
 
 
 
 
-.PHONY: all-target-libmpx maybe-all-target-libmpx
-maybe-all-target-libmpx:
+.PHONY: all-target-libsanitizer maybe-all-target-libsanitizer
+maybe-all-target-libsanitizer:
 @if gcc-bootstrap
-all-target-libmpx: stage_current
+all-target-libsanitizer: stage_current
 @endif gcc-bootstrap
-@if target-libmpx
-TARGET-target-libmpx=all
-maybe-all-target-libmpx: all-target-libmpx
-all-target-libmpx: configure-target-libmpx
+@if target-libsanitizer
+TARGET-target-libsanitizer=all
+maybe-all-target-libsanitizer: all-target-libsanitizer
+all-target-libsanitizer: configure-target-libsanitizer
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(NORMAL_TARGET_EXPORTS)  \
-	(cd $(TARGET_SUBDIR)/libmpx && \
-	  $(MAKE) $(BASE_FLAGS_TO_PASS) $(EXTRA_TARGET_FLAGS)   \
-		$(TARGET-target-libmpx))
-@endif target-libmpx
+	$(RAW_CXX_TARGET_EXPORTS)  \
+	(cd $(TARGET_SUBDIR)/libsanitizer && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   \
+		$(TARGET-target-libsanitizer))
+@endif target-libsanitizer
 
 
 
-.PHONY: all-stage1-target-libmpx maybe-all-stage1-target-libmpx
-.PHONY: clean-stage1-target-libmpx maybe-clean-stage1-target-libmpx
-maybe-all-stage1-target-libmpx:
-maybe-clean-stage1-target-libmpx:
-@if target-libmpx-bootstrap
-maybe-all-stage1-target-libmpx: all-stage1-target-libmpx
-all-stage1: all-stage1-target-libmpx
-TARGET-stage1-target-libmpx = $(TARGET-target-libmpx)
-all-stage1-target-libmpx: configure-stage1-target-libmpx
+.PHONY: all-stage1-target-libsanitizer maybe-all-stage1-target-libsanitizer
+.PHONY: clean-stage1-target-libsanitizer maybe-clean-stage1-target-libsanitizer
+maybe-all-stage1-target-libsanitizer:
+maybe-clean-stage1-target-libsanitizer:
+@if target-libsanitizer-bootstrap
+maybe-all-stage1-target-libsanitizer: all-stage1-target-libsanitizer
+all-stage1: all-stage1-target-libsanitizer
+TARGET-stage1-target-libsanitizer = $(TARGET-target-libsanitizer)
+all-stage1-target-libsanitizer: configure-stage1-target-libsanitizer
 	@[ $(current_stage) = stage1 ] || $(MAKE) stage1-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGE1_TFLAGS)"; \
-	$(NORMAL_TARGET_EXPORTS)  \
-	cd $(TARGET_SUBDIR)/libmpx && \
+	$(RAW_CXX_TARGET_EXPORTS)  \
+	cd $(TARGET_SUBDIR)/libsanitizer && \
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(CFLAGS_FOR_TARGET)" \
@@ -40850,42 +43859,42 @@ all-stage1-target-libmpx: configure-stage1-target-libmpx
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
 		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
 		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
-		$(EXTRA_TARGET_FLAGS)  \
+		$(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'  \
 		  \
 		TFLAGS="$(STAGE1_TFLAGS)"  \
-		$(TARGET-stage1-target-libmpx)
+		$(TARGET-stage1-target-libsanitizer)
 
-maybe-clean-stage1-target-libmpx: clean-stage1-target-libmpx
-clean-stage1: clean-stage1-target-libmpx
-clean-stage1-target-libmpx:
+maybe-clean-stage1-target-libsanitizer: clean-stage1-target-libsanitizer
+clean-stage1: clean-stage1-target-libsanitizer
+clean-stage1-target-libsanitizer:
 	@if [ $(current_stage) = stage1 ]; then \
-	  [ -f $(TARGET_SUBDIR)/libmpx/Makefile ] || exit 0; \
+	  [ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
 	else \
-	  [ -f $(TARGET_SUBDIR)/stage1-libmpx/Makefile ] || exit 0; \
+	  [ -f $(TARGET_SUBDIR)/stage1-libsanitizer/Makefile ] || exit 0; \
 	  $(MAKE) stage1-start; \
 	fi; \
-	cd $(TARGET_SUBDIR)/libmpx && \
-	$(MAKE) $(EXTRA_TARGET_FLAGS)  \
+	cd $(TARGET_SUBDIR)/libsanitizer && \
+	$(MAKE) $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'  \
 	  clean
-@endif target-libmpx-bootstrap
-
-
-.PHONY: all-stage2-target-libmpx maybe-all-stage2-target-libmpx
-.PHONY: clean-stage2-target-libmpx maybe-clean-stage2-target-libmpx
-maybe-all-stage2-target-libmpx:
-maybe-clean-stage2-target-libmpx:
-@if target-libmpx-bootstrap
-maybe-all-stage2-target-libmpx: all-stage2-target-libmpx
-all-stage2: all-stage2-target-libmpx
-TARGET-stage2-target-libmpx = $(TARGET-target-libmpx)
-all-stage2-target-libmpx: configure-stage2-target-libmpx
+@endif target-libsanitizer-bootstrap
+
+
+.PHONY: all-stage2-target-libsanitizer maybe-all-stage2-target-libsanitizer
+.PHONY: clean-stage2-target-libsanitizer maybe-clean-stage2-target-libsanitizer
+maybe-all-stage2-target-libsanitizer:
+maybe-clean-stage2-target-libsanitizer:
+@if target-libsanitizer-bootstrap
+maybe-all-stage2-target-libsanitizer: all-stage2-target-libsanitizer
+all-stage2: all-stage2-target-libsanitizer
+TARGET-stage2-target-libsanitizer = $(TARGET-target-libsanitizer)
+all-stage2-target-libsanitizer: configure-stage2-target-libsanitizer
 	@[ $(current_stage) = stage2 ] || $(MAKE) stage2-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGE2_TFLAGS)"; \
-	$(NORMAL_TARGET_EXPORTS) \
+	$(RAW_CXX_TARGET_EXPORTS) \
 	  \
-	cd $(TARGET_SUBDIR)/libmpx && \
+	cd $(TARGET_SUBDIR)/libsanitizer && \
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(CFLAGS_FOR_TARGET)" \
@@ -40894,40 +43903,40 @@ all-stage2-target-libmpx: configure-stage2-target-libmpx
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
 		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
 		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
-		$(EXTRA_TARGET_FLAGS)   \
+		$(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   \
 		TFLAGS="$(STAGE2_TFLAGS)"  \
-		$(TARGET-stage2-target-libmpx)
+		$(TARGET-stage2-target-libsanitizer)
 
-maybe-clean-stage2-target-libmpx: clean-stage2-target-libmpx
-clean-stage2: clean-stage2-target-libmpx
-clean-stage2-target-libmpx:
+maybe-clean-stage2-target-libsanitizer: clean-stage2-target-libsanitizer
+clean-stage2: clean-stage2-target-libsanitizer
+clean-stage2-target-libsanitizer:
 	@if [ $(current_stage) = stage2 ]; then \
-	  [ -f $(TARGET_SUBDIR)/libmpx/Makefile ] || exit 0; \
+	  [ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
 	else \
-	  [ -f $(TARGET_SUBDIR)/stage2-libmpx/Makefile ] || exit 0; \
+	  [ -f $(TARGET_SUBDIR)/stage2-libsanitizer/Makefile ] || exit 0; \
 	  $(MAKE) stage2-start; \
 	fi; \
-	cd $(TARGET_SUBDIR)/libmpx && \
-	$(MAKE) $(EXTRA_TARGET_FLAGS)   clean
-@endif target-libmpx-bootstrap
-
-
-.PHONY: all-stage3-target-libmpx maybe-all-stage3-target-libmpx
-.PHONY: clean-stage3-target-libmpx maybe-clean-stage3-target-libmpx
-maybe-all-stage3-target-libmpx:
-maybe-clean-stage3-target-libmpx:
-@if target-libmpx-bootstrap
-maybe-all-stage3-target-libmpx: all-stage3-target-libmpx
-all-stage3: all-stage3-target-libmpx
-TARGET-stage3-target-libmpx = $(TARGET-target-libmpx)
-all-stage3-target-libmpx: configure-stage3-target-libmpx
+	cd $(TARGET_SUBDIR)/libsanitizer && \
+	$(MAKE) $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   clean
+@endif target-libsanitizer-bootstrap
+
+
+.PHONY: all-stage3-target-libsanitizer maybe-all-stage3-target-libsanitizer
+.PHONY: clean-stage3-target-libsanitizer maybe-clean-stage3-target-libsanitizer
+maybe-all-stage3-target-libsanitizer:
+maybe-clean-stage3-target-libsanitizer:
+@if target-libsanitizer-bootstrap
+maybe-all-stage3-target-libsanitizer: all-stage3-target-libsanitizer
+all-stage3: all-stage3-target-libsanitizer
+TARGET-stage3-target-libsanitizer = $(TARGET-target-libsanitizer)
+all-stage3-target-libsanitizer: configure-stage3-target-libsanitizer
 	@[ $(current_stage) = stage3 ] || $(MAKE) stage3-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGE3_TFLAGS)"; \
-	$(NORMAL_TARGET_EXPORTS) \
+	$(RAW_CXX_TARGET_EXPORTS) \
 	  \
-	cd $(TARGET_SUBDIR)/libmpx && \
+	cd $(TARGET_SUBDIR)/libsanitizer && \
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(CFLAGS_FOR_TARGET)" \
@@ -40936,40 +43945,40 @@ all-stage3-target-libmpx: configure-stage3-target-libmpx
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
 		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
 		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
-		$(EXTRA_TARGET_FLAGS)   \
+		$(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   \
 		TFLAGS="$(STAGE3_TFLAGS)"  \
-		$(TARGET-stage3-target-libmpx)
+		$(TARGET-stage3-target-libsanitizer)
 
-maybe-clean-stage3-target-libmpx: clean-stage3-target-libmpx
-clean-stage3: clean-stage3-target-libmpx
-clean-stage3-target-libmpx:
+maybe-clean-stage3-target-libsanitizer: clean-stage3-target-libsanitizer
+clean-stage3: clean-stage3-target-libsanitizer
+clean-stage3-target-libsanitizer:
 	@if [ $(current_stage) = stage3 ]; then \
-	  [ -f $(TARGET_SUBDIR)/libmpx/Makefile ] || exit 0; \
+	  [ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
 	else \
-	  [ -f $(TARGET_SUBDIR)/stage3-libmpx/Makefile ] || exit 0; \
+	  [ -f $(TARGET_SUBDIR)/stage3-libsanitizer/Makefile ] || exit 0; \
 	  $(MAKE) stage3-start; \
 	fi; \
-	cd $(TARGET_SUBDIR)/libmpx && \
-	$(MAKE) $(EXTRA_TARGET_FLAGS)   clean
-@endif target-libmpx-bootstrap
-
-
-.PHONY: all-stage4-target-libmpx maybe-all-stage4-target-libmpx
-.PHONY: clean-stage4-target-libmpx maybe-clean-stage4-target-libmpx
-maybe-all-stage4-target-libmpx:
-maybe-clean-stage4-target-libmpx:
-@if target-libmpx-bootstrap
-maybe-all-stage4-target-libmpx: all-stage4-target-libmpx
-all-stage4: all-stage4-target-libmpx
-TARGET-stage4-target-libmpx = $(TARGET-target-libmpx)
-all-stage4-target-libmpx: configure-stage4-target-libmpx
+	cd $(TARGET_SUBDIR)/libsanitizer && \
+	$(MAKE) $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   clean
+@endif target-libsanitizer-bootstrap
+
+
+.PHONY: all-stage4-target-libsanitizer maybe-all-stage4-target-libsanitizer
+.PHONY: clean-stage4-target-libsanitizer maybe-clean-stage4-target-libsanitizer
+maybe-all-stage4-target-libsanitizer:
+maybe-clean-stage4-target-libsanitizer:
+@if target-libsanitizer-bootstrap
+maybe-all-stage4-target-libsanitizer: all-stage4-target-libsanitizer
+all-stage4: all-stage4-target-libsanitizer
+TARGET-stage4-target-libsanitizer = $(TARGET-target-libsanitizer)
+all-stage4-target-libsanitizer: configure-stage4-target-libsanitizer
 	@[ $(current_stage) = stage4 ] || $(MAKE) stage4-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGE4_TFLAGS)"; \
-	$(NORMAL_TARGET_EXPORTS) \
+	$(RAW_CXX_TARGET_EXPORTS) \
 	  \
-	cd $(TARGET_SUBDIR)/libmpx && \
+	cd $(TARGET_SUBDIR)/libsanitizer && \
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(CFLAGS_FOR_TARGET)" \
@@ -40978,40 +43987,40 @@ all-stage4-target-libmpx: configure-stage4-target-libmpx
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
 		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
 		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
-		$(EXTRA_TARGET_FLAGS)   \
+		$(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   \
 		TFLAGS="$(STAGE4_TFLAGS)"  \
-		$(TARGET-stage4-target-libmpx)
+		$(TARGET-stage4-target-libsanitizer)
 
-maybe-clean-stage4-target-libmpx: clean-stage4-target-libmpx
-clean-stage4: clean-stage4-target-libmpx
-clean-stage4-target-libmpx:
+maybe-clean-stage4-target-libsanitizer: clean-stage4-target-libsanitizer
+clean-stage4: clean-stage4-target-libsanitizer
+clean-stage4-target-libsanitizer:
 	@if [ $(current_stage) = stage4 ]; then \
-	  [ -f $(TARGET_SUBDIR)/libmpx/Makefile ] || exit 0; \
+	  [ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
 	else \
-	  [ -f $(TARGET_SUBDIR)/stage4-libmpx/Makefile ] || exit 0; \
+	  [ -f $(TARGET_SUBDIR)/stage4-libsanitizer/Makefile ] || exit 0; \
 	  $(MAKE) stage4-start; \
 	fi; \
-	cd $(TARGET_SUBDIR)/libmpx && \
-	$(MAKE) $(EXTRA_TARGET_FLAGS)   clean
-@endif target-libmpx-bootstrap
-
-
-.PHONY: all-stageprofile-target-libmpx maybe-all-stageprofile-target-libmpx
-.PHONY: clean-stageprofile-target-libmpx maybe-clean-stageprofile-target-libmpx
-maybe-all-stageprofile-target-libmpx:
-maybe-clean-stageprofile-target-libmpx:
-@if target-libmpx-bootstrap
-maybe-all-stageprofile-target-libmpx: all-stageprofile-target-libmpx
-all-stageprofile: all-stageprofile-target-libmpx
-TARGET-stageprofile-target-libmpx = $(TARGET-target-libmpx)
-all-stageprofile-target-libmpx: configure-stageprofile-target-libmpx
+	cd $(TARGET_SUBDIR)/libsanitizer && \
+	$(MAKE) $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   clean
+@endif target-libsanitizer-bootstrap
+
+
+.PHONY: all-stageprofile-target-libsanitizer maybe-all-stageprofile-target-libsanitizer
+.PHONY: clean-stageprofile-target-libsanitizer maybe-clean-stageprofile-target-libsanitizer
+maybe-all-stageprofile-target-libsanitizer:
+maybe-clean-stageprofile-target-libsanitizer:
+@if target-libsanitizer-bootstrap
+maybe-all-stageprofile-target-libsanitizer: all-stageprofile-target-libsanitizer
+all-stageprofile: all-stageprofile-target-libsanitizer
+TARGET-stageprofile-target-libsanitizer = $(TARGET-target-libsanitizer)
+all-stageprofile-target-libsanitizer: configure-stageprofile-target-libsanitizer
 	@[ $(current_stage) = stageprofile ] || $(MAKE) stageprofile-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEprofile_TFLAGS)"; \
-	$(NORMAL_TARGET_EXPORTS) \
+	$(RAW_CXX_TARGET_EXPORTS) \
 	  \
-	cd $(TARGET_SUBDIR)/libmpx && \
+	cd $(TARGET_SUBDIR)/libsanitizer && \
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(CFLAGS_FOR_TARGET)" \
@@ -41020,40 +44029,40 @@ all-stageprofile-target-libmpx: configure-stageprofile-target-libmpx
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
 		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
 		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
-		$(EXTRA_TARGET_FLAGS)   \
+		$(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   \
 		TFLAGS="$(STAGEprofile_TFLAGS)"  \
-		$(TARGET-stageprofile-target-libmpx)
+		$(TARGET-stageprofile-target-libsanitizer)
 
-maybe-clean-stageprofile-target-libmpx: clean-stageprofile-target-libmpx
-clean-stageprofile: clean-stageprofile-target-libmpx
-clean-stageprofile-target-libmpx:
+maybe-clean-stageprofile-target-libsanitizer: clean-stageprofile-target-libsanitizer
+clean-stageprofile: clean-stageprofile-target-libsanitizer
+clean-stageprofile-target-libsanitizer:
 	@if [ $(current_stage) = stageprofile ]; then \
-	  [ -f $(TARGET_SUBDIR)/libmpx/Makefile ] || exit 0; \
+	  [ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
 	else \
-	  [ -f $(TARGET_SUBDIR)/stageprofile-libmpx/Makefile ] || exit 0; \
+	  [ -f $(TARGET_SUBDIR)/stageprofile-libsanitizer/Makefile ] || exit 0; \
 	  $(MAKE) stageprofile-start; \
 	fi; \
-	cd $(TARGET_SUBDIR)/libmpx && \
-	$(MAKE) $(EXTRA_TARGET_FLAGS)   clean
-@endif target-libmpx-bootstrap
-
-
-.PHONY: all-stagetrain-target-libmpx maybe-all-stagetrain-target-libmpx
-.PHONY: clean-stagetrain-target-libmpx maybe-clean-stagetrain-target-libmpx
-maybe-all-stagetrain-target-libmpx:
-maybe-clean-stagetrain-target-libmpx:
-@if target-libmpx-bootstrap
-maybe-all-stagetrain-target-libmpx: all-stagetrain-target-libmpx
-all-stagetrain: all-stagetrain-target-libmpx
-TARGET-stagetrain-target-libmpx = $(TARGET-target-libmpx)
-all-stagetrain-target-libmpx: configure-stagetrain-target-libmpx
+	cd $(TARGET_SUBDIR)/libsanitizer && \
+	$(MAKE) $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   clean
+@endif target-libsanitizer-bootstrap
+
+
+.PHONY: all-stagetrain-target-libsanitizer maybe-all-stagetrain-target-libsanitizer
+.PHONY: clean-stagetrain-target-libsanitizer maybe-clean-stagetrain-target-libsanitizer
+maybe-all-stagetrain-target-libsanitizer:
+maybe-clean-stagetrain-target-libsanitizer:
+@if target-libsanitizer-bootstrap
+maybe-all-stagetrain-target-libsanitizer: all-stagetrain-target-libsanitizer
+all-stagetrain: all-stagetrain-target-libsanitizer
+TARGET-stagetrain-target-libsanitizer = $(TARGET-target-libsanitizer)
+all-stagetrain-target-libsanitizer: configure-stagetrain-target-libsanitizer
 	@[ $(current_stage) = stagetrain ] || $(MAKE) stagetrain-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEtrain_TFLAGS)"; \
-	$(NORMAL_TARGET_EXPORTS) \
+	$(RAW_CXX_TARGET_EXPORTS) \
 	  \
-	cd $(TARGET_SUBDIR)/libmpx && \
+	cd $(TARGET_SUBDIR)/libsanitizer && \
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(CFLAGS_FOR_TARGET)" \
@@ -41062,40 +44071,40 @@ all-stagetrain-target-libmpx: configure-stagetrain-target-libmpx
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
 		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
 		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
-		$(EXTRA_TARGET_FLAGS)   \
+		$(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   \
 		TFLAGS="$(STAGEtrain_TFLAGS)"  \
-		$(TARGET-stagetrain-target-libmpx)
+		$(TARGET-stagetrain-target-libsanitizer)
 
-maybe-clean-stagetrain-target-libmpx: clean-stagetrain-target-libmpx
-clean-stagetrain: clean-stagetrain-target-libmpx
-clean-stagetrain-target-libmpx:
+maybe-clean-stagetrain-target-libsanitizer: clean-stagetrain-target-libsanitizer
+clean-stagetrain: clean-stagetrain-target-libsanitizer
+clean-stagetrain-target-libsanitizer:
 	@if [ $(current_stage) = stagetrain ]; then \
-	  [ -f $(TARGET_SUBDIR)/libmpx/Makefile ] || exit 0; \
+	  [ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
 	else \
-	  [ -f $(TARGET_SUBDIR)/stagetrain-libmpx/Makefile ] || exit 0; \
+	  [ -f $(TARGET_SUBDIR)/stagetrain-libsanitizer/Makefile ] || exit 0; \
 	  $(MAKE) stagetrain-start; \
 	fi; \
-	cd $(TARGET_SUBDIR)/libmpx && \
-	$(MAKE) $(EXTRA_TARGET_FLAGS)   clean
-@endif target-libmpx-bootstrap
-
-
-.PHONY: all-stagefeedback-target-libmpx maybe-all-stagefeedback-target-libmpx
-.PHONY: clean-stagefeedback-target-libmpx maybe-clean-stagefeedback-target-libmpx
-maybe-all-stagefeedback-target-libmpx:
-maybe-clean-stagefeedback-target-libmpx:
-@if target-libmpx-bootstrap
-maybe-all-stagefeedback-target-libmpx: all-stagefeedback-target-libmpx
-all-stagefeedback: all-stagefeedback-target-libmpx
-TARGET-stagefeedback-target-libmpx = $(TARGET-target-libmpx)
-all-stagefeedback-target-libmpx: configure-stagefeedback-target-libmpx
+	cd $(TARGET_SUBDIR)/libsanitizer && \
+	$(MAKE) $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   clean
+@endif target-libsanitizer-bootstrap
+
+
+.PHONY: all-stagefeedback-target-libsanitizer maybe-all-stagefeedback-target-libsanitizer
+.PHONY: clean-stagefeedback-target-libsanitizer maybe-clean-stagefeedback-target-libsanitizer
+maybe-all-stagefeedback-target-libsanitizer:
+maybe-clean-stagefeedback-target-libsanitizer:
+@if target-libsanitizer-bootstrap
+maybe-all-stagefeedback-target-libsanitizer: all-stagefeedback-target-libsanitizer
+all-stagefeedback: all-stagefeedback-target-libsanitizer
+TARGET-stagefeedback-target-libsanitizer = $(TARGET-target-libsanitizer)
+all-stagefeedback-target-libsanitizer: configure-stagefeedback-target-libsanitizer
 	@[ $(current_stage) = stagefeedback ] || $(MAKE) stagefeedback-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEfeedback_TFLAGS)"; \
-	$(NORMAL_TARGET_EXPORTS) \
+	$(RAW_CXX_TARGET_EXPORTS) \
 	  \
-	cd $(TARGET_SUBDIR)/libmpx && \
+	cd $(TARGET_SUBDIR)/libsanitizer && \
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(CFLAGS_FOR_TARGET)" \
@@ -41104,40 +44113,40 @@ all-stagefeedback-target-libmpx: configure-stagefeedback-target-libmpx
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
 		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
 		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
-		$(EXTRA_TARGET_FLAGS)   \
+		$(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   \
 		TFLAGS="$(STAGEfeedback_TFLAGS)"  \
-		$(TARGET-stagefeedback-target-libmpx)
+		$(TARGET-stagefeedback-target-libsanitizer)
 
-maybe-clean-stagefeedback-target-libmpx: clean-stagefeedback-target-libmpx
-clean-stagefeedback: clean-stagefeedback-target-libmpx
-clean-stagefeedback-target-libmpx:
+maybe-clean-stagefeedback-target-libsanitizer: clean-stagefeedback-target-libsanitizer
+clean-stagefeedback: clean-stagefeedback-target-libsanitizer
+clean-stagefeedback-target-libsanitizer:
 	@if [ $(current_stage) = stagefeedback ]; then \
-	  [ -f $(TARGET_SUBDIR)/libmpx/Makefile ] || exit 0; \
+	  [ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
 	else \
-	  [ -f $(TARGET_SUBDIR)/stagefeedback-libmpx/Makefile ] || exit 0; \
+	  [ -f $(TARGET_SUBDIR)/stagefeedback-libsanitizer/Makefile ] || exit 0; \
 	  $(MAKE) stagefeedback-start; \
 	fi; \
-	cd $(TARGET_SUBDIR)/libmpx && \
-	$(MAKE) $(EXTRA_TARGET_FLAGS)   clean
-@endif target-libmpx-bootstrap
-
-
-.PHONY: all-stageautoprofile-target-libmpx maybe-all-stageautoprofile-target-libmpx
-.PHONY: clean-stageautoprofile-target-libmpx maybe-clean-stageautoprofile-target-libmpx
-maybe-all-stageautoprofile-target-libmpx:
-maybe-clean-stageautoprofile-target-libmpx:
-@if target-libmpx-bootstrap
-maybe-all-stageautoprofile-target-libmpx: all-stageautoprofile-target-libmpx
-all-stageautoprofile: all-stageautoprofile-target-libmpx
-TARGET-stageautoprofile-target-libmpx = $(TARGET-target-libmpx)
-all-stageautoprofile-target-libmpx: configure-stageautoprofile-target-libmpx
+	cd $(TARGET_SUBDIR)/libsanitizer && \
+	$(MAKE) $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   clean
+@endif target-libsanitizer-bootstrap
+
+
+.PHONY: all-stageautoprofile-target-libsanitizer maybe-all-stageautoprofile-target-libsanitizer
+.PHONY: clean-stageautoprofile-target-libsanitizer maybe-clean-stageautoprofile-target-libsanitizer
+maybe-all-stageautoprofile-target-libsanitizer:
+maybe-clean-stageautoprofile-target-libsanitizer:
+@if target-libsanitizer-bootstrap
+maybe-all-stageautoprofile-target-libsanitizer: all-stageautoprofile-target-libsanitizer
+all-stageautoprofile: all-stageautoprofile-target-libsanitizer
+TARGET-stageautoprofile-target-libsanitizer = $(TARGET-target-libsanitizer)
+all-stageautoprofile-target-libsanitizer: configure-stageautoprofile-target-libsanitizer
 	@[ $(current_stage) = stageautoprofile ] || $(MAKE) stageautoprofile-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEautoprofile_TFLAGS)"; \
-	$(NORMAL_TARGET_EXPORTS) \
+	$(RAW_CXX_TARGET_EXPORTS) \
 	  \
-	cd $(TARGET_SUBDIR)/libmpx && \
+	cd $(TARGET_SUBDIR)/libsanitizer && \
 	$$s/gcc/config/i386/$(AUTO_PROFILE) \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(CFLAGS_FOR_TARGET)" \
@@ -41146,40 +44155,40 @@ all-stageautoprofile-target-libmpx: configure-stageautoprofile-target-libmpx
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
 		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
 		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
-		$(EXTRA_TARGET_FLAGS)   \
+		$(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   \
 		TFLAGS="$(STAGEautoprofile_TFLAGS)"  \
-		$(TARGET-stageautoprofile-target-libmpx)
+		$(TARGET-stageautoprofile-target-libsanitizer)
 
-maybe-clean-stageautoprofile-target-libmpx: clean-stageautoprofile-target-libmpx
-clean-stageautoprofile: clean-stageautoprofile-target-libmpx
-clean-stageautoprofile-target-libmpx:
+maybe-clean-stageautoprofile-target-libsanitizer: clean-stageautoprofile-target-libsanitizer
+clean-stageautoprofile: clean-stageautoprofile-target-libsanitizer
+clean-stageautoprofile-target-libsanitizer:
 	@if [ $(current_stage) = stageautoprofile ]; then \
-	  [ -f $(TARGET_SUBDIR)/libmpx/Makefile ] || exit 0; \
+	  [ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
 	else \
-	  [ -f $(TARGET_SUBDIR)/stageautoprofile-libmpx/Makefile ] || exit 0; \
+	  [ -f $(TARGET_SUBDIR)/stageautoprofile-libsanitizer/Makefile ] || exit 0; \
 	  $(MAKE) stageautoprofile-start; \
 	fi; \
-	cd $(TARGET_SUBDIR)/libmpx && \
-	$(MAKE) $(EXTRA_TARGET_FLAGS)   clean
-@endif target-libmpx-bootstrap
-
-
-.PHONY: all-stageautofeedback-target-libmpx maybe-all-stageautofeedback-target-libmpx
-.PHONY: clean-stageautofeedback-target-libmpx maybe-clean-stageautofeedback-target-libmpx
-maybe-all-stageautofeedback-target-libmpx:
-maybe-clean-stageautofeedback-target-libmpx:
-@if target-libmpx-bootstrap
-maybe-all-stageautofeedback-target-libmpx: all-stageautofeedback-target-libmpx
-all-stageautofeedback: all-stageautofeedback-target-libmpx
-TARGET-stageautofeedback-target-libmpx = $(TARGET-target-libmpx)
-all-stageautofeedback-target-libmpx: configure-stageautofeedback-target-libmpx
+	cd $(TARGET_SUBDIR)/libsanitizer && \
+	$(MAKE) $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   clean
+@endif target-libsanitizer-bootstrap
+
+
+.PHONY: all-stageautofeedback-target-libsanitizer maybe-all-stageautofeedback-target-libsanitizer
+.PHONY: clean-stageautofeedback-target-libsanitizer maybe-clean-stageautofeedback-target-libsanitizer
+maybe-all-stageautofeedback-target-libsanitizer:
+maybe-clean-stageautofeedback-target-libsanitizer:
+@if target-libsanitizer-bootstrap
+maybe-all-stageautofeedback-target-libsanitizer: all-stageautofeedback-target-libsanitizer
+all-stageautofeedback: all-stageautofeedback-target-libsanitizer
+TARGET-stageautofeedback-target-libsanitizer = $(TARGET-target-libsanitizer)
+all-stageautofeedback-target-libsanitizer: configure-stageautofeedback-target-libsanitizer
 	@[ $(current_stage) = stageautofeedback ] || $(MAKE) stageautofeedback-start
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	TFLAGS="$(STAGEautofeedback_TFLAGS)"; \
-	$(NORMAL_TARGET_EXPORTS) \
+	$(RAW_CXX_TARGET_EXPORTS) \
 	  \
-	cd $(TARGET_SUBDIR)/libmpx && \
+	cd $(TARGET_SUBDIR)/libsanitizer && \
 	 \
 	$(MAKE) $(BASE_FLAGS_TO_PASS) \
 		CFLAGS="$(CFLAGS_FOR_TARGET)" \
@@ -41188,92 +44197,92 @@ all-stageautofeedback-target-libmpx: configure-stageautofeedback-target-libmpx
 		CFLAGS_FOR_TARGET="$(CFLAGS_FOR_TARGET)" \
 		CXXFLAGS_FOR_TARGET="$(CXXFLAGS_FOR_TARGET)" \
 		LIBCFLAGS_FOR_TARGET="$(LIBCFLAGS_FOR_TARGET)" \
-		$(EXTRA_TARGET_FLAGS)   \
+		$(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   \
 		TFLAGS="$(STAGEautofeedback_TFLAGS)" PERF_DATA=perf.data \
-		$(TARGET-stageautofeedback-target-libmpx)
+		$(TARGET-stageautofeedback-target-libsanitizer)
 
-maybe-clean-stageautofeedback-target-libmpx: clean-stageautofeedback-target-libmpx
-clean-stageautofeedback: clean-stageautofeedback-target-libmpx
-clean-stageautofeedback-target-libmpx:
+maybe-clean-stageautofeedback-target-libsanitizer: clean-stageautofeedback-target-libsanitizer
+clean-stageautofeedback: clean-stageautofeedback-target-libsanitizer
+clean-stageautofeedback-target-libsanitizer:
 	@if [ $(current_stage) = stageautofeedback ]; then \
-	  [ -f $(TARGET_SUBDIR)/libmpx/Makefile ] || exit 0; \
+	  [ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
 	else \
-	  [ -f $(TARGET_SUBDIR)/stageautofeedback-libmpx/Makefile ] || exit 0; \
+	  [ -f $(TARGET_SUBDIR)/stageautofeedback-libsanitizer/Makefile ] || exit 0; \
 	  $(MAKE) stageautofeedback-start; \
 	fi; \
-	cd $(TARGET_SUBDIR)/libmpx && \
-	$(MAKE) $(EXTRA_TARGET_FLAGS)   clean
-@endif target-libmpx-bootstrap
+	cd $(TARGET_SUBDIR)/libsanitizer && \
+	$(MAKE) $(EXTRA_TARGET_FLAGS) 'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   clean
+@endif target-libsanitizer-bootstrap
 
 
 
 
 
 
-.PHONY: check-target-libmpx maybe-check-target-libmpx
-maybe-check-target-libmpx:
-@if target-libmpx
-maybe-check-target-libmpx: check-target-libmpx
+.PHONY: check-target-libsanitizer maybe-check-target-libsanitizer
+maybe-check-target-libsanitizer:
+@if target-libsanitizer
+maybe-check-target-libsanitizer: check-target-libsanitizer
 
-check-target-libmpx:
+check-target-libsanitizer:
 	@: $(MAKE); $(unstage)
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(NORMAL_TARGET_EXPORTS) \
-	(cd $(TARGET_SUBDIR)/libmpx && \
-	  $(MAKE) $(TARGET_FLAGS_TO_PASS)   check)
+	$(RAW_CXX_TARGET_EXPORTS) \
+	(cd $(TARGET_SUBDIR)/libsanitizer && \
+	  $(MAKE) $(TARGET_FLAGS_TO_PASS)  'CXX=$$(RAW_CXX_FOR_TARGET)' 'CXX_FOR_TARGET=$$(RAW_CXX_FOR_TARGET)'   check)
 
-@endif target-libmpx
+@endif target-libsanitizer
 
-.PHONY: install-target-libmpx maybe-install-target-libmpx
-maybe-install-target-libmpx:
-@if target-libmpx
-maybe-install-target-libmpx: install-target-libmpx
+.PHONY: install-target-libsanitizer maybe-install-target-libsanitizer
+maybe-install-target-libsanitizer:
+@if target-libsanitizer
+maybe-install-target-libsanitizer: install-target-libsanitizer
 
-install-target-libmpx: installdirs
+install-target-libsanitizer: installdirs
 	@: $(MAKE); $(unstage)
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(NORMAL_TARGET_EXPORTS) \
-	(cd $(TARGET_SUBDIR)/libmpx && \
+	$(RAW_CXX_TARGET_EXPORTS) \
+	(cd $(TARGET_SUBDIR)/libsanitizer && \
 	  $(MAKE) $(TARGET_FLAGS_TO_PASS)  install)
 
-@endif target-libmpx
+@endif target-libsanitizer
 
-.PHONY: install-strip-target-libmpx maybe-install-strip-target-libmpx
-maybe-install-strip-target-libmpx:
-@if target-libmpx
-maybe-install-strip-target-libmpx: install-strip-target-libmpx
+.PHONY: install-strip-target-libsanitizer maybe-install-strip-target-libsanitizer
+maybe-install-strip-target-libsanitizer:
+@if target-libsanitizer
+maybe-install-strip-target-libsanitizer: install-strip-target-libsanitizer
 
-install-strip-target-libmpx: installdirs
+install-strip-target-libsanitizer: installdirs
 	@: $(MAKE); $(unstage)
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(NORMAL_TARGET_EXPORTS) \
-	(cd $(TARGET_SUBDIR)/libmpx && \
+	$(RAW_CXX_TARGET_EXPORTS) \
+	(cd $(TARGET_SUBDIR)/libsanitizer && \
 	  $(MAKE) $(TARGET_FLAGS_TO_PASS)  install-strip)
 
-@endif target-libmpx
+@endif target-libsanitizer
 
 # Other targets (info, dvi, pdf, etc.)
 
-.PHONY: maybe-info-target-libmpx info-target-libmpx
-maybe-info-target-libmpx:
-@if target-libmpx
-maybe-info-target-libmpx: info-target-libmpx
+.PHONY: maybe-info-target-libsanitizer info-target-libsanitizer
+maybe-info-target-libsanitizer:
+@if target-libsanitizer
+maybe-info-target-libsanitizer: info-target-libsanitizer
 
-info-target-libmpx: \
-    configure-target-libmpx 
+info-target-libsanitizer: \
+    configure-target-libsanitizer 
 	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libmpx/Makefile ] || exit 0; \
+	@[ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(NORMAL_TARGET_EXPORTS) \
-	echo "Doing info in $(TARGET_SUBDIR)/libmpx"; \
+	$(RAW_CXX_TARGET_EXPORTS) \
+	echo "Doing info in $(TARGET_SUBDIR)/libsanitizer"; \
 	for flag in $(EXTRA_TARGET_FLAGS); do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libmpx && \
+	(cd $(TARGET_SUBDIR)/libsanitizer && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -41281,25 +44290,25 @@ info-target-libmpx: \
 	           info) \
 	  || exit 1
 
-@endif target-libmpx
+@endif target-libsanitizer
 
-.PHONY: maybe-dvi-target-libmpx dvi-target-libmpx
-maybe-dvi-target-libmpx:
-@if target-libmpx
-maybe-dvi-target-libmpx: dvi-target-libmpx
+.PHONY: maybe-dvi-target-libsanitizer dvi-target-libsanitizer
+maybe-dvi-target-libsanitizer:
+@if target-libsanitizer
+maybe-dvi-target-libsanitizer: dvi-target-libsanitizer
 
-dvi-target-libmpx: \
-    configure-target-libmpx 
+dvi-target-libsanitizer: \
+    configure-target-libsanitizer 
 	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libmpx/Makefile ] || exit 0; \
+	@[ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(NORMAL_TARGET_EXPORTS) \
-	echo "Doing dvi in $(TARGET_SUBDIR)/libmpx"; \
+	$(RAW_CXX_TARGET_EXPORTS) \
+	echo "Doing dvi in $(TARGET_SUBDIR)/libsanitizer"; \
 	for flag in $(EXTRA_TARGET_FLAGS); do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libmpx && \
+	(cd $(TARGET_SUBDIR)/libsanitizer && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -41307,25 +44316,25 @@ dvi-target-libmpx: \
 	           dvi) \
 	  || exit 1
 
-@endif target-libmpx
+@endif target-libsanitizer
 
-.PHONY: maybe-pdf-target-libmpx pdf-target-libmpx
-maybe-pdf-target-libmpx:
-@if target-libmpx
-maybe-pdf-target-libmpx: pdf-target-libmpx
+.PHONY: maybe-pdf-target-libsanitizer pdf-target-libsanitizer
+maybe-pdf-target-libsanitizer:
+@if target-libsanitizer
+maybe-pdf-target-libsanitizer: pdf-target-libsanitizer
 
-pdf-target-libmpx: \
-    configure-target-libmpx 
+pdf-target-libsanitizer: \
+    configure-target-libsanitizer 
 	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libmpx/Makefile ] || exit 0; \
+	@[ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(NORMAL_TARGET_EXPORTS) \
-	echo "Doing pdf in $(TARGET_SUBDIR)/libmpx"; \
+	$(RAW_CXX_TARGET_EXPORTS) \
+	echo "Doing pdf in $(TARGET_SUBDIR)/libsanitizer"; \
 	for flag in $(EXTRA_TARGET_FLAGS); do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libmpx && \
+	(cd $(TARGET_SUBDIR)/libsanitizer && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -41333,25 +44342,25 @@ pdf-target-libmpx: \
 	           pdf) \
 	  || exit 1
 
-@endif target-libmpx
+@endif target-libsanitizer
 
-.PHONY: maybe-html-target-libmpx html-target-libmpx
-maybe-html-target-libmpx:
-@if target-libmpx
-maybe-html-target-libmpx: html-target-libmpx
+.PHONY: maybe-html-target-libsanitizer html-target-libsanitizer
+maybe-html-target-libsanitizer:
+@if target-libsanitizer
+maybe-html-target-libsanitizer: html-target-libsanitizer
 
-html-target-libmpx: \
-    configure-target-libmpx 
+html-target-libsanitizer: \
+    configure-target-libsanitizer 
 	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libmpx/Makefile ] || exit 0; \
+	@[ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(NORMAL_TARGET_EXPORTS) \
-	echo "Doing html in $(TARGET_SUBDIR)/libmpx"; \
+	$(RAW_CXX_TARGET_EXPORTS) \
+	echo "Doing html in $(TARGET_SUBDIR)/libsanitizer"; \
 	for flag in $(EXTRA_TARGET_FLAGS); do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libmpx && \
+	(cd $(TARGET_SUBDIR)/libsanitizer && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -41359,25 +44368,25 @@ html-target-libmpx: \
 	           html) \
 	  || exit 1
 
-@endif target-libmpx
+@endif target-libsanitizer
 
-.PHONY: maybe-TAGS-target-libmpx TAGS-target-libmpx
-maybe-TAGS-target-libmpx:
-@if target-libmpx
-maybe-TAGS-target-libmpx: TAGS-target-libmpx
+.PHONY: maybe-TAGS-target-libsanitizer TAGS-target-libsanitizer
+maybe-TAGS-target-libsanitizer:
+@if target-libsanitizer
+maybe-TAGS-target-libsanitizer: TAGS-target-libsanitizer
 
-TAGS-target-libmpx: \
-    configure-target-libmpx 
+TAGS-target-libsanitizer: \
+    configure-target-libsanitizer 
 	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libmpx/Makefile ] || exit 0; \
+	@[ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(NORMAL_TARGET_EXPORTS) \
-	echo "Doing TAGS in $(TARGET_SUBDIR)/libmpx"; \
+	$(RAW_CXX_TARGET_EXPORTS) \
+	echo "Doing TAGS in $(TARGET_SUBDIR)/libsanitizer"; \
 	for flag in $(EXTRA_TARGET_FLAGS); do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libmpx && \
+	(cd $(TARGET_SUBDIR)/libsanitizer && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -41385,26 +44394,26 @@ TAGS-target-libmpx: \
 	           TAGS) \
 	  || exit 1
 
-@endif target-libmpx
+@endif target-libsanitizer
 
-.PHONY: maybe-install-info-target-libmpx install-info-target-libmpx
-maybe-install-info-target-libmpx:
-@if target-libmpx
-maybe-install-info-target-libmpx: install-info-target-libmpx
+.PHONY: maybe-install-info-target-libsanitizer install-info-target-libsanitizer
+maybe-install-info-target-libsanitizer:
+@if target-libsanitizer
+maybe-install-info-target-libsanitizer: install-info-target-libsanitizer
 
-install-info-target-libmpx: \
-    configure-target-libmpx \
-    info-target-libmpx 
+install-info-target-libsanitizer: \
+    configure-target-libsanitizer \
+    info-target-libsanitizer 
 	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libmpx/Makefile ] || exit 0; \
+	@[ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(NORMAL_TARGET_EXPORTS) \
-	echo "Doing install-info in $(TARGET_SUBDIR)/libmpx"; \
+	$(RAW_CXX_TARGET_EXPORTS) \
+	echo "Doing install-info in $(TARGET_SUBDIR)/libsanitizer"; \
 	for flag in $(EXTRA_TARGET_FLAGS); do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libmpx && \
+	(cd $(TARGET_SUBDIR)/libsanitizer && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -41412,26 +44421,26 @@ install-info-target-libmpx: \
 	           install-info) \
 	  || exit 1
 
-@endif target-libmpx
+@endif target-libsanitizer
 
-.PHONY: maybe-install-pdf-target-libmpx install-pdf-target-libmpx
-maybe-install-pdf-target-libmpx:
-@if target-libmpx
-maybe-install-pdf-target-libmpx: install-pdf-target-libmpx
+.PHONY: maybe-install-pdf-target-libsanitizer install-pdf-target-libsanitizer
+maybe-install-pdf-target-libsanitizer:
+@if target-libsanitizer
+maybe-install-pdf-target-libsanitizer: install-pdf-target-libsanitizer
 
-install-pdf-target-libmpx: \
-    configure-target-libmpx \
-    pdf-target-libmpx 
+install-pdf-target-libsanitizer: \
+    configure-target-libsanitizer \
+    pdf-target-libsanitizer 
 	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libmpx/Makefile ] || exit 0; \
+	@[ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(NORMAL_TARGET_EXPORTS) \
-	echo "Doing install-pdf in $(TARGET_SUBDIR)/libmpx"; \
+	$(RAW_CXX_TARGET_EXPORTS) \
+	echo "Doing install-pdf in $(TARGET_SUBDIR)/libsanitizer"; \
 	for flag in $(EXTRA_TARGET_FLAGS); do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libmpx && \
+	(cd $(TARGET_SUBDIR)/libsanitizer && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -41439,26 +44448,26 @@ install-pdf-target-libmpx: \
 	           install-pdf) \
 	  || exit 1
 
-@endif target-libmpx
+@endif target-libsanitizer
 
-.PHONY: maybe-install-html-target-libmpx install-html-target-libmpx
-maybe-install-html-target-libmpx:
-@if target-libmpx
-maybe-install-html-target-libmpx: install-html-target-libmpx
+.PHONY: maybe-install-html-target-libsanitizer install-html-target-libsanitizer
+maybe-install-html-target-libsanitizer:
+@if target-libsanitizer
+maybe-install-html-target-libsanitizer: install-html-target-libsanitizer
 
-install-html-target-libmpx: \
-    configure-target-libmpx \
-    html-target-libmpx 
+install-html-target-libsanitizer: \
+    configure-target-libsanitizer \
+    html-target-libsanitizer 
 	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libmpx/Makefile ] || exit 0; \
+	@[ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(NORMAL_TARGET_EXPORTS) \
-	echo "Doing install-html in $(TARGET_SUBDIR)/libmpx"; \
+	$(RAW_CXX_TARGET_EXPORTS) \
+	echo "Doing install-html in $(TARGET_SUBDIR)/libsanitizer"; \
 	for flag in $(EXTRA_TARGET_FLAGS); do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libmpx && \
+	(cd $(TARGET_SUBDIR)/libsanitizer && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -41466,25 +44475,25 @@ install-html-target-libmpx: \
 	           install-html) \
 	  || exit 1
 
-@endif target-libmpx
+@endif target-libsanitizer
 
-.PHONY: maybe-installcheck-target-libmpx installcheck-target-libmpx
-maybe-installcheck-target-libmpx:
-@if target-libmpx
-maybe-installcheck-target-libmpx: installcheck-target-libmpx
+.PHONY: maybe-installcheck-target-libsanitizer installcheck-target-libsanitizer
+maybe-installcheck-target-libsanitizer:
+@if target-libsanitizer
+maybe-installcheck-target-libsanitizer: installcheck-target-libsanitizer
 
-installcheck-target-libmpx: \
-    configure-target-libmpx 
+installcheck-target-libsanitizer: \
+    configure-target-libsanitizer 
 	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libmpx/Makefile ] || exit 0; \
+	@[ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(NORMAL_TARGET_EXPORTS) \
-	echo "Doing installcheck in $(TARGET_SUBDIR)/libmpx"; \
+	$(RAW_CXX_TARGET_EXPORTS) \
+	echo "Doing installcheck in $(TARGET_SUBDIR)/libsanitizer"; \
 	for flag in $(EXTRA_TARGET_FLAGS); do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libmpx && \
+	(cd $(TARGET_SUBDIR)/libsanitizer && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -41492,24 +44501,24 @@ installcheck-target-libmpx: \
 	           installcheck) \
 	  || exit 1
 
-@endif target-libmpx
+@endif target-libsanitizer
 
-.PHONY: maybe-mostlyclean-target-libmpx mostlyclean-target-libmpx
-maybe-mostlyclean-target-libmpx:
-@if target-libmpx
-maybe-mostlyclean-target-libmpx: mostlyclean-target-libmpx
+.PHONY: maybe-mostlyclean-target-libsanitizer mostlyclean-target-libsanitizer
+maybe-mostlyclean-target-libsanitizer:
+@if target-libsanitizer
+maybe-mostlyclean-target-libsanitizer: mostlyclean-target-libsanitizer
 
-mostlyclean-target-libmpx: 
+mostlyclean-target-libsanitizer: 
 	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libmpx/Makefile ] || exit 0; \
+	@[ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(NORMAL_TARGET_EXPORTS) \
-	echo "Doing mostlyclean in $(TARGET_SUBDIR)/libmpx"; \
+	$(RAW_CXX_TARGET_EXPORTS) \
+	echo "Doing mostlyclean in $(TARGET_SUBDIR)/libsanitizer"; \
 	for flag in $(EXTRA_TARGET_FLAGS); do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libmpx && \
+	(cd $(TARGET_SUBDIR)/libsanitizer && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -41517,24 +44526,24 @@ mostlyclean-target-libmpx:
 	           mostlyclean) \
 	  || exit 1
 
-@endif target-libmpx
+@endif target-libsanitizer
 
-.PHONY: maybe-clean-target-libmpx clean-target-libmpx
-maybe-clean-target-libmpx:
-@if target-libmpx
-maybe-clean-target-libmpx: clean-target-libmpx
+.PHONY: maybe-clean-target-libsanitizer clean-target-libsanitizer
+maybe-clean-target-libsanitizer:
+@if target-libsanitizer
+maybe-clean-target-libsanitizer: clean-target-libsanitizer
 
-clean-target-libmpx: 
+clean-target-libsanitizer: 
 	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libmpx/Makefile ] || exit 0; \
+	@[ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(NORMAL_TARGET_EXPORTS) \
-	echo "Doing clean in $(TARGET_SUBDIR)/libmpx"; \
+	$(RAW_CXX_TARGET_EXPORTS) \
+	echo "Doing clean in $(TARGET_SUBDIR)/libsanitizer"; \
 	for flag in $(EXTRA_TARGET_FLAGS); do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libmpx && \
+	(cd $(TARGET_SUBDIR)/libsanitizer && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -41542,24 +44551,24 @@ clean-target-libmpx:
 	           clean) \
 	  || exit 1
 
-@endif target-libmpx
+@endif target-libsanitizer
 
-.PHONY: maybe-distclean-target-libmpx distclean-target-libmpx
-maybe-distclean-target-libmpx:
-@if target-libmpx
-maybe-distclean-target-libmpx: distclean-target-libmpx
+.PHONY: maybe-distclean-target-libsanitizer distclean-target-libsanitizer
+maybe-distclean-target-libsanitizer:
+@if target-libsanitizer
+maybe-distclean-target-libsanitizer: distclean-target-libsanitizer
 
-distclean-target-libmpx: 
+distclean-target-libsanitizer: 
 	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libmpx/Makefile ] || exit 0; \
+	@[ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(NORMAL_TARGET_EXPORTS) \
-	echo "Doing distclean in $(TARGET_SUBDIR)/libmpx"; \
+	$(RAW_CXX_TARGET_EXPORTS) \
+	echo "Doing distclean in $(TARGET_SUBDIR)/libsanitizer"; \
 	for flag in $(EXTRA_TARGET_FLAGS); do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libmpx && \
+	(cd $(TARGET_SUBDIR)/libsanitizer && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -41567,24 +44576,24 @@ distclean-target-libmpx:
 	           distclean) \
 	  || exit 1
 
-@endif target-libmpx
+@endif target-libsanitizer
 
-.PHONY: maybe-maintainer-clean-target-libmpx maintainer-clean-target-libmpx
-maybe-maintainer-clean-target-libmpx:
-@if target-libmpx
-maybe-maintainer-clean-target-libmpx: maintainer-clean-target-libmpx
+.PHONY: maybe-maintainer-clean-target-libsanitizer maintainer-clean-target-libsanitizer
+maybe-maintainer-clean-target-libsanitizer:
+@if target-libsanitizer
+maybe-maintainer-clean-target-libsanitizer: maintainer-clean-target-libsanitizer
 
-maintainer-clean-target-libmpx: 
+maintainer-clean-target-libsanitizer: 
 	@: $(MAKE); $(unstage)
-	@[ -f $(TARGET_SUBDIR)/libmpx/Makefile ] || exit 0; \
+	@[ -f $(TARGET_SUBDIR)/libsanitizer/Makefile ] || exit 0; \
 	r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	$(NORMAL_TARGET_EXPORTS) \
-	echo "Doing maintainer-clean in $(TARGET_SUBDIR)/libmpx"; \
+	$(RAW_CXX_TARGET_EXPORTS) \
+	echo "Doing maintainer-clean in $(TARGET_SUBDIR)/libsanitizer"; \
 	for flag in $(EXTRA_TARGET_FLAGS); do \
 	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
 	done; \
-	(cd $(TARGET_SUBDIR)/libmpx && \
+	(cd $(TARGET_SUBDIR)/libsanitizer && \
 	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
 	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
 	          "RANLIB=$${RANLIB}" \
@@ -41592,7 +44601,7 @@ maintainer-clean-target-libmpx:
 	           maintainer-clean) \
 	  || exit 1
 
-@endif target-libmpx
+@endif target-libsanitizer
 
 
 
@@ -48201,6 +51210,464 @@ maintainer-clean-target-libhsail-rt:
 
 
 
+.PHONY: configure-target-libphobos maybe-configure-target-libphobos
+maybe-configure-target-libphobos:
+@if gcc-bootstrap
+configure-target-libphobos: stage_current
+@endif gcc-bootstrap
+@if target-libphobos
+maybe-configure-target-libphobos: configure-target-libphobos
+configure-target-libphobos: 
+	@: $(MAKE); $(unstage)
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	echo "Checking multilib configuration for libphobos..."; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libphobos; \
+	$(CC_FOR_TARGET) --print-multi-lib > $(TARGET_SUBDIR)/libphobos/multilib.tmp 2> /dev/null; \
+	if test -r $(TARGET_SUBDIR)/libphobos/multilib.out; then \
+	  if cmp -s $(TARGET_SUBDIR)/libphobos/multilib.tmp $(TARGET_SUBDIR)/libphobos/multilib.out; then \
+	    rm -f $(TARGET_SUBDIR)/libphobos/multilib.tmp; \
+	  else \
+	    rm -f $(TARGET_SUBDIR)/libphobos/Makefile; \
+	    mv $(TARGET_SUBDIR)/libphobos/multilib.tmp $(TARGET_SUBDIR)/libphobos/multilib.out; \
+	  fi; \
+	else \
+	  mv $(TARGET_SUBDIR)/libphobos/multilib.tmp $(TARGET_SUBDIR)/libphobos/multilib.out; \
+	fi; \
+	test ! -f $(TARGET_SUBDIR)/libphobos/Makefile || exit 0; \
+	$(SHELL) $(srcdir)/mkinstalldirs $(TARGET_SUBDIR)/libphobos; \
+	$(NORMAL_TARGET_EXPORTS)  \
+	echo Configuring in $(TARGET_SUBDIR)/libphobos; \
+	cd "$(TARGET_SUBDIR)/libphobos" || exit 1; \
+	case $(srcdir) in \
+	  /* | [A-Za-z]:[\\/]*) topdir=$(srcdir) ;; \
+	  *) topdir=`echo $(TARGET_SUBDIR)/libphobos/ | \
+		sed -e 's,\./,,g' -e 's,[^/]*/,../,g' `$(srcdir) ;; \
+	esac; \
+	module_srcdir=libphobos; \
+	rm -f no-such-file || : ; \
+	CONFIG_SITE=no-such-file $(SHELL) \
+	  $$s/$$module_srcdir/configure \
+	  --srcdir=$${topdir}/$$module_srcdir \
+	  $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \
+	  --target=${target_alias}  \
+	  || exit 1
+@endif target-libphobos
+
+
+
+
+
+.PHONY: all-target-libphobos maybe-all-target-libphobos
+maybe-all-target-libphobos:
+@if gcc-bootstrap
+all-target-libphobos: stage_current
+@endif gcc-bootstrap
+@if target-libphobos
+TARGET-target-libphobos=all
+maybe-all-target-libphobos: all-target-libphobos
+all-target-libphobos: configure-target-libphobos
+	@: $(MAKE); $(unstage)
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(NORMAL_TARGET_EXPORTS)  \
+	(cd $(TARGET_SUBDIR)/libphobos && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) $(EXTRA_TARGET_FLAGS)   \
+		$(TARGET-target-libphobos))
+@endif target-libphobos
+
+
+
+
+
+.PHONY: check-target-libphobos maybe-check-target-libphobos
+maybe-check-target-libphobos:
+@if target-libphobos
+maybe-check-target-libphobos: check-target-libphobos
+
+check-target-libphobos:
+	@: $(MAKE); $(unstage)
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(NORMAL_TARGET_EXPORTS) \
+	(cd $(TARGET_SUBDIR)/libphobos && \
+	  $(MAKE) $(TARGET_FLAGS_TO_PASS)   check)
+
+@endif target-libphobos
+
+.PHONY: install-target-libphobos maybe-install-target-libphobos
+maybe-install-target-libphobos:
+@if target-libphobos
+maybe-install-target-libphobos: install-target-libphobos
+
+install-target-libphobos: installdirs
+	@: $(MAKE); $(unstage)
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(NORMAL_TARGET_EXPORTS) \
+	(cd $(TARGET_SUBDIR)/libphobos && \
+	  $(MAKE) $(TARGET_FLAGS_TO_PASS)  install)
+
+@endif target-libphobos
+
+.PHONY: install-strip-target-libphobos maybe-install-strip-target-libphobos
+maybe-install-strip-target-libphobos:
+@if target-libphobos
+maybe-install-strip-target-libphobos: install-strip-target-libphobos
+
+install-strip-target-libphobos: installdirs
+	@: $(MAKE); $(unstage)
+	@r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(NORMAL_TARGET_EXPORTS) \
+	(cd $(TARGET_SUBDIR)/libphobos && \
+	  $(MAKE) $(TARGET_FLAGS_TO_PASS)  install-strip)
+
+@endif target-libphobos
+
+# Other targets (info, dvi, pdf, etc.)
+
+.PHONY: maybe-info-target-libphobos info-target-libphobos
+maybe-info-target-libphobos:
+@if target-libphobos
+maybe-info-target-libphobos: info-target-libphobos
+
+info-target-libphobos: \
+    configure-target-libphobos 
+	@: $(MAKE); $(unstage)
+	@[ -f $(TARGET_SUBDIR)/libphobos/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(NORMAL_TARGET_EXPORTS) \
+	echo "Doing info in $(TARGET_SUBDIR)/libphobos"; \
+	for flag in $(EXTRA_TARGET_FLAGS); do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	(cd $(TARGET_SUBDIR)/libphobos && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	           info) \
+	  || exit 1
+
+@endif target-libphobos
+
+.PHONY: maybe-dvi-target-libphobos dvi-target-libphobos
+maybe-dvi-target-libphobos:
+@if target-libphobos
+maybe-dvi-target-libphobos: dvi-target-libphobos
+
+dvi-target-libphobos: \
+    configure-target-libphobos 
+	@: $(MAKE); $(unstage)
+	@[ -f $(TARGET_SUBDIR)/libphobos/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(NORMAL_TARGET_EXPORTS) \
+	echo "Doing dvi in $(TARGET_SUBDIR)/libphobos"; \
+	for flag in $(EXTRA_TARGET_FLAGS); do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	(cd $(TARGET_SUBDIR)/libphobos && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	           dvi) \
+	  || exit 1
+
+@endif target-libphobos
+
+.PHONY: maybe-pdf-target-libphobos pdf-target-libphobos
+maybe-pdf-target-libphobos:
+@if target-libphobos
+maybe-pdf-target-libphobos: pdf-target-libphobos
+
+pdf-target-libphobos: \
+    configure-target-libphobos 
+	@: $(MAKE); $(unstage)
+	@[ -f $(TARGET_SUBDIR)/libphobos/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(NORMAL_TARGET_EXPORTS) \
+	echo "Doing pdf in $(TARGET_SUBDIR)/libphobos"; \
+	for flag in $(EXTRA_TARGET_FLAGS); do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	(cd $(TARGET_SUBDIR)/libphobos && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	           pdf) \
+	  || exit 1
+
+@endif target-libphobos
+
+.PHONY: maybe-html-target-libphobos html-target-libphobos
+maybe-html-target-libphobos:
+@if target-libphobos
+maybe-html-target-libphobos: html-target-libphobos
+
+html-target-libphobos: \
+    configure-target-libphobos 
+	@: $(MAKE); $(unstage)
+	@[ -f $(TARGET_SUBDIR)/libphobos/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(NORMAL_TARGET_EXPORTS) \
+	echo "Doing html in $(TARGET_SUBDIR)/libphobos"; \
+	for flag in $(EXTRA_TARGET_FLAGS); do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	(cd $(TARGET_SUBDIR)/libphobos && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	           html) \
+	  || exit 1
+
+@endif target-libphobos
+
+.PHONY: maybe-TAGS-target-libphobos TAGS-target-libphobos
+maybe-TAGS-target-libphobos:
+@if target-libphobos
+maybe-TAGS-target-libphobos: TAGS-target-libphobos
+
+TAGS-target-libphobos: \
+    configure-target-libphobos 
+	@: $(MAKE); $(unstage)
+	@[ -f $(TARGET_SUBDIR)/libphobos/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(NORMAL_TARGET_EXPORTS) \
+	echo "Doing TAGS in $(TARGET_SUBDIR)/libphobos"; \
+	for flag in $(EXTRA_TARGET_FLAGS); do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	(cd $(TARGET_SUBDIR)/libphobos && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	           TAGS) \
+	  || exit 1
+
+@endif target-libphobos
+
+.PHONY: maybe-install-info-target-libphobos install-info-target-libphobos
+maybe-install-info-target-libphobos:
+@if target-libphobos
+maybe-install-info-target-libphobos: install-info-target-libphobos
+
+install-info-target-libphobos: \
+    configure-target-libphobos \
+    info-target-libphobos 
+	@: $(MAKE); $(unstage)
+	@[ -f $(TARGET_SUBDIR)/libphobos/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(NORMAL_TARGET_EXPORTS) \
+	echo "Doing install-info in $(TARGET_SUBDIR)/libphobos"; \
+	for flag in $(EXTRA_TARGET_FLAGS); do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	(cd $(TARGET_SUBDIR)/libphobos && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	           install-info) \
+	  || exit 1
+
+@endif target-libphobos
+
+.PHONY: maybe-install-pdf-target-libphobos install-pdf-target-libphobos
+maybe-install-pdf-target-libphobos:
+@if target-libphobos
+maybe-install-pdf-target-libphobos: install-pdf-target-libphobos
+
+install-pdf-target-libphobos: \
+    configure-target-libphobos \
+    pdf-target-libphobos 
+	@: $(MAKE); $(unstage)
+	@[ -f $(TARGET_SUBDIR)/libphobos/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(NORMAL_TARGET_EXPORTS) \
+	echo "Doing install-pdf in $(TARGET_SUBDIR)/libphobos"; \
+	for flag in $(EXTRA_TARGET_FLAGS); do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	(cd $(TARGET_SUBDIR)/libphobos && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	           install-pdf) \
+	  || exit 1
+
+@endif target-libphobos
+
+.PHONY: maybe-install-html-target-libphobos install-html-target-libphobos
+maybe-install-html-target-libphobos:
+@if target-libphobos
+maybe-install-html-target-libphobos: install-html-target-libphobos
+
+install-html-target-libphobos: \
+    configure-target-libphobos \
+    html-target-libphobos 
+	@: $(MAKE); $(unstage)
+	@[ -f $(TARGET_SUBDIR)/libphobos/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(NORMAL_TARGET_EXPORTS) \
+	echo "Doing install-html in $(TARGET_SUBDIR)/libphobos"; \
+	for flag in $(EXTRA_TARGET_FLAGS); do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	(cd $(TARGET_SUBDIR)/libphobos && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	           install-html) \
+	  || exit 1
+
+@endif target-libphobos
+
+.PHONY: maybe-installcheck-target-libphobos installcheck-target-libphobos
+maybe-installcheck-target-libphobos:
+@if target-libphobos
+maybe-installcheck-target-libphobos: installcheck-target-libphobos
+
+installcheck-target-libphobos: \
+    configure-target-libphobos 
+	@: $(MAKE); $(unstage)
+	@[ -f $(TARGET_SUBDIR)/libphobos/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(NORMAL_TARGET_EXPORTS) \
+	echo "Doing installcheck in $(TARGET_SUBDIR)/libphobos"; \
+	for flag in $(EXTRA_TARGET_FLAGS); do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	(cd $(TARGET_SUBDIR)/libphobos && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	           installcheck) \
+	  || exit 1
+
+@endif target-libphobos
+
+.PHONY: maybe-mostlyclean-target-libphobos mostlyclean-target-libphobos
+maybe-mostlyclean-target-libphobos:
+@if target-libphobos
+maybe-mostlyclean-target-libphobos: mostlyclean-target-libphobos
+
+mostlyclean-target-libphobos: 
+	@: $(MAKE); $(unstage)
+	@[ -f $(TARGET_SUBDIR)/libphobos/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(NORMAL_TARGET_EXPORTS) \
+	echo "Doing mostlyclean in $(TARGET_SUBDIR)/libphobos"; \
+	for flag in $(EXTRA_TARGET_FLAGS); do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	(cd $(TARGET_SUBDIR)/libphobos && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	           mostlyclean) \
+	  || exit 1
+
+@endif target-libphobos
+
+.PHONY: maybe-clean-target-libphobos clean-target-libphobos
+maybe-clean-target-libphobos:
+@if target-libphobos
+maybe-clean-target-libphobos: clean-target-libphobos
+
+clean-target-libphobos: 
+	@: $(MAKE); $(unstage)
+	@[ -f $(TARGET_SUBDIR)/libphobos/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(NORMAL_TARGET_EXPORTS) \
+	echo "Doing clean in $(TARGET_SUBDIR)/libphobos"; \
+	for flag in $(EXTRA_TARGET_FLAGS); do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	(cd $(TARGET_SUBDIR)/libphobos && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	           clean) \
+	  || exit 1
+
+@endif target-libphobos
+
+.PHONY: maybe-distclean-target-libphobos distclean-target-libphobos
+maybe-distclean-target-libphobos:
+@if target-libphobos
+maybe-distclean-target-libphobos: distclean-target-libphobos
+
+distclean-target-libphobos: 
+	@: $(MAKE); $(unstage)
+	@[ -f $(TARGET_SUBDIR)/libphobos/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(NORMAL_TARGET_EXPORTS) \
+	echo "Doing distclean in $(TARGET_SUBDIR)/libphobos"; \
+	for flag in $(EXTRA_TARGET_FLAGS); do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	(cd $(TARGET_SUBDIR)/libphobos && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	           distclean) \
+	  || exit 1
+
+@endif target-libphobos
+
+.PHONY: maybe-maintainer-clean-target-libphobos maintainer-clean-target-libphobos
+maybe-maintainer-clean-target-libphobos:
+@if target-libphobos
+maybe-maintainer-clean-target-libphobos: maintainer-clean-target-libphobos
+
+maintainer-clean-target-libphobos: 
+	@: $(MAKE); $(unstage)
+	@[ -f $(TARGET_SUBDIR)/libphobos/Makefile ] || exit 0; \
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(NORMAL_TARGET_EXPORTS) \
+	echo "Doing maintainer-clean in $(TARGET_SUBDIR)/libphobos"; \
+	for flag in $(EXTRA_TARGET_FLAGS); do \
+	  eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \
+	done; \
+	(cd $(TARGET_SUBDIR)/libphobos && \
+	  $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \
+	          "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \
+	          "RANLIB=$${RANLIB}" \
+	          "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \
+	           maintainer-clean) \
+	  || exit 1
+
+@endif target-libphobos
+
+
+
+
+
 .PHONY: configure-target-libtermcap maybe-configure-target-libtermcap
 maybe-configure-target-libtermcap:
 @if gcc-bootstrap
@@ -53489,6 +56956,10 @@ maintainer-clean-target-libatomic:
 check-target-libgomp-c++:
 	$(MAKE) RUNTESTFLAGS="$(RUNTESTFLAGS) c++.exp" check-target-libgomp
 
+.PHONY: check-target-libgomp-fortran
+check-target-libgomp-fortran:
+	$(MAKE) RUNTESTFLAGS="$(RUNTESTFLAGS) fortran.exp" check-target-libgomp
+
 @endif target-libgomp
 
 @if target-libitm
@@ -53540,7 +57011,7 @@ check-gcc-fortran:
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
 	$(HOST_EXPORTS) \
 	(cd gcc && $(MAKE) $(GCC_FLAGS_TO_PASS) check-fortran);
-check-fortran: check-gcc-fortran check-target-libquadmath check-target-libgfortran
+check-fortran: check-gcc-fortran check-target-libquadmath check-target-libgfortran check-target-libgomp-fortran
 
 .PHONY: check-gcc-ada check-ada
 check-gcc-ada:
@@ -53582,6 +57053,14 @@ check-gcc-brig:
 	(cd gcc && $(MAKE) $(GCC_FLAGS_TO_PASS) check-brig);
 check-brig: check-gcc-brig check-target-libhsail-rt
 
+.PHONY: check-gcc-d check-d
+check-gcc-d:
+	r=`${PWD_COMMAND}`; export r; \
+	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
+	$(HOST_EXPORTS) \
+	(cd gcc && $(MAKE) $(GCC_FLAGS_TO_PASS) check-d);
+check-d: check-gcc-d check-target-libphobos
+
 
 # The gcc part of install-no-fixedincludes, which relies on an intimate
 # knowledge of how a number of gcc internal targets (inter)operate.  Delegate.
@@ -53734,6 +57213,11 @@ stage1-start::
 	  mkdir stage1-libcpp; \
 	mv stage1-libcpp libcpp
 @endif libcpp
+@if libcody
+	@cd $(HOST_SUBDIR); [ -d stage1-libcody ] || \
+	  mkdir stage1-libcody; \
+	mv stage1-libcody libcody
+@endif libcody
 @if libdecnumber
 	@cd $(HOST_SUBDIR); [ -d stage1-libdecnumber ] || \
 	  mkdir stage1-libdecnumber; \
@@ -53764,6 +57248,11 @@ stage1-start::
 	  mkdir stage1-lto-plugin; \
 	mv stage1-lto-plugin lto-plugin
 @endif lto-plugin
+@if libctf
+	@cd $(HOST_SUBDIR); [ -d stage1-libctf ] || \
+	  mkdir stage1-libctf; \
+	mv stage1-libctf libctf
+@endif libctf
 	@[ -d stage1-$(TARGET_SUBDIR) ] || \
 	  mkdir stage1-$(TARGET_SUBDIR); \
 	mv stage1-$(TARGET_SUBDIR) $(TARGET_SUBDIR)
@@ -53849,6 +57338,11 @@ stage1-end::
 	  cd $(HOST_SUBDIR); mv libcpp stage1-libcpp; \
 	fi
 @endif libcpp
+@if libcody
+	@if test -d $(HOST_SUBDIR)/libcody; then \
+	  cd $(HOST_SUBDIR); mv libcody stage1-libcody; \
+	fi
+@endif libcody
 @if libdecnumber
 	@if test -d $(HOST_SUBDIR)/libdecnumber; then \
 	  cd $(HOST_SUBDIR); mv libdecnumber stage1-libdecnumber; \
@@ -53879,6 +57373,11 @@ stage1-end::
 	  cd $(HOST_SUBDIR); mv lto-plugin stage1-lto-plugin; \
 	fi
 @endif lto-plugin
+@if libctf
+	@if test -d $(HOST_SUBDIR)/libctf; then \
+	  cd $(HOST_SUBDIR); mv libctf stage1-libctf; \
+	fi
+@endif libctf
 	@if test -d $(TARGET_SUBDIR); then \
 	  mv $(TARGET_SUBDIR) stage1-$(TARGET_SUBDIR); \
 	fi
@@ -54023,6 +57522,12 @@ stage2-start::
 	mv stage2-libcpp libcpp; \
 	mv stage1-libcpp prev-libcpp || test -f stage1-lean 
 @endif libcpp
+@if libcody
+	@cd $(HOST_SUBDIR); [ -d stage2-libcody ] || \
+	  mkdir stage2-libcody; \
+	mv stage2-libcody libcody; \
+	mv stage1-libcody prev-libcody || test -f stage1-lean 
+@endif libcody
 @if libdecnumber
 	@cd $(HOST_SUBDIR); [ -d stage2-libdecnumber ] || \
 	  mkdir stage2-libdecnumber; \
@@ -54059,6 +57564,12 @@ stage2-start::
 	mv stage2-lto-plugin lto-plugin; \
 	mv stage1-lto-plugin prev-lto-plugin || test -f stage1-lean 
 @endif lto-plugin
+@if libctf
+	@cd $(HOST_SUBDIR); [ -d stage2-libctf ] || \
+	  mkdir stage2-libctf; \
+	mv stage2-libctf libctf; \
+	mv stage1-libctf prev-libctf || test -f stage1-lean 
+@endif libctf
 	@[ -d stage2-$(TARGET_SUBDIR) ] || \
 	  mkdir stage2-$(TARGET_SUBDIR); \
 	mv stage2-$(TARGET_SUBDIR) $(TARGET_SUBDIR); \
@@ -54161,6 +57672,12 @@ stage2-end::
 	  mv prev-libcpp stage1-libcpp; : ; \
 	fi
 @endif libcpp
+@if libcody
+	@if test -d $(HOST_SUBDIR)/libcody; then \
+	  cd $(HOST_SUBDIR); mv libcody stage2-libcody; \
+	  mv prev-libcody stage1-libcody; : ; \
+	fi
+@endif libcody
 @if libdecnumber
 	@if test -d $(HOST_SUBDIR)/libdecnumber; then \
 	  cd $(HOST_SUBDIR); mv libdecnumber stage2-libdecnumber; \
@@ -54197,6 +57714,12 @@ stage2-end::
 	  mv prev-lto-plugin stage1-lto-plugin; : ; \
 	fi
 @endif lto-plugin
+@if libctf
+	@if test -d $(HOST_SUBDIR)/libctf; then \
+	  cd $(HOST_SUBDIR); mv libctf stage2-libctf; \
+	  mv prev-libctf stage1-libctf; : ; \
+	fi
+@endif libctf
 	@if test -d $(TARGET_SUBDIR); then \
 	  mv $(TARGET_SUBDIR) stage2-$(TARGET_SUBDIR); \
 	  mv prev-$(TARGET_SUBDIR) stage1-$(TARGET_SUBDIR); : ; \
@@ -54365,6 +57888,12 @@ stage3-start::
 	mv stage3-libcpp libcpp; \
 	mv stage2-libcpp prev-libcpp || test -f stage2-lean 
 @endif libcpp
+@if libcody
+	@cd $(HOST_SUBDIR); [ -d stage3-libcody ] || \
+	  mkdir stage3-libcody; \
+	mv stage3-libcody libcody; \
+	mv stage2-libcody prev-libcody || test -f stage2-lean 
+@endif libcody
 @if libdecnumber
 	@cd $(HOST_SUBDIR); [ -d stage3-libdecnumber ] || \
 	  mkdir stage3-libdecnumber; \
@@ -54401,6 +57930,12 @@ stage3-start::
 	mv stage3-lto-plugin lto-plugin; \
 	mv stage2-lto-plugin prev-lto-plugin || test -f stage2-lean 
 @endif lto-plugin
+@if libctf
+	@cd $(HOST_SUBDIR); [ -d stage3-libctf ] || \
+	  mkdir stage3-libctf; \
+	mv stage3-libctf libctf; \
+	mv stage2-libctf prev-libctf || test -f stage2-lean 
+@endif libctf
 	@[ -d stage3-$(TARGET_SUBDIR) ] || \
 	  mkdir stage3-$(TARGET_SUBDIR); \
 	mv stage3-$(TARGET_SUBDIR) $(TARGET_SUBDIR); \
@@ -54503,6 +58038,12 @@ stage3-end::
 	  mv prev-libcpp stage2-libcpp; : ; \
 	fi
 @endif libcpp
+@if libcody
+	@if test -d $(HOST_SUBDIR)/libcody; then \
+	  cd $(HOST_SUBDIR); mv libcody stage3-libcody; \
+	  mv prev-libcody stage2-libcody; : ; \
+	fi
+@endif libcody
 @if libdecnumber
 	@if test -d $(HOST_SUBDIR)/libdecnumber; then \
 	  cd $(HOST_SUBDIR); mv libdecnumber stage3-libdecnumber; \
@@ -54539,6 +58080,12 @@ stage3-end::
 	  mv prev-lto-plugin stage2-lto-plugin; : ; \
 	fi
 @endif lto-plugin
+@if libctf
+	@if test -d $(HOST_SUBDIR)/libctf; then \
+	  cd $(HOST_SUBDIR); mv libctf stage3-libctf; \
+	  mv prev-libctf stage2-libctf; : ; \
+	fi
+@endif libctf
 	@if test -d $(TARGET_SUBDIR); then \
 	  mv $(TARGET_SUBDIR) stage3-$(TARGET_SUBDIR); \
 	  mv prev-$(TARGET_SUBDIR) stage2-$(TARGET_SUBDIR); : ; \
@@ -54584,7 +58131,7 @@ compare:
         sed=`echo stage3 | sed 's,^stage,,;s,.,.,g'`; \
 	files=`find stage3-* -name "*$(objext)" -print | \
 		 sed -n s,^stage$$sed-,,p`; \
-	for file in $${files}; do \
+	for file in $${files} ${extra-compare}; do \
 	  f1=$$r/stage2-$$file; f2=$$r/stage3-$$file; \
 	  if test ! -f $$f1; then continue; fi; \
 	  $(do-compare) > /dev/null 2>&1; \
@@ -54763,6 +58310,12 @@ stage4-start::
 	mv stage4-libcpp libcpp; \
 	mv stage3-libcpp prev-libcpp || test -f stage3-lean 
 @endif libcpp
+@if libcody
+	@cd $(HOST_SUBDIR); [ -d stage4-libcody ] || \
+	  mkdir stage4-libcody; \
+	mv stage4-libcody libcody; \
+	mv stage3-libcody prev-libcody || test -f stage3-lean 
+@endif libcody
 @if libdecnumber
 	@cd $(HOST_SUBDIR); [ -d stage4-libdecnumber ] || \
 	  mkdir stage4-libdecnumber; \
@@ -54799,6 +58352,12 @@ stage4-start::
 	mv stage4-lto-plugin lto-plugin; \
 	mv stage3-lto-plugin prev-lto-plugin || test -f stage3-lean 
 @endif lto-plugin
+@if libctf
+	@cd $(HOST_SUBDIR); [ -d stage4-libctf ] || \
+	  mkdir stage4-libctf; \
+	mv stage4-libctf libctf; \
+	mv stage3-libctf prev-libctf || test -f stage3-lean 
+@endif libctf
 	@[ -d stage4-$(TARGET_SUBDIR) ] || \
 	  mkdir stage4-$(TARGET_SUBDIR); \
 	mv stage4-$(TARGET_SUBDIR) $(TARGET_SUBDIR); \
@@ -54901,6 +58460,12 @@ stage4-end::
 	  mv prev-libcpp stage3-libcpp; : ; \
 	fi
 @endif libcpp
+@if libcody
+	@if test -d $(HOST_SUBDIR)/libcody; then \
+	  cd $(HOST_SUBDIR); mv libcody stage4-libcody; \
+	  mv prev-libcody stage3-libcody; : ; \
+	fi
+@endif libcody
 @if libdecnumber
 	@if test -d $(HOST_SUBDIR)/libdecnumber; then \
 	  cd $(HOST_SUBDIR); mv libdecnumber stage4-libdecnumber; \
@@ -54937,6 +58502,12 @@ stage4-end::
 	  mv prev-lto-plugin stage3-lto-plugin; : ; \
 	fi
 @endif lto-plugin
+@if libctf
+	@if test -d $(HOST_SUBDIR)/libctf; then \
+	  cd $(HOST_SUBDIR); mv libctf stage4-libctf; \
+	  mv prev-libctf stage3-libctf; : ; \
+	fi
+@endif libctf
 	@if test -d $(TARGET_SUBDIR); then \
 	  mv $(TARGET_SUBDIR) stage4-$(TARGET_SUBDIR); \
 	  mv prev-$(TARGET_SUBDIR) stage3-$(TARGET_SUBDIR); : ; \
@@ -54982,7 +58553,7 @@ compare3:
         sed=`echo stage4 | sed 's,^stage,,;s,.,.,g'`; \
 	files=`find stage4-* -name "*$(objext)" -print | \
 		 sed -n s,^stage$$sed-,,p`; \
-	for file in $${files}; do \
+	for file in $${files} ${extra-compare}; do \
 	  f1=$$r/stage3-$$file; f2=$$r/stage4-$$file; \
 	  if test ! -f $$f1; then continue; fi; \
 	  $(do-compare3) > /dev/null 2>&1; \
@@ -55149,6 +58720,12 @@ stageprofile-start::
 	mv stageprofile-libcpp libcpp; \
 	mv stage1-libcpp prev-libcpp || test -f stage1-lean 
 @endif libcpp
+@if libcody
+	@cd $(HOST_SUBDIR); [ -d stageprofile-libcody ] || \
+	  mkdir stageprofile-libcody; \
+	mv stageprofile-libcody libcody; \
+	mv stage1-libcody prev-libcody || test -f stage1-lean 
+@endif libcody
 @if libdecnumber
 	@cd $(HOST_SUBDIR); [ -d stageprofile-libdecnumber ] || \
 	  mkdir stageprofile-libdecnumber; \
@@ -55185,6 +58762,12 @@ stageprofile-start::
 	mv stageprofile-lto-plugin lto-plugin; \
 	mv stage1-lto-plugin prev-lto-plugin || test -f stage1-lean 
 @endif lto-plugin
+@if libctf
+	@cd $(HOST_SUBDIR); [ -d stageprofile-libctf ] || \
+	  mkdir stageprofile-libctf; \
+	mv stageprofile-libctf libctf; \
+	mv stage1-libctf prev-libctf || test -f stage1-lean 
+@endif libctf
 	@[ -d stageprofile-$(TARGET_SUBDIR) ] || \
 	  mkdir stageprofile-$(TARGET_SUBDIR); \
 	mv stageprofile-$(TARGET_SUBDIR) $(TARGET_SUBDIR); \
@@ -55287,6 +58870,12 @@ stageprofile-end::
 	  mv prev-libcpp stage1-libcpp; : ; \
 	fi
 @endif libcpp
+@if libcody
+	@if test -d $(HOST_SUBDIR)/libcody; then \
+	  cd $(HOST_SUBDIR); mv libcody stageprofile-libcody; \
+	  mv prev-libcody stage1-libcody; : ; \
+	fi
+@endif libcody
 @if libdecnumber
 	@if test -d $(HOST_SUBDIR)/libdecnumber; then \
 	  cd $(HOST_SUBDIR); mv libdecnumber stageprofile-libdecnumber; \
@@ -55323,6 +58912,12 @@ stageprofile-end::
 	  mv prev-lto-plugin stage1-lto-plugin; : ; \
 	fi
 @endif lto-plugin
+@if libctf
+	@if test -d $(HOST_SUBDIR)/libctf; then \
+	  cd $(HOST_SUBDIR); mv libctf stageprofile-libctf; \
+	  mv prev-libctf stage1-libctf; : ; \
+	fi
+@endif libctf
 	@if test -d $(TARGET_SUBDIR); then \
 	  mv $(TARGET_SUBDIR) stageprofile-$(TARGET_SUBDIR); \
 	  mv prev-$(TARGET_SUBDIR) stage1-$(TARGET_SUBDIR); : ; \
@@ -55468,6 +59063,12 @@ stagetrain-start::
 	mv stagetrain-libcpp libcpp; \
 	mv stageprofile-libcpp prev-libcpp || test -f stageprofile-lean 
 @endif libcpp
+@if libcody
+	@cd $(HOST_SUBDIR); [ -d stagetrain-libcody ] || \
+	  mkdir stagetrain-libcody; \
+	mv stagetrain-libcody libcody; \
+	mv stageprofile-libcody prev-libcody || test -f stageprofile-lean 
+@endif libcody
 @if libdecnumber
 	@cd $(HOST_SUBDIR); [ -d stagetrain-libdecnumber ] || \
 	  mkdir stagetrain-libdecnumber; \
@@ -55504,6 +59105,12 @@ stagetrain-start::
 	mv stagetrain-lto-plugin lto-plugin; \
 	mv stageprofile-lto-plugin prev-lto-plugin || test -f stageprofile-lean 
 @endif lto-plugin
+@if libctf
+	@cd $(HOST_SUBDIR); [ -d stagetrain-libctf ] || \
+	  mkdir stagetrain-libctf; \
+	mv stagetrain-libctf libctf; \
+	mv stageprofile-libctf prev-libctf || test -f stageprofile-lean 
+@endif libctf
 	@[ -d stagetrain-$(TARGET_SUBDIR) ] || \
 	  mkdir stagetrain-$(TARGET_SUBDIR); \
 	mv stagetrain-$(TARGET_SUBDIR) $(TARGET_SUBDIR); \
@@ -55606,6 +59213,12 @@ stagetrain-end::
 	  mv prev-libcpp stageprofile-libcpp; : ; \
 	fi
 @endif libcpp
+@if libcody
+	@if test -d $(HOST_SUBDIR)/libcody; then \
+	  cd $(HOST_SUBDIR); mv libcody stagetrain-libcody; \
+	  mv prev-libcody stageprofile-libcody; : ; \
+	fi
+@endif libcody
 @if libdecnumber
 	@if test -d $(HOST_SUBDIR)/libdecnumber; then \
 	  cd $(HOST_SUBDIR); mv libdecnumber stagetrain-libdecnumber; \
@@ -55642,6 +59255,12 @@ stagetrain-end::
 	  mv prev-lto-plugin stageprofile-lto-plugin; : ; \
 	fi
 @endif lto-plugin
+@if libctf
+	@if test -d $(HOST_SUBDIR)/libctf; then \
+	  cd $(HOST_SUBDIR); mv libctf stagetrain-libctf; \
+	  mv prev-libctf stageprofile-libctf; : ; \
+	fi
+@endif libctf
 	@if test -d $(TARGET_SUBDIR); then \
 	  mv $(TARGET_SUBDIR) stagetrain-$(TARGET_SUBDIR); \
 	  mv prev-$(TARGET_SUBDIR) stageprofile-$(TARGET_SUBDIR); : ; \
@@ -55787,6 +59406,12 @@ stagefeedback-start::
 	mv stagefeedback-libcpp libcpp; \
 	mv stagetrain-libcpp prev-libcpp || test -f stagetrain-lean 
 @endif libcpp
+@if libcody
+	@cd $(HOST_SUBDIR); [ -d stagefeedback-libcody ] || \
+	  mkdir stagefeedback-libcody; \
+	mv stagefeedback-libcody libcody; \
+	mv stagetrain-libcody prev-libcody || test -f stagetrain-lean 
+@endif libcody
 @if libdecnumber
 	@cd $(HOST_SUBDIR); [ -d stagefeedback-libdecnumber ] || \
 	  mkdir stagefeedback-libdecnumber; \
@@ -55823,6 +59448,12 @@ stagefeedback-start::
 	mv stagefeedback-lto-plugin lto-plugin; \
 	mv stagetrain-lto-plugin prev-lto-plugin || test -f stagetrain-lean 
 @endif lto-plugin
+@if libctf
+	@cd $(HOST_SUBDIR); [ -d stagefeedback-libctf ] || \
+	  mkdir stagefeedback-libctf; \
+	mv stagefeedback-libctf libctf; \
+	mv stagetrain-libctf prev-libctf || test -f stagetrain-lean 
+@endif libctf
 	@[ -d stagefeedback-$(TARGET_SUBDIR) ] || \
 	  mkdir stagefeedback-$(TARGET_SUBDIR); \
 	mv stagefeedback-$(TARGET_SUBDIR) $(TARGET_SUBDIR); \
@@ -55925,6 +59556,12 @@ stagefeedback-end::
 	  mv prev-libcpp stagetrain-libcpp; : ; \
 	fi
 @endif libcpp
+@if libcody
+	@if test -d $(HOST_SUBDIR)/libcody; then \
+	  cd $(HOST_SUBDIR); mv libcody stagefeedback-libcody; \
+	  mv prev-libcody stagetrain-libcody; : ; \
+	fi
+@endif libcody
 @if libdecnumber
 	@if test -d $(HOST_SUBDIR)/libdecnumber; then \
 	  cd $(HOST_SUBDIR); mv libdecnumber stagefeedback-libdecnumber; \
@@ -55961,6 +59598,12 @@ stagefeedback-end::
 	  mv prev-lto-plugin stagetrain-lto-plugin; : ; \
 	fi
 @endif lto-plugin
+@if libctf
+	@if test -d $(HOST_SUBDIR)/libctf; then \
+	  cd $(HOST_SUBDIR); mv libctf stagefeedback-libctf; \
+	  mv prev-libctf stagetrain-libctf; : ; \
+	fi
+@endif libctf
 	@if test -d $(TARGET_SUBDIR); then \
 	  mv $(TARGET_SUBDIR) stagefeedback-$(TARGET_SUBDIR); \
 	  mv prev-$(TARGET_SUBDIR) stagetrain-$(TARGET_SUBDIR); : ; \
@@ -56129,6 +59772,12 @@ stageautoprofile-start::
 	mv stageautoprofile-libcpp libcpp; \
 	mv stage1-libcpp prev-libcpp || test -f stage1-lean 
 @endif libcpp
+@if libcody
+	@cd $(HOST_SUBDIR); [ -d stageautoprofile-libcody ] || \
+	  mkdir stageautoprofile-libcody; \
+	mv stageautoprofile-libcody libcody; \
+	mv stage1-libcody prev-libcody || test -f stage1-lean 
+@endif libcody
 @if libdecnumber
 	@cd $(HOST_SUBDIR); [ -d stageautoprofile-libdecnumber ] || \
 	  mkdir stageautoprofile-libdecnumber; \
@@ -56165,6 +59814,12 @@ stageautoprofile-start::
 	mv stageautoprofile-lto-plugin lto-plugin; \
 	mv stage1-lto-plugin prev-lto-plugin || test -f stage1-lean 
 @endif lto-plugin
+@if libctf
+	@cd $(HOST_SUBDIR); [ -d stageautoprofile-libctf ] || \
+	  mkdir stageautoprofile-libctf; \
+	mv stageautoprofile-libctf libctf; \
+	mv stage1-libctf prev-libctf || test -f stage1-lean 
+@endif libctf
 	@[ -d stageautoprofile-$(TARGET_SUBDIR) ] || \
 	  mkdir stageautoprofile-$(TARGET_SUBDIR); \
 	mv stageautoprofile-$(TARGET_SUBDIR) $(TARGET_SUBDIR); \
@@ -56267,6 +59922,12 @@ stageautoprofile-end::
 	  mv prev-libcpp stage1-libcpp; : ; \
 	fi
 @endif libcpp
+@if libcody
+	@if test -d $(HOST_SUBDIR)/libcody; then \
+	  cd $(HOST_SUBDIR); mv libcody stageautoprofile-libcody; \
+	  mv prev-libcody stage1-libcody; : ; \
+	fi
+@endif libcody
 @if libdecnumber
 	@if test -d $(HOST_SUBDIR)/libdecnumber; then \
 	  cd $(HOST_SUBDIR); mv libdecnumber stageautoprofile-libdecnumber; \
@@ -56303,6 +59964,12 @@ stageautoprofile-end::
 	  mv prev-lto-plugin stage1-lto-plugin; : ; \
 	fi
 @endif lto-plugin
+@if libctf
+	@if test -d $(HOST_SUBDIR)/libctf; then \
+	  cd $(HOST_SUBDIR); mv libctf stageautoprofile-libctf; \
+	  mv prev-libctf stage1-libctf; : ; \
+	fi
+@endif libctf
 	@if test -d $(TARGET_SUBDIR); then \
 	  mv $(TARGET_SUBDIR) stageautoprofile-$(TARGET_SUBDIR); \
 	  mv prev-$(TARGET_SUBDIR) stage1-$(TARGET_SUBDIR); : ; \
@@ -56448,6 +60115,12 @@ stageautofeedback-start::
 	mv stageautofeedback-libcpp libcpp; \
 	mv stageautoprofile-libcpp prev-libcpp || test -f stageautoprofile-lean 
 @endif libcpp
+@if libcody
+	@cd $(HOST_SUBDIR); [ -d stageautofeedback-libcody ] || \
+	  mkdir stageautofeedback-libcody; \
+	mv stageautofeedback-libcody libcody; \
+	mv stageautoprofile-libcody prev-libcody || test -f stageautoprofile-lean 
+@endif libcody
 @if libdecnumber
 	@cd $(HOST_SUBDIR); [ -d stageautofeedback-libdecnumber ] || \
 	  mkdir stageautofeedback-libdecnumber; \
@@ -56484,6 +60157,12 @@ stageautofeedback-start::
 	mv stageautofeedback-lto-plugin lto-plugin; \
 	mv stageautoprofile-lto-plugin prev-lto-plugin || test -f stageautoprofile-lean 
 @endif lto-plugin
+@if libctf
+	@cd $(HOST_SUBDIR); [ -d stageautofeedback-libctf ] || \
+	  mkdir stageautofeedback-libctf; \
+	mv stageautofeedback-libctf libctf; \
+	mv stageautoprofile-libctf prev-libctf || test -f stageautoprofile-lean 
+@endif libctf
 	@[ -d stageautofeedback-$(TARGET_SUBDIR) ] || \
 	  mkdir stageautofeedback-$(TARGET_SUBDIR); \
 	mv stageautofeedback-$(TARGET_SUBDIR) $(TARGET_SUBDIR); \
@@ -56586,6 +60265,12 @@ stageautofeedback-end::
 	  mv prev-libcpp stageautoprofile-libcpp; : ; \
 	fi
 @endif libcpp
+@if libcody
+	@if test -d $(HOST_SUBDIR)/libcody; then \
+	  cd $(HOST_SUBDIR); mv libcody stageautofeedback-libcody; \
+	  mv prev-libcody stageautoprofile-libcody; : ; \
+	fi
+@endif libcody
 @if libdecnumber
 	@if test -d $(HOST_SUBDIR)/libdecnumber; then \
 	  cd $(HOST_SUBDIR); mv libdecnumber stageautofeedback-libdecnumber; \
@@ -56622,6 +60307,12 @@ stageautofeedback-end::
 	  mv prev-lto-plugin stageautoprofile-lto-plugin; : ; \
 	fi
 @endif lto-plugin
+@if libctf
+	@if test -d $(HOST_SUBDIR)/libctf; then \
+	  cd $(HOST_SUBDIR); mv libctf stageautofeedback-libctf; \
+	  mv prev-libctf stageautoprofile-libctf; : ; \
+	fi
+@endif libctf
 	@if test -d $(TARGET_SUBDIR); then \
 	  mv $(TARGET_SUBDIR) stageautofeedback-$(TARGET_SUBDIR); \
 	  mv prev-$(TARGET_SUBDIR) stageautoprofile-$(TARGET_SUBDIR); : ; \
@@ -56694,8 +60385,8 @@ stageprofile-end::
 stagefeedback-start::
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	for i in stageprofile-*; do \
-	  j=`echo $$i | sed s/^stageprofile-//`; \
+	for i in prev-*; do \
+	  j=`echo $$i | sed s/^prev-//`; \
 	  cd $$r/$$i && \
 	  { find . -type d | sort | sed 's,.*,$(SHELL) '"$$s"'/mkinstalldirs "../'$$j'/&",' | $(SHELL); } && \
 	  { find . -name '*.*da' | sed 's,.*,$(LN) -f "&" "../'$$j'/&",' | $(SHELL); }; \
@@ -56747,15 +60438,6 @@ configure-stagetrain-target-libsanitizer: maybe-all-stagetrain-gcc
 configure-stagefeedback-target-libsanitizer: maybe-all-stagefeedback-gcc
 configure-stageautoprofile-target-libsanitizer: maybe-all-stageautoprofile-gcc
 configure-stageautofeedback-target-libsanitizer: maybe-all-stageautofeedback-gcc
-configure-stage1-target-libmpx: maybe-all-stage1-gcc
-configure-stage2-target-libmpx: maybe-all-stage2-gcc
-configure-stage3-target-libmpx: maybe-all-stage3-gcc
-configure-stage4-target-libmpx: maybe-all-stage4-gcc
-configure-stageprofile-target-libmpx: maybe-all-stageprofile-gcc
-configure-stagetrain-target-libmpx: maybe-all-stagetrain-gcc
-configure-stagefeedback-target-libmpx: maybe-all-stagefeedback-gcc
-configure-stageautoprofile-target-libmpx: maybe-all-stageautoprofile-gcc
-configure-stageautofeedback-target-libmpx: maybe-all-stageautofeedback-gcc
 configure-stage1-target-libvtv: maybe-all-stage1-gcc
 configure-stage2-target-libvtv: maybe-all-stage2-gcc
 configure-stage3-target-libvtv: maybe-all-stage3-gcc
@@ -56783,6 +60465,7 @@ configure-target-libgfortran: stage_last
 configure-target-libobjc: stage_last
 configure-target-libgo: stage_last
 configure-target-libhsail-rt: stage_last
+configure-target-libphobos: stage_last
 configure-target-libtermcap: stage_last
 configure-target-winsup: stage_last
 configure-target-libgloss: stage_last
@@ -56806,7 +60489,6 @@ configure-target-libatomic: stage_last
 @if gcc-no-bootstrap
 configure-target-libstdc++-v3: maybe-all-gcc
 configure-target-libsanitizer: maybe-all-gcc
-configure-target-libmpx: maybe-all-gcc
 configure-target-libvtv: maybe-all-gcc
 configure-target-liboffloadmic: maybe-all-gcc
 configure-target-libssp: maybe-all-gcc
@@ -56818,6 +60500,7 @@ configure-target-libgfortran: maybe-all-gcc
 configure-target-libobjc: maybe-all-gcc
 configure-target-libgo: maybe-all-gcc
 configure-target-libhsail-rt: maybe-all-gcc
+configure-target-libphobos: maybe-all-gcc
 configure-target-libtermcap: maybe-all-gcc
 configure-target-winsup: maybe-all-gcc
 configure-target-libgloss: maybe-all-gcc
@@ -56857,7 +60540,6 @@ all-build-m4: maybe-all-build-texinfo
 all-build-fixincludes: maybe-all-build-libiberty
 all-build-libcpp: maybe-all-build-libiberty
 configure-gcc: maybe-configure-intl
-
 configure-stage1-gcc: maybe-configure-stage1-intl
 configure-stage2-gcc: maybe-configure-stage2-intl
 configure-stage3-gcc: maybe-configure-stage3-intl
@@ -56868,7 +60550,6 @@ configure-stagefeedback-gcc: maybe-configure-stagefeedback-intl
 configure-stageautoprofile-gcc: maybe-configure-stageautoprofile-intl
 configure-stageautofeedback-gcc: maybe-configure-stageautofeedback-intl
 configure-gcc: maybe-all-gmp
-
 configure-stage1-gcc: maybe-all-stage1-gmp
 configure-stage2-gcc: maybe-all-stage2-gmp
 configure-stage3-gcc: maybe-all-stage3-gmp
@@ -56879,7 +60560,6 @@ configure-stagefeedback-gcc: maybe-all-stagefeedback-gmp
 configure-stageautoprofile-gcc: maybe-all-stageautoprofile-gmp
 configure-stageautofeedback-gcc: maybe-all-stageautofeedback-gmp
 configure-gcc: maybe-all-mpfr
-
 configure-stage1-gcc: maybe-all-stage1-mpfr
 configure-stage2-gcc: maybe-all-stage2-mpfr
 configure-stage3-gcc: maybe-all-stage3-mpfr
@@ -56890,7 +60570,6 @@ configure-stagefeedback-gcc: maybe-all-stagefeedback-mpfr
 configure-stageautoprofile-gcc: maybe-all-stageautoprofile-mpfr
 configure-stageautofeedback-gcc: maybe-all-stageautofeedback-mpfr
 configure-gcc: maybe-all-mpc
-
 configure-stage1-gcc: maybe-all-stage1-mpc
 configure-stage2-gcc: maybe-all-stage2-mpc
 configure-stage3-gcc: maybe-all-stage3-mpc
@@ -56901,7 +60580,6 @@ configure-stagefeedback-gcc: maybe-all-stagefeedback-mpc
 configure-stageautoprofile-gcc: maybe-all-stageautoprofile-mpc
 configure-stageautofeedback-gcc: maybe-all-stageautofeedback-mpc
 configure-gcc: maybe-all-isl
-
 configure-stage1-gcc: maybe-all-stage1-isl
 configure-stage2-gcc: maybe-all-stage2-isl
 configure-stage3-gcc: maybe-all-stage3-isl
@@ -56912,7 +60590,6 @@ configure-stagefeedback-gcc: maybe-all-stagefeedback-isl
 configure-stageautoprofile-gcc: maybe-all-stageautoprofile-isl
 configure-stageautofeedback-gcc: maybe-all-stageautofeedback-isl
 configure-gcc: maybe-all-lto-plugin
-
 configure-stage1-gcc: maybe-all-stage1-lto-plugin
 configure-stage2-gcc: maybe-all-stage2-lto-plugin
 configure-stage3-gcc: maybe-all-stage3-lto-plugin
@@ -56923,7 +60600,6 @@ configure-stagefeedback-gcc: maybe-all-stagefeedback-lto-plugin
 configure-stageautoprofile-gcc: maybe-all-stageautoprofile-lto-plugin
 configure-stageautofeedback-gcc: maybe-all-stageautofeedback-lto-plugin
 configure-gcc: maybe-all-binutils
-
 configure-stage1-gcc: maybe-all-stage1-binutils
 configure-stage2-gcc: maybe-all-stage2-binutils
 configure-stage3-gcc: maybe-all-stage3-binutils
@@ -56934,7 +60610,6 @@ configure-stagefeedback-gcc: maybe-all-stagefeedback-binutils
 configure-stageautoprofile-gcc: maybe-all-stageautoprofile-binutils
 configure-stageautofeedback-gcc: maybe-all-stageautofeedback-binutils
 configure-gcc: maybe-all-gas
-
 configure-stage1-gcc: maybe-all-stage1-gas
 configure-stage2-gcc: maybe-all-stage2-gas
 configure-stage3-gcc: maybe-all-stage3-gas
@@ -56945,7 +60620,6 @@ configure-stagefeedback-gcc: maybe-all-stagefeedback-gas
 configure-stageautoprofile-gcc: maybe-all-stageautoprofile-gas
 configure-stageautofeedback-gcc: maybe-all-stageautofeedback-gas
 configure-gcc: maybe-all-ld
-
 configure-stage1-gcc: maybe-all-stage1-ld
 configure-stage2-gcc: maybe-all-stage2-ld
 configure-stage3-gcc: maybe-all-stage3-ld
@@ -56956,7 +60630,6 @@ configure-stagefeedback-gcc: maybe-all-stagefeedback-ld
 configure-stageautoprofile-gcc: maybe-all-stageautoprofile-ld
 configure-stageautofeedback-gcc: maybe-all-stageautofeedback-ld
 configure-gcc: maybe-all-gold
-
 configure-stage1-gcc: maybe-all-stage1-gold
 configure-stage2-gcc: maybe-all-stage2-gold
 configure-stage3-gcc: maybe-all-stage3-gold
@@ -56967,7 +60640,6 @@ configure-stagefeedback-gcc: maybe-all-stagefeedback-gold
 configure-stageautoprofile-gcc: maybe-all-stageautoprofile-gold
 configure-stageautofeedback-gcc: maybe-all-stageautofeedback-gold
 configure-gcc: maybe-all-libelf
-
 configure-stage1-gcc: maybe-all-stage1-libelf
 configure-stage2-gcc: maybe-all-stage2-libelf
 configure-stage3-gcc: maybe-all-stage3-libelf
@@ -56978,7 +60650,6 @@ configure-stagefeedback-gcc: maybe-all-stagefeedback-libelf
 configure-stageautoprofile-gcc: maybe-all-stageautoprofile-libelf
 configure-stageautofeedback-gcc: maybe-all-stageautofeedback-libelf
 configure-gcc: maybe-all-libiconv
-
 configure-stage1-gcc: maybe-all-stage1-libiconv
 configure-stage2-gcc: maybe-all-stage2-libiconv
 configure-stage3-gcc: maybe-all-stage3-libiconv
@@ -56989,7 +60660,6 @@ configure-stagefeedback-gcc: maybe-all-stagefeedback-libiconv
 configure-stageautoprofile-gcc: maybe-all-stageautoprofile-libiconv
 configure-stageautofeedback-gcc: maybe-all-stageautofeedback-libiconv
 all-gcc: all-libiberty
-
 all-stage1-gcc: all-stage1-libiberty
 all-stage2-gcc: all-stage2-libiberty
 all-stage3-gcc: all-stage3-libiberty
@@ -57000,7 +60670,6 @@ all-stagefeedback-gcc: all-stagefeedback-libiberty
 all-stageautoprofile-gcc: all-stageautoprofile-libiberty
 all-stageautofeedback-gcc: all-stageautofeedback-libiberty
 all-gcc: maybe-all-intl
-
 all-stage1-gcc: maybe-all-stage1-intl
 all-stage2-gcc: maybe-all-stage2-intl
 all-stage3-gcc: maybe-all-stage3-intl
@@ -57011,7 +60680,6 @@ all-stagefeedback-gcc: maybe-all-stagefeedback-intl
 all-stageautoprofile-gcc: maybe-all-stageautoprofile-intl
 all-stageautofeedback-gcc: maybe-all-stageautofeedback-intl
 all-gcc: maybe-all-mpfr
-
 all-stage1-gcc: maybe-all-stage1-mpfr
 all-stage2-gcc: maybe-all-stage2-mpfr
 all-stage3-gcc: maybe-all-stage3-mpfr
@@ -57022,7 +60690,6 @@ all-stagefeedback-gcc: maybe-all-stagefeedback-mpfr
 all-stageautoprofile-gcc: maybe-all-stageautoprofile-mpfr
 all-stageautofeedback-gcc: maybe-all-stageautofeedback-mpfr
 all-gcc: maybe-all-mpc
-
 all-stage1-gcc: maybe-all-stage1-mpc
 all-stage2-gcc: maybe-all-stage2-mpc
 all-stage3-gcc: maybe-all-stage3-mpc
@@ -57033,7 +60700,6 @@ all-stagefeedback-gcc: maybe-all-stagefeedback-mpc
 all-stageautoprofile-gcc: maybe-all-stageautoprofile-mpc
 all-stageautofeedback-gcc: maybe-all-stageautofeedback-mpc
 all-gcc: maybe-all-isl
-
 all-stage1-gcc: maybe-all-stage1-isl
 all-stage2-gcc: maybe-all-stage2-isl
 all-stage3-gcc: maybe-all-stage3-isl
@@ -57044,7 +60710,6 @@ all-stagefeedback-gcc: maybe-all-stagefeedback-isl
 all-stageautoprofile-gcc: maybe-all-stageautoprofile-isl
 all-stageautofeedback-gcc: maybe-all-stageautofeedback-isl
 all-gcc: maybe-all-build-texinfo
-
 all-stage1-gcc: maybe-all-build-texinfo
 all-stage2-gcc: maybe-all-build-texinfo
 all-stage3-gcc: maybe-all-build-texinfo
@@ -57055,7 +60720,6 @@ all-stagefeedback-gcc: maybe-all-build-texinfo
 all-stageautoprofile-gcc: maybe-all-build-texinfo
 all-stageautofeedback-gcc: maybe-all-build-texinfo
 all-gcc: maybe-all-build-bison
-
 all-stage1-gcc: maybe-all-build-bison
 all-stage2-gcc: maybe-all-build-bison
 all-stage3-gcc: maybe-all-build-bison
@@ -57066,7 +60730,6 @@ all-stagefeedback-gcc: maybe-all-build-bison
 all-stageautoprofile-gcc: maybe-all-build-bison
 all-stageautofeedback-gcc: maybe-all-build-bison
 all-gcc: maybe-all-build-flex
-
 all-stage1-gcc: maybe-all-build-flex
 all-stage2-gcc: maybe-all-build-flex
 all-stage3-gcc: maybe-all-build-flex
@@ -57077,7 +60740,6 @@ all-stagefeedback-gcc: maybe-all-build-flex
 all-stageautoprofile-gcc: maybe-all-build-flex
 all-stageautofeedback-gcc: maybe-all-build-flex
 all-gcc: maybe-all-build-libiberty
-
 all-stage1-gcc: maybe-all-build-libiberty
 all-stage2-gcc: maybe-all-build-libiberty
 all-stage3-gcc: maybe-all-build-libiberty
@@ -57088,7 +60750,6 @@ all-stagefeedback-gcc: maybe-all-build-libiberty
 all-stageautoprofile-gcc: maybe-all-build-libiberty
 all-stageautofeedback-gcc: maybe-all-build-libiberty
 all-gcc: maybe-all-build-fixincludes
-
 all-stage1-gcc: maybe-all-build-fixincludes
 all-stage2-gcc: maybe-all-build-fixincludes
 all-stage3-gcc: maybe-all-build-fixincludes
@@ -57099,7 +60760,6 @@ all-stagefeedback-gcc: maybe-all-build-fixincludes
 all-stageautoprofile-gcc: maybe-all-build-fixincludes
 all-stageautofeedback-gcc: maybe-all-build-fixincludes
 all-gcc: maybe-all-build-libcpp
-
 all-stage1-gcc: maybe-all-build-libcpp
 all-stage2-gcc: maybe-all-build-libcpp
 all-stage3-gcc: maybe-all-build-libcpp
@@ -57110,7 +60770,6 @@ all-stagefeedback-gcc: maybe-all-build-libcpp
 all-stageautoprofile-gcc: maybe-all-build-libcpp
 all-stageautofeedback-gcc: maybe-all-build-libcpp
 all-gcc: maybe-all-zlib
-
 all-stage1-gcc: maybe-all-stage1-zlib
 all-stage2-gcc: maybe-all-stage2-zlib
 all-stage3-gcc: maybe-all-stage3-zlib
@@ -57121,7 +60780,6 @@ all-stagefeedback-gcc: maybe-all-stagefeedback-zlib
 all-stageautoprofile-gcc: maybe-all-stageautoprofile-zlib
 all-stageautofeedback-gcc: maybe-all-stageautofeedback-zlib
 all-gcc: all-libbacktrace
-
 all-stage1-gcc: all-stage1-libbacktrace
 all-stage2-gcc: all-stage2-libbacktrace
 all-stage3-gcc: all-stage3-libbacktrace
@@ -57132,7 +60790,6 @@ all-stagefeedback-gcc: all-stagefeedback-libbacktrace
 all-stageautoprofile-gcc: all-stageautoprofile-libbacktrace
 all-stageautofeedback-gcc: all-stageautofeedback-libbacktrace
 all-gcc: all-libcpp
-
 all-stage1-gcc: all-stage1-libcpp
 all-stage2-gcc: all-stage2-libcpp
 all-stage3-gcc: all-stage3-libcpp
@@ -57142,8 +60799,17 @@ all-stagetrain-gcc: all-stagetrain-libcpp
 all-stagefeedback-gcc: all-stagefeedback-libcpp
 all-stageautoprofile-gcc: all-stageautoprofile-libcpp
 all-stageautofeedback-gcc: all-stageautofeedback-libcpp
+all-gcc: all-libcody
+all-stage1-gcc: all-stage1-libcody
+all-stage2-gcc: all-stage2-libcody
+all-stage3-gcc: all-stage3-libcody
+all-stage4-gcc: all-stage4-libcody
+all-stageprofile-gcc: all-stageprofile-libcody
+all-stagetrain-gcc: all-stagetrain-libcody
+all-stagefeedback-gcc: all-stagefeedback-libcody
+all-stageautoprofile-gcc: all-stageautoprofile-libcody
+all-stageautofeedback-gcc: all-stageautofeedback-libcody
 all-gcc: all-libdecnumber
-
 all-stage1-gcc: all-stage1-libdecnumber
 all-stage2-gcc: all-stage2-libdecnumber
 all-stage3-gcc: all-stage3-libdecnumber
@@ -57154,7 +60820,6 @@ all-stagefeedback-gcc: all-stagefeedback-libdecnumber
 all-stageautoprofile-gcc: all-stageautoprofile-libdecnumber
 all-stageautofeedback-gcc: all-stageautofeedback-libdecnumber
 all-gcc: maybe-all-libiberty
-
 all-stage1-gcc: maybe-all-stage1-libiberty
 all-stage2-gcc: maybe-all-stage2-libiberty
 all-stage3-gcc: maybe-all-stage3-libiberty
@@ -57165,7 +60830,6 @@ all-stagefeedback-gcc: maybe-all-stagefeedback-libiberty
 all-stageautoprofile-gcc: maybe-all-stageautoprofile-libiberty
 all-stageautofeedback-gcc: maybe-all-stageautofeedback-libiberty
 all-gcc: maybe-all-fixincludes
-
 all-stage1-gcc: maybe-all-stage1-fixincludes
 all-stage2-gcc: maybe-all-stage2-fixincludes
 all-stage3-gcc: maybe-all-stage3-fixincludes
@@ -57176,7 +60840,6 @@ all-stagefeedback-gcc: maybe-all-stagefeedback-fixincludes
 all-stageautoprofile-gcc: maybe-all-stageautoprofile-fixincludes
 all-stageautofeedback-gcc: maybe-all-stageautofeedback-fixincludes
 all-gcc: maybe-all-lto-plugin
-
 all-stage1-gcc: maybe-all-stage1-lto-plugin
 all-stage2-gcc: maybe-all-stage2-lto-plugin
 all-stage3-gcc: maybe-all-stage3-lto-plugin
@@ -57187,7 +60850,6 @@ all-stagefeedback-gcc: maybe-all-stagefeedback-lto-plugin
 all-stageautoprofile-gcc: maybe-all-stageautoprofile-lto-plugin
 all-stageautofeedback-gcc: maybe-all-stageautofeedback-lto-plugin
 all-gcc: maybe-all-libiconv
-
 all-stage1-gcc: maybe-all-stage1-libiconv
 all-stage2-gcc: maybe-all-stage2-libiconv
 all-stage3-gcc: maybe-all-stage3-libiconv
@@ -57198,7 +60860,6 @@ all-stagefeedback-gcc: maybe-all-stagefeedback-libiconv
 all-stageautoprofile-gcc: maybe-all-stageautoprofile-libiconv
 all-stageautofeedback-gcc: maybe-all-stageautofeedback-libiconv
 info-gcc: maybe-all-build-libiberty
-
 info-stage1-gcc: maybe-all-build-libiberty
 info-stage2-gcc: maybe-all-build-libiberty
 info-stage3-gcc: maybe-all-build-libiberty
@@ -57209,7 +60870,6 @@ info-stagefeedback-gcc: maybe-all-build-libiberty
 info-stageautoprofile-gcc: maybe-all-build-libiberty
 info-stageautofeedback-gcc: maybe-all-build-libiberty
 dvi-gcc: maybe-all-build-libiberty
-
 dvi-stage1-gcc: maybe-all-build-libiberty
 dvi-stage2-gcc: maybe-all-build-libiberty
 dvi-stage3-gcc: maybe-all-build-libiberty
@@ -57220,7 +60880,6 @@ dvi-stagefeedback-gcc: maybe-all-build-libiberty
 dvi-stageautoprofile-gcc: maybe-all-build-libiberty
 dvi-stageautofeedback-gcc: maybe-all-build-libiberty
 pdf-gcc: maybe-all-build-libiberty
-
 pdf-stage1-gcc: maybe-all-build-libiberty
 pdf-stage2-gcc: maybe-all-build-libiberty
 pdf-stage3-gcc: maybe-all-build-libiberty
@@ -57231,7 +60890,6 @@ pdf-stagefeedback-gcc: maybe-all-build-libiberty
 pdf-stageautoprofile-gcc: maybe-all-build-libiberty
 pdf-stageautofeedback-gcc: maybe-all-build-libiberty
 html-gcc: maybe-all-build-libiberty
-
 html-stage1-gcc: maybe-all-build-libiberty
 html-stage2-gcc: maybe-all-build-libiberty
 html-stage3-gcc: maybe-all-build-libiberty
@@ -57246,7 +60904,6 @@ install-gcc: maybe-install-lto-plugin
 install-strip-gcc: maybe-install-strip-fixincludes
 install-strip-gcc: maybe-install-strip-lto-plugin
 configure-libcpp: configure-libiberty
-
 configure-stage1-libcpp: configure-stage1-libiberty
 configure-stage2-libcpp: configure-stage2-libiberty
 configure-stage3-libcpp: configure-stage3-libiberty
@@ -57257,7 +60914,6 @@ configure-stagefeedback-libcpp: configure-stagefeedback-libiberty
 configure-stageautoprofile-libcpp: configure-stageautoprofile-libiberty
 configure-stageautofeedback-libcpp: configure-stageautofeedback-libiberty
 configure-libcpp: maybe-configure-intl
-
 configure-stage1-libcpp: maybe-configure-stage1-intl
 configure-stage2-libcpp: maybe-configure-stage2-intl
 configure-stage3-libcpp: maybe-configure-stage3-intl
@@ -57268,7 +60924,6 @@ configure-stagefeedback-libcpp: maybe-configure-stagefeedback-intl
 configure-stageautoprofile-libcpp: maybe-configure-stageautoprofile-intl
 configure-stageautofeedback-libcpp: maybe-configure-stageautofeedback-intl
 configure-libcpp: maybe-all-libiconv
-
 configure-stage1-libcpp: maybe-all-stage1-libiconv
 configure-stage2-libcpp: maybe-all-stage2-libiconv
 configure-stage3-libcpp: maybe-all-stage3-libiconv
@@ -57279,7 +60934,6 @@ configure-stagefeedback-libcpp: maybe-all-stagefeedback-libiconv
 configure-stageautoprofile-libcpp: maybe-all-stageautoprofile-libiconv
 configure-stageautofeedback-libcpp: maybe-all-stageautofeedback-libiconv
 all-libcpp: all-libiberty
-
 all-stage1-libcpp: all-stage1-libiberty
 all-stage2-libcpp: all-stage2-libiberty
 all-stage3-libcpp: all-stage3-libiberty
@@ -57290,7 +60944,6 @@ all-stagefeedback-libcpp: all-stagefeedback-libiberty
 all-stageautoprofile-libcpp: all-stageautoprofile-libiberty
 all-stageautofeedback-libcpp: all-stageautofeedback-libiberty
 all-libcpp: maybe-all-intl
-
 all-stage1-libcpp: maybe-all-stage1-intl
 all-stage2-libcpp: maybe-all-stage2-intl
 all-stage3-libcpp: maybe-all-stage3-intl
@@ -57301,7 +60954,6 @@ all-stagefeedback-libcpp: maybe-all-stagefeedback-intl
 all-stageautoprofile-libcpp: maybe-all-stageautoprofile-intl
 all-stageautofeedback-libcpp: maybe-all-stageautofeedback-intl
 all-libcpp: maybe-all-libiconv
-
 all-stage1-libcpp: maybe-all-stage1-libiconv
 all-stage2-libcpp: maybe-all-stage2-libiconv
 all-stage3-libcpp: maybe-all-stage3-libiconv
@@ -57312,7 +60964,6 @@ all-stagefeedback-libcpp: maybe-all-stagefeedback-libiconv
 all-stageautoprofile-libcpp: maybe-all-stageautoprofile-libiconv
 all-stageautofeedback-libcpp: maybe-all-stageautofeedback-libiconv
 all-fixincludes: maybe-all-libiberty
-
 all-stage1-fixincludes: maybe-all-stage1-libiberty
 all-stage2-fixincludes: maybe-all-stage2-libiberty
 all-stage3-fixincludes: maybe-all-stage3-libiberty
@@ -57323,9 +60974,7 @@ all-stagefeedback-fixincludes: maybe-all-stagefeedback-libiberty
 all-stageautoprofile-fixincludes: maybe-all-stageautoprofile-libiberty
 all-stageautofeedback-fixincludes: maybe-all-stageautofeedback-libiberty
 all-gnattools: maybe-all-target-libada
-all-gnattools: maybe-all-target-libstdc++-v3
 all-lto-plugin: maybe-all-libiberty
-
 all-stage1-lto-plugin: maybe-all-stage1-libiberty
 all-stage2-lto-plugin: maybe-all-stage2-libiberty
 all-stage3-lto-plugin: maybe-all-stage3-libiberty
@@ -57336,7 +60985,6 @@ all-stagefeedback-lto-plugin: maybe-all-stagefeedback-libiberty
 all-stageautoprofile-lto-plugin: maybe-all-stageautoprofile-libiberty
 all-stageautofeedback-lto-plugin: maybe-all-stageautofeedback-libiberty
 all-lto-plugin: maybe-all-libiberty-linker-plugin
-
 all-stage1-lto-plugin: maybe-all-stage1-libiberty-linker-plugin
 all-stage2-lto-plugin: maybe-all-stage2-libiberty-linker-plugin
 all-stage3-lto-plugin: maybe-all-stage3-libiberty-linker-plugin
@@ -57346,12 +60994,8 @@ all-stagetrain-lto-plugin: maybe-all-stagetrain-libiberty-linker-plugin
 all-stagefeedback-lto-plugin: maybe-all-stagefeedback-libiberty-linker-plugin
 all-stageautoprofile-lto-plugin: maybe-all-stageautoprofile-libiberty-linker-plugin
 all-stageautofeedback-lto-plugin: maybe-all-stageautofeedback-libiberty-linker-plugin
-configure-libcc1: maybe-configure-gcc
-all-libcc1: maybe-all-gcc
 all-gotools: maybe-all-target-libgo
-all-utils: maybe-all-libiberty
 configure-intl: maybe-all-libiconv
-
 configure-stage1-intl: maybe-all-stage1-libiconv
 configure-stage2-intl: maybe-all-stage2-libiconv
 configure-stage3-intl: maybe-all-stage3-libiconv
@@ -57362,7 +61006,6 @@ configure-stagefeedback-intl: maybe-all-stagefeedback-libiconv
 configure-stageautoprofile-intl: maybe-all-stageautoprofile-libiconv
 configure-stageautofeedback-intl: maybe-all-stageautofeedback-libiconv
 configure-mpfr: maybe-all-gmp
-
 configure-stage1-mpfr: maybe-all-stage1-gmp
 configure-stage2-mpfr: maybe-all-stage2-gmp
 configure-stage3-mpfr: maybe-all-stage3-gmp
@@ -57373,7 +61016,6 @@ configure-stagefeedback-mpfr: maybe-all-stagefeedback-gmp
 configure-stageautoprofile-mpfr: maybe-all-stageautoprofile-gmp
 configure-stageautofeedback-mpfr: maybe-all-stageautofeedback-gmp
 configure-mpc: maybe-all-mpfr
-
 configure-stage1-mpc: maybe-all-stage1-mpfr
 configure-stage2-mpc: maybe-all-stage2-mpfr
 configure-stage3-mpc: maybe-all-stage3-mpfr
@@ -57384,7 +61026,6 @@ configure-stagefeedback-mpc: maybe-all-stagefeedback-mpfr
 configure-stageautoprofile-mpc: maybe-all-stageautoprofile-mpfr
 configure-stageautofeedback-mpc: maybe-all-stageautofeedback-mpfr
 configure-isl: maybe-all-gmp
-
 configure-stage1-isl: maybe-all-stage1-gmp
 configure-stage2-isl: maybe-all-stage2-gmp
 configure-stage3-isl: maybe-all-stage3-gmp
@@ -57395,7 +61036,6 @@ configure-stagefeedback-isl: maybe-all-stagefeedback-gmp
 configure-stageautoprofile-isl: maybe-all-stageautoprofile-gmp
 configure-stageautofeedback-isl: maybe-all-stageautofeedback-gmp
 all-intl: maybe-all-libiconv
-
 all-stage1-intl: maybe-all-stage1-libiconv
 all-stage2-intl: maybe-all-stage2-libiconv
 all-stage3-intl: maybe-all-stage3-libiconv
@@ -57405,25 +61045,26 @@ all-stagetrain-intl: maybe-all-stagetrain-libiconv
 all-stagefeedback-intl: maybe-all-stagefeedback-libiconv
 all-stageautoprofile-intl: maybe-all-stageautoprofile-libiconv
 all-stageautofeedback-intl: maybe-all-stageautofeedback-libiconv
-configure-gdb: maybe-all-intl
 configure-gdb: maybe-configure-sim
-configure-gdb: maybe-all-bfd
-configure-gdb: maybe-all-libiconv
-all-gdb: maybe-all-libiberty
-all-gdb: maybe-all-libiconv
-all-gdb: maybe-all-opcodes
+configure-gdb: maybe-all-gnulib
+configure-gdb: maybe-all-gdbsupport
+all-gdb: maybe-all-gnulib
+all-gdb: maybe-all-gdbsupport
 all-gdb: maybe-all-readline
 all-gdb: maybe-all-build-bison
 all-gdb: maybe-all-sim
-all-gdb: maybe-all-libdecnumber
 all-gdb: maybe-all-libtermcap
+configure-gdbserver: maybe-all-gnulib
+all-gdbserver: maybe-all-gdbsupport
+all-gdbserver: maybe-all-gnulib
 configure-libgui: maybe-configure-tcl
 configure-libgui: maybe-configure-tk
 all-libgui: maybe-all-tcl
 all-libgui: maybe-all-tk
 all-libgui: maybe-all-itcl
+configure-gdbsupport: maybe-configure-gnulib
+all-gdbsupport: maybe-all-gnulib
 configure-bfd: configure-libiberty
-
 configure-stage1-bfd: configure-stage1-libiberty
 configure-stage2-bfd: configure-stage2-libiberty
 configure-stage3-bfd: configure-stage3-libiberty
@@ -57434,7 +61075,6 @@ configure-stagefeedback-bfd: configure-stagefeedback-libiberty
 configure-stageautoprofile-bfd: configure-stageautoprofile-libiberty
 configure-stageautofeedback-bfd: configure-stageautofeedback-libiberty
 configure-bfd: maybe-configure-intl
-
 configure-stage1-bfd: maybe-configure-stage1-intl
 configure-stage2-bfd: maybe-configure-stage2-intl
 configure-stage3-bfd: maybe-configure-stage3-intl
@@ -57445,7 +61085,6 @@ configure-stagefeedback-bfd: maybe-configure-stagefeedback-intl
 configure-stageautoprofile-bfd: maybe-configure-stageautoprofile-intl
 configure-stageautofeedback-bfd: maybe-configure-stageautofeedback-intl
 all-bfd: maybe-all-libiberty
-
 all-stage1-bfd: maybe-all-stage1-libiberty
 all-stage2-bfd: maybe-all-stage2-libiberty
 all-stage3-bfd: maybe-all-stage3-libiberty
@@ -57456,7 +61095,6 @@ all-stagefeedback-bfd: maybe-all-stagefeedback-libiberty
 all-stageautoprofile-bfd: maybe-all-stageautoprofile-libiberty
 all-stageautofeedback-bfd: maybe-all-stageautofeedback-libiberty
 all-bfd: maybe-all-intl
-
 all-stage1-bfd: maybe-all-stage1-intl
 all-stage2-bfd: maybe-all-stage2-intl
 all-stage3-bfd: maybe-all-stage3-intl
@@ -57467,7 +61105,6 @@ all-stagefeedback-bfd: maybe-all-stagefeedback-intl
 all-stageautoprofile-bfd: maybe-all-stageautoprofile-intl
 all-stageautofeedback-bfd: maybe-all-stageautofeedback-intl
 all-bfd: maybe-all-zlib
-
 all-stage1-bfd: maybe-all-stage1-zlib
 all-stage2-bfd: maybe-all-stage2-zlib
 all-stage3-bfd: maybe-all-stage3-zlib
@@ -57478,7 +61115,6 @@ all-stagefeedback-bfd: maybe-all-stagefeedback-zlib
 all-stageautoprofile-bfd: maybe-all-stageautoprofile-zlib
 all-stageautofeedback-bfd: maybe-all-stageautofeedback-zlib
 configure-opcodes: configure-libiberty
-
 configure-stage1-opcodes: configure-stage1-libiberty
 configure-stage2-opcodes: configure-stage2-libiberty
 configure-stage3-opcodes: configure-stage3-libiberty
@@ -57489,7 +61125,6 @@ configure-stagefeedback-opcodes: configure-stagefeedback-libiberty
 configure-stageautoprofile-opcodes: configure-stageautoprofile-libiberty
 configure-stageautofeedback-opcodes: configure-stageautofeedback-libiberty
 all-opcodes: maybe-all-libiberty
-
 all-stage1-opcodes: maybe-all-stage1-libiberty
 all-stage2-opcodes: maybe-all-stage2-libiberty
 all-stage3-opcodes: maybe-all-stage3-libiberty
@@ -57500,7 +61135,6 @@ all-stagefeedback-opcodes: maybe-all-stagefeedback-libiberty
 all-stageautoprofile-opcodes: maybe-all-stageautoprofile-libiberty
 all-stageautofeedback-opcodes: maybe-all-stageautofeedback-libiberty
 configure-binutils: maybe-configure-intl
-
 configure-stage1-binutils: maybe-configure-stage1-intl
 configure-stage2-binutils: maybe-configure-stage2-intl
 configure-stage3-binutils: maybe-configure-stage3-intl
@@ -57511,7 +61145,6 @@ configure-stagefeedback-binutils: maybe-configure-stagefeedback-intl
 configure-stageautoprofile-binutils: maybe-configure-stageautoprofile-intl
 configure-stageautofeedback-binutils: maybe-configure-stageautofeedback-intl
 all-binutils: maybe-all-libiberty
-
 all-stage1-binutils: maybe-all-stage1-libiberty
 all-stage2-binutils: maybe-all-stage2-libiberty
 all-stage3-binutils: maybe-all-stage3-libiberty
@@ -57522,7 +61155,6 @@ all-stagefeedback-binutils: maybe-all-stagefeedback-libiberty
 all-stageautoprofile-binutils: maybe-all-stageautoprofile-libiberty
 all-stageautofeedback-binutils: maybe-all-stageautofeedback-libiberty
 all-binutils: maybe-all-opcodes
-
 all-stage1-binutils: maybe-all-stage1-opcodes
 all-stage2-binutils: maybe-all-stage2-opcodes
 all-stage3-binutils: maybe-all-stage3-opcodes
@@ -57533,7 +61165,6 @@ all-stagefeedback-binutils: maybe-all-stagefeedback-opcodes
 all-stageautoprofile-binutils: maybe-all-stageautoprofile-opcodes
 all-stageautofeedback-binutils: maybe-all-stageautofeedback-opcodes
 all-binutils: maybe-all-bfd
-
 all-stage1-binutils: maybe-all-stage1-bfd
 all-stage2-binutils: maybe-all-stage2-bfd
 all-stage3-binutils: maybe-all-stage3-bfd
@@ -57544,7 +61175,6 @@ all-stagefeedback-binutils: maybe-all-stagefeedback-bfd
 all-stageautoprofile-binutils: maybe-all-stageautoprofile-bfd
 all-stageautofeedback-binutils: maybe-all-stageautofeedback-bfd
 all-binutils: maybe-all-build-flex
-
 all-stage1-binutils: maybe-all-build-flex
 all-stage2-binutils: maybe-all-build-flex
 all-stage3-binutils: maybe-all-build-flex
@@ -57555,7 +61185,6 @@ all-stagefeedback-binutils: maybe-all-build-flex
 all-stageautoprofile-binutils: maybe-all-build-flex
 all-stageautofeedback-binutils: maybe-all-build-flex
 all-binutils: maybe-all-build-bison
-
 all-stage1-binutils: maybe-all-build-bison
 all-stage2-binutils: maybe-all-build-bison
 all-stage3-binutils: maybe-all-build-bison
@@ -57566,7 +61195,6 @@ all-stagefeedback-binutils: maybe-all-build-bison
 all-stageautoprofile-binutils: maybe-all-build-bison
 all-stageautofeedback-binutils: maybe-all-build-bison
 all-binutils: maybe-all-intl
-
 all-stage1-binutils: maybe-all-stage1-intl
 all-stage2-binutils: maybe-all-stage2-intl
 all-stage3-binutils: maybe-all-stage3-intl
@@ -57577,7 +61205,6 @@ all-stagefeedback-binutils: maybe-all-stagefeedback-intl
 all-stageautoprofile-binutils: maybe-all-stageautoprofile-intl
 all-stageautofeedback-binutils: maybe-all-stageautofeedback-intl
 all-binutils: maybe-all-gas
-
 all-stage1-binutils: maybe-all-stage1-gas
 all-stage2-binutils: maybe-all-stage2-gas
 all-stage3-binutils: maybe-all-stage3-gas
@@ -57587,12 +61214,31 @@ all-stagetrain-binutils: maybe-all-stagetrain-gas
 all-stagefeedback-binutils: maybe-all-stagefeedback-gas
 all-stageautoprofile-binutils: maybe-all-stageautoprofile-gas
 all-stageautofeedback-binutils: maybe-all-stageautofeedback-gas
+all-binutils: maybe-all-libctf
+all-stage1-binutils: maybe-all-stage1-libctf
+all-stage2-binutils: maybe-all-stage2-libctf
+all-stage3-binutils: maybe-all-stage3-libctf
+all-stage4-binutils: maybe-all-stage4-libctf
+all-stageprofile-binutils: maybe-all-stageprofile-libctf
+all-stagetrain-binutils: maybe-all-stagetrain-libctf
+all-stagefeedback-binutils: maybe-all-stagefeedback-libctf
+all-stageautoprofile-binutils: maybe-all-stageautoprofile-libctf
+all-stageautofeedback-binutils: maybe-all-stageautofeedback-libctf
+all-ld: maybe-all-libctf
+all-stage1-ld: maybe-all-stage1-libctf
+all-stage2-ld: maybe-all-stage2-libctf
+all-stage3-ld: maybe-all-stage3-libctf
+all-stage4-ld: maybe-all-stage4-libctf
+all-stageprofile-ld: maybe-all-stageprofile-libctf
+all-stagetrain-ld: maybe-all-stagetrain-libctf
+all-stagefeedback-ld: maybe-all-stagefeedback-libctf
+all-stageautoprofile-ld: maybe-all-stageautoprofile-libctf
+all-stageautofeedback-ld: maybe-all-stageautofeedback-libctf
 install-binutils: maybe-install-opcodes
 install-strip-binutils: maybe-install-strip-opcodes
 install-opcodes: maybe-install-bfd
 install-strip-opcodes: maybe-install-strip-bfd
 configure-gas: maybe-configure-intl
-
 configure-stage1-gas: maybe-configure-stage1-intl
 configure-stage2-gas: maybe-configure-stage2-intl
 configure-stage3-gas: maybe-configure-stage3-intl
@@ -57603,7 +61249,6 @@ configure-stagefeedback-gas: maybe-configure-stagefeedback-intl
 configure-stageautoprofile-gas: maybe-configure-stageautoprofile-intl
 configure-stageautofeedback-gas: maybe-configure-stageautofeedback-intl
 all-gas: maybe-all-libiberty
-
 all-stage1-gas: maybe-all-stage1-libiberty
 all-stage2-gas: maybe-all-stage2-libiberty
 all-stage3-gas: maybe-all-stage3-libiberty
@@ -57614,7 +61259,6 @@ all-stagefeedback-gas: maybe-all-stagefeedback-libiberty
 all-stageautoprofile-gas: maybe-all-stageautoprofile-libiberty
 all-stageautofeedback-gas: maybe-all-stageautofeedback-libiberty
 all-gas: maybe-all-opcodes
-
 all-stage1-gas: maybe-all-stage1-opcodes
 all-stage2-gas: maybe-all-stage2-opcodes
 all-stage3-gas: maybe-all-stage3-opcodes
@@ -57625,7 +61269,6 @@ all-stagefeedback-gas: maybe-all-stagefeedback-opcodes
 all-stageautoprofile-gas: maybe-all-stageautoprofile-opcodes
 all-stageautofeedback-gas: maybe-all-stageautofeedback-opcodes
 all-gas: maybe-all-bfd
-
 all-stage1-gas: maybe-all-stage1-bfd
 all-stage2-gas: maybe-all-stage2-bfd
 all-stage3-gas: maybe-all-stage3-bfd
@@ -57636,7 +61279,6 @@ all-stagefeedback-gas: maybe-all-stagefeedback-bfd
 all-stageautoprofile-gas: maybe-all-stageautoprofile-bfd
 all-stageautofeedback-gas: maybe-all-stageautofeedback-bfd
 all-gas: maybe-all-intl
-
 all-stage1-gas: maybe-all-stage1-intl
 all-stage2-gas: maybe-all-stage2-intl
 all-stage3-gas: maybe-all-stage3-intl
@@ -57646,14 +61288,7 @@ all-stagetrain-gas: maybe-all-stagetrain-intl
 all-stagefeedback-gas: maybe-all-stagefeedback-intl
 all-stageautoprofile-gas: maybe-all-stageautoprofile-intl
 all-stageautofeedback-gas: maybe-all-stageautofeedback-intl
-configure-gprof: maybe-configure-intl
-all-gprof: maybe-all-libiberty
-all-gprof: maybe-all-bfd
-all-gprof: maybe-all-opcodes
-all-gprof: maybe-all-intl
-all-gprof: maybe-all-gas
 configure-ld: maybe-configure-intl
-
 configure-stage1-ld: maybe-configure-stage1-intl
 configure-stage2-ld: maybe-configure-stage2-intl
 configure-stage3-ld: maybe-configure-stage3-intl
@@ -57664,7 +61299,6 @@ configure-stagefeedback-ld: maybe-configure-stagefeedback-intl
 configure-stageautoprofile-ld: maybe-configure-stageautoprofile-intl
 configure-stageautofeedback-ld: maybe-configure-stageautofeedback-intl
 all-ld: maybe-all-libiberty
-
 all-stage1-ld: maybe-all-stage1-libiberty
 all-stage2-ld: maybe-all-stage2-libiberty
 all-stage3-ld: maybe-all-stage3-libiberty
@@ -57675,7 +61309,6 @@ all-stagefeedback-ld: maybe-all-stagefeedback-libiberty
 all-stageautoprofile-ld: maybe-all-stageautoprofile-libiberty
 all-stageautofeedback-ld: maybe-all-stageautofeedback-libiberty
 all-ld: maybe-all-bfd
-
 all-stage1-ld: maybe-all-stage1-bfd
 all-stage2-ld: maybe-all-stage2-bfd
 all-stage3-ld: maybe-all-stage3-bfd
@@ -57686,7 +61319,6 @@ all-stagefeedback-ld: maybe-all-stagefeedback-bfd
 all-stageautoprofile-ld: maybe-all-stageautoprofile-bfd
 all-stageautofeedback-ld: maybe-all-stageautofeedback-bfd
 all-ld: maybe-all-opcodes
-
 all-stage1-ld: maybe-all-stage1-opcodes
 all-stage2-ld: maybe-all-stage2-opcodes
 all-stage3-ld: maybe-all-stage3-opcodes
@@ -57697,7 +61329,6 @@ all-stagefeedback-ld: maybe-all-stagefeedback-opcodes
 all-stageautoprofile-ld: maybe-all-stageautoprofile-opcodes
 all-stageautofeedback-ld: maybe-all-stageautofeedback-opcodes
 all-ld: maybe-all-build-bison
-
 all-stage1-ld: maybe-all-build-bison
 all-stage2-ld: maybe-all-build-bison
 all-stage3-ld: maybe-all-build-bison
@@ -57708,7 +61339,6 @@ all-stagefeedback-ld: maybe-all-build-bison
 all-stageautoprofile-ld: maybe-all-build-bison
 all-stageautofeedback-ld: maybe-all-build-bison
 all-ld: maybe-all-build-flex
-
 all-stage1-ld: maybe-all-build-flex
 all-stage2-ld: maybe-all-build-flex
 all-stage3-ld: maybe-all-build-flex
@@ -57719,7 +61349,6 @@ all-stagefeedback-ld: maybe-all-build-flex
 all-stageautoprofile-ld: maybe-all-build-flex
 all-stageautofeedback-ld: maybe-all-build-flex
 all-ld: maybe-all-intl
-
 all-stage1-ld: maybe-all-stage1-intl
 all-stage2-ld: maybe-all-stage2-intl
 all-stage3-ld: maybe-all-stage3-intl
@@ -57730,7 +61359,6 @@ all-stagefeedback-ld: maybe-all-stagefeedback-intl
 all-stageautoprofile-ld: maybe-all-stageautoprofile-intl
 all-stageautofeedback-ld: maybe-all-stageautofeedback-intl
 all-ld: maybe-all-gas
-
 all-stage1-ld: maybe-all-stage1-gas
 all-stage2-ld: maybe-all-stage2-gas
 all-stage3-ld: maybe-all-stage3-gas
@@ -57741,7 +61369,6 @@ all-stagefeedback-ld: maybe-all-stagefeedback-gas
 all-stageautoprofile-ld: maybe-all-stageautoprofile-gas
 all-stageautofeedback-ld: maybe-all-stageautofeedback-gas
 all-ld: maybe-all-binutils
-
 all-stage1-ld: maybe-all-stage1-binutils
 all-stage2-ld: maybe-all-stage2-binutils
 all-stage3-ld: maybe-all-stage3-binutils
@@ -57754,7 +61381,6 @@ all-stageautofeedback-ld: maybe-all-stageautofeedback-binutils
 install-ld: maybe-install-gold
 install-strip-ld: maybe-install-strip-gold
 configure-gold: maybe-configure-intl
-
 configure-stage1-gold: maybe-configure-stage1-intl
 configure-stage2-gold: maybe-configure-stage2-intl
 configure-stage3-gold: maybe-configure-stage3-intl
@@ -57765,7 +61391,6 @@ configure-stagefeedback-gold: maybe-configure-stagefeedback-intl
 configure-stageautoprofile-gold: maybe-configure-stageautoprofile-intl
 configure-stageautofeedback-gold: maybe-configure-stageautofeedback-intl
 all-gold: maybe-all-libiberty
-
 all-stage1-gold: maybe-all-stage1-libiberty
 all-stage2-gold: maybe-all-stage2-libiberty
 all-stage3-gold: maybe-all-stage3-libiberty
@@ -57776,7 +61401,6 @@ all-stagefeedback-gold: maybe-all-stagefeedback-libiberty
 all-stageautoprofile-gold: maybe-all-stageautoprofile-libiberty
 all-stageautofeedback-gold: maybe-all-stageautofeedback-libiberty
 all-gold: maybe-all-intl
-
 all-stage1-gold: maybe-all-stage1-intl
 all-stage2-gold: maybe-all-stage2-intl
 all-stage3-gold: maybe-all-stage3-intl
@@ -57787,7 +61411,6 @@ all-stagefeedback-gold: maybe-all-stagefeedback-intl
 all-stageautoprofile-gold: maybe-all-stageautoprofile-intl
 all-stageautofeedback-gold: maybe-all-stageautofeedback-intl
 all-gold: maybe-all-bfd
-
 all-stage1-gold: maybe-all-stage1-bfd
 all-stage2-gold: maybe-all-stage2-bfd
 all-stage3-gold: maybe-all-stage3-bfd
@@ -57798,7 +61421,6 @@ all-stagefeedback-gold: maybe-all-stagefeedback-bfd
 all-stageautoprofile-gold: maybe-all-stageautoprofile-bfd
 all-stageautofeedback-gold: maybe-all-stageautofeedback-bfd
 all-gold: maybe-all-build-bison
-
 all-stage1-gold: maybe-all-build-bison
 all-stage2-gold: maybe-all-build-bison
 all-stage3-gold: maybe-all-build-bison
@@ -57809,7 +61431,6 @@ all-stagefeedback-gold: maybe-all-build-bison
 all-stageautoprofile-gold: maybe-all-build-bison
 all-stageautofeedback-gold: maybe-all-build-bison
 all-gold: maybe-all-gas
-
 all-stage1-gold: maybe-all-stage1-gas
 all-stage2-gold: maybe-all-stage2-gas
 all-stage3-gold: maybe-all-stage3-gas
@@ -57820,7 +61441,6 @@ all-stagefeedback-gold: maybe-all-stagefeedback-gas
 all-stageautoprofile-gold: maybe-all-stageautoprofile-gas
 all-stageautofeedback-gold: maybe-all-stageautofeedback-gas
 check-gold: maybe-all-binutils
-
 check-stage1-gold: maybe-all-stage1-binutils
 check-stage2-gold: maybe-all-stage2-binutils
 check-stage3-gold: maybe-all-stage3-binutils
@@ -57831,7 +61451,6 @@ check-stagefeedback-gold: maybe-all-stagefeedback-binutils
 check-stageautoprofile-gold: maybe-all-stageautoprofile-binutils
 check-stageautofeedback-gold: maybe-all-stageautofeedback-binutils
 check-gold: maybe-all-gas
-
 check-stage1-gold: maybe-all-stage1-gas
 check-stage2-gold: maybe-all-stage2-gas
 check-stage3-gold: maybe-all-stage3-gas
@@ -57842,7 +61461,6 @@ check-stagefeedback-gold: maybe-all-stagefeedback-gas
 check-stageautoprofile-gold: maybe-all-stageautoprofile-gas
 check-stageautofeedback-gold: maybe-all-stageautofeedback-gas
 configure-opcodes: maybe-configure-intl
-
 configure-stage1-opcodes: maybe-configure-stage1-intl
 configure-stage2-opcodes: maybe-configure-stage2-intl
 configure-stage3-opcodes: maybe-configure-stage3-intl
@@ -57853,7 +61471,6 @@ configure-stagefeedback-opcodes: maybe-configure-stagefeedback-intl
 configure-stageautoprofile-opcodes: maybe-configure-stageautoprofile-intl
 configure-stageautofeedback-opcodes: maybe-configure-stageautofeedback-intl
 all-opcodes: maybe-all-bfd
-
 all-stage1-opcodes: maybe-all-stage1-bfd
 all-stage2-opcodes: maybe-all-stage2-bfd
 all-stage3-opcodes: maybe-all-stage3-bfd
@@ -57864,7 +61481,6 @@ all-stagefeedback-opcodes: maybe-all-stagefeedback-bfd
 all-stageautoprofile-opcodes: maybe-all-stageautoprofile-bfd
 all-stageautofeedback-opcodes: maybe-all-stageautofeedback-bfd
 all-opcodes: maybe-all-libiberty
-
 all-stage1-opcodes: maybe-all-stage1-libiberty
 all-stage2-opcodes: maybe-all-stage2-libiberty
 all-stage3-opcodes: maybe-all-stage3-libiberty
@@ -57875,7 +61491,6 @@ all-stagefeedback-opcodes: maybe-all-stagefeedback-libiberty
 all-stageautoprofile-opcodes: maybe-all-stageautoprofile-libiberty
 all-stageautofeedback-opcodes: maybe-all-stageautofeedback-libiberty
 all-opcodes: maybe-all-intl
-
 all-stage1-opcodes: maybe-all-stage1-intl
 all-stage2-opcodes: maybe-all-stage2-intl
 all-stage3-opcodes: maybe-all-stage3-intl
@@ -57900,42 +61515,111 @@ install-itcl: maybe-install-tcl
 install-strip-itcl: maybe-install-strip-tcl
 configure-tk: maybe-configure-tcl
 all-tk: maybe-all-tcl
-all-sid: maybe-all-libiberty
-all-sid: maybe-all-bfd
-all-sid: maybe-all-opcodes
 all-sid: maybe-all-tcl
 all-sid: maybe-all-tk
 install-sid: maybe-install-tcl
 install-strip-sid: maybe-install-strip-tcl
 install-sid: maybe-install-tk
 install-strip-sid: maybe-install-strip-tk
-configure-sim: maybe-configure-intl
-all-sim: maybe-all-intl
-all-sim: maybe-all-libiberty
-all-sim: maybe-all-bfd
-all-sim: maybe-all-opcodes
 all-sim: maybe-all-readline
-all-sim: maybe-configure-gdb
-all-fastjar: maybe-all-zlib
 all-fastjar: maybe-all-build-texinfo
-all-fastjar: maybe-all-libiberty
-all-bison: maybe-all-intl
+all-libctf: all-libiberty
+all-stage1-libctf: all-stage1-libiberty
+all-stage2-libctf: all-stage2-libiberty
+all-stage3-libctf: all-stage3-libiberty
+all-stage4-libctf: all-stage4-libiberty
+all-stageprofile-libctf: all-stageprofile-libiberty
+all-stagetrain-libctf: all-stagetrain-libiberty
+all-stagefeedback-libctf: all-stagefeedback-libiberty
+all-stageautoprofile-libctf: all-stageautoprofile-libiberty
+all-stageautofeedback-libctf: all-stageautofeedback-libiberty
+all-libctf: maybe-all-bfd
+all-stage1-libctf: maybe-all-stage1-bfd
+all-stage2-libctf: maybe-all-stage2-bfd
+all-stage3-libctf: maybe-all-stage3-bfd
+all-stage4-libctf: maybe-all-stage4-bfd
+all-stageprofile-libctf: maybe-all-stageprofile-bfd
+all-stagetrain-libctf: maybe-all-stagetrain-bfd
+all-stagefeedback-libctf: maybe-all-stagefeedback-bfd
+all-stageautoprofile-libctf: maybe-all-stageautoprofile-bfd
+all-stageautofeedback-libctf: maybe-all-stageautofeedback-bfd
+all-libctf: maybe-all-zlib
+all-stage1-libctf: maybe-all-stage1-zlib
+all-stage2-libctf: maybe-all-stage2-zlib
+all-stage3-libctf: maybe-all-stage3-zlib
+all-stage4-libctf: maybe-all-stage4-zlib
+all-stageprofile-libctf: maybe-all-stageprofile-zlib
+all-stagetrain-libctf: maybe-all-stagetrain-zlib
+all-stagefeedback-libctf: maybe-all-stagefeedback-zlib
+all-stageautoprofile-libctf: maybe-all-stageautoprofile-zlib
+all-stageautofeedback-libctf: maybe-all-stageautofeedback-zlib
+configure-libctf: maybe-all-bfd
+configure-stage1-libctf: maybe-all-stage1-bfd
+configure-stage2-libctf: maybe-all-stage2-bfd
+configure-stage3-libctf: maybe-all-stage3-bfd
+configure-stage4-libctf: maybe-all-stage4-bfd
+configure-stageprofile-libctf: maybe-all-stageprofile-bfd
+configure-stagetrain-libctf: maybe-all-stagetrain-bfd
+configure-stagefeedback-libctf: maybe-all-stagefeedback-bfd
+configure-stageautoprofile-libctf: maybe-all-stageautoprofile-bfd
+configure-stageautofeedback-libctf: maybe-all-stageautofeedback-bfd
+configure-libctf: maybe-all-intl
+configure-stage1-libctf: maybe-all-stage1-intl
+configure-stage2-libctf: maybe-all-stage2-intl
+configure-stage3-libctf: maybe-all-stage3-intl
+configure-stage4-libctf: maybe-all-stage4-intl
+configure-stageprofile-libctf: maybe-all-stageprofile-intl
+configure-stagetrain-libctf: maybe-all-stagetrain-intl
+configure-stagefeedback-libctf: maybe-all-stagefeedback-intl
+configure-stageautoprofile-libctf: maybe-all-stageautoprofile-intl
+configure-stageautofeedback-libctf: maybe-all-stageautofeedback-intl
+configure-libctf: maybe-all-zlib
+configure-stage1-libctf: maybe-all-stage1-zlib
+configure-stage2-libctf: maybe-all-stage2-zlib
+configure-stage3-libctf: maybe-all-stage3-zlib
+configure-stage4-libctf: maybe-all-stage4-zlib
+configure-stageprofile-libctf: maybe-all-stageprofile-zlib
+configure-stagetrain-libctf: maybe-all-stagetrain-zlib
+configure-stagefeedback-libctf: maybe-all-stagefeedback-zlib
+configure-stageautoprofile-libctf: maybe-all-stageautoprofile-zlib
+configure-stageautofeedback-libctf: maybe-all-stageautofeedback-zlib
+configure-libctf: maybe-all-libiconv
+configure-stage1-libctf: maybe-all-stage1-libiconv
+configure-stage2-libctf: maybe-all-stage2-libiconv
+configure-stage3-libctf: maybe-all-stage3-libiconv
+configure-stage4-libctf: maybe-all-stage4-libiconv
+configure-stageprofile-libctf: maybe-all-stageprofile-libiconv
+configure-stagetrain-libctf: maybe-all-stagetrain-libiconv
+configure-stagefeedback-libctf: maybe-all-stagefeedback-libiconv
+configure-stageautoprofile-libctf: maybe-all-stageautoprofile-libiconv
+configure-stageautofeedback-libctf: maybe-all-stageautofeedback-libiconv
+check-libctf: maybe-all-ld
+check-stage1-libctf: maybe-all-stage1-ld
+check-stage2-libctf: maybe-all-stage2-ld
+check-stage3-libctf: maybe-all-stage3-ld
+check-stage4-libctf: maybe-all-stage4-ld
+check-stageprofile-libctf: maybe-all-stageprofile-ld
+check-stagetrain-libctf: maybe-all-stagetrain-ld
+check-stagefeedback-libctf: maybe-all-stagefeedback-ld
+check-stageautoprofile-libctf: maybe-all-stageautoprofile-ld
+check-stageautofeedback-libctf: maybe-all-stageautofeedback-ld
 all-bison: maybe-all-build-texinfo
 all-flex: maybe-all-build-bison
-all-flex: maybe-all-intl
 all-flex: maybe-all-m4
 all-flex: maybe-all-build-texinfo
-all-m4: maybe-all-intl
 all-m4: maybe-all-build-texinfo
 configure-target-fastjar: maybe-configure-target-zlib
 all-target-fastjar: maybe-all-target-zlib
 configure-target-libgo: maybe-configure-target-libffi
-configure-target-libgo: maybe-all-target-libstdc++-v3
 all-target-libgo: maybe-all-target-libbacktrace
 all-target-libgo: maybe-all-target-libffi
 all-target-libgo: maybe-all-target-libatomic
+configure-target-libphobos: maybe-configure-target-libbacktrace
+configure-target-libphobos: maybe-configure-target-zlib
+all-target-libphobos: maybe-all-target-libbacktrace
+all-target-libphobos: maybe-all-target-zlib
+all-target-libphobos: maybe-all-target-libatomic
 configure-target-libstdc++-v3: maybe-configure-target-libgomp
-
 configure-stage1-target-libstdc++-v3: maybe-configure-stage1-target-libgomp
 configure-stage2-target-libstdc++-v3: maybe-configure-stage2-target-libgomp
 configure-stage3-target-libstdc++-v3: maybe-configure-stage3-target-libgomp
@@ -57945,9 +61629,7 @@ configure-stagetrain-target-libstdc++-v3: maybe-configure-stagetrain-target-libg
 configure-stagefeedback-target-libstdc++-v3: maybe-configure-stagefeedback-target-libgomp
 configure-stageautoprofile-target-libstdc++-v3: maybe-configure-stageautoprofile-target-libgomp
 configure-stageautofeedback-target-libstdc++-v3: maybe-configure-stageautofeedback-target-libgomp
-configure-target-liboffloadmic: maybe-configure-target-libgomp
 configure-target-libsanitizer: maybe-all-target-libstdc++-v3
-
 configure-stage1-target-libsanitizer: maybe-all-stage1-target-libstdc++-v3
 configure-stage2-target-libsanitizer: maybe-all-stage2-target-libstdc++-v3
 configure-stage3-target-libsanitizer: maybe-all-stage3-target-libstdc++-v3
@@ -57958,7 +61640,6 @@ configure-stagefeedback-target-libsanitizer: maybe-all-stagefeedback-target-libs
 configure-stageautoprofile-target-libsanitizer: maybe-all-stageautoprofile-target-libstdc++-v3
 configure-stageautofeedback-target-libsanitizer: maybe-all-stageautofeedback-target-libstdc++-v3
 configure-target-libvtv: maybe-all-target-libstdc++-v3
-
 configure-stage1-target-libvtv: maybe-all-stage1-target-libstdc++-v3
 configure-stage2-target-libvtv: maybe-all-stage2-target-libstdc++-v3
 configure-stage3-target-libvtv: maybe-all-stage3-target-libstdc++-v3
@@ -57969,7 +61650,6 @@ configure-stagefeedback-target-libvtv: maybe-all-stagefeedback-target-libstdc++-
 configure-stageautoprofile-target-libvtv: maybe-all-stageautoprofile-target-libstdc++-v3
 configure-stageautofeedback-target-libvtv: maybe-all-stageautofeedback-target-libstdc++-v3
 all-target-libstdc++-v3: maybe-configure-target-libgomp
-
 all-stage1-target-libstdc++-v3: maybe-configure-stage1-target-libgomp
 all-stage2-target-libstdc++-v3: maybe-configure-stage2-target-libgomp
 all-stage3-target-libstdc++-v3: maybe-configure-stage3-target-libgomp
@@ -57979,10 +61659,10 @@ all-stagetrain-target-libstdc++-v3: maybe-configure-stagetrain-target-libgomp
 all-stagefeedback-target-libstdc++-v3: maybe-configure-stagefeedback-target-libgomp
 all-stageautoprofile-target-libstdc++-v3: maybe-configure-stageautoprofile-target-libgomp
 all-stageautofeedback-target-libstdc++-v3: maybe-configure-stageautofeedback-target-libgomp
-all-target-liboffloadmic: maybe-all-target-libgomp
 install-target-libgo: maybe-install-target-libatomic
 install-target-libgfortran: maybe-install-target-libquadmath
 install-target-libgfortran: maybe-install-target-libgcc
+install-target-libphobos: maybe-install-target-libatomic
 install-target-libsanitizer: maybe-install-target-libstdc++-v3
 install-target-libsanitizer: maybe-install-target-libgcc
 install-target-libvtv: maybe-install-target-libstdc++-v3
@@ -57994,12 +61674,70 @@ install-target-libobjc: maybe-install-target-libgcc
 install-target-libstdc++-v3: maybe-install-target-libgcc
 all-target-libgloss: maybe-all-target-newlib
 all-target-winsup: maybe-all-target-libtermcap
-configure-target-newlib: maybe-all-binutils
-configure-target-newlib: maybe-all-ld
 configure-target-libgfortran: maybe-all-target-libquadmath
 configure-target-libgfortran: maybe-all-target-libbacktrace
 
 
+@if gcc-bootstrap
+configure-gnattools: stage_last
+configure-libcc1: stage_last
+configure-c++tools: stage_last
+configure-utils: stage_last
+configure-gdb: stage_last
+configure-gdbserver: stage_last
+configure-gdbsupport: stage_last
+configure-gprof: stage_last
+configure-sid: stage_last
+configure-sim: stage_last
+configure-fastjar: stage_last
+configure-bison: stage_last
+configure-flex: stage_last
+configure-m4: stage_last
+@endif gcc-bootstrap
+
+@unless gcc-bootstrap
+all-gnattools: maybe-all-target-libstdc++-v3
+configure-libcc1: maybe-configure-gcc
+all-libcc1: maybe-all-gcc
+all-c++tools: maybe-all-gcc
+all-utils: maybe-all-libiberty
+configure-gdb: maybe-all-intl
+configure-gdb: maybe-all-bfd
+configure-gdb: maybe-all-libiconv
+all-gdb: maybe-all-libiberty
+all-gdb: maybe-all-libiconv
+all-gdb: maybe-all-opcodes
+all-gdb: maybe-all-libdecnumber
+all-gdb: maybe-all-libctf
+all-gdbserver: maybe-all-libiberty
+configure-gdbsupport: maybe-configure-intl
+all-gdbsupport: maybe-all-intl
+configure-gprof: maybe-configure-intl
+all-gprof: maybe-all-libiberty
+all-gprof: maybe-all-bfd
+all-gprof: maybe-all-opcodes
+all-gprof: maybe-all-intl
+all-gprof: maybe-all-gas
+all-sid: maybe-all-libiberty
+all-sid: maybe-all-bfd
+all-sid: maybe-all-opcodes
+configure-sim: maybe-configure-intl
+all-sim: maybe-all-intl
+all-sim: maybe-all-libiberty
+all-sim: maybe-all-bfd
+all-sim: maybe-all-opcodes
+all-fastjar: maybe-all-zlib
+all-fastjar: maybe-all-libiberty
+all-bison: maybe-all-intl
+all-flex: maybe-all-intl
+all-m4: maybe-all-intl
+configure-target-libgo: maybe-all-target-libstdc++-v3
+configure-target-liboffloadmic: maybe-configure-target-libgomp
+all-target-liboffloadmic: maybe-all-target-libgomp
+configure-target-newlib: maybe-all-binutils
+configure-target-newlib: maybe-all-ld
+@endunless gcc-bootstrap
+
 # Dependencies for target modules on other target modules are
 # described by lang_env_dependencies; the defaults apply to anything
 # not mentioned there.
@@ -58024,15 +61762,6 @@ configure-stagetrain-target-libsanitizer: maybe-all-stagetrain-target-libgcc
 configure-stagefeedback-target-libsanitizer: maybe-all-stagefeedback-target-libgcc
 configure-stageautoprofile-target-libsanitizer: maybe-all-stageautoprofile-target-libgcc
 configure-stageautofeedback-target-libsanitizer: maybe-all-stageautofeedback-target-libgcc
-configure-stage1-target-libmpx: maybe-all-stage1-target-libgcc
-configure-stage2-target-libmpx: maybe-all-stage2-target-libgcc
-configure-stage3-target-libmpx: maybe-all-stage3-target-libgcc
-configure-stage4-target-libmpx: maybe-all-stage4-target-libgcc
-configure-stageprofile-target-libmpx: maybe-all-stageprofile-target-libgcc
-configure-stagetrain-target-libmpx: maybe-all-stagetrain-target-libgcc
-configure-stagefeedback-target-libmpx: maybe-all-stagefeedback-target-libgcc
-configure-stageautoprofile-target-libmpx: maybe-all-stageautoprofile-target-libgcc
-configure-stageautofeedback-target-libmpx: maybe-all-stageautofeedback-target-libgcc
 configure-stage1-target-libvtv: maybe-all-stage1-target-libgcc
 configure-stage2-target-libvtv: maybe-all-stage2-target-libgcc
 configure-stage3-target-libvtv: maybe-all-stage3-target-libgcc
@@ -58056,7 +61785,6 @@ configure-stageautofeedback-target-libgomp: maybe-all-stageautofeedback-target-l
 @if gcc-no-bootstrap
 configure-target-libstdc++-v3: maybe-all-target-libgcc
 configure-target-libsanitizer: maybe-all-target-libgcc
-configure-target-libmpx: maybe-all-target-libgcc
 configure-target-libvtv: maybe-all-target-libgcc
 configure-target-liboffloadmic: maybe-all-target-libgcc
 configure-target-libssp: maybe-all-target-libgcc
@@ -58067,6 +61795,7 @@ configure-target-libgfortran: maybe-all-target-libgcc
 configure-target-libobjc: maybe-all-target-libgcc
 configure-target-libgo: maybe-all-target-libgcc
 configure-target-libhsail-rt: maybe-all-target-libgcc
+configure-target-libphobos: maybe-all-target-libgcc
 configure-target-libtermcap: maybe-all-target-libgcc
 configure-target-winsup: maybe-all-target-libgcc
 configure-target-libgloss: maybe-all-target-libgcc
@@ -58084,8 +61813,6 @@ configure-target-libstdc++-v3: maybe-all-target-newlib maybe-all-target-libgloss
 
 configure-target-libsanitizer: maybe-all-target-newlib maybe-all-target-libgloss
 
-configure-target-libmpx: maybe-all-target-newlib maybe-all-target-libgloss
-
 configure-target-libvtv: maybe-all-target-newlib maybe-all-target-libgloss
 
 configure-target-liboffloadmic: maybe-all-target-newlib maybe-all-target-libgloss
@@ -58107,6 +61834,8 @@ configure-target-libgo: maybe-all-target-newlib maybe-all-target-libgloss
 
 configure-target-libhsail-rt: maybe-all-target-newlib maybe-all-target-libgloss
 
+configure-target-libphobos: maybe-all-target-newlib maybe-all-target-libgloss
+
 configure-target-libtermcap: maybe-all-target-newlib maybe-all-target-libgloss
 
 configure-target-winsup: maybe-all-target-newlib maybe-all-target-libgloss
diff --git a/Makefile.tpl b/Makefile.tpl
index 1f23b79b4b2ab..6e0337fb48fdb 100644
--- a/Makefile.tpl
+++ b/Makefile.tpl
@@ -159,6 +159,8 @@ BUILD_EXPORTS = \
 	GFORTRAN="$(GFORTRAN_FOR_BUILD)"; export GFORTRAN; \
 	GOC="$(GOC_FOR_BUILD)"; export GOC; \
 	GOCFLAGS="$(GOCFLAGS_FOR_BUILD)"; export GOCFLAGS; \
+	GDC="$(GDC_FOR_BUILD)"; export GDC; \
+	GDCFLAGS="$(GDCFLAGS_FOR_BUILD)"; export GDCFLAGS; \
 	DLLTOOL="$(DLLTOOL_FOR_BUILD)"; export DLLTOOL; \
 	LD="$(LD_FOR_BUILD)"; export LD; \
 	LDFLAGS="$(LDFLAGS_FOR_BUILD)"; export LDFLAGS; \
@@ -195,9 +197,11 @@ HOST_EXPORTS = \
 	CXXFLAGS="$(CXXFLAGS)"; export CXXFLAGS; \
 	GFORTRAN="$(GFORTRAN)"; export GFORTRAN; \
 	GOC="$(GOC)"; export GOC; \
+	GDC="$(GDC)"; export GDC; \
 	AR="$(AR)"; export AR; \
 	AS="$(AS)"; export AS; \
 	CC_FOR_BUILD="$(CC_FOR_BUILD)"; export CC_FOR_BUILD; \
+	CXX_FOR_BUILD="$(CXX_FOR_BUILD)"; export CXX_FOR_BUILD; \
 	DLLTOOL="$(DLLTOOL)"; export DLLTOOL; \
 	LD="$(LD)"; export LD; \
 	LDFLAGS="$(STAGE1_LDFLAGS) $(LDFLAGS)"; export LDFLAGS; \
@@ -207,6 +211,7 @@ HOST_EXPORTS = \
 	WINDMC="$(WINDMC)"; export WINDMC; \
 	OBJCOPY="$(OBJCOPY)"; export OBJCOPY; \
 	OBJDUMP="$(OBJDUMP)"; export OBJDUMP; \
+	OTOOL="$(OTOOL)"; export OTOOL; \
 	READELF="$(READELF)"; export READELF; \
 	AR_FOR_TARGET="$(AR_FOR_TARGET)"; export AR_FOR_TARGET; \
 	AS_FOR_TARGET="$(AS_FOR_TARGET)"; export AS_FOR_TARGET; \
@@ -215,6 +220,7 @@ HOST_EXPORTS = \
 	NM_FOR_TARGET="$(NM_FOR_TARGET)"; export NM_FOR_TARGET; \
 	OBJDUMP_FOR_TARGET="$(OBJDUMP_FOR_TARGET)"; export OBJDUMP_FOR_TARGET; \
 	OBJCOPY_FOR_TARGET="$(OBJCOPY_FOR_TARGET)"; export OBJCOPY_FOR_TARGET; \
+	OTOOL_FOR_TARGET="$(OTOOL_FOR_TARGET)"; export OTOOL_FOR_TARGET; \
 	RANLIB_FOR_TARGET="$(RANLIB_FOR_TARGET)"; export RANLIB_FOR_TARGET; \
 	READELF_FOR_TARGET="$(READELF_FOR_TARGET)"; export READELF_FOR_TARGET; \
 	TOPLEVEL_CONFIGURE_ARGUMENTS="$(TOPLEVEL_CONFIGURE_ARGUMENTS)"; export TOPLEVEL_CONFIGURE_ARGUMENTS; \
@@ -259,6 +265,14 @@ POSTSTAGE1_HOST_EXPORTS = \
 	CC_FOR_BUILD="$$CC"; export CC_FOR_BUILD; \
 	$(POSTSTAGE1_CXX_EXPORT) \
 	$(LTO_EXPORTS) \
+	GDC="$$r/$(HOST_SUBDIR)/prev-gcc/gdc$(exeext) -B$$r/$(HOST_SUBDIR)/prev-gcc/ \
+	  -B$(build_tooldir)/bin/ $(GDC_FLAGS_FOR_TARGET) \
+	  -B$$r/prev-$(TARGET_SUBDIR)/libphobos/src \
+	  -I$$r/prev-$(TARGET_SUBDIR)/libphobos/libdruntime -I$$s/libphobos/libdruntime \
+	  -L$$r/prev-$(TARGET_SUBDIR)/libphobos/src/.libs \
+	  -L$$r/prev-$(TARGET_SUBDIR)/libphobos/libdruntime/.libs"; \
+	export GDC; \
+	GDC_FOR_BUILD="$$GDC"; export GDC_FOR_BUILD; \
 	GNATBIND="$$r/$(HOST_SUBDIR)/prev-gcc/gnatbind"; export GNATBIND; \
 	LDFLAGS="$(POSTSTAGE1_LDFLAGS) $(BOOT_LDFLAGS)"; export LDFLAGS; \
 	HOST_LIBS="$(POSTSTAGE1_LIBS)"; export HOST_LIBS;
@@ -281,6 +295,7 @@ BASE_TARGET_EXPORTS = \
 	CXXFLAGS="$(CXXFLAGS_FOR_TARGET)"; export CXXFLAGS; \
 	GFORTRAN="$(GFORTRAN_FOR_TARGET) $(XGCC_FLAGS_FOR_TARGET) $$TFLAGS"; export GFORTRAN; \
 	GOC="$(GOC_FOR_TARGET) $(XGCC_FLAGS_FOR_TARGET) $$TFLAGS"; export GOC; \
+	GDC="$(GDC_FOR_TARGET) $(XGCC_FLAGS_FOR_TARGET) $$TFLAGS"; export GDC; \
 	DLLTOOL="$(DLLTOOL_FOR_TARGET)"; export DLLTOOL; \
 	LD="$(COMPILER_LD_FOR_TARGET)"; export LD; \
 	LDFLAGS="$(LDFLAGS_FOR_TARGET)"; export LDFLAGS; \
@@ -288,6 +303,7 @@ BASE_TARGET_EXPORTS = \
 	NM="$(COMPILER_NM_FOR_TARGET)"; export NM; \
 	OBJDUMP="$(OBJDUMP_FOR_TARGET)"; export OBJDUMP; \
 	OBJCOPY="$(OBJCOPY_FOR_TARGET)"; export OBJCOPY; \
+	OTOOL="$(OTOOL_FOR_TARGET)"; export OTOOL; \
 	RANLIB="$(RANLIB_FOR_TARGET)"; export RANLIB; \
 	READELF="$(READELF_FOR_TARGET)"; export READELF; \
 	STRIP="$(STRIP_FOR_TARGET)"; export STRIP; \
@@ -345,6 +361,7 @@ CXX_FOR_BUILD = @CXX_FOR_BUILD@
 DLLTOOL_FOR_BUILD = @DLLTOOL_FOR_BUILD@
 GFORTRAN_FOR_BUILD = @GFORTRAN_FOR_BUILD@
 GOC_FOR_BUILD = @GOC_FOR_BUILD@
+GDC_FOR_BUILD = @GDC_FOR_BUILD@
 LDFLAGS_FOR_BUILD = @LDFLAGS_FOR_BUILD@
 LD_FOR_BUILD = @LD_FOR_BUILD@
 NM_FOR_BUILD = @NM_FOR_BUILD@
@@ -374,7 +391,7 @@ MAKEINFO = @MAKEINFO@
 EXPECT = @EXPECT@
 RUNTEST = @RUNTEST@
 
-AUTO_PROFILE = gcc-auto-profile -c 1000000
+AUTO_PROFILE = gcc-auto-profile -c 10000000
 
 # This just becomes part of the MAKEINFO definition passed down to
 # sub-makes.  It lets flags be given on the command line while still
@@ -396,12 +413,14 @@ LD = @LD@
 LIPO = @LIPO@
 NM = @NM@
 OBJDUMP = @OBJDUMP@
+OTOOL = @OTOOL@
 RANLIB = @RANLIB@
 READELF = @READELF@
 STRIP = @STRIP@
 WINDRES = @WINDRES@
 WINDMC = @WINDMC@
 
+GDC = @GDC@
 GNATBIND = @GNATBIND@
 GNATMAKE = @GNATMAKE@
 
@@ -411,6 +430,7 @@ LIBCFLAGS = $(CFLAGS)
 CXXFLAGS = @CXXFLAGS@
 LIBCXXFLAGS = $(CXXFLAGS) -fno-implicit-templates
 GOCFLAGS = $(CFLAGS)
+GDCFLAGS = $(CFLAGS)
 
 CREATE_GCOV = create_gcov
 
@@ -452,13 +472,23 @@ STAGE1_CONFIGURE_FLAGS = --disable-intermodule $(STAGE1_CHECKING) \
 	  --disable-coverage --enable-languages="$(STAGE1_LANGUAGES)" \
 	  --disable-build-format-warnings
 
+# When using the slow stage1 compiler disable IL verification and forcefully
+# enable it when using the stage2 compiler instead.  As we later compare
+# stage2 and stage3 we are merely avoid doing redundant work, plus we apply
+# checking when building all target libraries for release builds.
+STAGE1_TFLAGS += -fno-checking
+STAGE2_CFLAGS += -fno-checking
+STAGE2_TFLAGS += -fno-checking
+STAGE3_CFLAGS += -fchecking=1
+STAGE3_TFLAGS += -fchecking=1
+
 STAGEprofile_CFLAGS = $(STAGE2_CFLAGS) -fprofile-generate
 STAGEprofile_TFLAGS = $(STAGE2_TFLAGS)
 
-STAGEtrain_CFLAGS = $(STAGE3_CFLAGS)
-STAGEtrain_TFLAGS = $(STAGE3_TFLAGS)
+STAGEtrain_CFLAGS = $(filter-out -fchecking=1,$(STAGE3_CFLAGS))
+STAGEtrain_TFLAGS = $(filter-out -fchecking=1,$(STAGE3_TFLAGS))
 
-STAGEfeedback_CFLAGS = $(STAGE4_CFLAGS) -fprofile-use
+STAGEfeedback_CFLAGS = $(STAGE4_CFLAGS) -fprofile-use -fprofile-reproducible=parallel-runs
 STAGEfeedback_TFLAGS = $(STAGE4_TFLAGS)
 
 STAGEautoprofile_CFLAGS = $(STAGE2_CFLAGS) -g
@@ -487,6 +517,7 @@ CXX_FOR_TARGET=$(STAGE_CC_WRAPPER) @CXX_FOR_TARGET@
 RAW_CXX_FOR_TARGET=$(STAGE_CC_WRAPPER) @RAW_CXX_FOR_TARGET@
 GFORTRAN_FOR_TARGET=$(STAGE_CC_WRAPPER) @GFORTRAN_FOR_TARGET@
 GOC_FOR_TARGET=$(STAGE_CC_WRAPPER) @GOC_FOR_TARGET@
+GDC_FOR_TARGET=$(STAGE_CC_WRAPPER) @GDC_FOR_TARGET@
 DLLTOOL_FOR_TARGET=@DLLTOOL_FOR_TARGET@
 LD_FOR_TARGET=@LD_FOR_TARGET@
 
@@ -494,6 +525,7 @@ LIPO_FOR_TARGET=@LIPO_FOR_TARGET@
 NM_FOR_TARGET=@NM_FOR_TARGET@
 OBJDUMP_FOR_TARGET=@OBJDUMP_FOR_TARGET@
 OBJCOPY_FOR_TARGET=@OBJCOPY_FOR_TARGET@
+OTOOL_FOR_TARGET=@OTOOL_FOR_TARGET@
 RANLIB_FOR_TARGET=@RANLIB_FOR_TARGET@
 READELF_FOR_TARGET=@READELF_FOR_TARGET@
 STRIP_FOR_TARGET=@STRIP_FOR_TARGET@
@@ -511,6 +543,7 @@ LIBCFLAGS_FOR_TARGET = $(CFLAGS_FOR_TARGET)
 LIBCXXFLAGS_FOR_TARGET = $(CXXFLAGS_FOR_TARGET) -fno-implicit-templates
 LDFLAGS_FOR_TARGET = @LDFLAGS_FOR_TARGET@
 GOCFLAGS_FOR_TARGET = -O2 -g
+GDCFLAGS_FOR_TARGET = -O2 -g
 
 FLAGS_FOR_TARGET = @FLAGS_FOR_TARGET@
 SYSROOT_CFLAGS_FOR_TARGET = @SYSROOT_CFLAGS_FOR_TARGET@
@@ -592,6 +625,7 @@ BASE_FLAGS_TO_PASS =[+ FOR flags_to_pass +][+ IF optional +] \
 	"[+flag+]=$([+flag+])"[+ ENDIF optional+][+ ENDFOR flags_to_pass +][+ FOR bootstrap-stage +] \
 	"STAGE[+id+]_CFLAGS=$(STAGE[+id+]_CFLAGS)" \
 	"STAGE[+id+]_CXXFLAGS=$(STAGE[+id+]_CXXFLAGS)" \
+	"STAGE[+id+]_GENERATOR_CFLAGS=$(STAGE[+id+]_GENERATOR_CFLAGS)" \
 	"STAGE[+id+]_TFLAGS=$(STAGE[+id+]_TFLAGS)"[+ ENDFOR bootstrap-stage +] \
 	$(CXX_FOR_TARGET_FLAG_TO_PASS) \
 	"TFLAGS=$(TFLAGS)" \
@@ -612,10 +646,12 @@ EXTRA_HOST_FLAGS = \
 	'DLLTOOL=$(DLLTOOL)' \
 	'GFORTRAN=$(GFORTRAN)' \
 	'GOC=$(GOC)' \
+	'GDC=$(GDC)' \
 	'LD=$(LD)' \
 	'LIPO=$(LIPO)' \
 	'NM=$(NM)' \
 	'OBJDUMP=$(OBJDUMP)' \
+	'OTOOL=$(OTOOL)' \
 	'RANLIB=$(RANLIB)' \
 	'READELF=$(READELF)' \
 	'STRIP=$(STRIP)' \
@@ -636,6 +672,7 @@ STAGE1_FLAGS_TO_PASS = \
 POSTSTAGE1_FLAGS_TO_PASS = \
 	CC="$${CC}" CC_FOR_BUILD="$${CC_FOR_BUILD}" \
 	CXX="$${CXX}" CXX_FOR_BUILD="$${CXX_FOR_BUILD}" \
+	GDC="$${GDC}" GDC_FOR_BUILD="$${GDC_FOR_BUILD}" \
 	GNATBIND="$${GNATBIND}" \
 	LDFLAGS="$${LDFLAGS}" \
 	HOST_LIBS="$${HOST_LIBS}" \
@@ -668,6 +705,8 @@ EXTRA_TARGET_FLAGS = \
 	'GFORTRAN=$$(GFORTRAN_FOR_TARGET) $$(XGCC_FLAGS_FOR_TARGET) $$(TFLAGS)' \
 	'GOC=$$(GOC_FOR_TARGET) $$(XGCC_FLAGS_FOR_TARGET) $$(TFLAGS)' \
 	'GOCFLAGS=$$(GOCFLAGS_FOR_TARGET)' \
+	'GDC=$$(GDC_FOR_TARGET) $$(XGCC_FLAGS_FOR_TARGET) $$(TFLAGS)' \
+	'GDCFLAGS=$$(GDCFLAGS_FOR_TARGET)' \
 	'LD=$(COMPILER_LD_FOR_TARGET)' \
 	'LDFLAGS=$$(LDFLAGS_FOR_TARGET)' \
 	'LIBCFLAGS=$$(LIBCFLAGS_FOR_TARGET)' \
@@ -825,8 +864,8 @@ local-distclean:
 	-rm -f texinfo/doc/Makefile texinfo/po/POTFILES
 	-rmdir texinfo/doc texinfo/info texinfo/intl texinfo/lib 2>/dev/null
 	-rmdir texinfo/makeinfo texinfo/po texinfo/util 2>/dev/null
-	-rmdir fastjar gcc gnattools gotools libcc1 libiberty 2>/dev/null
-	-rmdir texinfo zlib 2>/dev/null
+	-rmdir c++tools fastjar gcc gnattools gotools 2>/dev/null
+	-rmdir libcc1 libiberty texinfo zlib 2>/dev/null
 	-find . -name config.cache -exec rm -f {} \; \; 2>/dev/null
 
 local-maintainer-clean:
@@ -1156,6 +1195,7 @@ all-stage[+id+]-[+prefix+][+module+]: configure-stage[+id+]-[+prefix+][+module+]
 		CXXFLAGS="$(CXXFLAGS_FOR_TARGET)" \
 		LIBCFLAGS="$(LIBCFLAGS_FOR_TARGET)"[+ ELSE prefix +] \
 		CFLAGS="$(STAGE[+id+]_CFLAGS)" \
+		GENERATOR_CFLAGS="$(STAGE[+id+]_GENERATOR_CFLAGS)" \
 		CXXFLAGS="$(STAGE[+id+]_CXXFLAGS)"[+ IF prev +] \
 		LIBCFLAGS="$(STAGE[+id+]_CFLAGS)"[+ ELSE prev +] \
 		LIBCFLAGS="$(LIBCFLAGS)"[+ ENDIF prev +][+ ENDIF prefix +] \
@@ -1460,6 +1500,10 @@ ENDIF raw_cxx +]
 check-target-libgomp-c++:
 	$(MAKE) RUNTESTFLAGS="$(RUNTESTFLAGS) c++.exp" check-target-libgomp
 
+.PHONY: check-target-libgomp-fortran
+check-target-libgomp-fortran:
+	$(MAKE) RUNTESTFLAGS="$(RUNTESTFLAGS) fortran.exp" check-target-libgomp
+
 @endif target-libgomp
 
 @if target-libitm
@@ -1634,7 +1678,7 @@ do-clean: clean-stage[+id+]
         sed=`echo stage[+id+] | sed 's,^stage,,;s,.,.,g'`; \
 	files=`find stage[+id+]-* -name "*$(objext)" -print | \
 		 sed -n s,^stage$$sed-,,p`; \
-	for file in $${files}; do \
+	for file in $${files} ${extra-compare}; do \
 	  f1=$$r/stage[+prev+]-$$file; f2=$$r/stage[+id+]-$$file; \
 	  if test ! -f $$f1; then continue; fi; \
 	  $(do-[+compare-target+]) > /dev/null 2>&1; \
@@ -1718,8 +1762,8 @@ stageprofile-end::
 stagefeedback-start::
 	@r=`${PWD_COMMAND}`; export r; \
 	s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \
-	for i in stageprofile-*; do \
-	  j=`echo $$i | sed s/^stageprofile-//`; \
+	for i in prev-*; do \
+	  j=`echo $$i | sed s/^prev-//`; \
 	  cd $$r/$$i && \
 	  { find . -type d | sort | sed 's,.*,$(SHELL) '"$$s"'/mkinstalldirs "../'$$j'/&",' | $(SHELL); } && \
 	  { find . -name '*.*da' | sed 's,.*,$(LN) -f "&" "../'$$j'/&",' | $(SHELL); }; \
@@ -1814,25 +1858,46 @@ configure-target-[+module+]: maybe-all-gcc[+
    (define dep-maybe (lambda ()
       (if (exist? "hard") "" "maybe-")))
 
-   ;; dep-kind returns "normal" if the dependency is on an "install" target,
-   ;; or if either module is not bootstrapped.  It returns "bootstrap" for
-   ;; configure or build dependencies between bootstrapped modules; it returns
-   ;; "prebootstrap" for configure or build dependencies of bootstrapped
-   ;; modules on a build module (e.g. all-gcc on all-build-bison).  All this
-   ;; is only necessary for host modules.
+   ;; dep-kind returns returns "prebootstrap" for configure or build
+   ;; dependencies of bootstrapped modules on a build module
+   ;; (e.g. all-gcc on all-build-bison); "normal" if the dependency is
+   ;; on an "install" target, or if the dependence module is not
+   ;; bootstrapped; otherwise, it returns "bootstrap" or
+   ;; "postbootstrap" depending on whether the dependent module is
+   ;; bootstrapped.  All this is only necessary for host and target
+   ;; modules.  It might seem like, in order to avoid build races, we
+   ;; might need more elaborate detection between prebootstrap and
+   ;; postbootstrap modules, but there are no host prebootstrap
+   ;; modules.  If there were any non-bootstrap host modules that
+   ;; bootstrap modules depended on, we'd get unsatisfied per-stage
+   ;; dependencies on them, which would be immediately noticed.
    (define dep-kind (lambda ()
-      (if (and (hash-ref boot-modules (dep-module "module"))
-	       (=* (dep-module "on") "build-"))
-	  "prebootstrap"
+      (cond
+       ((and (hash-ref boot-modules (dep-module "module"))
+	     (=* (dep-module "on") "build-"))
+	"prebootstrap")
+
+       ((or (= (dep-subtarget "on") "install-")
+	    (not (hash-ref boot-modules (dep-module "on"))))
+	"normal")
+
+       ((hash-ref boot-modules (dep-module "module"))
+	"bootstrap")
 
-	  (if (or (= (dep-subtarget "on") "install-")
-		  (not (hash-ref boot-modules (dep-module "module")))
-		  (not (hash-ref boot-modules (dep-module "on"))))
-              "normal"
-	      "bootstrap"))))
+       (1 "postbootstrap"))))
+
+   (define make-postboot-dep (lambda ()
+     (let ((target (dep-module "module")) (dep "stage_last"))
+       (unless (= (hash-ref postboot-targets target) dep)
+	 (hash-create-handle! postboot-targets target dep)
+	 ;; All non-bootstrap modules' configure target already
+	 ;; depend on dep.
+	 (unless (=* target "target-")
+           (string-append "configure-" target ": " dep "\n"))))))
 
    ;; We now build the hash table that is used by dep-kind.
    (define boot-modules (make-hash-table 113))
+   (define postboot-targets (make-hash-table 113))
 +]
 
 [+ FOR host_modules +][+
@@ -1849,18 +1914,23 @@ configure-target-[+module+]: maybe-all-gcc[+
 # to check for bootstrap/prebootstrap dependencies.  To resolve
 # prebootstrap dependencies, prebootstrap modules are gathered in
 # a hash table.
-[+ FOR dependencies +][+ (make-dep "" "") +]
-[+ CASE (dep-kind) +]
-[+ == "prebootstrap"
-     +][+ FOR bootstrap_stage +]
-[+ (make-dep (dep-stage) "") +][+
-       ENDFOR bootstrap_stage +]
-[+ == "bootstrap"
-     +][+ FOR bootstrap_stage +]
-[+ (make-dep (dep-stage) (dep-stage)) +][+
-       ENDFOR bootstrap_stage +]
-[+ ESAC +][+
-ENDFOR dependencies +]
+[+ FOR dependencies +][+ CASE (dep-kind) +]
+[+ == "prebootstrap" +][+ (make-dep "" "") +][+ FOR bootstrap_stage +]
+[+ (make-dep (dep-stage) "") +][+ ENDFOR bootstrap_stage +]
+[+ == "bootstrap" +][+ (make-dep "" "") +][+ FOR bootstrap_stage +]
+[+ (make-dep (dep-stage) (dep-stage)) +][+ ENDFOR bootstrap_stage +]
+[+ == "normal" +][+ (make-dep "" "") +]
+[+ ESAC +][+ ENDFOR dependencies +]
+
+@if gcc-bootstrap
+[+ FOR dependencies +][+ CASE (dep-kind) +]
+[+ == "postbootstrap" +][+ (make-postboot-dep) +][+ ESAC +][+
+ENDFOR dependencies +]@endif gcc-bootstrap
+
+@unless gcc-bootstrap
+[+ FOR dependencies +][+ CASE (dep-kind) +]
+[+ == "postbootstrap" +][+ (make-dep "" "") +]
+[+ ESAC +][+ ENDFOR dependencies +]@endunless gcc-bootstrap
 
 # Dependencies for target modules on other target modules are
 # described by lang_env_dependencies; the defaults apply to anything
diff --git a/configure b/configure
index 0601395512fdc..504f6410274ac 100755
--- a/configure
+++ b/configure
@@ -1,10 +1,10 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.64.
+# Generated by GNU Autoconf 2.69.
+#
+#
+# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
 #
-# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
-# 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software
-# Foundation, Inc.
 #
 # This configure script is free software; the Free Software Foundation
 # gives unlimited permission to copy, distribute and modify it.
@@ -87,6 +87,7 @@ fi
 IFS=" ""	$as_nl"
 
 # Find who we are.  Look in the path if we contain no directory separator.
+as_myself=
 case $0 in #((
   *[\\/]* ) as_myself=$0 ;;
   *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
@@ -131,6 +132,31 @@ export LANGUAGE
 # CDPATH.
 (unset CDPATH) >/dev/null 2>&1 && unset CDPATH
 
+# Use a proper internal environment variable to ensure we don't fall
+  # into an infinite loop, continuously re-executing ourselves.
+  if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then
+    _as_can_reexec=no; export _as_can_reexec;
+    # We cannot yet assume a decent shell, so we have to provide a
+# neutralization value for shells without unset; and this also
+# works around shells that cannot unset nonexistent variables.
+# Preserve -v and -x to the replacement shell.
+BASH_ENV=/dev/null
+ENV=/dev/null
+(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
+case $- in # ((((
+  *v*x* | *x*v* ) as_opts=-vx ;;
+  *v* ) as_opts=-v ;;
+  *x* ) as_opts=-x ;;
+  * ) as_opts= ;;
+esac
+exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"}
+# Admittedly, this is quite paranoid, since all the known shells bail
+# out after a failed `exec'.
+$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2
+as_fn_exit 255
+  fi
+  # We don't want this to propagate to other subprocesses.
+          { _as_can_reexec=; unset _as_can_reexec;}
 if test "x$CONFIG_SHELL" = x; then
   as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then :
   emulate sh
@@ -164,7 +190,8 @@ if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then :
 else
   exitcode=1; echo positional parameters were not saved.
 fi
-test x\$exitcode = x0 || exit 1"
+test x\$exitcode = x0 || exit 1
+test -x / || exit 1"
   as_suggested="  as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO
   as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO
   eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" &&
@@ -209,14 +236,25 @@ IFS=$as_save_IFS
 
 
       if test "x$CONFIG_SHELL" != x; then :
-  # We cannot yet assume a decent shell, so we have to provide a
-	# neutralization value for shells without unset; and this also
-	# works around shells that cannot unset nonexistent variables.
-	BASH_ENV=/dev/null
-	ENV=/dev/null
-	(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
-	export CONFIG_SHELL
-	exec "$CONFIG_SHELL" "$as_myself" ${1+"$@"}
+  export CONFIG_SHELL
+             # We cannot yet assume a decent shell, so we have to provide a
+# neutralization value for shells without unset; and this also
+# works around shells that cannot unset nonexistent variables.
+# Preserve -v and -x to the replacement shell.
+BASH_ENV=/dev/null
+ENV=/dev/null
+(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
+case $- in # ((((
+  *v*x* | *x*v* ) as_opts=-vx ;;
+  *v* ) as_opts=-v ;;
+  *x* ) as_opts=-x ;;
+  * ) as_opts= ;;
+esac
+exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"}
+# Admittedly, this is quite paranoid, since all the known shells bail
+# out after a failed `exec'.
+$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2
+exit 255
 fi
 
     if test x$as_have_required = xno; then :
@@ -314,10 +352,18 @@ $as_echo X"$as_dir" |
       test -d "$as_dir" && break
     done
     test -z "$as_dirs" || eval "mkdir $as_dirs"
-  } || test -d "$as_dir" || as_fn_error "cannot create directory $as_dir"
+  } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir"
 
 
 } # as_fn_mkdir_p
+
+# as_fn_executable_p FILE
+# -----------------------
+# Test if FILE is an executable regular file.
+as_fn_executable_p ()
+{
+  test -f "$1" && test -x "$1"
+} # as_fn_executable_p
 # as_fn_append VAR VALUE
 # ----------------------
 # Append the text in VALUE to the end of the definition contained in VAR. Take
@@ -354,19 +400,19 @@ else
 fi # as_fn_arith
 
 
-# as_fn_error ERROR [LINENO LOG_FD]
-# ---------------------------------
+# as_fn_error STATUS ERROR [LINENO LOG_FD]
+# ----------------------------------------
 # Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are
 # provided, also output the error to LOG_FD, referencing LINENO. Then exit the
-# script with status $?, using 1 if that was 0.
+# script with STATUS, using 1 if that was 0.
 as_fn_error ()
 {
-  as_status=$?; test $as_status -eq 0 && as_status=1
-  if test "$3"; then
-    as_lineno=${as_lineno-"$2"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
-    $as_echo "$as_me:${as_lineno-$LINENO}: error: $1" >&$3
+  as_status=$1; test $as_status -eq 0 && as_status=1
+  if test "$4"; then
+    as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+    $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4
   fi
-  $as_echo "$as_me: error: $1" >&2
+  $as_echo "$as_me: error: $2" >&2
   as_fn_exit $as_status
 } # as_fn_error
 
@@ -439,6 +485,10 @@ as_cr_alnum=$as_cr_Letters$as_cr_digits
   chmod +x "$as_me.lineno" ||
     { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; }
 
+  # If we had to re-execute with $CONFIG_SHELL, we're ensured to have
+  # already done that, so ensure we don't try to do so again and fall
+  # in an infinite loop.  This has already happened in practice.
+  _as_can_reexec=no; export _as_can_reexec
   # Don't try to exec as it changes $[0], causing all sort of problems
   # (the dirname of $[0] is not the place where we might find the
   # original and so on.  Autoconf is especially sensitive to this).
@@ -473,16 +523,16 @@ if (echo >conf$$.file) 2>/dev/null; then
     # ... but there are two gotchas:
     # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
     # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
-    # In both cases, we have to default to `cp -p'.
+    # In both cases, we have to default to `cp -pR'.
     ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
-      as_ln_s='cp -p'
+      as_ln_s='cp -pR'
   elif ln conf$$.file conf$$ 2>/dev/null; then
     as_ln_s=ln
   else
-    as_ln_s='cp -p'
+    as_ln_s='cp -pR'
   fi
 else
-  as_ln_s='cp -p'
+  as_ln_s='cp -pR'
 fi
 rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
 rmdir conf$$.dir 2>/dev/null
@@ -494,28 +544,8 @@ else
   as_mkdir_p=false
 fi
 
-if test -x / >/dev/null 2>&1; then
-  as_test_x='test -x'
-else
-  if ls -dL / >/dev/null 2>&1; then
-    as_ls_L_option=L
-  else
-    as_ls_L_option=
-  fi
-  as_test_x='
-    eval sh -c '\''
-      if test -d "$1"; then
-	test -d "$1/.";
-      else
-	case $1 in #(
-	-*)set "./$1";;
-	esac;
-	case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in #((
-	???[sx]*):;;*)false;;esac;fi
-    '\'' sh
-  '
-fi
-as_executable_p=$as_test_x
+as_test_x='test -x'
+as_executable_p=as_fn_executable_p
 
 # Sed expression to map a string onto a valid CPP name.
 as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
@@ -524,10 +554,11 @@ as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
 as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
 
 
-exec 7<&0 </dev/null 6>&1
+test -n "$DJDIR" || exec 7<&0 </dev/null
+exec 6>&1
 
 # Name of the host.
-# hostname on some systems (SVR3.2, Linux) returns a bogus exit status,
+# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status,
 # so uname gets run too.
 ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q`
 
@@ -572,6 +603,7 @@ WINDRES_FOR_TARGET
 STRIP_FOR_TARGET
 READELF_FOR_TARGET
 RANLIB_FOR_TARGET
+OTOOL_FOR_TARGET
 OBJDUMP_FOR_TARGET
 OBJCOPY_FOR_TARGET
 NM_FOR_TARGET
@@ -580,12 +612,14 @@ LD_FOR_TARGET
 DLLTOOL_FOR_TARGET
 AS_FOR_TARGET
 AR_FOR_TARGET
+GDC_FOR_TARGET
 GOC_FOR_TARGET
 GFORTRAN_FOR_TARGET
 GCC_FOR_TARGET
 CXX_FOR_TARGET
 CC_FOR_TARGET
 READELF
+OTOOL
 OBJDUMP
 OBJCOPY
 WINDMC
@@ -612,6 +646,7 @@ RANLIB_FOR_BUILD
 NM_FOR_BUILD
 LD_FOR_BUILD
 LDFLAGS_FOR_BUILD
+GDC_FOR_BUILD
 GOC_FOR_BUILD
 GFORTRAN_FOR_BUILD
 DLLTOOL_FOR_BUILD
@@ -659,6 +694,8 @@ extra_mpc_gmp_configure_flags
 extra_mpfr_configure_flags
 gmpinc
 gmplibs
+HAVE_CXX11_FOR_BUILD
+HAVE_CXX11
 do_compare
 GNATMAKE
 GNATBIND
@@ -771,6 +808,7 @@ with_gmp
 with_gmp_include
 with_gmp_lib
 with_stage1_libs
+with_static_standard_libraries
 with_stage1_ldflags
 with_boot_libs
 with_boot_ldflags
@@ -824,12 +862,14 @@ WINDRES
 WINDMC
 OBJCOPY
 OBJDUMP
+OTOOL
 READELF
 CC_FOR_TARGET
 CXX_FOR_TARGET
 GCC_FOR_TARGET
 GFORTRAN_FOR_TARGET
 GOC_FOR_TARGET
+GDC_FOR_TARGET
 AR_FOR_TARGET
 AS_FOR_TARGET
 DLLTOOL_FOR_TARGET
@@ -838,6 +878,7 @@ LIPO_FOR_TARGET
 NM_FOR_TARGET
 OBJCOPY_FOR_TARGET
 OBJDUMP_FOR_TARGET
+OTOOL_FOR_TARGET
 RANLIB_FOR_TARGET
 READELF_FOR_TARGET
 STRIP_FOR_TARGET
@@ -905,8 +946,9 @@ do
   fi
 
   case $ac_option in
-  *=*)	ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;;
-  *)	ac_optarg=yes ;;
+  *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;;
+  *=)   ac_optarg= ;;
+  *)    ac_optarg=yes ;;
   esac
 
   # Accept the important Cygnus configure options, so we can diagnose typos.
@@ -951,7 +993,7 @@ do
     ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'`
     # Reject names that are not valid shell variable names.
     expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
-      as_fn_error "invalid feature name: $ac_useropt"
+      as_fn_error $? "invalid feature name: $ac_useropt"
     ac_useropt_orig=$ac_useropt
     ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
     case $ac_user_opts in
@@ -977,7 +1019,7 @@ do
     ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'`
     # Reject names that are not valid shell variable names.
     expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
-      as_fn_error "invalid feature name: $ac_useropt"
+      as_fn_error $? "invalid feature name: $ac_useropt"
     ac_useropt_orig=$ac_useropt
     ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
     case $ac_user_opts in
@@ -1181,7 +1223,7 @@ do
     ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'`
     # Reject names that are not valid shell variable names.
     expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
-      as_fn_error "invalid package name: $ac_useropt"
+      as_fn_error $? "invalid package name: $ac_useropt"
     ac_useropt_orig=$ac_useropt
     ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
     case $ac_user_opts in
@@ -1197,7 +1239,7 @@ do
     ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'`
     # Reject names that are not valid shell variable names.
     expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
-      as_fn_error "invalid package name: $ac_useropt"
+      as_fn_error $? "invalid package name: $ac_useropt"
     ac_useropt_orig=$ac_useropt
     ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
     case $ac_user_opts in
@@ -1227,8 +1269,8 @@ do
   | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*)
     x_libraries=$ac_optarg ;;
 
-  -*) as_fn_error "unrecognized option: \`$ac_option'
-Try \`$0 --help' for more information."
+  -*) as_fn_error $? "unrecognized option: \`$ac_option'
+Try \`$0 --help' for more information"
     ;;
 
   *=*)
@@ -1236,7 +1278,7 @@ Try \`$0 --help' for more information."
     # Reject names that are not valid shell variable names.
     case $ac_envvar in #(
       '' | [0-9]* | *[!_$as_cr_alnum]* )
-      as_fn_error "invalid variable name: \`$ac_envvar'" ;;
+      as_fn_error $? "invalid variable name: \`$ac_envvar'" ;;
     esac
     eval $ac_envvar=\$ac_optarg
     export $ac_envvar ;;
@@ -1246,7 +1288,7 @@ Try \`$0 --help' for more information."
     $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2
     expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null &&
       $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2
-    : ${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}
+    : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}"
     ;;
 
   esac
@@ -1254,13 +1296,13 @@ done
 
 if test -n "$ac_prev"; then
   ac_option=--`echo $ac_prev | sed 's/_/-/g'`
-  as_fn_error "missing argument to $ac_option"
+  as_fn_error $? "missing argument to $ac_option"
 fi
 
 if test -n "$ac_unrecognized_opts"; then
   case $enable_option_checking in
     no) ;;
-    fatal) as_fn_error "unrecognized options: $ac_unrecognized_opts" ;;
+    fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;;
     *)     $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;;
   esac
 fi
@@ -1283,7 +1325,7 @@ do
     [\\/$]* | ?:[\\/]* )  continue;;
     NONE | '' ) case $ac_var in *prefix ) continue;; esac;;
   esac
-  as_fn_error "expected an absolute directory name for --$ac_var: $ac_val"
+  as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val"
 done
 
 # There might be people who depend on the old broken behavior: `$host'
@@ -1297,8 +1339,6 @@ target=$target_alias
 if test "x$host_alias" != x; then
   if test "x$build_alias" = x; then
     cross_compiling=maybe
-    $as_echo "$as_me: WARNING: If you wanted to set the --build type, don't use --host.
-    If a cross compiler is detected then cross compile mode will be used." >&2
   elif test "x$build_alias" != "x$host_alias"; then
     cross_compiling=yes
   fi
@@ -1313,9 +1353,9 @@ test "$silent" = yes && exec 6>/dev/null
 ac_pwd=`pwd` && test -n "$ac_pwd" &&
 ac_ls_di=`ls -di .` &&
 ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` ||
-  as_fn_error "working directory cannot be determined"
+  as_fn_error $? "working directory cannot be determined"
 test "X$ac_ls_di" = "X$ac_pwd_ls_di" ||
-  as_fn_error "pwd does not report name of working directory"
+  as_fn_error $? "pwd does not report name of working directory"
 
 
 # Find the source files, if location was not specified.
@@ -1354,11 +1394,11 @@ else
 fi
 if test ! -r "$srcdir/$ac_unique_file"; then
   test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .."
-  as_fn_error "cannot find sources ($ac_unique_file) in $srcdir"
+  as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir"
 fi
 ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work"
 ac_abs_confdir=`(
-	cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error "$ac_msg"
+	cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg"
 	pwd)`
 # When building in place, set srcdir=.
 if test "$ac_abs_confdir" = "$ac_pwd"; then
@@ -1372,7 +1412,7 @@ case $srcdir in
 esac
 case $srcdir in
   *" "*)
-    as_fn_error "path to source, $srcdir, contains spaces"
+    as_fn_error $? "path to source, $srcdir, contains spaces"
     ;;
 esac
 ac_subdirs_all=`cd $srcdir && echo */configure | sed 's,/configure,,g'`
@@ -1405,7 +1445,7 @@ Configuration:
       --help=short        display options specific to this package
       --help=recursive    display the short help of all the included packages
   -V, --version           display version information and exit
-  -q, --quiet, --silent   do not print \`checking...' messages
+  -q, --quiet, --silent   do not print \`checking ...' messages
       --cache-file=FILE   cache test results in FILE [disabled]
   -C, --config-cache      alias for \`--cache-file=config.cache'
   -n, --no-create         do not create output files
@@ -1542,6 +1582,9 @@ Optional Packages:
   --with-gmp-include=PATH specify directory for installed GMP include files
   --with-gmp-lib=PATH     specify directory for the installed GMP library
   --with-stage1-libs=LIBS libraries for stage1
+  --with-static-standard-libraries
+                          use -static-libstdc++ and -static-libgcc
+                          (default=auto)
   --with-stage1-ldflags=FLAGS
                           linker flags for stage1
   --with-boot-libs=LIBS   libraries for stage2 and later
@@ -1580,7 +1623,7 @@ Some influential environment variables:
   LDFLAGS     linker flags, e.g. -L<lib dir> if you have libraries in a
               nonstandard directory <lib dir>
   LIBS        libraries to pass to the linker, e.g. -l<library>
-  CPPFLAGS    C/C++/Objective C preprocessor flags, e.g. -I<include dir> if
+  CPPFLAGS    (Objective) C/C++ preprocessor flags, e.g. -I<include dir> if
               you have headers in a nonstandard directory <include dir>
   CXX         C++ compiler command
   CXXFLAGS    C++ compiler flags
@@ -1602,6 +1645,7 @@ Some influential environment variables:
   WINDMC      WINDMC for the host
   OBJCOPY     OBJCOPY for the host
   OBJDUMP     OBJDUMP for the host
+  OTOOL       OTOOL for the host
   READELF     READELF for the host
   CC_FOR_TARGET
               CC for the target
@@ -1613,6 +1657,8 @@ Some influential environment variables:
               GFORTRAN for the target
   GOC_FOR_TARGET
               GOC for the target
+  GDC_FOR_TARGET
+              GDC for the target
   AR_FOR_TARGET
               AR for the target
   AS_FOR_TARGET
@@ -1629,6 +1675,8 @@ Some influential environment variables:
               OBJCOPY for the target
   OBJDUMP_FOR_TARGET
               OBJDUMP for the target
+  OTOOL_FOR_TARGET
+              OTOOL for the target
   RANLIB_FOR_TARGET
               RANLIB for the target
   READELF_FOR_TARGET
@@ -1707,9 +1755,9 @@ test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
 configure
-generated by GNU Autoconf 2.64
+generated by GNU Autoconf 2.69
 
-Copyright (C) 2009 Free Software Foundation, Inc.
+Copyright (C) 2012 Free Software Foundation, Inc.
 This configure script is free software; the Free Software Foundation
 gives unlimited permission to copy, distribute and modify it.
 _ACEOF
@@ -1753,8 +1801,8 @@ sed 's/^/| /' conftest.$ac_ext >&5
 
 	ac_retval=1
 fi
-  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
-  return $ac_retval
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+  as_fn_set_status $ac_retval
 
 } # ac_fn_c_try_compile
 
@@ -1791,8 +1839,8 @@ sed 's/^/| /' conftest.$ac_ext >&5
 
 	ac_retval=1
 fi
-  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
-  return $ac_retval
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+  as_fn_set_status $ac_retval
 
 } # ac_fn_cxx_try_compile
 
@@ -1823,7 +1871,7 @@ $as_echo "$ac_try_echo"; } >&5
 	 test ! -s conftest.err
        } && test -s conftest$ac_exeext && {
 	 test "$cross_compiling" = yes ||
-	 $as_test_x conftest$ac_exeext
+	 test -x conftest$ac_exeext
        }; then :
   ac_retval=0
 else
@@ -1837,8 +1885,8 @@ fi
   # interfere with the next link command; also delete a directory that is
   # left behind by Apple's compiler.  We do this before executing the actions.
   rm -rf conftest.dSYM conftest_ipa8_conftest.oo
-  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
-  return $ac_retval
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+  as_fn_set_status $ac_retval
 
 } # ac_fn_cxx_try_link
 
@@ -1869,7 +1917,7 @@ $as_echo "$ac_try_echo"; } >&5
 	 test ! -s conftest.err
        } && test -s conftest$ac_exeext && {
 	 test "$cross_compiling" = yes ||
-	 $as_test_x conftest$ac_exeext
+	 test -x conftest$ac_exeext
        }; then :
   ac_retval=0
 else
@@ -1883,8 +1931,8 @@ fi
   # interfere with the next link command; also delete a directory that is
   # left behind by Apple's compiler.  We do this before executing the actions.
   rm -rf conftest.dSYM conftest_ipa8_conftest.oo
-  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
-  return $ac_retval
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+  as_fn_set_status $ac_retval
 
 } # ac_fn_c_try_link
 cat >config.log <<_ACEOF
@@ -1892,7 +1940,7 @@ This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
 It was created by $as_me, which was
-generated by GNU Autoconf 2.64.  Invocation command line was
+generated by GNU Autoconf 2.69.  Invocation command line was
 
   $ $0 $@
 
@@ -2002,11 +2050,9 @@ trap 'exit_status=$?
   {
     echo
 
-    cat <<\_ASBOX
-## ---------------- ##
+    $as_echo "## ---------------- ##
 ## Cache variables. ##
-## ---------------- ##
-_ASBOX
+## ---------------- ##"
     echo
     # The following way of writing the cache mishandles newlines in values,
 (
@@ -2040,11 +2086,9 @@ $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;;
 )
     echo
 
-    cat <<\_ASBOX
-## ----------------- ##
+    $as_echo "## ----------------- ##
 ## Output variables. ##
-## ----------------- ##
-_ASBOX
+## ----------------- ##"
     echo
     for ac_var in $ac_subst_vars
     do
@@ -2057,11 +2101,9 @@ _ASBOX
     echo
 
     if test -n "$ac_subst_files"; then
-      cat <<\_ASBOX
-## ------------------- ##
+      $as_echo "## ------------------- ##
 ## File substitutions. ##
-## ------------------- ##
-_ASBOX
+## ------------------- ##"
       echo
       for ac_var in $ac_subst_files
       do
@@ -2075,11 +2117,9 @@ _ASBOX
     fi
 
     if test -s confdefs.h; then
-      cat <<\_ASBOX
-## ----------- ##
+      $as_echo "## ----------- ##
 ## confdefs.h. ##
-## ----------- ##
-_ASBOX
+## ----------- ##"
       echo
       cat confdefs.h
       echo
@@ -2134,7 +2174,12 @@ _ACEOF
 ac_site_file1=NONE
 ac_site_file2=NONE
 if test -n "$CONFIG_SITE"; then
-  ac_site_file1=$CONFIG_SITE
+  # We do not want a PATH search for config.site.
+  case $CONFIG_SITE in #((
+    -*)  ac_site_file1=./$CONFIG_SITE;;
+    */*) ac_site_file1=$CONFIG_SITE;;
+    *)   ac_site_file1=./$CONFIG_SITE;;
+  esac
 elif test "x$prefix" != xNONE; then
   ac_site_file1=$prefix/share/config.site
   ac_site_file2=$prefix/etc/config.site
@@ -2145,18 +2190,22 @@ fi
 for ac_site_file in "$ac_site_file1" "$ac_site_file2"
 do
   test "x$ac_site_file" = xNONE && continue
-  if test -r "$ac_site_file"; then
+  if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then
     { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5
 $as_echo "$as_me: loading site script $ac_site_file" >&6;}
     sed 's/^/| /' "$ac_site_file" >&5
-    . "$ac_site_file"
+    . "$ac_site_file" \
+      || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "failed to load site script $ac_site_file
+See \`config.log' for more details" "$LINENO" 5; }
   fi
 done
 
 if test -r "$cache_file"; then
-  # Some versions of bash will fail to source /dev/null (special
-  # files actually), so we avoid doing that.
-  if test -f "$cache_file"; then
+  # Some versions of bash will fail to source /dev/null (special files
+  # actually), so we avoid doing that.  DJGPP emulates it as a regular file.
+  if test /dev/null != "$cache_file" && test -f "$cache_file"; then
     { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5
 $as_echo "$as_me: loading cache $cache_file" >&6;}
     case $cache_file in
@@ -2225,7 +2274,7 @@ if $ac_cache_corrupted; then
 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
   { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5
 $as_echo "$as_me: error: changes in the environment can compromise the build" >&2;}
-  as_fn_error "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5
+  as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5
 fi
 ## -------------------- ##
 ## Main body of script. ##
@@ -2244,7 +2293,6 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
 
 
-
 progname=$0
 # if PWD already has a value, it is probably wrong.
 if test -n "$PWD" ; then PWD=`${PWDCMD-pwd}`; fi
@@ -2276,16 +2324,22 @@ TOPLEVEL_CONFIGURE_ARGUMENTS=`echo "x$TOPLEVEL_CONFIGURE_ARGUMENTS" | sed -e 's/
 # Find the build, host, and target systems.
 ac_aux_dir=
 for ac_dir in "$srcdir" "$srcdir/.." "$srcdir/../.."; do
-  for ac_t in install-sh install.sh shtool; do
-    if test -f "$ac_dir/$ac_t"; then
-      ac_aux_dir=$ac_dir
-      ac_install_sh="$ac_aux_dir/$ac_t -c"
-      break 2
-    fi
-  done
+  if test -f "$ac_dir/install-sh"; then
+    ac_aux_dir=$ac_dir
+    ac_install_sh="$ac_aux_dir/install-sh -c"
+    break
+  elif test -f "$ac_dir/install.sh"; then
+    ac_aux_dir=$ac_dir
+    ac_install_sh="$ac_aux_dir/install.sh -c"
+    break
+  elif test -f "$ac_dir/shtool"; then
+    ac_aux_dir=$ac_dir
+    ac_install_sh="$ac_aux_dir/shtool install -c"
+    break
+  fi
 done
 if test -z "$ac_aux_dir"; then
-  as_fn_error "cannot find install-sh, install.sh, or shtool in \"$srcdir\" \"$srcdir/..\" \"$srcdir/../..\"" "$LINENO" 5
+  as_fn_error $? "cannot find install-sh, install.sh, or shtool in \"$srcdir\" \"$srcdir/..\" \"$srcdir/../..\"" "$LINENO" 5
 fi
 
 # These three variables are undocumented and unsupported,
@@ -2299,27 +2353,27 @@ ac_configure="$SHELL $ac_aux_dir/configure"  # Please don't use this var.
 
 # Make sure we can run config.sub.
 $SHELL "$ac_aux_dir/config.sub" sun4 >/dev/null 2>&1 ||
-  as_fn_error "cannot run $SHELL $ac_aux_dir/config.sub" "$LINENO" 5
+  as_fn_error $? "cannot run $SHELL $ac_aux_dir/config.sub" "$LINENO" 5
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking build system type" >&5
 $as_echo_n "checking build system type... " >&6; }
-if test "${ac_cv_build+set}" = set; then :
+if ${ac_cv_build+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   ac_build_alias=$build_alias
 test "x$ac_build_alias" = x &&
   ac_build_alias=`$SHELL "$ac_aux_dir/config.guess"`
 test "x$ac_build_alias" = x &&
-  as_fn_error "cannot guess build type; you must specify one" "$LINENO" 5
+  as_fn_error $? "cannot guess build type; you must specify one" "$LINENO" 5
 ac_cv_build=`$SHELL "$ac_aux_dir/config.sub" $ac_build_alias` ||
-  as_fn_error "$SHELL $ac_aux_dir/config.sub $ac_build_alias failed" "$LINENO" 5
+  as_fn_error $? "$SHELL $ac_aux_dir/config.sub $ac_build_alias failed" "$LINENO" 5
 
 fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_build" >&5
 $as_echo "$ac_cv_build" >&6; }
 case $ac_cv_build in
 *-*-*) ;;
-*) as_fn_error "invalid value of canonical build" "$LINENO" 5;;
+*) as_fn_error $? "invalid value of canonical build" "$LINENO" 5;;
 esac
 build=$ac_cv_build
 ac_save_IFS=$IFS; IFS='-'
@@ -2364,14 +2418,14 @@ test "$host_noncanonical" = "$target_noncanonical" &&
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking host system type" >&5
 $as_echo_n "checking host system type... " >&6; }
-if test "${ac_cv_host+set}" = set; then :
+if ${ac_cv_host+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test "x$host_alias" = x; then
   ac_cv_host=$ac_cv_build
 else
   ac_cv_host=`$SHELL "$ac_aux_dir/config.sub" $host_alias` ||
-    as_fn_error "$SHELL $ac_aux_dir/config.sub $host_alias failed" "$LINENO" 5
+    as_fn_error $? "$SHELL $ac_aux_dir/config.sub $host_alias failed" "$LINENO" 5
 fi
 
 fi
@@ -2379,7 +2433,7 @@ fi
 $as_echo "$ac_cv_host" >&6; }
 case $ac_cv_host in
 *-*-*) ;;
-*) as_fn_error "invalid value of canonical host" "$LINENO" 5;;
+*) as_fn_error $? "invalid value of canonical host" "$LINENO" 5;;
 esac
 host=$ac_cv_host
 ac_save_IFS=$IFS; IFS='-'
@@ -2397,14 +2451,14 @@ case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking target system type" >&5
 $as_echo_n "checking target system type... " >&6; }
-if test "${ac_cv_target+set}" = set; then :
+if ${ac_cv_target+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test "x$target_alias" = x; then
   ac_cv_target=$ac_cv_host
 else
   ac_cv_target=`$SHELL "$ac_aux_dir/config.sub" $target_alias` ||
-    as_fn_error "$SHELL $ac_aux_dir/config.sub $target_alias failed" "$LINENO" 5
+    as_fn_error $? "$SHELL $ac_aux_dir/config.sub $target_alias failed" "$LINENO" 5
 fi
 
 fi
@@ -2412,7 +2466,7 @@ fi
 $as_echo "$ac_cv_target" >&6; }
 case $ac_cv_target in
 *-*-*) ;;
-*) as_fn_error "invalid value of canonical target" "$LINENO" 5;;
+*) as_fn_error $? "invalid value of canonical target" "$LINENO" 5;;
 esac
 target=$ac_cv_target
 ac_save_IFS=$IFS; IFS='-'
@@ -2465,7 +2519,7 @@ program_transform_name=`$as_echo "$program_transform_name" | sed "$ac_script"`
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for a BSD-compatible install" >&5
 $as_echo_n "checking for a BSD-compatible install... " >&6; }
 if test -z "$INSTALL"; then
-if test "${ac_cv_path_install+set}" = set; then :
+if ${ac_cv_path_install+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
@@ -2485,7 +2539,7 @@ case $as_dir/ in #((
     # by default.
     for ac_prog in ginstall scoinst install; do
       for ac_exec_ext in '' $ac_executable_extensions; do
-	if { test -f "$as_dir/$ac_prog$ac_exec_ext" && $as_test_x "$as_dir/$ac_prog$ac_exec_ext"; }; then
+	if as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext"; then
 	  if test $ac_prog = install &&
 	    grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then
 	    # AIX install.  It has an incompatible calling convention.
@@ -2543,7 +2597,7 @@ test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644'
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ln works" >&5
 $as_echo_n "checking whether ln works... " >&6; }
-if test "${acx_cv_prog_LN+set}" = set; then :
+if ${acx_cv_prog_LN+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   rm -f conftestdata_t
@@ -2580,7 +2634,7 @@ fi
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for a sed that does not truncate output" >&5
 $as_echo_n "checking for a sed that does not truncate output... " >&6; }
-if test "${ac_cv_path_SED+set}" = set; then :
+if ${ac_cv_path_SED+:} false; then :
   $as_echo_n "(cached) " >&6
 else
             ac_script=s/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/
@@ -2600,7 +2654,7 @@ do
     for ac_prog in sed gsed; do
     for ac_exec_ext in '' $ac_executable_extensions; do
       ac_path_SED="$as_dir/$ac_prog$ac_exec_ext"
-      { test -f "$ac_path_SED" && $as_test_x "$ac_path_SED"; } || continue
+      as_fn_executable_p "$ac_path_SED" || continue
 # Check for GNU ac_path_SED and select it if it is found.
   # Check for GNU $ac_path_SED
 case `"$ac_path_SED" --version 2>&1` in
@@ -2635,7 +2689,7 @@ esac
   done
 IFS=$as_save_IFS
   if test -z "$ac_cv_path_SED"; then
-    as_fn_error "no acceptable sed could be found in \$PATH" "$LINENO" 5
+    as_fn_error $? "no acceptable sed could be found in \$PATH" "$LINENO" 5
   fi
 else
   ac_cv_path_SED=$SED
@@ -2653,7 +2707,7 @@ do
 set dummy $ac_prog; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_AWK+set}" = set; then :
+if ${ac_cv_prog_AWK+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$AWK"; then
@@ -2665,7 +2719,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_AWK="$ac_prog"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -2733,7 +2787,7 @@ build_tools="build-texinfo build-flex build-bison build-m4 build-fixincludes"
 
 # these libraries are used by various programs built for the host environment
 #f
-host_libs="intl libiberty opcodes bfd readline tcl tk itcl libgui zlib libbacktrace libcpp libdecnumber gmp mpfr mpc isl libelf libiconv"
+host_libs="intl libiberty opcodes bfd readline tcl tk itcl libgui zlib libbacktrace libcpp libcody libdecnumber gmp mpfr mpc isl libelf libiconv libctf"
 
 # these tools are built for the host environment
 # Note, the powerpc-eabi build depends on sim occurring before gdb in order to
@@ -2741,7 +2795,7 @@ host_libs="intl libiberty opcodes bfd readline tcl tk itcl libgui zlib libbacktr
 # binutils, gas and ld appear in that order because it makes sense to run
 # "make check" in that particular order.
 # If --enable-gold is used, "gold" may replace "ld".
-host_tools="texinfo flex bison binutils gas ld fixincludes gcc cgen sid sim gdb gprof etc expect dejagnu m4 utils guile fastjar gnattools libcc1 gotools"
+host_tools="texinfo flex bison binutils gas ld fixincludes gcc cgen sid sim gdb gdbserver gprof etc expect dejagnu m4 utils guile fastjar gnattools libcc1 gotools c++tools"
 
 # these libraries are built for the target environment, and are built after
 # the host libraries and the host tools (which may be a cross compiler)
@@ -2758,14 +2812,15 @@ target_libraries="target-libgcc \
 		target-libstdc++-v3 \
 		target-libsanitizer \
 		target-libvtv \
-		target-libmpx \
 		target-libssp \
 		target-libquadmath \
 		target-libgfortran \
 		target-libffi \
 		target-libobjc \
 		target-libada \
-		target-libgo"
+		target-libgo \
+		target-libphobos \
+		target-zlib"
 
 # these tools are built using the target libraries, and are intended to
 # run only in the target environment
@@ -2836,7 +2891,7 @@ target_subdir=${target_noncanonical}
 
 # Be sure to cover against remnants of an in-tree build.
 if test $srcdir != .  && test -d $srcdir/host-${host_noncanonical}; then
-  as_fn_error "building out of tree but $srcdir contains host-${host_noncanonical}.
+  as_fn_error $? "building out of tree but $srcdir contains host-${host_noncanonical}.
 Use a pristine source tree when building in a separate tree" "$LINENO" 5
 fi
 
@@ -2918,7 +2973,7 @@ fi
 if test "${enable_offload_targets+set}" = set; then :
   enableval=$enable_offload_targets;
   if test x"$enable_offload_targets" = x; then
-    as_fn_error "no offload targets specified" "$LINENO" 5
+    as_fn_error $? "no offload targets specified" "$LINENO" 5
   fi
 
 else
@@ -2987,7 +3042,7 @@ case "${ENABLE_GOLD}" in
   no)
     ;;
   *)
-    as_fn_error "invalid --enable-gold argument" "$LINENO" 5
+    as_fn_error $? "invalid --enable-gold argument" "$LINENO" 5
     ;;
 esac
 
@@ -3002,7 +3057,7 @@ fi
 case "${ENABLE_LD}" in
   default)
     if test x${default_ld} != x; then
-      as_fn_error "either gold or ld can be the default ld" "$LINENO" 5
+      as_fn_error $? "either gold or ld can be the default ld" "$LINENO" 5
     fi
     ;;
   yes)
@@ -3015,7 +3070,7 @@ $as_echo "$as_me: WARNING: neither ld nor gold are enabled" >&2;}
     configdirs=`echo " ${configdirs} " | sed -e 's/ ld / /'`
     ;;
   *)
-    as_fn_error "invalid --enable-ld argument" "$LINENO" 5
+    as_fn_error $? "invalid --enable-ld argument" "$LINENO" 5
     ;;
 esac
 
@@ -3026,7 +3081,7 @@ esac
 if test "${enable_compressed_debug_sections+set}" = set; then :
   enableval=$enable_compressed_debug_sections;
   if test x"$enable_compressed_debug_sections" = xyes; then
-    as_fn_error "no program with compressed debug sections specified" "$LINENO" 5
+    as_fn_error $? "no program with compressed debug sections specified" "$LINENO" 5
   fi
 
 else
@@ -3121,7 +3176,7 @@ if test "${enable_liboffloadmic+set}" = set; then :
   no | host | target)
     enable_liboffloadmic=$enableval ;;
   *)
-    as_fn_error "--enable-liboffloadmic=no/host/target" "$LINENO" 5 ;;
+    as_fn_error $? "--enable-liboffloadmic=no/host/target" "$LINENO" 5 ;;
 esac
 else
   if test x"$enable_as_accelerator_for" != x; then
@@ -3160,7 +3215,7 @@ if test x$enable_libgomp = x ; then
 	;;
     *-*-darwin* | *-*-aix*)
 	;;
-    nvptx*-*-*)
+    nvptx*-*-* | amdgcn*-*-*)
 	;;
     *)
 	noconfigdirs="$noconfigdirs target-libgomp"
@@ -3264,25 +3319,6 @@ $as_echo "yes" >&6; }
 fi
 
 
-# Enable libmpx on supported systems by request.
-if test -d ${srcdir}/libmpx; then
-    if test x$enable_libmpx = x; then
-       { $as_echo "$as_me:${as_lineno-$LINENO}: checking for libmpx support" >&5
-$as_echo_n "checking for libmpx support... " >&6; }
-       if (srcdir=${srcdir}/libmpx; \
-               . ${srcdir}/configure.tgt; \
-               test "$LIBMPX_SUPPORTED" != "yes")
-       then
-           { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-           noconfigdirs="$noconfigdirs target-libmpx"
-       else
-           { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
-       fi
-    fi
-fi
-
 # Disable libhsail-rt on unsupported systems.
 if test -d ${srcdir}/libhsail-rt; then
     if test x$enable_libhsail_rt = x; then
@@ -3323,9 +3359,16 @@ case "${target}" in
     # No hosted I/O support.
     noconfigdirs="$noconfigdirs target-libssp"
     ;;
+  bpf-*-*)
+    noconfigdirs="$noconfigdirs target-libssp"
+    ;;
   powerpc-*-aix* | rs6000-*-aix*)
     noconfigdirs="$noconfigdirs target-libssp"
     ;;
+  pru-*-*)
+    # No hosted I/O support.
+    noconfigdirs="$noconfigdirs target-libssp"
+    ;;
   rl78-*-*)
     # libssp uses a misaligned load to trigger a fault, but the RL78
     # doesn't fault for those - instead, it gives a build-time error
@@ -3346,6 +3389,10 @@ if test "${ENABLE_LIBSTDCXX}" = "default" ; then
       # VxWorks uses the Dinkumware C++ library.
       noconfigdirs="$noconfigdirs target-libstdc++-v3"
       ;;
+    amdgcn*-*-*)
+      # Not ported/fails to build when using newlib.
+      noconfigdirs="$noconfigdirs target-libstdc++-v3"
+      ;;
     arm*-wince-pe*)
       # the C++ libraries don't build on top of CE's C libraries
       noconfigdirs="$noconfigdirs target-libstdc++-v3"
@@ -3353,18 +3400,86 @@ if test "${ENABLE_LIBSTDCXX}" = "default" ; then
     avr-*-*)
       noconfigdirs="$noconfigdirs target-libstdc++-v3"
       ;;
+    bpf-*-*)
+      noconfigdirs="$noconfigdirs target-libstdc++-v3"
+      ;;
     ft32-*-*)
       noconfigdirs="$noconfigdirs target-libstdc++-v3"
       ;;
   esac
 fi
 
+# Disable C++ on systems where it is known to not work.
+# For testing, you can override this with --enable-languages=c++.
+case ,${enable_languages}, in
+  *,c++,*)
+    ;;
+  *)
+      case "${target}" in
+        bpf-*-*)
+          unsupported_languages="$unsupported_languages c++"
+          ;;
+      esac
+      ;;
+esac
+
+# Disable Objc on systems where it is known to not work.
+# For testing, you can override this with --enable-languages=objc.
+case ,${enable_languages}, in
+  *,objc,*)
+    ;;
+  *)
+      case "${target}" in
+        bpf-*-*)
+          unsupported_languages="$unsupported_languages objc"
+          ;;
+      esac
+      ;;
+esac
+
+# Disable D on systems where it is known to not work.
+# For testing, you can override this with --enable-languages=d.
+case ,${enable_languages}, in
+  *,d,*)
+    ;;
+  *)
+    case "${target}" in
+      bpf-*-*)
+	unsupported_languages="$unsupported_languages d"
+	;;
+    esac
+    ;;
+esac
+
+# Disable libphobos on unsupported systems.
+# For testing, you can override this with --enable-libphobos.
+if test -d ${srcdir}/libphobos; then
+    if test x$enable_libphobos = x; then
+	{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for libphobos support" >&5
+$as_echo_n "checking for libphobos support... " >&6; }
+	if (srcdir=${srcdir}/libphobos; \
+		. ${srcdir}/configure.tgt; \
+		test "$LIBPHOBOS_SUPPORTED" != "yes")
+	then
+	    { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+	    noconfigdirs="$noconfigdirs target-libphobos"
+	else
+	    { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+	fi
+    fi
+fi
+
 # Disable Fortran for some systems.
 case "${target}" in
   mmix-*-*)
     # See <http://gcc.gnu.org/ml/gcc-patches/2004-11/msg00572.html>.
     unsupported_languages="$unsupported_languages fortran"
     ;;
+  bpf-*-*)
+    unsupported_languages="$unsupported_languages fortran"
+    ;;
 esac
 
 # Disable libffi for some systems.
@@ -3411,6 +3526,9 @@ case "${target}" in
   arm*-*-symbianelf*)
     noconfigdirs="$noconfigdirs target-libffi"
     ;;
+  bpf-*-*)
+    noconfigdirs="$noconfigdirs target-libffi"
+    ;;
   cris-*-* | crisv32-*-*)
     case "${target}" in
       *-*-linux*)
@@ -3457,11 +3575,30 @@ esac
 # Disable the go frontend on systems where it is known to not work. Please keep
 # this in sync with contrib/config-list.mk.
 case "${target}" in
-*-*-darwin* | *-*-cygwin* | *-*-mingw*)
+*-*-darwin* | *-*-cygwin* | *-*-mingw* | bpf-* )
     unsupported_languages="$unsupported_languages go"
     ;;
 esac
 
+# Only allow gdbserver on some systems.
+if test -d ${srcdir}/gdbserver; then
+    if test x$enable_gdbserver = x; then
+	{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for gdbserver support" >&5
+$as_echo_n "checking for gdbserver support... " >&6; }
+	if (srcdir=${srcdir}/gdbserver; \
+		. ${srcdir}/configure.srv; \
+		test -n "$UNSUPPORTED")
+	then
+	    { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+	    noconfigdirs="$noconfigdirs gdbserver"
+	else
+	    { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+	fi
+    fi
+fi
+
 # Disable libgo for some systems where it is known to not work.
 # For testing, you can easily override this with --enable-libgo.
 if test x$enable_libgo = x; then
@@ -3473,6 +3610,9 @@ if test x$enable_libgo = x; then
     *-*-cygwin* | *-*-mingw*)
 	noconfigdirs="$noconfigdirs target-libgo"
 	;;
+    bpf-*-*)
+        noconfigdirs="$noconfigdirs target-libgo"
+        ;;
     esac
 fi
 
@@ -3510,6 +3650,9 @@ case "${target}" in
   powerpc*-*-*)
     libgloss_dir=rs6000
     ;;
+  pru-*-*)
+    libgloss_dir=pru
+    ;;
   sparc*-*-*)
     libgloss_dir=sparc
     ;;
@@ -3541,6 +3684,9 @@ case "${target}" in
   sparc-*-sunos4*)
     noconfigdirs="$noconfigdirs target-newlib target-libgloss"
     ;;
+  bpf-*-*)
+    noconfigdirs="$noconfigdirs target-newlib target-libgloss"
+    ;;
   *-*-aix*)
     noconfigdirs="$noconfigdirs target-newlib target-libgloss"
     ;;
@@ -3589,6 +3735,8 @@ case "${target}" in
     noconfigdirs="$noconfigdirs ld gas gdb gprof"
     noconfigdirs="$noconfigdirs sim target-rda"
     ;;
+  amdgcn*-*-*)
+    ;;
   arm-*-darwin*)
     noconfigdirs="$noconfigdirs ld gas gdb gprof"
     noconfigdirs="$noconfigdirs sim target-rda"
@@ -3613,6 +3761,7 @@ case "${target}" in
     ;;
   *-*-freebsd*)
     if test "x$with_gmp" = x && test "x$with_gmp_dir" = x \
+	&& ! test -d ${srcdir}/gmp \
 	&& test -f /usr/local/include/gmp.h; then
       with_gmp=/usr/local
     fi
@@ -3652,6 +3801,9 @@ case "${target}" in
     # newlib is not 64 bit ready
     noconfigdirs="$noconfigdirs target-newlib target-libgloss"
     ;;
+  bpf-*-*)
+    noconfigdirs="$noconfigdirs target-libobjc target-libbacktrace"
+    ;;
   sh*-*-pe|mips*-*-pe|*arm-wince-pe)
     noconfigdirs="$noconfigdirs tcl tk itcl libgui sim"
     ;;
@@ -3777,6 +3929,13 @@ case "${target}" in
   mt-*-*)
     noconfigdirs="$noconfigdirs sim"
     ;;
+  nfp-*-*)
+    noconfigdirs="$noconfigdirs ld gas gdb gprof sim"
+    noconfigdirs="$noconfigdirs $target_libraries"
+    ;;
+  pdp11-*-*)
+    noconfigdirs="$noconfigdirs gdb gprof"
+    ;;
   powerpc-*-aix*)
     # copied from rs6000-*-* entry
     noconfigdirs="$noconfigdirs gprof"
@@ -3892,7 +4051,7 @@ else
   rm cygwin-cat-check
   { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
 $as_echo "no" >&6; }
-  as_fn_error "The cat command does not ignore carriage return characters.
+  as_fn_error $? "The cat command does not ignore carriage return characters.
   Please either mount the build directory in binary mode or run the following
   commands before running any configure script:
 set -o igncr
@@ -3936,6 +4095,7 @@ if test "${build}" != "${host}" ; then
   CXX_FOR_BUILD=${CXX_FOR_BUILD-g++}
   GFORTRAN_FOR_BUILD=${GFORTRAN_FOR_BUILD-gfortran}
   GOC_FOR_BUILD=${GOC_FOR_BUILD-gccgo}
+  GDC_FOR_BUILD=${GDC_FOR_BUILD-gdc}
   DLLTOOL_FOR_BUILD=${DLLTOOL_FOR_BUILD-dlltool}
   LD_FOR_BUILD=${LD_FOR_BUILD-ld}
   NM_FOR_BUILD=${NM_FOR_BUILD-nm}
@@ -3949,6 +4109,7 @@ else
   CXX_FOR_BUILD="\$(CXX)"
   GFORTRAN_FOR_BUILD="\$(GFORTRAN)"
   GOC_FOR_BUILD="\$(GOC)"
+  GDC_FOR_BUILD="\$(GDC)"
   DLLTOOL_FOR_BUILD="\$(DLLTOOL)"
   LD_FOR_BUILD="\$(LD)"
   NM_FOR_BUILD="\$(NM)"
@@ -3967,7 +4128,7 @@ if test -n "$ac_tool_prefix"; then
 set dummy ${ac_tool_prefix}gcc; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CC+set}" = set; then :
+if ${ac_cv_prog_CC+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$CC"; then
@@ -3979,7 +4140,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_CC="${ac_tool_prefix}gcc"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -4007,7 +4168,7 @@ if test -z "$ac_cv_prog_CC"; then
 set dummy gcc; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_CC+set}" = set; then :
+if ${ac_cv_prog_ac_ct_CC+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$ac_ct_CC"; then
@@ -4019,7 +4180,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_CC="gcc"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -4060,7 +4221,7 @@ if test -z "$CC"; then
 set dummy ${ac_tool_prefix}cc; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CC+set}" = set; then :
+if ${ac_cv_prog_CC+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$CC"; then
@@ -4072,7 +4233,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_CC="${ac_tool_prefix}cc"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -4100,7 +4261,7 @@ if test -z "$CC"; then
 set dummy cc; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CC+set}" = set; then :
+if ${ac_cv_prog_CC+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$CC"; then
@@ -4113,7 +4274,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then
        ac_prog_rejected=yes
        continue
@@ -4159,7 +4320,7 @@ if test -z "$CC"; then
 set dummy $ac_tool_prefix$ac_prog; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CC+set}" = set; then :
+if ${ac_cv_prog_CC+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$CC"; then
@@ -4171,7 +4332,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -4203,7 +4364,7 @@ do
 set dummy $ac_prog; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_CC+set}" = set; then :
+if ${ac_cv_prog_ac_ct_CC+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$ac_ct_CC"; then
@@ -4215,7 +4376,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_CC="$ac_prog"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -4257,8 +4418,8 @@ fi
 
 test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "no acceptable C compiler found in \$PATH
-See \`config.log' for more details." "$LINENO" 5; }
+as_fn_error $? "no acceptable C compiler found in \$PATH
+See \`config.log' for more details" "$LINENO" 5; }
 
 # Provide some information about the compiler.
 $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5
@@ -4279,8 +4440,8 @@ $as_echo "$ac_try_echo"; } >&5
 ... rest of stderr output deleted ...
          10q' conftest.err >conftest.er1
     cat conftest.er1 >&5
-    rm -f conftest.er1 conftest.err
   fi
+  rm -f conftest.er1 conftest.err
   $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
   test $ac_status = 0; }
 done
@@ -4297,12 +4458,12 @@ main ()
 }
 _ACEOF
 ac_clean_files_save=$ac_clean_files
-ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out conftest.out"
+ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out"
 # Try to create an executable without -o first, disregard a.out.
 # It will help us diagnose broken compilers, and finding out an intuition
 # of exeext.
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5
-$as_echo_n "checking for C compiler default output file name... " >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5
+$as_echo_n "checking whether the C compiler works... " >&6; }
 ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'`
 
 # The possible output files:
@@ -4364,62 +4525,28 @@ test "$ac_cv_exeext" = no && ac_cv_exeext=
 else
   ac_file=''
 fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5
-$as_echo "$ac_file" >&6; }
 if test -z "$ac_file"; then :
-  $as_echo "$as_me: failed program was:" >&5
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+$as_echo "$as_me: failed program was:" >&5
 sed 's/^/| /' conftest.$ac_ext >&5
 
 { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-{ as_fn_set_status 77
-as_fn_error "C compiler cannot create executables
-See \`config.log' for more details." "$LINENO" 5; }; }
+as_fn_error 77 "C compiler cannot create executables
+See \`config.log' for more details" "$LINENO" 5; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
 fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5
+$as_echo_n "checking for C compiler default output file name... " >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5
+$as_echo "$ac_file" >&6; }
 ac_exeext=$ac_cv_exeext
 
-# Check that the compiler produces executables we can run.  If not, either
-# the compiler is broken, or we cross compile.
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5
-$as_echo_n "checking whether the C compiler works... " >&6; }
-# If not cross compiling, check that we can run a simple program.
-if test "$cross_compiling" != yes; then
-  if { ac_try='./$ac_file'
-  { { case "(($ac_try" in
-  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
-  *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
-  (eval "$ac_try") 2>&5
-  ac_status=$?
-  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
-  test $ac_status = 0; }; }; then
-    cross_compiling=no
-  else
-    if test "$cross_compiling" = maybe; then
-	cross_compiling=yes
-    else
-	{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "cannot run C compiled programs.
-If you meant to cross compile, use \`--host'.
-See \`config.log' for more details." "$LINENO" 5; }
-    fi
-  fi
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
-
-rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out conftest.out
+rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out
 ac_clean_files=$ac_clean_files_save
-# Check that the compiler produces executables we can run.  If not, either
-# the compiler is broken, or we cross compile.
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5
-$as_echo_n "checking whether we are cross compiling... " >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5
-$as_echo "$cross_compiling" >&6; }
-
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5
 $as_echo_n "checking for suffix of executables... " >&6; }
 if { { ac_try="$ac_link"
@@ -4449,19 +4576,78 @@ done
 else
   { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "cannot compute suffix of executables: cannot compile and link
-See \`config.log' for more details." "$LINENO" 5; }
+as_fn_error $? "cannot compute suffix of executables: cannot compile and link
+See \`config.log' for more details" "$LINENO" 5; }
 fi
-rm -f conftest$ac_cv_exeext
+rm -f conftest conftest$ac_cv_exeext
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5
 $as_echo "$ac_cv_exeext" >&6; }
 
 rm -f conftest.$ac_ext
 EXEEXT=$ac_cv_exeext
 ac_exeext=$EXEEXT
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <stdio.h>
+int
+main ()
+{
+FILE *f = fopen ("conftest.out", "w");
+ return ferror (f) || fclose (f) != 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+ac_clean_files="$ac_clean_files conftest.out"
+# Check that the compiler produces executables we can run.  If not, either
+# the compiler is broken, or we cross compile.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5
+$as_echo_n "checking whether we are cross compiling... " >&6; }
+if test "$cross_compiling" != yes; then
+  { { ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }
+  if { ac_try='./conftest$ac_cv_exeext'
+  { { case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then
+    cross_compiling=no
+  else
+    if test "$cross_compiling" = maybe; then
+	cross_compiling=yes
+    else
+	{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "cannot run C compiled programs.
+If you meant to cross compile, use \`--host'.
+See \`config.log' for more details" "$LINENO" 5; }
+    fi
+  fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5
+$as_echo "$cross_compiling" >&6; }
+
+rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out
+ac_clean_files=$ac_clean_files_save
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5
 $as_echo_n "checking for suffix of object files... " >&6; }
-if test "${ac_cv_objext+set}" = set; then :
+if ${ac_cv_objext+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -4501,8 +4687,8 @@ sed 's/^/| /' conftest.$ac_ext >&5
 
 { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "cannot compute suffix of object files: cannot compile
-See \`config.log' for more details." "$LINENO" 5; }
+as_fn_error $? "cannot compute suffix of object files: cannot compile
+See \`config.log' for more details" "$LINENO" 5; }
 fi
 rm -f conftest.$ac_cv_objext conftest.$ac_ext
 fi
@@ -4512,7 +4698,7 @@ OBJEXT=$ac_cv_objext
 ac_objext=$OBJEXT
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5
 $as_echo_n "checking whether we are using the GNU C compiler... " >&6; }
-if test "${ac_cv_c_compiler_gnu+set}" = set; then :
+if ${ac_cv_c_compiler_gnu+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -4549,7 +4735,7 @@ ac_test_CFLAGS=${CFLAGS+set}
 ac_save_CFLAGS=$CFLAGS
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5
 $as_echo_n "checking whether $CC accepts -g... " >&6; }
-if test "${ac_cv_prog_cc_g+set}" = set; then :
+if ${ac_cv_prog_cc_g+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   ac_save_c_werror_flag=$ac_c_werror_flag
@@ -4627,7 +4813,7 @@ else
 fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5
 $as_echo_n "checking for $CC option to accept ISO C89... " >&6; }
-if test "${ac_cv_prog_cc_c89+set}" = set; then :
+if ${ac_cv_prog_cc_c89+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   ac_cv_prog_cc_c89=no
@@ -4636,8 +4822,7 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 #include <stdarg.h>
 #include <stdio.h>
-#include <sys/types.h>
-#include <sys/stat.h>
+struct stat;
 /* Most of the following tests are stolen from RCS 5.7's src/conf.sh.  */
 struct buf { int x; };
 FILE * (*rcsopen) (struct buf *, struct stat *, int);
@@ -4738,7 +4923,7 @@ if test -z "$CXX"; then
 set dummy $ac_tool_prefix$ac_prog; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CXX+set}" = set; then :
+if ${ac_cv_prog_CXX+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$CXX"; then
@@ -4750,7 +4935,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_CXX="$ac_tool_prefix$ac_prog"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -4782,7 +4967,7 @@ do
 set dummy $ac_prog; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_CXX+set}" = set; then :
+if ${ac_cv_prog_ac_ct_CXX+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$ac_ct_CXX"; then
@@ -4794,7 +4979,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_CXX="$ac_prog"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -4852,15 +5037,15 @@ $as_echo "$ac_try_echo"; } >&5
 ... rest of stderr output deleted ...
          10q' conftest.err >conftest.er1
     cat conftest.er1 >&5
-    rm -f conftest.er1 conftest.err
   fi
+  rm -f conftest.er1 conftest.err
   $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
   test $ac_status = 0; }
 done
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C++ compiler" >&5
 $as_echo_n "checking whether we are using the GNU C++ compiler... " >&6; }
-if test "${ac_cv_cxx_compiler_gnu+set}" = set; then :
+if ${ac_cv_cxx_compiler_gnu+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -4897,7 +5082,7 @@ ac_test_CXXFLAGS=${CXXFLAGS+set}
 ac_save_CXXFLAGS=$CXXFLAGS
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX accepts -g" >&5
 $as_echo_n "checking whether $CXX accepts -g... " >&6; }
-if test "${ac_cv_prog_cxx_g+set}" = set; then :
+if ${ac_cv_prog_cxx_g+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   ac_save_cxx_werror_flag=$ac_cxx_werror_flag
@@ -5049,7 +5234,7 @@ if test -n "$ac_tool_prefix"; then
 set dummy ${ac_tool_prefix}gnatbind; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_GNATBIND+set}" = set; then :
+if ${ac_cv_prog_GNATBIND+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$GNATBIND"; then
@@ -5061,7 +5246,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_GNATBIND="${ac_tool_prefix}gnatbind"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -5089,7 +5274,7 @@ if test -z "$ac_cv_prog_GNATBIND"; then
 set dummy gnatbind; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_GNATBIND+set}" = set; then :
+if ${ac_cv_prog_ac_ct_GNATBIND+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$ac_ct_GNATBIND"; then
@@ -5101,7 +5286,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_GNATBIND="gnatbind"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -5141,7 +5326,7 @@ if test -n "$ac_tool_prefix"; then
 set dummy ${ac_tool_prefix}gnatmake; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_GNATMAKE+set}" = set; then :
+if ${ac_cv_prog_GNATMAKE+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$GNATMAKE"; then
@@ -5153,7 +5338,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_GNATMAKE="${ac_tool_prefix}gnatmake"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -5181,7 +5366,7 @@ if test -z "$ac_cv_prog_GNATMAKE"; then
 set dummy gnatmake; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_GNATMAKE+set}" = set; then :
+if ${ac_cv_prog_ac_ct_GNATMAKE+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$ac_ct_GNATMAKE"; then
@@ -5193,7 +5378,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_ac_ct_GNATMAKE="gnatmake"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -5230,7 +5415,7 @@ fi
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler driver understands Ada" >&5
 $as_echo_n "checking whether compiler driver understands Ada... " >&6; }
-if test "${acx_cv_cc_gcc_supports_ada+set}" = set; then :
+if ${acx_cv_cc_gcc_supports_ada+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   cat >conftest.adb <<EOF
@@ -5262,7 +5447,7 @@ fi
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to compare bootstrapped objects" >&5
 $as_echo_n "checking how to compare bootstrapped objects... " >&6; }
-if test "${gcc_cv_prog_cmp_skip+set}" = set; then :
+if ${gcc_cv_prog_cmp_skip+:} false; then :
   $as_echo_n "(cached) " >&6
 else
    echo abfoo >t1
@@ -5330,73 +5515,2072 @@ $as_echo "$as_me: WARNING: trying to bootstrap a cross compiler" >&2;}
 
   # No compiler: if they passed --enable-bootstrap explicitly, fail
   no:*:*:yes)
-    as_fn_error "cannot bootstrap without a compiler" "$LINENO" 5 ;;
+    as_fn_error $? "cannot bootstrap without a compiler" "$LINENO" 5 ;;
 
   # Fail if wrong command line
   *)
-    as_fn_error "invalid option for --enable-bootstrap" "$LINENO" 5
+    as_fn_error $? "invalid option for --enable-bootstrap" "$LINENO" 5
     ;;
 esac
 
-# When bootstrapping with GCC, build stage 1 in C++98 mode to ensure that a
-# C++98 compiler can still start the bootstrap.
+# When bootstrapping with GCC, build stage 1 in C++11 mode to ensure that a
+# C++11 compiler can still start the bootstrap.  Otherwise, if building GCC,
+# require C++11 (or higher).
 if test "$enable_bootstrap:$GXX" = "yes:yes"; then
-  CXX="$CXX -std=gnu++98"
-fi
+  CXX="$CXX -std=c++11"
+elif test "$have_compiler" = yes; then
+    ax_cxx_compile_alternatives="11 0x"    ax_cxx_compile_cxx11_required=true
+  ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+  ac_success=no
 
-# Used for setting $lt_cv_objdir
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for objdir" >&5
-$as_echo_n "checking for objdir... " >&6; }
-if test "${lt_cv_objdir+set}" = set; then :
+      { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX supports C++11 features by default" >&5
+$as_echo_n "checking whether $CXX supports C++11 features by default... " >&6; }
+if ${ax_cv_cxx_compile_cxx11+:} false; then :
   $as_echo_n "(cached) " >&6
 else
-  rm -f .libs 2>/dev/null
-mkdir .libs 2>/dev/null
-if test -d .libs; then
-  lt_cv_objdir=.libs
-else
-  # MS-DOS does not allow filenames that begin with a dot.
-  lt_cv_objdir=_libs
-fi
-rmdir .libs 2>/dev/null
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_objdir" >&5
-$as_echo "$lt_cv_objdir" >&6; }
-objdir=$lt_cv_objdir
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
 
 
+// If the compiler admits that it is not ready for C++11, why torture it?
+// Hopefully, this will speed up the test.
 
+#ifndef __cplusplus
 
+#error "This is not a C++ compiler"
 
-cat >>confdefs.h <<_ACEOF
-#define LT_OBJDIR "$lt_cv_objdir/"
-_ACEOF
+#elif __cplusplus < 201103L
 
+#error "This is not a C++11 compiler"
 
+#else
 
-# Check for GMP, MPFR and MPC
-gmplibs="-lmpc -lmpfr -lgmp"
-gmpinc=
-have_gmp=no
+namespace cxx11
+{
 
-# Specify a location for mpc
-# check for this first so it ends up on the link line before mpfr.
+  namespace test_static_assert
+  {
 
-# Check whether --with-mpc was given.
-if test "${with_mpc+set}" = set; then :
-  withval=$with_mpc;
-fi
+    template <typename T>
+    struct check
+    {
+      static_assert(sizeof(int) <= sizeof(T), "not big enough");
+    };
 
+  }
 
-# Check whether --with-mpc-include was given.
-if test "${with_mpc_include+set}" = set; then :
-  withval=$with_mpc_include;
-fi
+  namespace test_final_override
+  {
 
+    struct Base
+    {
+      virtual ~Base() {}
+      virtual void f() {}
+    };
 
-# Check whether --with-mpc-lib was given.
-if test "${with_mpc_lib+set}" = set; then :
-  withval=$with_mpc_lib;
+    struct Derived : public Base
+    {
+      virtual ~Derived() override {}
+      virtual void f() override {}
+    };
+
+  }
+
+  namespace test_double_right_angle_brackets
+  {
+
+    template < typename T >
+    struct check {};
+
+    typedef check<void> single_type;
+    typedef check<check<void>> double_type;
+    typedef check<check<check<void>>> triple_type;
+    typedef check<check<check<check<void>>>> quadruple_type;
+
+  }
+
+  namespace test_decltype
+  {
+
+    int
+    f()
+    {
+      int a = 1;
+      decltype(a) b = 2;
+      return a + b;
+    }
+
+  }
+
+  namespace test_type_deduction
+  {
+
+    template < typename T1, typename T2 >
+    struct is_same
+    {
+      static const bool value = false;
+    };
+
+    template < typename T >
+    struct is_same<T, T>
+    {
+      static const bool value = true;
+    };
+
+    template < typename T1, typename T2 >
+    auto
+    add(T1 a1, T2 a2) -> decltype(a1 + a2)
+    {
+      return a1 + a2;
+    }
+
+    int
+    test(const int c, volatile int v)
+    {
+      static_assert(is_same<int, decltype(0)>::value == true, "");
+      static_assert(is_same<int, decltype(c)>::value == false, "");
+      static_assert(is_same<int, decltype(v)>::value == false, "");
+      auto ac = c;
+      auto av = v;
+      auto sumi = ac + av + 'x';
+      auto sumf = ac + av + 1.0;
+      static_assert(is_same<int, decltype(ac)>::value == true, "");
+      static_assert(is_same<int, decltype(av)>::value == true, "");
+      static_assert(is_same<int, decltype(sumi)>::value == true, "");
+      static_assert(is_same<int, decltype(sumf)>::value == false, "");
+      static_assert(is_same<int, decltype(add(c, v))>::value == true, "");
+      return (sumf > 0.0) ? sumi : add(c, v);
+    }
+
+  }
+
+  namespace test_noexcept
+  {
+
+    int f() { return 0; }
+    int g() noexcept { return 0; }
+
+    static_assert(noexcept(f()) == false, "");
+    static_assert(noexcept(g()) == true, "");
+
+  }
+
+  namespace test_constexpr
+  {
+
+    template < typename CharT >
+    unsigned long constexpr
+    strlen_c_r(const CharT *const s, const unsigned long acc) noexcept
+    {
+      return *s ? strlen_c_r(s + 1, acc + 1) : acc;
+    }
+
+    template < typename CharT >
+    unsigned long constexpr
+    strlen_c(const CharT *const s) noexcept
+    {
+      return strlen_c_r(s, 0UL);
+    }
+
+    static_assert(strlen_c("") == 0UL, "");
+    static_assert(strlen_c("1") == 1UL, "");
+    static_assert(strlen_c("example") == 7UL, "");
+    static_assert(strlen_c("another\0example") == 7UL, "");
+
+  }
+
+  namespace test_rvalue_references
+  {
+
+    template < int N >
+    struct answer
+    {
+      static constexpr int value = N;
+    };
+
+    answer<1> f(int&)       { return answer<1>(); }
+    answer<2> f(const int&) { return answer<2>(); }
+    answer<3> f(int&&)      { return answer<3>(); }
+
+    void
+    test()
+    {
+      int i = 0;
+      const int c = 0;
+      static_assert(decltype(f(i))::value == 1, "");
+      static_assert(decltype(f(c))::value == 2, "");
+      static_assert(decltype(f(0))::value == 3, "");
+    }
+
+  }
+
+  namespace test_uniform_initialization
+  {
+
+    struct test
+    {
+      static const int zero {};
+      static const int one {1};
+    };
+
+    static_assert(test::zero == 0, "");
+    static_assert(test::one == 1, "");
+
+  }
+
+  namespace test_lambdas
+  {
+
+    void
+    test1()
+    {
+      auto lambda1 = [](){};
+      auto lambda2 = lambda1;
+      lambda1();
+      lambda2();
+    }
+
+    int
+    test2()
+    {
+      auto a = [](int i, int j){ return i + j; }(1, 2);
+      auto b = []() -> int { return '0'; }();
+      auto c = [=](){ return a + b; }();
+      auto d = [&](){ return c; }();
+      auto e = [a, &b](int x) mutable {
+        const auto identity = [](int y){ return y; };
+        for (auto i = 0; i < a; ++i)
+          a += b--;
+        return x + identity(a + b);
+      }(0);
+      return a + b + c + d + e;
+    }
+
+    int
+    test3()
+    {
+      const auto nullary = [](){ return 0; };
+      const auto unary = [](int x){ return x; };
+      using nullary_t = decltype(nullary);
+      using unary_t = decltype(unary);
+      const auto higher1st = [](nullary_t f){ return f(); };
+      const auto higher2nd = [unary](nullary_t f1){
+        return [unary, f1](unary_t f2){ return f2(unary(f1())); };
+      };
+      return higher1st(nullary) + higher2nd(nullary)(unary);
+    }
+
+  }
+
+  namespace test_variadic_templates
+  {
+
+    template <int...>
+    struct sum;
+
+    template <int N0, int... N1toN>
+    struct sum<N0, N1toN...>
+    {
+      static constexpr auto value = N0 + sum<N1toN...>::value;
+    };
+
+    template <>
+    struct sum<>
+    {
+      static constexpr auto value = 0;
+    };
+
+    static_assert(sum<>::value == 0, "");
+    static_assert(sum<1>::value == 1, "");
+    static_assert(sum<23>::value == 23, "");
+    static_assert(sum<1, 2>::value == 3, "");
+    static_assert(sum<5, 5, 11>::value == 21, "");
+    static_assert(sum<2, 3, 5, 7, 11, 13>::value == 41, "");
+
+  }
+
+  // http://stackoverflow.com/questions/13728184/template-aliases-and-sfinae
+  // Clang 3.1 fails with headers of libstd++ 4.8.3 when using std::function
+  // because of this.
+  namespace test_template_alias_sfinae
+  {
+
+    struct foo {};
+
+    template<typename T>
+    using member = typename T::member_type;
+
+    template<typename T>
+    void func(...) {}
+
+    template<typename T>
+    void func(member<T>*) {}
+
+    void test();
+
+    void test() { func<foo>(0); }
+
+  }
+
+}  // namespace cxx11
+
+#endif  // __cplusplus >= 201103L
+
+
+
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+  ax_cv_cxx_compile_cxx11=yes
+else
+  ax_cv_cxx_compile_cxx11=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_cxx_compile_cxx11" >&5
+$as_echo "$ax_cv_cxx_compile_cxx11" >&6; }
+    if test x$ax_cv_cxx_compile_cxx11 = xyes; then
+      ac_success=yes
+    fi
+
+    if test x$ac_success = xno; then
+    for alternative in ${ax_cxx_compile_alternatives}; do
+      switch="-std=gnu++${alternative}"
+      cachevar=`$as_echo "ax_cv_cxx_compile_cxx11_$switch" | $as_tr_sh`
+      { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX supports C++11 features with $switch" >&5
+$as_echo_n "checking whether $CXX supports C++11 features with $switch... " >&6; }
+if eval \${$cachevar+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_save_CXX="$CXX"
+         CXX="$CXX $switch"
+         cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+
+// If the compiler admits that it is not ready for C++11, why torture it?
+// Hopefully, this will speed up the test.
+
+#ifndef __cplusplus
+
+#error "This is not a C++ compiler"
+
+#elif __cplusplus < 201103L
+
+#error "This is not a C++11 compiler"
+
+#else
+
+namespace cxx11
+{
+
+  namespace test_static_assert
+  {
+
+    template <typename T>
+    struct check
+    {
+      static_assert(sizeof(int) <= sizeof(T), "not big enough");
+    };
+
+  }
+
+  namespace test_final_override
+  {
+
+    struct Base
+    {
+      virtual ~Base() {}
+      virtual void f() {}
+    };
+
+    struct Derived : public Base
+    {
+      virtual ~Derived() override {}
+      virtual void f() override {}
+    };
+
+  }
+
+  namespace test_double_right_angle_brackets
+  {
+
+    template < typename T >
+    struct check {};
+
+    typedef check<void> single_type;
+    typedef check<check<void>> double_type;
+    typedef check<check<check<void>>> triple_type;
+    typedef check<check<check<check<void>>>> quadruple_type;
+
+  }
+
+  namespace test_decltype
+  {
+
+    int
+    f()
+    {
+      int a = 1;
+      decltype(a) b = 2;
+      return a + b;
+    }
+
+  }
+
+  namespace test_type_deduction
+  {
+
+    template < typename T1, typename T2 >
+    struct is_same
+    {
+      static const bool value = false;
+    };
+
+    template < typename T >
+    struct is_same<T, T>
+    {
+      static const bool value = true;
+    };
+
+    template < typename T1, typename T2 >
+    auto
+    add(T1 a1, T2 a2) -> decltype(a1 + a2)
+    {
+      return a1 + a2;
+    }
+
+    int
+    test(const int c, volatile int v)
+    {
+      static_assert(is_same<int, decltype(0)>::value == true, "");
+      static_assert(is_same<int, decltype(c)>::value == false, "");
+      static_assert(is_same<int, decltype(v)>::value == false, "");
+      auto ac = c;
+      auto av = v;
+      auto sumi = ac + av + 'x';
+      auto sumf = ac + av + 1.0;
+      static_assert(is_same<int, decltype(ac)>::value == true, "");
+      static_assert(is_same<int, decltype(av)>::value == true, "");
+      static_assert(is_same<int, decltype(sumi)>::value == true, "");
+      static_assert(is_same<int, decltype(sumf)>::value == false, "");
+      static_assert(is_same<int, decltype(add(c, v))>::value == true, "");
+      return (sumf > 0.0) ? sumi : add(c, v);
+    }
+
+  }
+
+  namespace test_noexcept
+  {
+
+    int f() { return 0; }
+    int g() noexcept { return 0; }
+
+    static_assert(noexcept(f()) == false, "");
+    static_assert(noexcept(g()) == true, "");
+
+  }
+
+  namespace test_constexpr
+  {
+
+    template < typename CharT >
+    unsigned long constexpr
+    strlen_c_r(const CharT *const s, const unsigned long acc) noexcept
+    {
+      return *s ? strlen_c_r(s + 1, acc + 1) : acc;
+    }
+
+    template < typename CharT >
+    unsigned long constexpr
+    strlen_c(const CharT *const s) noexcept
+    {
+      return strlen_c_r(s, 0UL);
+    }
+
+    static_assert(strlen_c("") == 0UL, "");
+    static_assert(strlen_c("1") == 1UL, "");
+    static_assert(strlen_c("example") == 7UL, "");
+    static_assert(strlen_c("another\0example") == 7UL, "");
+
+  }
+
+  namespace test_rvalue_references
+  {
+
+    template < int N >
+    struct answer
+    {
+      static constexpr int value = N;
+    };
+
+    answer<1> f(int&)       { return answer<1>(); }
+    answer<2> f(const int&) { return answer<2>(); }
+    answer<3> f(int&&)      { return answer<3>(); }
+
+    void
+    test()
+    {
+      int i = 0;
+      const int c = 0;
+      static_assert(decltype(f(i))::value == 1, "");
+      static_assert(decltype(f(c))::value == 2, "");
+      static_assert(decltype(f(0))::value == 3, "");
+    }
+
+  }
+
+  namespace test_uniform_initialization
+  {
+
+    struct test
+    {
+      static const int zero {};
+      static const int one {1};
+    };
+
+    static_assert(test::zero == 0, "");
+    static_assert(test::one == 1, "");
+
+  }
+
+  namespace test_lambdas
+  {
+
+    void
+    test1()
+    {
+      auto lambda1 = [](){};
+      auto lambda2 = lambda1;
+      lambda1();
+      lambda2();
+    }
+
+    int
+    test2()
+    {
+      auto a = [](int i, int j){ return i + j; }(1, 2);
+      auto b = []() -> int { return '0'; }();
+      auto c = [=](){ return a + b; }();
+      auto d = [&](){ return c; }();
+      auto e = [a, &b](int x) mutable {
+        const auto identity = [](int y){ return y; };
+        for (auto i = 0; i < a; ++i)
+          a += b--;
+        return x + identity(a + b);
+      }(0);
+      return a + b + c + d + e;
+    }
+
+    int
+    test3()
+    {
+      const auto nullary = [](){ return 0; };
+      const auto unary = [](int x){ return x; };
+      using nullary_t = decltype(nullary);
+      using unary_t = decltype(unary);
+      const auto higher1st = [](nullary_t f){ return f(); };
+      const auto higher2nd = [unary](nullary_t f1){
+        return [unary, f1](unary_t f2){ return f2(unary(f1())); };
+      };
+      return higher1st(nullary) + higher2nd(nullary)(unary);
+    }
+
+  }
+
+  namespace test_variadic_templates
+  {
+
+    template <int...>
+    struct sum;
+
+    template <int N0, int... N1toN>
+    struct sum<N0, N1toN...>
+    {
+      static constexpr auto value = N0 + sum<N1toN...>::value;
+    };
+
+    template <>
+    struct sum<>
+    {
+      static constexpr auto value = 0;
+    };
+
+    static_assert(sum<>::value == 0, "");
+    static_assert(sum<1>::value == 1, "");
+    static_assert(sum<23>::value == 23, "");
+    static_assert(sum<1, 2>::value == 3, "");
+    static_assert(sum<5, 5, 11>::value == 21, "");
+    static_assert(sum<2, 3, 5, 7, 11, 13>::value == 41, "");
+
+  }
+
+  // http://stackoverflow.com/questions/13728184/template-aliases-and-sfinae
+  // Clang 3.1 fails with headers of libstd++ 4.8.3 when using std::function
+  // because of this.
+  namespace test_template_alias_sfinae
+  {
+
+    struct foo {};
+
+    template<typename T>
+    using member = typename T::member_type;
+
+    template<typename T>
+    void func(...) {}
+
+    template<typename T>
+    void func(member<T>*) {}
+
+    void test();
+
+    void test() { func<foo>(0); }
+
+  }
+
+}  // namespace cxx11
+
+#endif  // __cplusplus >= 201103L
+
+
+
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+  eval $cachevar=yes
+else
+  eval $cachevar=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+         CXX="$ac_save_CXX"
+fi
+eval ac_res=\$$cachevar
+	       { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+      if eval test x\$$cachevar = xyes; then
+        CXX="$CXX $switch"
+        if test -n "$CXXCPP" ; then
+          CXXCPP="$CXXCPP $switch"
+        fi
+        ac_success=yes
+        break
+      fi
+    done
+  fi
+
+    if test x$ac_success = xno; then
+                for alternative in ${ax_cxx_compile_alternatives}; do
+      for switch in -std=c++${alternative} +std=c++${alternative} "-h std=c++${alternative}"; do
+        cachevar=`$as_echo "ax_cv_cxx_compile_cxx11_$switch" | $as_tr_sh`
+        { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX supports C++11 features with $switch" >&5
+$as_echo_n "checking whether $CXX supports C++11 features with $switch... " >&6; }
+if eval \${$cachevar+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_save_CXX="$CXX"
+           CXX="$CXX $switch"
+           cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+
+// If the compiler admits that it is not ready for C++11, why torture it?
+// Hopefully, this will speed up the test.
+
+#ifndef __cplusplus
+
+#error "This is not a C++ compiler"
+
+#elif __cplusplus < 201103L
+
+#error "This is not a C++11 compiler"
+
+#else
+
+namespace cxx11
+{
+
+  namespace test_static_assert
+  {
+
+    template <typename T>
+    struct check
+    {
+      static_assert(sizeof(int) <= sizeof(T), "not big enough");
+    };
+
+  }
+
+  namespace test_final_override
+  {
+
+    struct Base
+    {
+      virtual ~Base() {}
+      virtual void f() {}
+    };
+
+    struct Derived : public Base
+    {
+      virtual ~Derived() override {}
+      virtual void f() override {}
+    };
+
+  }
+
+  namespace test_double_right_angle_brackets
+  {
+
+    template < typename T >
+    struct check {};
+
+    typedef check<void> single_type;
+    typedef check<check<void>> double_type;
+    typedef check<check<check<void>>> triple_type;
+    typedef check<check<check<check<void>>>> quadruple_type;
+
+  }
+
+  namespace test_decltype
+  {
+
+    int
+    f()
+    {
+      int a = 1;
+      decltype(a) b = 2;
+      return a + b;
+    }
+
+  }
+
+  namespace test_type_deduction
+  {
+
+    template < typename T1, typename T2 >
+    struct is_same
+    {
+      static const bool value = false;
+    };
+
+    template < typename T >
+    struct is_same<T, T>
+    {
+      static const bool value = true;
+    };
+
+    template < typename T1, typename T2 >
+    auto
+    add(T1 a1, T2 a2) -> decltype(a1 + a2)
+    {
+      return a1 + a2;
+    }
+
+    int
+    test(const int c, volatile int v)
+    {
+      static_assert(is_same<int, decltype(0)>::value == true, "");
+      static_assert(is_same<int, decltype(c)>::value == false, "");
+      static_assert(is_same<int, decltype(v)>::value == false, "");
+      auto ac = c;
+      auto av = v;
+      auto sumi = ac + av + 'x';
+      auto sumf = ac + av + 1.0;
+      static_assert(is_same<int, decltype(ac)>::value == true, "");
+      static_assert(is_same<int, decltype(av)>::value == true, "");
+      static_assert(is_same<int, decltype(sumi)>::value == true, "");
+      static_assert(is_same<int, decltype(sumf)>::value == false, "");
+      static_assert(is_same<int, decltype(add(c, v))>::value == true, "");
+      return (sumf > 0.0) ? sumi : add(c, v);
+    }
+
+  }
+
+  namespace test_noexcept
+  {
+
+    int f() { return 0; }
+    int g() noexcept { return 0; }
+
+    static_assert(noexcept(f()) == false, "");
+    static_assert(noexcept(g()) == true, "");
+
+  }
+
+  namespace test_constexpr
+  {
+
+    template < typename CharT >
+    unsigned long constexpr
+    strlen_c_r(const CharT *const s, const unsigned long acc) noexcept
+    {
+      return *s ? strlen_c_r(s + 1, acc + 1) : acc;
+    }
+
+    template < typename CharT >
+    unsigned long constexpr
+    strlen_c(const CharT *const s) noexcept
+    {
+      return strlen_c_r(s, 0UL);
+    }
+
+    static_assert(strlen_c("") == 0UL, "");
+    static_assert(strlen_c("1") == 1UL, "");
+    static_assert(strlen_c("example") == 7UL, "");
+    static_assert(strlen_c("another\0example") == 7UL, "");
+
+  }
+
+  namespace test_rvalue_references
+  {
+
+    template < int N >
+    struct answer
+    {
+      static constexpr int value = N;
+    };
+
+    answer<1> f(int&)       { return answer<1>(); }
+    answer<2> f(const int&) { return answer<2>(); }
+    answer<3> f(int&&)      { return answer<3>(); }
+
+    void
+    test()
+    {
+      int i = 0;
+      const int c = 0;
+      static_assert(decltype(f(i))::value == 1, "");
+      static_assert(decltype(f(c))::value == 2, "");
+      static_assert(decltype(f(0))::value == 3, "");
+    }
+
+  }
+
+  namespace test_uniform_initialization
+  {
+
+    struct test
+    {
+      static const int zero {};
+      static const int one {1};
+    };
+
+    static_assert(test::zero == 0, "");
+    static_assert(test::one == 1, "");
+
+  }
+
+  namespace test_lambdas
+  {
+
+    void
+    test1()
+    {
+      auto lambda1 = [](){};
+      auto lambda2 = lambda1;
+      lambda1();
+      lambda2();
+    }
+
+    int
+    test2()
+    {
+      auto a = [](int i, int j){ return i + j; }(1, 2);
+      auto b = []() -> int { return '0'; }();
+      auto c = [=](){ return a + b; }();
+      auto d = [&](){ return c; }();
+      auto e = [a, &b](int x) mutable {
+        const auto identity = [](int y){ return y; };
+        for (auto i = 0; i < a; ++i)
+          a += b--;
+        return x + identity(a + b);
+      }(0);
+      return a + b + c + d + e;
+    }
+
+    int
+    test3()
+    {
+      const auto nullary = [](){ return 0; };
+      const auto unary = [](int x){ return x; };
+      using nullary_t = decltype(nullary);
+      using unary_t = decltype(unary);
+      const auto higher1st = [](nullary_t f){ return f(); };
+      const auto higher2nd = [unary](nullary_t f1){
+        return [unary, f1](unary_t f2){ return f2(unary(f1())); };
+      };
+      return higher1st(nullary) + higher2nd(nullary)(unary);
+    }
+
+  }
+
+  namespace test_variadic_templates
+  {
+
+    template <int...>
+    struct sum;
+
+    template <int N0, int... N1toN>
+    struct sum<N0, N1toN...>
+    {
+      static constexpr auto value = N0 + sum<N1toN...>::value;
+    };
+
+    template <>
+    struct sum<>
+    {
+      static constexpr auto value = 0;
+    };
+
+    static_assert(sum<>::value == 0, "");
+    static_assert(sum<1>::value == 1, "");
+    static_assert(sum<23>::value == 23, "");
+    static_assert(sum<1, 2>::value == 3, "");
+    static_assert(sum<5, 5, 11>::value == 21, "");
+    static_assert(sum<2, 3, 5, 7, 11, 13>::value == 41, "");
+
+  }
+
+  // http://stackoverflow.com/questions/13728184/template-aliases-and-sfinae
+  // Clang 3.1 fails with headers of libstd++ 4.8.3 when using std::function
+  // because of this.
+  namespace test_template_alias_sfinae
+  {
+
+    struct foo {};
+
+    template<typename T>
+    using member = typename T::member_type;
+
+    template<typename T>
+    void func(...) {}
+
+    template<typename T>
+    void func(member<T>*) {}
+
+    void test();
+
+    void test() { func<foo>(0); }
+
+  }
+
+}  // namespace cxx11
+
+#endif  // __cplusplus >= 201103L
+
+
+
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+  eval $cachevar=yes
+else
+  eval $cachevar=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+           CXX="$ac_save_CXX"
+fi
+eval ac_res=\$$cachevar
+	       { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+        if eval test x\$$cachevar = xyes; then
+          CXX="$CXX $switch"
+          if test -n "$CXXCPP" ; then
+            CXXCPP="$CXXCPP $switch"
+          fi
+          ac_success=yes
+          break
+        fi
+      done
+      if test x$ac_success = xyes; then
+        break
+      fi
+    done
+  fi
+
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+  if test x$ax_cxx_compile_cxx11_required = xtrue; then
+    if test x$ac_success = xno; then
+      as_fn_error $? "*** A compiler with support for C++11 language features is required." "$LINENO" 5
+    fi
+  fi
+  if test x$ac_success = xno; then
+    HAVE_CXX11=0
+    { $as_echo "$as_me:${as_lineno-$LINENO}: No compiler with C++11 support was found" >&5
+$as_echo "$as_me: No compiler with C++11 support was found" >&6;}
+  else
+    HAVE_CXX11=1
+
+$as_echo "#define HAVE_CXX11 1" >>confdefs.h
+
+  fi
+
+
+
+  if test "${build}" != "${host}"; then
+      ax_cxx_compile_alternatives="11 0x"    ax_cxx_compile_cxx11_required=true
+  ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+  ac_success=no
+      ax_cv_cxx_compile_cxx11_orig_cxx="$CXX"
+    ax_cv_cxx_compile_cxx11_orig_cxxflags="$CXXFLAGS"
+    ax_cv_cxx_compile_cxx11_orig_cppflags="$CPPFLAGS"
+    CXX="$CXX_FOR_BUILD"
+    CXXFLAGS="$CXXFLAGS_FOR_BUILD"
+    CPPFLAGS="$CPPFLAGS_FOR_BUILD"
+      { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX supports C++11 features by default" >&5
+$as_echo_n "checking whether $CXX supports C++11 features by default... " >&6; }
+if ${ax_cv_cxx_compile_cxx11_FOR_BUILD+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+
+// If the compiler admits that it is not ready for C++11, why torture it?
+// Hopefully, this will speed up the test.
+
+#ifndef __cplusplus
+
+#error "This is not a C++ compiler"
+
+#elif __cplusplus < 201103L
+
+#error "This is not a C++11 compiler"
+
+#else
+
+namespace cxx11
+{
+
+  namespace test_static_assert
+  {
+
+    template <typename T>
+    struct check
+    {
+      static_assert(sizeof(int) <= sizeof(T), "not big enough");
+    };
+
+  }
+
+  namespace test_final_override
+  {
+
+    struct Base
+    {
+      virtual ~Base() {}
+      virtual void f() {}
+    };
+
+    struct Derived : public Base
+    {
+      virtual ~Derived() override {}
+      virtual void f() override {}
+    };
+
+  }
+
+  namespace test_double_right_angle_brackets
+  {
+
+    template < typename T >
+    struct check {};
+
+    typedef check<void> single_type;
+    typedef check<check<void>> double_type;
+    typedef check<check<check<void>>> triple_type;
+    typedef check<check<check<check<void>>>> quadruple_type;
+
+  }
+
+  namespace test_decltype
+  {
+
+    int
+    f()
+    {
+      int a = 1;
+      decltype(a) b = 2;
+      return a + b;
+    }
+
+  }
+
+  namespace test_type_deduction
+  {
+
+    template < typename T1, typename T2 >
+    struct is_same
+    {
+      static const bool value = false;
+    };
+
+    template < typename T >
+    struct is_same<T, T>
+    {
+      static const bool value = true;
+    };
+
+    template < typename T1, typename T2 >
+    auto
+    add(T1 a1, T2 a2) -> decltype(a1 + a2)
+    {
+      return a1 + a2;
+    }
+
+    int
+    test(const int c, volatile int v)
+    {
+      static_assert(is_same<int, decltype(0)>::value == true, "");
+      static_assert(is_same<int, decltype(c)>::value == false, "");
+      static_assert(is_same<int, decltype(v)>::value == false, "");
+      auto ac = c;
+      auto av = v;
+      auto sumi = ac + av + 'x';
+      auto sumf = ac + av + 1.0;
+      static_assert(is_same<int, decltype(ac)>::value == true, "");
+      static_assert(is_same<int, decltype(av)>::value == true, "");
+      static_assert(is_same<int, decltype(sumi)>::value == true, "");
+      static_assert(is_same<int, decltype(sumf)>::value == false, "");
+      static_assert(is_same<int, decltype(add(c, v))>::value == true, "");
+      return (sumf > 0.0) ? sumi : add(c, v);
+    }
+
+  }
+
+  namespace test_noexcept
+  {
+
+    int f() { return 0; }
+    int g() noexcept { return 0; }
+
+    static_assert(noexcept(f()) == false, "");
+    static_assert(noexcept(g()) == true, "");
+
+  }
+
+  namespace test_constexpr
+  {
+
+    template < typename CharT >
+    unsigned long constexpr
+    strlen_c_r(const CharT *const s, const unsigned long acc) noexcept
+    {
+      return *s ? strlen_c_r(s + 1, acc + 1) : acc;
+    }
+
+    template < typename CharT >
+    unsigned long constexpr
+    strlen_c(const CharT *const s) noexcept
+    {
+      return strlen_c_r(s, 0UL);
+    }
+
+    static_assert(strlen_c("") == 0UL, "");
+    static_assert(strlen_c("1") == 1UL, "");
+    static_assert(strlen_c("example") == 7UL, "");
+    static_assert(strlen_c("another\0example") == 7UL, "");
+
+  }
+
+  namespace test_rvalue_references
+  {
+
+    template < int N >
+    struct answer
+    {
+      static constexpr int value = N;
+    };
+
+    answer<1> f(int&)       { return answer<1>(); }
+    answer<2> f(const int&) { return answer<2>(); }
+    answer<3> f(int&&)      { return answer<3>(); }
+
+    void
+    test()
+    {
+      int i = 0;
+      const int c = 0;
+      static_assert(decltype(f(i))::value == 1, "");
+      static_assert(decltype(f(c))::value == 2, "");
+      static_assert(decltype(f(0))::value == 3, "");
+    }
+
+  }
+
+  namespace test_uniform_initialization
+  {
+
+    struct test
+    {
+      static const int zero {};
+      static const int one {1};
+    };
+
+    static_assert(test::zero == 0, "");
+    static_assert(test::one == 1, "");
+
+  }
+
+  namespace test_lambdas
+  {
+
+    void
+    test1()
+    {
+      auto lambda1 = [](){};
+      auto lambda2 = lambda1;
+      lambda1();
+      lambda2();
+    }
+
+    int
+    test2()
+    {
+      auto a = [](int i, int j){ return i + j; }(1, 2);
+      auto b = []() -> int { return '0'; }();
+      auto c = [=](){ return a + b; }();
+      auto d = [&](){ return c; }();
+      auto e = [a, &b](int x) mutable {
+        const auto identity = [](int y){ return y; };
+        for (auto i = 0; i < a; ++i)
+          a += b--;
+        return x + identity(a + b);
+      }(0);
+      return a + b + c + d + e;
+    }
+
+    int
+    test3()
+    {
+      const auto nullary = [](){ return 0; };
+      const auto unary = [](int x){ return x; };
+      using nullary_t = decltype(nullary);
+      using unary_t = decltype(unary);
+      const auto higher1st = [](nullary_t f){ return f(); };
+      const auto higher2nd = [unary](nullary_t f1){
+        return [unary, f1](unary_t f2){ return f2(unary(f1())); };
+      };
+      return higher1st(nullary) + higher2nd(nullary)(unary);
+    }
+
+  }
+
+  namespace test_variadic_templates
+  {
+
+    template <int...>
+    struct sum;
+
+    template <int N0, int... N1toN>
+    struct sum<N0, N1toN...>
+    {
+      static constexpr auto value = N0 + sum<N1toN...>::value;
+    };
+
+    template <>
+    struct sum<>
+    {
+      static constexpr auto value = 0;
+    };
+
+    static_assert(sum<>::value == 0, "");
+    static_assert(sum<1>::value == 1, "");
+    static_assert(sum<23>::value == 23, "");
+    static_assert(sum<1, 2>::value == 3, "");
+    static_assert(sum<5, 5, 11>::value == 21, "");
+    static_assert(sum<2, 3, 5, 7, 11, 13>::value == 41, "");
+
+  }
+
+  // http://stackoverflow.com/questions/13728184/template-aliases-and-sfinae
+  // Clang 3.1 fails with headers of libstd++ 4.8.3 when using std::function
+  // because of this.
+  namespace test_template_alias_sfinae
+  {
+
+    struct foo {};
+
+    template<typename T>
+    using member = typename T::member_type;
+
+    template<typename T>
+    void func(...) {}
+
+    template<typename T>
+    void func(member<T>*) {}
+
+    void test();
+
+    void test() { func<foo>(0); }
+
+  }
+
+}  // namespace cxx11
+
+#endif  // __cplusplus >= 201103L
+
+
+
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+  ax_cv_cxx_compile_cxx11_FOR_BUILD=yes
+else
+  ax_cv_cxx_compile_cxx11_FOR_BUILD=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_cxx_compile_cxx11_FOR_BUILD" >&5
+$as_echo "$ax_cv_cxx_compile_cxx11_FOR_BUILD" >&6; }
+    if test x$ax_cv_cxx_compile_cxx11_FOR_BUILD = xyes; then
+      ac_success=yes
+    fi
+
+    if test x$ac_success = xno; then
+    for alternative in ${ax_cxx_compile_alternatives}; do
+      switch="-std=gnu++${alternative}"
+      cachevar=`$as_echo "ax_cv_cxx_compile_cxx11_FOR_BUILD_$switch" | $as_tr_sh`
+      { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX supports C++11 features with $switch" >&5
+$as_echo_n "checking whether $CXX supports C++11 features with $switch... " >&6; }
+if eval \${$cachevar+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_save_CXX="$CXX"
+         CXX="$CXX $switch"
+         cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+
+// If the compiler admits that it is not ready for C++11, why torture it?
+// Hopefully, this will speed up the test.
+
+#ifndef __cplusplus
+
+#error "This is not a C++ compiler"
+
+#elif __cplusplus < 201103L
+
+#error "This is not a C++11 compiler"
+
+#else
+
+namespace cxx11
+{
+
+  namespace test_static_assert
+  {
+
+    template <typename T>
+    struct check
+    {
+      static_assert(sizeof(int) <= sizeof(T), "not big enough");
+    };
+
+  }
+
+  namespace test_final_override
+  {
+
+    struct Base
+    {
+      virtual ~Base() {}
+      virtual void f() {}
+    };
+
+    struct Derived : public Base
+    {
+      virtual ~Derived() override {}
+      virtual void f() override {}
+    };
+
+  }
+
+  namespace test_double_right_angle_brackets
+  {
+
+    template < typename T >
+    struct check {};
+
+    typedef check<void> single_type;
+    typedef check<check<void>> double_type;
+    typedef check<check<check<void>>> triple_type;
+    typedef check<check<check<check<void>>>> quadruple_type;
+
+  }
+
+  namespace test_decltype
+  {
+
+    int
+    f()
+    {
+      int a = 1;
+      decltype(a) b = 2;
+      return a + b;
+    }
+
+  }
+
+  namespace test_type_deduction
+  {
+
+    template < typename T1, typename T2 >
+    struct is_same
+    {
+      static const bool value = false;
+    };
+
+    template < typename T >
+    struct is_same<T, T>
+    {
+      static const bool value = true;
+    };
+
+    template < typename T1, typename T2 >
+    auto
+    add(T1 a1, T2 a2) -> decltype(a1 + a2)
+    {
+      return a1 + a2;
+    }
+
+    int
+    test(const int c, volatile int v)
+    {
+      static_assert(is_same<int, decltype(0)>::value == true, "");
+      static_assert(is_same<int, decltype(c)>::value == false, "");
+      static_assert(is_same<int, decltype(v)>::value == false, "");
+      auto ac = c;
+      auto av = v;
+      auto sumi = ac + av + 'x';
+      auto sumf = ac + av + 1.0;
+      static_assert(is_same<int, decltype(ac)>::value == true, "");
+      static_assert(is_same<int, decltype(av)>::value == true, "");
+      static_assert(is_same<int, decltype(sumi)>::value == true, "");
+      static_assert(is_same<int, decltype(sumf)>::value == false, "");
+      static_assert(is_same<int, decltype(add(c, v))>::value == true, "");
+      return (sumf > 0.0) ? sumi : add(c, v);
+    }
+
+  }
+
+  namespace test_noexcept
+  {
+
+    int f() { return 0; }
+    int g() noexcept { return 0; }
+
+    static_assert(noexcept(f()) == false, "");
+    static_assert(noexcept(g()) == true, "");
+
+  }
+
+  namespace test_constexpr
+  {
+
+    template < typename CharT >
+    unsigned long constexpr
+    strlen_c_r(const CharT *const s, const unsigned long acc) noexcept
+    {
+      return *s ? strlen_c_r(s + 1, acc + 1) : acc;
+    }
+
+    template < typename CharT >
+    unsigned long constexpr
+    strlen_c(const CharT *const s) noexcept
+    {
+      return strlen_c_r(s, 0UL);
+    }
+
+    static_assert(strlen_c("") == 0UL, "");
+    static_assert(strlen_c("1") == 1UL, "");
+    static_assert(strlen_c("example") == 7UL, "");
+    static_assert(strlen_c("another\0example") == 7UL, "");
+
+  }
+
+  namespace test_rvalue_references
+  {
+
+    template < int N >
+    struct answer
+    {
+      static constexpr int value = N;
+    };
+
+    answer<1> f(int&)       { return answer<1>(); }
+    answer<2> f(const int&) { return answer<2>(); }
+    answer<3> f(int&&)      { return answer<3>(); }
+
+    void
+    test()
+    {
+      int i = 0;
+      const int c = 0;
+      static_assert(decltype(f(i))::value == 1, "");
+      static_assert(decltype(f(c))::value == 2, "");
+      static_assert(decltype(f(0))::value == 3, "");
+    }
+
+  }
+
+  namespace test_uniform_initialization
+  {
+
+    struct test
+    {
+      static const int zero {};
+      static const int one {1};
+    };
+
+    static_assert(test::zero == 0, "");
+    static_assert(test::one == 1, "");
+
+  }
+
+  namespace test_lambdas
+  {
+
+    void
+    test1()
+    {
+      auto lambda1 = [](){};
+      auto lambda2 = lambda1;
+      lambda1();
+      lambda2();
+    }
+
+    int
+    test2()
+    {
+      auto a = [](int i, int j){ return i + j; }(1, 2);
+      auto b = []() -> int { return '0'; }();
+      auto c = [=](){ return a + b; }();
+      auto d = [&](){ return c; }();
+      auto e = [a, &b](int x) mutable {
+        const auto identity = [](int y){ return y; };
+        for (auto i = 0; i < a; ++i)
+          a += b--;
+        return x + identity(a + b);
+      }(0);
+      return a + b + c + d + e;
+    }
+
+    int
+    test3()
+    {
+      const auto nullary = [](){ return 0; };
+      const auto unary = [](int x){ return x; };
+      using nullary_t = decltype(nullary);
+      using unary_t = decltype(unary);
+      const auto higher1st = [](nullary_t f){ return f(); };
+      const auto higher2nd = [unary](nullary_t f1){
+        return [unary, f1](unary_t f2){ return f2(unary(f1())); };
+      };
+      return higher1st(nullary) + higher2nd(nullary)(unary);
+    }
+
+  }
+
+  namespace test_variadic_templates
+  {
+
+    template <int...>
+    struct sum;
+
+    template <int N0, int... N1toN>
+    struct sum<N0, N1toN...>
+    {
+      static constexpr auto value = N0 + sum<N1toN...>::value;
+    };
+
+    template <>
+    struct sum<>
+    {
+      static constexpr auto value = 0;
+    };
+
+    static_assert(sum<>::value == 0, "");
+    static_assert(sum<1>::value == 1, "");
+    static_assert(sum<23>::value == 23, "");
+    static_assert(sum<1, 2>::value == 3, "");
+    static_assert(sum<5, 5, 11>::value == 21, "");
+    static_assert(sum<2, 3, 5, 7, 11, 13>::value == 41, "");
+
+  }
+
+  // http://stackoverflow.com/questions/13728184/template-aliases-and-sfinae
+  // Clang 3.1 fails with headers of libstd++ 4.8.3 when using std::function
+  // because of this.
+  namespace test_template_alias_sfinae
+  {
+
+    struct foo {};
+
+    template<typename T>
+    using member = typename T::member_type;
+
+    template<typename T>
+    void func(...) {}
+
+    template<typename T>
+    void func(member<T>*) {}
+
+    void test();
+
+    void test() { func<foo>(0); }
+
+  }
+
+}  // namespace cxx11
+
+#endif  // __cplusplus >= 201103L
+
+
+
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+  eval $cachevar=yes
+else
+  eval $cachevar=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+         CXX="$ac_save_CXX"
+fi
+eval ac_res=\$$cachevar
+	       { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+      if eval test x\$$cachevar = xyes; then
+        CXX="$CXX $switch"
+        if test -n "$CXXCPP" ; then
+          CXXCPP="$CXXCPP $switch"
+        fi
+        ac_success=yes
+        break
+      fi
+    done
+  fi
+
+    if test x$ac_success = xno; then
+                for alternative in ${ax_cxx_compile_alternatives}; do
+      for switch in -std=c++${alternative} +std=c++${alternative} "-h std=c++${alternative}"; do
+        cachevar=`$as_echo "ax_cv_cxx_compile_cxx11_FOR_BUILD_$switch" | $as_tr_sh`
+        { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX supports C++11 features with $switch" >&5
+$as_echo_n "checking whether $CXX supports C++11 features with $switch... " >&6; }
+if eval \${$cachevar+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_save_CXX="$CXX"
+           CXX="$CXX $switch"
+           cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+
+// If the compiler admits that it is not ready for C++11, why torture it?
+// Hopefully, this will speed up the test.
+
+#ifndef __cplusplus
+
+#error "This is not a C++ compiler"
+
+#elif __cplusplus < 201103L
+
+#error "This is not a C++11 compiler"
+
+#else
+
+namespace cxx11
+{
+
+  namespace test_static_assert
+  {
+
+    template <typename T>
+    struct check
+    {
+      static_assert(sizeof(int) <= sizeof(T), "not big enough");
+    };
+
+  }
+
+  namespace test_final_override
+  {
+
+    struct Base
+    {
+      virtual ~Base() {}
+      virtual void f() {}
+    };
+
+    struct Derived : public Base
+    {
+      virtual ~Derived() override {}
+      virtual void f() override {}
+    };
+
+  }
+
+  namespace test_double_right_angle_brackets
+  {
+
+    template < typename T >
+    struct check {};
+
+    typedef check<void> single_type;
+    typedef check<check<void>> double_type;
+    typedef check<check<check<void>>> triple_type;
+    typedef check<check<check<check<void>>>> quadruple_type;
+
+  }
+
+  namespace test_decltype
+  {
+
+    int
+    f()
+    {
+      int a = 1;
+      decltype(a) b = 2;
+      return a + b;
+    }
+
+  }
+
+  namespace test_type_deduction
+  {
+
+    template < typename T1, typename T2 >
+    struct is_same
+    {
+      static const bool value = false;
+    };
+
+    template < typename T >
+    struct is_same<T, T>
+    {
+      static const bool value = true;
+    };
+
+    template < typename T1, typename T2 >
+    auto
+    add(T1 a1, T2 a2) -> decltype(a1 + a2)
+    {
+      return a1 + a2;
+    }
+
+    int
+    test(const int c, volatile int v)
+    {
+      static_assert(is_same<int, decltype(0)>::value == true, "");
+      static_assert(is_same<int, decltype(c)>::value == false, "");
+      static_assert(is_same<int, decltype(v)>::value == false, "");
+      auto ac = c;
+      auto av = v;
+      auto sumi = ac + av + 'x';
+      auto sumf = ac + av + 1.0;
+      static_assert(is_same<int, decltype(ac)>::value == true, "");
+      static_assert(is_same<int, decltype(av)>::value == true, "");
+      static_assert(is_same<int, decltype(sumi)>::value == true, "");
+      static_assert(is_same<int, decltype(sumf)>::value == false, "");
+      static_assert(is_same<int, decltype(add(c, v))>::value == true, "");
+      return (sumf > 0.0) ? sumi : add(c, v);
+    }
+
+  }
+
+  namespace test_noexcept
+  {
+
+    int f() { return 0; }
+    int g() noexcept { return 0; }
+
+    static_assert(noexcept(f()) == false, "");
+    static_assert(noexcept(g()) == true, "");
+
+  }
+
+  namespace test_constexpr
+  {
+
+    template < typename CharT >
+    unsigned long constexpr
+    strlen_c_r(const CharT *const s, const unsigned long acc) noexcept
+    {
+      return *s ? strlen_c_r(s + 1, acc + 1) : acc;
+    }
+
+    template < typename CharT >
+    unsigned long constexpr
+    strlen_c(const CharT *const s) noexcept
+    {
+      return strlen_c_r(s, 0UL);
+    }
+
+    static_assert(strlen_c("") == 0UL, "");
+    static_assert(strlen_c("1") == 1UL, "");
+    static_assert(strlen_c("example") == 7UL, "");
+    static_assert(strlen_c("another\0example") == 7UL, "");
+
+  }
+
+  namespace test_rvalue_references
+  {
+
+    template < int N >
+    struct answer
+    {
+      static constexpr int value = N;
+    };
+
+    answer<1> f(int&)       { return answer<1>(); }
+    answer<2> f(const int&) { return answer<2>(); }
+    answer<3> f(int&&)      { return answer<3>(); }
+
+    void
+    test()
+    {
+      int i = 0;
+      const int c = 0;
+      static_assert(decltype(f(i))::value == 1, "");
+      static_assert(decltype(f(c))::value == 2, "");
+      static_assert(decltype(f(0))::value == 3, "");
+    }
+
+  }
+
+  namespace test_uniform_initialization
+  {
+
+    struct test
+    {
+      static const int zero {};
+      static const int one {1};
+    };
+
+    static_assert(test::zero == 0, "");
+    static_assert(test::one == 1, "");
+
+  }
+
+  namespace test_lambdas
+  {
+
+    void
+    test1()
+    {
+      auto lambda1 = [](){};
+      auto lambda2 = lambda1;
+      lambda1();
+      lambda2();
+    }
+
+    int
+    test2()
+    {
+      auto a = [](int i, int j){ return i + j; }(1, 2);
+      auto b = []() -> int { return '0'; }();
+      auto c = [=](){ return a + b; }();
+      auto d = [&](){ return c; }();
+      auto e = [a, &b](int x) mutable {
+        const auto identity = [](int y){ return y; };
+        for (auto i = 0; i < a; ++i)
+          a += b--;
+        return x + identity(a + b);
+      }(0);
+      return a + b + c + d + e;
+    }
+
+    int
+    test3()
+    {
+      const auto nullary = [](){ return 0; };
+      const auto unary = [](int x){ return x; };
+      using nullary_t = decltype(nullary);
+      using unary_t = decltype(unary);
+      const auto higher1st = [](nullary_t f){ return f(); };
+      const auto higher2nd = [unary](nullary_t f1){
+        return [unary, f1](unary_t f2){ return f2(unary(f1())); };
+      };
+      return higher1st(nullary) + higher2nd(nullary)(unary);
+    }
+
+  }
+
+  namespace test_variadic_templates
+  {
+
+    template <int...>
+    struct sum;
+
+    template <int N0, int... N1toN>
+    struct sum<N0, N1toN...>
+    {
+      static constexpr auto value = N0 + sum<N1toN...>::value;
+    };
+
+    template <>
+    struct sum<>
+    {
+      static constexpr auto value = 0;
+    };
+
+    static_assert(sum<>::value == 0, "");
+    static_assert(sum<1>::value == 1, "");
+    static_assert(sum<23>::value == 23, "");
+    static_assert(sum<1, 2>::value == 3, "");
+    static_assert(sum<5, 5, 11>::value == 21, "");
+    static_assert(sum<2, 3, 5, 7, 11, 13>::value == 41, "");
+
+  }
+
+  // http://stackoverflow.com/questions/13728184/template-aliases-and-sfinae
+  // Clang 3.1 fails with headers of libstd++ 4.8.3 when using std::function
+  // because of this.
+  namespace test_template_alias_sfinae
+  {
+
+    struct foo {};
+
+    template<typename T>
+    using member = typename T::member_type;
+
+    template<typename T>
+    void func(...) {}
+
+    template<typename T>
+    void func(member<T>*) {}
+
+    void test();
+
+    void test() { func<foo>(0); }
+
+  }
+
+}  // namespace cxx11
+
+#endif  // __cplusplus >= 201103L
+
+
+
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+  eval $cachevar=yes
+else
+  eval $cachevar=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+           CXX="$ac_save_CXX"
+fi
+eval ac_res=\$$cachevar
+	       { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+        if eval test x\$$cachevar = xyes; then
+          CXX="$CXX $switch"
+          if test -n "$CXXCPP" ; then
+            CXXCPP="$CXXCPP $switch"
+          fi
+          ac_success=yes
+          break
+        fi
+      done
+      if test x$ac_success = xyes; then
+        break
+      fi
+    done
+  fi
+      CXX_FOR_BUILD="$CXX"
+    CXXFLAGS_FOR_BUILD="$CXXFLAGS"
+    CPPFLAGS_FOR_BUILD="$CPPFLAGS"
+    CXX="$ax_cv_cxx_compile_cxx11_orig_cxx"
+    CXXFLAGS="$ax_cv_cxx_compile_cxx11_orig_cxxflags"
+    CPPFLAGS="$ax_cv_cxx_compile_cxx11_orig_cppflags"
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+  if test x$ax_cxx_compile_cxx11_required = xtrue; then
+    if test x$ac_success = xno; then
+      as_fn_error $? "*** A compiler with support for C++11 language features is required." "$LINENO" 5
+    fi
+  fi
+  if test x$ac_success = xno; then
+    HAVE_CXX11_FOR_BUILD=0
+    { $as_echo "$as_me:${as_lineno-$LINENO}: No compiler with C++11 support was found" >&5
+$as_echo "$as_me: No compiler with C++11 support was found" >&6;}
+  else
+    HAVE_CXX11_FOR_BUILD=1
+
+$as_echo "#define HAVE_CXX11_FOR_BUILD 1" >>confdefs.h
+
+  fi
+
+
+  fi
+fi
+
+# Used for setting $lt_cv_objdir
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for objdir" >&5
+$as_echo_n "checking for objdir... " >&6; }
+if ${lt_cv_objdir+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  rm -f .libs 2>/dev/null
+mkdir .libs 2>/dev/null
+if test -d .libs; then
+  lt_cv_objdir=.libs
+else
+  # MS-DOS does not allow filenames that begin with a dot.
+  lt_cv_objdir=_libs
+fi
+rmdir .libs 2>/dev/null
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_objdir" >&5
+$as_echo "$lt_cv_objdir" >&6; }
+objdir=$lt_cv_objdir
+
+
+
+
+
+cat >>confdefs.h <<_ACEOF
+#define LT_OBJDIR "$lt_cv_objdir/"
+_ACEOF
+
+
+
+# Check for GMP, MPFR and MPC
+gmplibs="-lmpc -lmpfr -lgmp"
+gmpinc=
+have_gmp=no
+
+# Specify a location for mpc
+# check for this first so it ends up on the link line before mpfr.
+
+# Check whether --with-mpc was given.
+if test "${with_mpc+set}" = set; then :
+  withval=$with_mpc;
+fi
+
+
+# Check whether --with-mpc-include was given.
+if test "${with_mpc_include+set}" = set; then :
+  withval=$with_mpc_include;
+fi
+
+
+# Check whether --with-mpc-lib was given.
+if test "${with_mpc_lib+set}" = set; then :
+  withval=$with_mpc_lib;
 fi
 
 
@@ -5425,7 +7609,7 @@ fi
 
 # Check whether --with-mpfr-dir was given.
 if test "${with_mpfr_dir+set}" = set; then :
-  withval=$with_mpfr_dir; as_fn_error "The --with-mpfr-dir=PATH option has been removed.
+  withval=$with_mpfr_dir; as_fn_error $? "The --with-mpfr-dir=PATH option has been removed.
 Use --with-mpfr=PATH or --with-mpfr-include=PATH plus --with-mpfr-lib=PATH" "$LINENO" 5
 fi
 
@@ -5462,7 +7646,7 @@ fi
 if test "x$with_mpfr$with_mpfr_include$with_mpfr_lib" = x && test -d ${srcdir}/mpfr; then
   # MPFR v3.1.0 moved the sources into a src sub-directory.
   if ! test -d ${srcdir}/mpfr/src; then
-    as_fn_error "Building GCC with MPFR in the source tree is only handled for MPFR 3.1.0+." "$LINENO" 5
+    as_fn_error $? "Building GCC with MPFR in the source tree is only handled for MPFR 3.1.0+." "$LINENO" 5
   fi
   gmplibs='-L$$r/$(HOST_SUBDIR)/mpfr/src/'"$lt_cv_objdir $gmplibs"
   gmpinc='-I$$r/$(HOST_SUBDIR)/mpfr/src -I$$s/mpfr/src '"$gmpinc"
@@ -5478,7 +7662,7 @@ fi
 
 # Check whether --with-gmp-dir was given.
 if test "${with_gmp_dir+set}" = set; then :
-  withval=$with_gmp_dir; as_fn_error "The --with-gmp-dir=PATH option has been removed.
+  withval=$with_gmp_dir; as_fn_error $? "The --with-gmp-dir=PATH option has been removed.
 Use --with-gmp=PATH or --with-gmp-include=PATH plus --with-gmp-lib=PATH" "$LINENO" 5
 fi
 
@@ -5596,7 +7780,7 @@ int
 main ()
 {
 
-    #if MPFR_VERSION < MPFR_VERSION_NUM(2,4,0)
+    #if MPFR_VERSION < MPFR_VERSION_NUM(3,1,0)
     choke me
     #endif
 
@@ -5613,7 +7797,7 @@ int
 main ()
 {
 
-    #if MPFR_VERSION < MPFR_VERSION_NUM(2,4,2)
+    #if MPFR_VERSION < MPFR_VERSION_NUM(3,1,6)
     choke me
     #endif
 
@@ -5706,9 +7890,9 @@ main ()
     int t;
     mpfr_init (n);
     mpfr_init (x);
-    mpfr_atan2 (n, n, x, GMP_RNDN);
-    mpfr_erfc (n, x, GMP_RNDN);
-    mpfr_subnormalize (x, t, GMP_RNDN);
+    mpfr_atan2 (n, n, x, MPFR_RNDN);
+    mpfr_erfc (n, x, MPFR_RNDN);
+    mpfr_subnormalize (x, t, MPFR_RNDN);
     mpfr_clear(n);
     mpfr_clear(x);
     mpc_init2 (c, 53);
@@ -5739,11 +7923,11 @@ rm -f core conftest.err conftest.$ac_objext \
 # The library versions listed in the error message below should match
 # the HARD-minimums enforced above.
   if test x$have_gmp != xyes; then
-    as_fn_error "Building GCC requires GMP 4.2+, MPFR 2.4.0+ and MPC 0.8.0+.
+    as_fn_error $? "Building GCC requires GMP 4.2+, MPFR 3.1.0+ and MPC 0.8.0+.
 Try the --with-gmp, --with-mpfr and/or --with-mpc options to specify
 their locations.  Source code for these libraries can be found at
 their respective hosting sites as well as at
-ftp://gcc.gnu.org/pub/gcc/infrastructure/.  See also
+https://gcc.gnu.org/pub/gcc/infrastructure/.  See also
 http://gcc.gnu.org/install/prerequisites.html for additional info.  If
 you obtained GMP, MPFR and/or MPC from a vendor distribution package,
 make sure that you have installed both the libraries and the header
@@ -5774,6 +7958,23 @@ fi
 
 
 
+# Whether or not to use -static-libstdc++ and -static-libgcc.  The
+# default is yes if gcc is being built; no otherwise.  The reason for
+# this default is that gdb is sometimes linked against GNU Source
+# Highlight, which is a shared library that uses C++ exceptions.  In
+# this case, -static-libstdc++ will cause crashes.
+
+# Check whether --with-static-standard-libraries was given.
+if test "${with_static_standard_libraries+set}" = set; then :
+  withval=$with_static_standard_libraries;
+else
+  with_static_standard_libraries=auto
+fi
+
+if test "$with_static_standard_libraries" = auto; then
+  with_static_standard_libraries=$have_compiler
+fi
+
 # Linker flags to use for stage1 or when not bootstrapping.
 
 # Check whether --with-stage1-ldflags was given.
@@ -5788,7 +7989,8 @@ else
  # In stage 1, default to linking libstdc++ and libgcc statically with GCC
  # if supported.  But if the user explicitly specified the libraries to use,
  # trust that they are doing what they want.
- if test "$stage1_libs" = "" -a "$have_static_libs" = yes; then
+ if test "$with_static_standard_libraries" = yes -a "$stage1_libs" = "" \
+     -a "$have_static_libs" = yes; then
    stage1_ldflags="-static-libstdc++ -static-libgcc"
  fi
 fi
@@ -5967,7 +8169,7 @@ $as_echo "required isl version is 0.15 or later" >&6; }
     && test "x${isllibs}" = x \
     && test "x${islinc}" = x ; then
 
-    as_fn_error "Unable to find a usable isl.  See config.log for details." "$LINENO" 5
+    as_fn_error $? "Unable to find a usable isl.  See config.log for details." "$LINENO" 5
   fi
 
 
@@ -6028,7 +8230,7 @@ else
     case $target in
       *-cygwin* | *-mingw* | *-apple-darwin* | *djgpp*) ;;
       *) if test x"$enable_lto" = x"yes"; then
-	as_fn_error "LTO support is not enabled for this target." "$LINENO" 5
+	as_fn_error $? "LTO support is not enabled for this target." "$LINENO" 5
         fi
       ;;
     esac
@@ -6116,7 +8318,7 @@ if test -d ${srcdir}/gcc; then
       if test -f ${srcdir}/gcc/cp/config-lang.in; then
         enable_languages="${enable_languages},c++"
       else
-        as_fn_error "bootstrapping requires c++ sources" "$LINENO" 5
+        as_fn_error $? "bootstrapping requires c++ sources" "$LINENO" 5
       fi
       ;;
   esac
@@ -6254,7 +8456,7 @@ if test -d ${srcdir}/gcc; then
 	  case ${add_this_lang} in
 	    yes)
               # Specifically requested language; tell them.
-              as_fn_error "The gcc/$i directory contains parts of $language but is missing" "$LINENO" 5
+              as_fn_error $? "The gcc/$i directory contains parts of $language but is missing" "$LINENO" 5
               ;;
             all)
               { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: The gcc/$i directory contains parts of $language but is missing" >&5
@@ -6272,7 +8474,7 @@ $as_echo "$as_me: WARNING: The gcc/$i directory contains parts of $language but
         case ${add_this_lang}:${language}:${have_gnat} in
           yes:ada:no)
             # Specifically requested language; tell them.
-            as_fn_error "GNAT is required to build $language" "$LINENO" 5
+            as_fn_error $? "GNAT is required to build $language" "$LINENO" 5
             ;;
           all:ada:no)
             { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: GNAT is required to build $language" >&5
@@ -6286,10 +8488,15 @@ $as_echo "$as_me: WARNING: GNAT is required to build $language" >&2;}
         esac
 
         # Disable jit if -enable-host-shared not specified
-        case ${add_this_lang}:${language}:${host_shared} in
-          yes:jit:no)
-	    # PR jit/64780: explicitly specify --enable-host-shared
-	    as_fn_error "
+        # but not if building for Mingw. All code in Windows
+        # is position independent code (PIC).
+        case $target in
+          *mingw*) ;;
+          *)
+          case ${add_this_lang}:${language}:${host_shared} in
+            yes:jit:no)
+	           # PR jit/64780: explicitly specify --enable-host-shared
+	    as_fn_error $? "
 Enabling language \"jit\" requires --enable-host-shared.
 
 --enable-host-shared typically slows the rest of the compiler down by
@@ -6297,19 +8504,21 @@ a few %, so you must explicitly enable it.
 
 If you want to build both the jit and the regular compiler, it is often
 best to do this via two separate configure/builds, in separate
-directories, to avoid imposing the performance cost of
---enable-host-shared on the regular compiler." "$LINENO" 5
-	    ;;
-          all:jit:no)
-	    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: --enable-host-shared required to build $language" >&5
-$as_echo "$as_me: WARNING: --enable-host-shared required to build $language" >&2;}
-            add_this_lang=unsupported
-            ;;
-          *:jit:no)
-            # Silently disable.
-            add_this_lang=unsupported
-            ;;
-	esac
+directories, to avoid imposing the performance cost of
+--enable-host-shared on the regular compiler." "$LINENO" 5
+	            ;;
+            all:jit:no)
+	      { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: --enable-host-shared required to build $language" >&5
+$as_echo "$as_me: WARNING: --enable-host-shared required to build $language" >&2;}
+              add_this_lang=unsupported
+              ;;
+            *:jit:no)
+              # Silently disable.
+              add_this_lang=unsupported
+              ;;
+	        esac
+          ;;
+        esac
 
         # Disable a language that is unsupported by the target.
 	case "${add_this_lang}: $unsupported_languages " in
@@ -6407,7 +8616,7 @@ fi
   new_enable_languages=`echo "$new_enable_languages" | sed -e "s/^,//" -e "s/,$//"`
 
   if test "x$missing_languages" != x; then
-    as_fn_error "
+    as_fn_error $? "
 The following requested languages could not be built: ${missing_languages}
 Supported languages are: ${potential_languages}" "$LINENO" 5
   fi
@@ -6462,9 +8671,9 @@ $as_echo_n "checking for bdw garbage collector... " >&6; }
 $as_echo "using bdw-gc in default locations" >&6; }
   else
         if test "x$with_target_bdw_gc_include" = x && test "x$with_target_bdw_gc_lib" != x; then
-      as_fn_error "found --with-target-bdw-gc-lib but --with-target-bdw-gc-include missing" "$LINENO" 5
+      as_fn_error $? "found --with-target-bdw-gc-lib but --with-target-bdw-gc-include missing" "$LINENO" 5
     elif test "x$with_target_bdw_gc_include" != x && test "x$with_target_bdw_gc_lib" = x; then
-      as_fn_error "found --with-target-bdw-gc-include but --with-target-bdw-gc-lib missing" "$LINENO" 5
+      as_fn_error $? "found --with-target-bdw-gc-include but --with-target-bdw-gc-lib missing" "$LINENO" 5
     else
       { $as_echo "$as_me:${as_lineno-$LINENO}: result: using paths configured with --with-target-bdw-gc options" >&5
 $as_echo "using paths configured with --with-target-bdw-gc options" >&6; }
@@ -6567,7 +8776,7 @@ then
       extra_linker_plugin_configure_flags="$extra_linker_plugin_configure_flags \
         --with-libiberty=../libiberty-linker-plugin";;
     *)
-      as_fn_error "libiberty missing" "$LINENO" 5;;
+      as_fn_error $? "libiberty missing" "$LINENO" 5;;
   esac
 fi
 
@@ -6652,7 +8861,7 @@ fi
 # Check whether --with-gcc-major-version-only was given.
 if test "${with_gcc_major_version_only+set}" = set; then :
   withval=$with_gcc_major_version_only; if test x$with_gcc_major_version_only = xyes ; then
-        get_gcc_base_ver="sed -e 's/^\([0-9]*\).*\$\$/\1/'"
+        get_gcc_base_ver="sed -e 's/^\([0-9]*\).*/\1/'"
       fi
 
 fi
@@ -7093,7 +9302,7 @@ fi
 # or bootstrap-ubsan, bootstrap it.
 if echo " ${target_configdirs} " | grep " libsanitizer " > /dev/null 2>&1; then
   case "$BUILD_CONFIG" in
-    *bootstrap-asan* | *bootstrap-ubsan* )
+    *bootstrap-hwasan* | *bootstrap-asan* | *bootstrap-ubsan* )
       bootstrap_target_libs=${bootstrap_target_libs}target-libsanitizer,
       bootstrap_fixincludes=yes
       ;;
@@ -7106,16 +9315,6 @@ if echo " ${target_configdirs} " | grep " libvtv " > /dev/null 2>&1 &&
   bootstrap_target_libs=${bootstrap_target_libs}target-libvtv,
 fi
 
-# If we are building libmpx and $BUILD_CONFIG contains bootstrap-mpx,
-# bootstrap it.
-if echo " ${target_configdirs} " | grep " libmpx " > /dev/null 2>&1; then
-  case "$BUILD_CONFIG" in
-    *bootstrap-mpx* )
-      bootstrap_target_libs=${bootstrap_target_libs}target-libmpx,
-      ;;
-  esac
-fi
-
 # Determine whether gdb needs tk/tcl or not.
 # Use 'maybe' since enable_gdbtk might be true even if tk isn't available
 # and in that case we want gdb to be built without tk.  Ugh!
@@ -7144,6 +9343,18 @@ esac
 CONFIGURE_GDB_TK=`echo ${GDB_TK} | sed s/-all-/-configure-/g`
 INSTALL_GDB_TK=`echo ${GDB_TK} | sed s/-all-/-install-/g`
 
+# gdb and gdbserver depend on gnulib and gdbsupport, but as nothing
+# else does, only include them if one of these is built.  The Makefile
+# provides the ordering, so it's enough here to add to the list.
+case " ${configdirs} " in
+  *\ gdb\ *)
+    configdirs="${configdirs} gnulib gdbsupport"
+    ;;
+  *\ gdbserver\ *)
+    configdirs="${configdirs} gnulib gdbsupport"
+    ;;
+esac
+
 # Strip out unwanted targets.
 
 # While at that, we remove Makefiles if we were started for recursive
@@ -7156,12 +9367,14 @@ INSTALL_GDB_TK=`echo ${GDB_TK} | sed s/-all-/-install-/g`
 # extrasub-{build,host,target} not because there is any reason to split
 # the substitutions up that way, but only to remain below the limit of
 # 99 commands in a script, for HP-UX sed.
-# Do not nest @if/@endif pairs, because configure will not warn you at all.
+
+# Do not nest @if/@endif or @unless/@endunless pairs, because
+# configure will not warn you at all.
 
 case "$enable_bootstrap:$ENABLE_GOLD: $configdirs :,$stage1_languages," in
   yes:yes:*\ gold\ *:*,c++,*) ;;
   yes:yes:*\ gold\ *:*)
-    as_fn_error "in a combined tree, bootstrapping with --enable-gold requires c++ in stage1_languages" "$LINENO" 5
+    as_fn_error $? "in a combined tree, bootstrapping with --enable-gold requires c++ in stage1_languages" "$LINENO" 5
     ;;
 esac
 
@@ -7175,8 +9388,10 @@ for module in ${build_configdirs} ; do
   extrasub_build="$extrasub_build
 /^@if build-$module\$/d
 /^@endif build-$module\$/d
+/^@unless build-$module\$/,/^@endunless build-$module\$/d
 /^@if build-$module-$bootstrap_suffix\$/d
-/^@endif build-$module-$bootstrap_suffix\$/d"
+/^@endif build-$module-$bootstrap_suffix\$/d
+/^@unless build-$module-$bootstrap_suffix\$/,/^@endunless build-$module-$bootstrap_suffix\$/d"
 done
 extrasub_host=
 for module in ${configdirs} ; do
@@ -7195,8 +9410,10 @@ for module in ${configdirs} ; do
   extrasub_host="$extrasub_host
 /^@if $module\$/d
 /^@endif $module\$/d
+/^@unless $module\$/,/^@endunless $module\$/d
 /^@if $module-$host_bootstrap_suffix\$/d
-/^@endif $module-$host_bootstrap_suffix\$/d"
+/^@endif $module-$host_bootstrap_suffix\$/d
+/^@unless $module-$host_bootstrap_suffix\$/,/^@endunless $module-$host_bootstrap_suffix\$/d"
 done
 extrasub_target=
 for module in ${target_configdirs} ; do
@@ -7215,13 +9432,17 @@ for module in ${target_configdirs} ; do
   extrasub_target="$extrasub_target
 /^@if target-$module\$/d
 /^@endif target-$module\$/d
+/^@unless target-$module\$/,/^@endunless target-$module\$/d
 /^@if target-$module-$target_bootstrap_suffix\$/d
-/^@endif target-$module-$target_bootstrap_suffix\$/d"
+/^@endif target-$module-$target_bootstrap_suffix\$/d
+/^@unless target-$module-$target_bootstrap_suffix\$/,/^@endunless target-$module-$target_bootstrap_suffix\$/d"
 done
 
 # Do the final fixup along with target modules.
 extrasub_target="$extrasub_target
-/^@if /,/^@endif /d"
+/^@if /,/^@endif /d
+/^@unless /d
+/^@endunless /d"
 
 # Create the serialization dependencies.  This uses a temporary file.
 
@@ -7477,7 +9698,7 @@ case "$target:$have_compiler:$host:$target:$enable_multilib" in
     fi
     rm -f conftest*
     if test x${dev64} != xyes ; then
-      as_fn_error "I suspect your system does not have 32-bit development libraries (libc and headers). If you have them, rerun configure with --enable-multilib. If you do not have them, and want to build a 64-bit-only compiler, rerun configure with --disable-multilib." "$LINENO" 5
+      as_fn_error $? "I suspect your system does not have 32-bit development libraries (libc and headers). If you have them, rerun configure with --enable-multilib. If you do not have them, and want to build a 64-bit-only compiler, rerun configure with --disable-multilib." "$LINENO" 5
     fi
     ;;
 esac
@@ -7666,6 +9887,7 @@ done
 
 
 
+
 # Generate default definitions for YACC, M4, LEX and other programs that run
 # on the build machine.  These are used if the Makefile can't locate these
 # programs in objdir.
@@ -7677,7 +9899,7 @@ do
 set dummy $ac_prog; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_YACC+set}" = set; then :
+if ${ac_cv_prog_YACC+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$YACC"; then
@@ -7689,7 +9911,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_YACC="$ac_prog"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -7724,7 +9946,7 @@ do
 set dummy $ac_prog; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_BISON+set}" = set; then :
+if ${ac_cv_prog_BISON+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$BISON"; then
@@ -7736,7 +9958,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_BISON="$ac_prog"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -7771,7 +9993,7 @@ do
 set dummy $ac_prog; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_M4+set}" = set; then :
+if ${ac_cv_prog_M4+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$M4"; then
@@ -7783,7 +10005,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_M4="$ac_prog"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -7818,7 +10040,7 @@ do
 set dummy $ac_prog; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_LEX+set}" = set; then :
+if ${ac_cv_prog_LEX+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$LEX"; then
@@ -7830,7 +10052,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_LEX="$ac_prog"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -7866,7 +10088,7 @@ do
 set dummy $ac_prog; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_FLEX+set}" = set; then :
+if ${ac_cv_prog_FLEX+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$FLEX"; then
@@ -7878,7 +10100,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_FLEX="$ac_prog"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -7913,7 +10135,7 @@ do
 set dummy $ac_prog; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_MAKEINFO+set}" = set; then :
+if ${ac_cv_prog_MAKEINFO+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$MAKEINFO"; then
@@ -7925,7 +10147,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_MAKEINFO="$ac_prog"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -7974,7 +10196,7 @@ do
 set dummy $ac_prog; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_EXPECT+set}" = set; then :
+if ${ac_cv_prog_EXPECT+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$EXPECT"; then
@@ -7986,7 +10208,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_EXPECT="$ac_prog"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -8023,7 +10245,7 @@ do
 set dummy $ac_prog; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_RUNTEST+set}" = set; then :
+if ${ac_cv_prog_RUNTEST+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$RUNTEST"; then
@@ -8035,7 +10257,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_RUNTEST="$ac_prog"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -8087,7 +10309,7 @@ if test -n "$ac_cv_prog_AR"; then
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_AR+set}" = set; then :
+if ${ac_cv_prog_AR+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$AR"; then
@@ -8099,7 +10321,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_AR="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -8129,7 +10351,7 @@ for ncn_progname in ar; do
 set dummy ${ncn_tool_prefix}${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_AR+set}" = set; then :
+if ${ac_cv_prog_AR+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$AR"; then
@@ -8141,7 +10363,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_AR="${ncn_tool_prefix}${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -8168,7 +10390,7 @@ fi
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_AR+set}" = set; then :
+if ${ac_cv_prog_AR+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$AR"; then
@@ -8180,7 +10402,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_AR="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -8228,7 +10450,7 @@ if test -n "$ac_cv_prog_AS"; then
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_AS+set}" = set; then :
+if ${ac_cv_prog_AS+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$AS"; then
@@ -8240,7 +10462,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_AS="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -8270,7 +10492,7 @@ for ncn_progname in as; do
 set dummy ${ncn_tool_prefix}${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_AS+set}" = set; then :
+if ${ac_cv_prog_AS+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$AS"; then
@@ -8282,7 +10504,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_AS="${ncn_tool_prefix}${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -8309,7 +10531,7 @@ fi
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_AS+set}" = set; then :
+if ${ac_cv_prog_AS+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$AS"; then
@@ -8321,7 +10543,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_AS="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -8369,7 +10591,7 @@ if test -n "$ac_cv_prog_DLLTOOL"; then
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_DLLTOOL+set}" = set; then :
+if ${ac_cv_prog_DLLTOOL+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$DLLTOOL"; then
@@ -8381,7 +10603,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_DLLTOOL="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -8411,7 +10633,7 @@ for ncn_progname in dlltool; do
 set dummy ${ncn_tool_prefix}${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_DLLTOOL+set}" = set; then :
+if ${ac_cv_prog_DLLTOOL+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$DLLTOOL"; then
@@ -8423,7 +10645,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_DLLTOOL="${ncn_tool_prefix}${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -8450,7 +10672,7 @@ fi
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_DLLTOOL+set}" = set; then :
+if ${ac_cv_prog_DLLTOOL+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$DLLTOOL"; then
@@ -8462,7 +10684,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_DLLTOOL="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -8510,7 +10732,7 @@ if test -n "$ac_cv_prog_LD"; then
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_LD+set}" = set; then :
+if ${ac_cv_prog_LD+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$LD"; then
@@ -8522,7 +10744,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_LD="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -8552,7 +10774,7 @@ for ncn_progname in ld; do
 set dummy ${ncn_tool_prefix}${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_LD+set}" = set; then :
+if ${ac_cv_prog_LD+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$LD"; then
@@ -8564,7 +10786,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_LD="${ncn_tool_prefix}${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -8591,7 +10813,7 @@ fi
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_LD+set}" = set; then :
+if ${ac_cv_prog_LD+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$LD"; then
@@ -8603,7 +10825,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_LD="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -8651,7 +10873,7 @@ if test -n "$ac_cv_prog_LIPO"; then
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_LIPO+set}" = set; then :
+if ${ac_cv_prog_LIPO+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$LIPO"; then
@@ -8663,7 +10885,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_LIPO="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -8693,7 +10915,7 @@ for ncn_progname in lipo; do
 set dummy ${ncn_tool_prefix}${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_LIPO+set}" = set; then :
+if ${ac_cv_prog_LIPO+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$LIPO"; then
@@ -8705,7 +10927,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_LIPO="${ncn_tool_prefix}${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -8732,7 +10954,7 @@ fi
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_LIPO+set}" = set; then :
+if ${ac_cv_prog_LIPO+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$LIPO"; then
@@ -8744,7 +10966,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_LIPO="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -8792,7 +11014,7 @@ if test -n "$ac_cv_prog_NM"; then
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_NM+set}" = set; then :
+if ${ac_cv_prog_NM+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$NM"; then
@@ -8804,7 +11026,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_NM="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -8834,7 +11056,7 @@ for ncn_progname in nm; do
 set dummy ${ncn_tool_prefix}${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_NM+set}" = set; then :
+if ${ac_cv_prog_NM+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$NM"; then
@@ -8846,7 +11068,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_NM="${ncn_tool_prefix}${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -8873,7 +11095,7 @@ fi
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_NM+set}" = set; then :
+if ${ac_cv_prog_NM+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$NM"; then
@@ -8885,7 +11107,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_NM="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -8933,7 +11155,7 @@ if test -n "$ac_cv_prog_RANLIB"; then
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_RANLIB+set}" = set; then :
+if ${ac_cv_prog_RANLIB+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$RANLIB"; then
@@ -8945,7 +11167,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_RANLIB="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -8975,7 +11197,7 @@ for ncn_progname in ranlib; do
 set dummy ${ncn_tool_prefix}${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_RANLIB+set}" = set; then :
+if ${ac_cv_prog_RANLIB+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$RANLIB"; then
@@ -8987,7 +11209,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_RANLIB="${ncn_tool_prefix}${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -9014,7 +11236,7 @@ fi
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_RANLIB+set}" = set; then :
+if ${ac_cv_prog_RANLIB+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$RANLIB"; then
@@ -9026,7 +11248,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_RANLIB="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -9069,7 +11291,7 @@ if test -n "$ac_cv_prog_STRIP"; then
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_STRIP+set}" = set; then :
+if ${ac_cv_prog_STRIP+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$STRIP"; then
@@ -9081,7 +11303,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_STRIP="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -9111,7 +11333,7 @@ for ncn_progname in strip; do
 set dummy ${ncn_tool_prefix}${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_STRIP+set}" = set; then :
+if ${ac_cv_prog_STRIP+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$STRIP"; then
@@ -9123,7 +11345,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_STRIP="${ncn_tool_prefix}${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -9150,7 +11372,7 @@ fi
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_STRIP+set}" = set; then :
+if ${ac_cv_prog_STRIP+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$STRIP"; then
@@ -9162,7 +11384,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_STRIP="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -9205,7 +11427,7 @@ if test -n "$ac_cv_prog_WINDRES"; then
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_WINDRES+set}" = set; then :
+if ${ac_cv_prog_WINDRES+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$WINDRES"; then
@@ -9217,7 +11439,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_WINDRES="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -9247,7 +11469,7 @@ for ncn_progname in windres; do
 set dummy ${ncn_tool_prefix}${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_WINDRES+set}" = set; then :
+if ${ac_cv_prog_WINDRES+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$WINDRES"; then
@@ -9259,7 +11481,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_WINDRES="${ncn_tool_prefix}${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -9286,7 +11508,7 @@ fi
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_WINDRES+set}" = set; then :
+if ${ac_cv_prog_WINDRES+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$WINDRES"; then
@@ -9298,7 +11520,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_WINDRES="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -9346,7 +11568,7 @@ if test -n "$ac_cv_prog_WINDMC"; then
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_WINDMC+set}" = set; then :
+if ${ac_cv_prog_WINDMC+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$WINDMC"; then
@@ -9358,7 +11580,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_WINDMC="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -9388,7 +11610,7 @@ for ncn_progname in windmc; do
 set dummy ${ncn_tool_prefix}${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_WINDMC+set}" = set; then :
+if ${ac_cv_prog_WINDMC+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$WINDMC"; then
@@ -9400,7 +11622,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_WINDMC="${ncn_tool_prefix}${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -9427,7 +11649,7 @@ fi
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_WINDMC+set}" = set; then :
+if ${ac_cv_prog_WINDMC+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$WINDMC"; then
@@ -9439,7 +11661,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_WINDMC="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -9487,7 +11709,7 @@ if test -n "$ac_cv_prog_OBJCOPY"; then
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_OBJCOPY+set}" = set; then :
+if ${ac_cv_prog_OBJCOPY+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$OBJCOPY"; then
@@ -9499,7 +11721,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_OBJCOPY="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -9529,7 +11751,7 @@ for ncn_progname in objcopy; do
 set dummy ${ncn_tool_prefix}${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_OBJCOPY+set}" = set; then :
+if ${ac_cv_prog_OBJCOPY+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$OBJCOPY"; then
@@ -9541,7 +11763,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_OBJCOPY="${ncn_tool_prefix}${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -9568,7 +11790,7 @@ fi
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_OBJCOPY+set}" = set; then :
+if ${ac_cv_prog_OBJCOPY+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$OBJCOPY"; then
@@ -9580,7 +11802,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_OBJCOPY="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -9628,7 +11850,7 @@ if test -n "$ac_cv_prog_OBJDUMP"; then
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_OBJDUMP+set}" = set; then :
+if ${ac_cv_prog_OBJDUMP+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$OBJDUMP"; then
@@ -9640,7 +11862,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_OBJDUMP="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -9670,7 +11892,7 @@ for ncn_progname in objdump; do
 set dummy ${ncn_tool_prefix}${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_OBJDUMP+set}" = set; then :
+if ${ac_cv_prog_OBJDUMP+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$OBJDUMP"; then
@@ -9682,7 +11904,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_OBJDUMP="${ncn_tool_prefix}${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -9709,7 +11931,7 @@ fi
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_OBJDUMP+set}" = set; then :
+if ${ac_cv_prog_OBJDUMP+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$OBJDUMP"; then
@@ -9721,7 +11943,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_OBJDUMP="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -9757,6 +11979,147 @@ fi
 
 
 
+if test -n "$OTOOL"; then
+  ac_cv_prog_OTOOL=$OTOOL
+elif test -n "$ac_cv_prog_OTOOL"; then
+  OTOOL=$ac_cv_prog_OTOOL
+fi
+
+if test -n "$ac_cv_prog_OTOOL"; then
+  for ncn_progname in otool; do
+    # Extract the first word of "${ncn_progname}", so it can be a program name with args.
+set dummy ${ncn_progname}; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_OTOOL+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$OTOOL"; then
+  ac_cv_prog_OTOOL="$OTOOL" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_OTOOL="${ncn_progname}"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+OTOOL=$ac_cv_prog_OTOOL
+if test -n "$OTOOL"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OTOOL" >&5
+$as_echo "$OTOOL" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  done
+fi
+
+for ncn_progname in otool; do
+  if test -n "$ncn_tool_prefix"; then
+    # Extract the first word of "${ncn_tool_prefix}${ncn_progname}", so it can be a program name with args.
+set dummy ${ncn_tool_prefix}${ncn_progname}; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_OTOOL+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$OTOOL"; then
+  ac_cv_prog_OTOOL="$OTOOL" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_OTOOL="${ncn_tool_prefix}${ncn_progname}"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+OTOOL=$ac_cv_prog_OTOOL
+if test -n "$OTOOL"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OTOOL" >&5
+$as_echo "$OTOOL" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  fi
+  if test -z "$ac_cv_prog_OTOOL" && test $build = $host ; then
+    # Extract the first word of "${ncn_progname}", so it can be a program name with args.
+set dummy ${ncn_progname}; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_OTOOL+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$OTOOL"; then
+  ac_cv_prog_OTOOL="$OTOOL" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_OTOOL="${ncn_progname}"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+OTOOL=$ac_cv_prog_OTOOL
+if test -n "$OTOOL"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OTOOL" >&5
+$as_echo "$OTOOL" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  fi
+  test -n "$ac_cv_prog_OTOOL" && break
+done
+
+if test -z "$ac_cv_prog_OTOOL" ; then
+  set dummy otool
+  if test $build = $host ; then
+    OTOOL="$2"
+  else
+    OTOOL="${ncn_tool_prefix}$2"
+  fi
+fi
+
+
+
 if test -n "$READELF"; then
   ac_cv_prog_READELF=$READELF
 elif test -n "$ac_cv_prog_READELF"; then
@@ -9769,7 +12132,7 @@ if test -n "$ac_cv_prog_READELF"; then
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_READELF+set}" = set; then :
+if ${ac_cv_prog_READELF+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$READELF"; then
@@ -9781,7 +12144,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_READELF="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -9811,7 +12174,7 @@ for ncn_progname in readelf; do
 set dummy ${ncn_tool_prefix}${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_READELF+set}" = set; then :
+if ${ac_cv_prog_READELF+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$READELF"; then
@@ -9823,7 +12186,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_READELF="${ncn_tool_prefix}${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -9850,7 +12213,7 @@ fi
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_READELF+set}" = set; then :
+if ${ac_cv_prog_READELF+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$READELF"; then
@@ -9862,7 +12225,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_READELF="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -9932,7 +12295,7 @@ if test -n "$ac_cv_prog_CC_FOR_TARGET"; then
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CC_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_CC_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$CC_FOR_TARGET"; then
@@ -9944,7 +12307,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_CC_FOR_TARGET="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -9991,7 +12354,7 @@ if test -z "$ac_cv_prog_CC_FOR_TARGET"; then
 set dummy ${ncn_target_tool_prefix}${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CC_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_CC_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$CC_FOR_TARGET"; then
@@ -10003,7 +12366,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_CC_FOR_TARGET="${ncn_target_tool_prefix}${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -10030,7 +12393,7 @@ fi
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CC_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_CC_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$CC_FOR_TARGET"; then
@@ -10042,7 +12405,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_CC_FOR_TARGET="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -10093,7 +12456,7 @@ if test -n "$ac_cv_prog_CXX_FOR_TARGET"; then
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CXX_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_CXX_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$CXX_FOR_TARGET"; then
@@ -10105,7 +12468,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_CXX_FOR_TARGET="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -10152,7 +12515,7 @@ if test -z "$ac_cv_prog_CXX_FOR_TARGET"; then
 set dummy ${ncn_target_tool_prefix}${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CXX_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_CXX_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$CXX_FOR_TARGET"; then
@@ -10164,7 +12527,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_CXX_FOR_TARGET="${ncn_target_tool_prefix}${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -10191,7 +12554,7 @@ fi
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CXX_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_CXX_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$CXX_FOR_TARGET"; then
@@ -10203,7 +12566,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_CXX_FOR_TARGET="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -10254,7 +12617,7 @@ if test -n "$ac_cv_prog_GCC_FOR_TARGET"; then
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_GCC_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_GCC_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$GCC_FOR_TARGET"; then
@@ -10266,7 +12629,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_GCC_FOR_TARGET="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -10313,7 +12676,7 @@ if test -z "$ac_cv_prog_GCC_FOR_TARGET"; then
 set dummy ${ncn_target_tool_prefix}${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_GCC_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_GCC_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$GCC_FOR_TARGET"; then
@@ -10325,7 +12688,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_GCC_FOR_TARGET="${ncn_target_tool_prefix}${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -10352,7 +12715,7 @@ fi
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_GCC_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_GCC_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$GCC_FOR_TARGET"; then
@@ -10364,7 +12727,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_GCC_FOR_TARGET="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -10410,7 +12773,7 @@ if test -n "$ac_cv_prog_GFORTRAN_FOR_TARGET"; then
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_GFORTRAN_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_GFORTRAN_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$GFORTRAN_FOR_TARGET"; then
@@ -10422,7 +12785,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_GFORTRAN_FOR_TARGET="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -10469,7 +12832,7 @@ if test -z "$ac_cv_prog_GFORTRAN_FOR_TARGET"; then
 set dummy ${ncn_target_tool_prefix}${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_GFORTRAN_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_GFORTRAN_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$GFORTRAN_FOR_TARGET"; then
@@ -10481,7 +12844,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_GFORTRAN_FOR_TARGET="${ncn_target_tool_prefix}${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -10508,7 +12871,7 @@ fi
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_GFORTRAN_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_GFORTRAN_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$GFORTRAN_FOR_TARGET"; then
@@ -10520,7 +12883,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_GFORTRAN_FOR_TARGET="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -10571,7 +12934,7 @@ if test -n "$ac_cv_prog_GOC_FOR_TARGET"; then
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_GOC_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_GOC_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$GOC_FOR_TARGET"; then
@@ -10583,7 +12946,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_GOC_FOR_TARGET="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -10630,7 +12993,46 @@ if test -z "$ac_cv_prog_GOC_FOR_TARGET"; then
 set dummy ${ncn_target_tool_prefix}${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_GOC_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_GOC_FOR_TARGET+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$GOC_FOR_TARGET"; then
+  ac_cv_prog_GOC_FOR_TARGET="$GOC_FOR_TARGET" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_GOC_FOR_TARGET="${ncn_target_tool_prefix}${ncn_progname}"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+GOC_FOR_TARGET=$ac_cv_prog_GOC_FOR_TARGET
+if test -n "$GOC_FOR_TARGET"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $GOC_FOR_TARGET" >&5
+$as_echo "$GOC_FOR_TARGET" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+    fi
+    if test -z "$ac_cv_prog_GOC_FOR_TARGET" && test $build = $target ; then
+      # Extract the first word of "${ncn_progname}", so it can be a program name with args.
+set dummy ${ncn_progname}; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_GOC_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$GOC_FOR_TARGET"; then
@@ -10642,8 +13044,130 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
-    ac_cv_prog_GOC_FOR_TARGET="${ncn_target_tool_prefix}${ncn_progname}"
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_GOC_FOR_TARGET="${ncn_progname}"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+GOC_FOR_TARGET=$ac_cv_prog_GOC_FOR_TARGET
+if test -n "$GOC_FOR_TARGET"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $GOC_FOR_TARGET" >&5
+$as_echo "$GOC_FOR_TARGET" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+    fi
+    test -n "$ac_cv_prog_GOC_FOR_TARGET" && break
+  done
+fi
+
+if test -z "$ac_cv_prog_GOC_FOR_TARGET" ; then
+  set dummy gccgo
+  if test $build = $target ; then
+    GOC_FOR_TARGET="$2"
+  else
+    GOC_FOR_TARGET="${ncn_target_tool_prefix}$2"
+  fi
+else
+  GOC_FOR_TARGET="$ac_cv_prog_GOC_FOR_TARGET"
+fi
+
+
+
+if test -n "$GDC_FOR_TARGET"; then
+  ac_cv_prog_GDC_FOR_TARGET=$GDC_FOR_TARGET
+elif test -n "$ac_cv_prog_GDC_FOR_TARGET"; then
+  GDC_FOR_TARGET=$ac_cv_prog_GDC_FOR_TARGET
+fi
+
+if test -n "$ac_cv_prog_GDC_FOR_TARGET"; then
+  for ncn_progname in gdc; do
+    # Extract the first word of "${ncn_progname}", so it can be a program name with args.
+set dummy ${ncn_progname}; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_GDC_FOR_TARGET+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$GDC_FOR_TARGET"; then
+  ac_cv_prog_GDC_FOR_TARGET="$GDC_FOR_TARGET" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_GDC_FOR_TARGET="${ncn_progname}"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+GDC_FOR_TARGET=$ac_cv_prog_GDC_FOR_TARGET
+if test -n "$GDC_FOR_TARGET"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $GDC_FOR_TARGET" >&5
+$as_echo "$GDC_FOR_TARGET" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  done
+fi
+
+if test -z "$ac_cv_prog_GDC_FOR_TARGET" && test -n "$with_build_time_tools"; then
+  for ncn_progname in gdc; do
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ${ncn_progname} in $with_build_time_tools" >&5
+$as_echo_n "checking for ${ncn_progname} in $with_build_time_tools... " >&6; }
+    if test -x $with_build_time_tools/${ncn_progname}; then
+      ac_cv_prog_GDC_FOR_TARGET=$with_build_time_tools/${ncn_progname}
+      { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+      break
+    else
+      { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+    fi
+  done
+fi
+
+if test -z "$ac_cv_prog_GDC_FOR_TARGET"; then
+  for ncn_progname in gdc; do
+    if test -n "$ncn_target_tool_prefix"; then
+      # Extract the first word of "${ncn_target_tool_prefix}${ncn_progname}", so it can be a program name with args.
+set dummy ${ncn_target_tool_prefix}${ncn_progname}; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_GDC_FOR_TARGET+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$GDC_FOR_TARGET"; then
+  ac_cv_prog_GDC_FOR_TARGET="$GDC_FOR_TARGET" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_GDC_FOR_TARGET="${ncn_target_tool_prefix}${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
   fi
@@ -10653,10 +13177,10 @@ IFS=$as_save_IFS
 
 fi
 fi
-GOC_FOR_TARGET=$ac_cv_prog_GOC_FOR_TARGET
-if test -n "$GOC_FOR_TARGET"; then
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $GOC_FOR_TARGET" >&5
-$as_echo "$GOC_FOR_TARGET" >&6; }
+GDC_FOR_TARGET=$ac_cv_prog_GDC_FOR_TARGET
+if test -n "$GDC_FOR_TARGET"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $GDC_FOR_TARGET" >&5
+$as_echo "$GDC_FOR_TARGET" >&6; }
 else
   { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
 $as_echo "no" >&6; }
@@ -10664,16 +13188,16 @@ fi
 
 
     fi
-    if test -z "$ac_cv_prog_GOC_FOR_TARGET" && test $build = $target ; then
+    if test -z "$ac_cv_prog_GDC_FOR_TARGET" && test $build = $target ; then
       # Extract the first word of "${ncn_progname}", so it can be a program name with args.
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_GOC_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_GDC_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
-  if test -n "$GOC_FOR_TARGET"; then
-  ac_cv_prog_GOC_FOR_TARGET="$GOC_FOR_TARGET" # Let the user override the test.
+  if test -n "$GDC_FOR_TARGET"; then
+  ac_cv_prog_GDC_FOR_TARGET="$GDC_FOR_TARGET" # Let the user override the test.
 else
 as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
 for as_dir in $PATH
@@ -10681,8 +13205,8 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
-    ac_cv_prog_GOC_FOR_TARGET="${ncn_progname}"
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_GDC_FOR_TARGET="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
   fi
@@ -10692,10 +13216,10 @@ IFS=$as_save_IFS
 
 fi
 fi
-GOC_FOR_TARGET=$ac_cv_prog_GOC_FOR_TARGET
-if test -n "$GOC_FOR_TARGET"; then
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $GOC_FOR_TARGET" >&5
-$as_echo "$GOC_FOR_TARGET" >&6; }
+GDC_FOR_TARGET=$ac_cv_prog_GDC_FOR_TARGET
+if test -n "$GDC_FOR_TARGET"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $GDC_FOR_TARGET" >&5
+$as_echo "$GDC_FOR_TARGET" >&6; }
 else
   { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
 $as_echo "no" >&6; }
@@ -10703,19 +13227,19 @@ fi
 
 
     fi
-    test -n "$ac_cv_prog_GOC_FOR_TARGET" && break
+    test -n "$ac_cv_prog_GDC_FOR_TARGET" && break
   done
 fi
 
-if test -z "$ac_cv_prog_GOC_FOR_TARGET" ; then
-  set dummy gccgo
+if test -z "$ac_cv_prog_GDC_FOR_TARGET" ; then
+  set dummy gdc
   if test $build = $target ; then
-    GOC_FOR_TARGET="$2"
+    GDC_FOR_TARGET="$2"
   else
-    GOC_FOR_TARGET="${ncn_target_tool_prefix}$2"
+    GDC_FOR_TARGET="${ncn_target_tool_prefix}$2"
   fi
 else
-  GOC_FOR_TARGET="$ac_cv_prog_GOC_FOR_TARGET"
+  GDC_FOR_TARGET="$ac_cv_prog_GDC_FOR_TARGET"
 fi
 
 
@@ -10760,7 +13284,7 @@ if test -z "$ac_cv_path_AR_FOR_TARGET" && test -n "$gcc_cv_tool_dirs"; then
 set dummy ar; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_path_AR_FOR_TARGET+set}" = set; then :
+if ${ac_cv_path_AR_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   case $AR_FOR_TARGET in
@@ -10774,7 +13298,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_path_AR_FOR_TARGET="$as_dir/$ac_word$ac_exec_ext"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -10812,7 +13336,7 @@ if test -n "$ac_cv_prog_AR_FOR_TARGET"; then
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_AR_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_AR_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$AR_FOR_TARGET"; then
@@ -10824,7 +13348,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_AR_FOR_TARGET="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -10871,7 +13395,7 @@ if test -z "$ac_cv_prog_AR_FOR_TARGET"; then
 set dummy ${ncn_target_tool_prefix}${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_AR_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_AR_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$AR_FOR_TARGET"; then
@@ -10883,7 +13407,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_AR_FOR_TARGET="${ncn_target_tool_prefix}${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -10910,7 +13434,7 @@ fi
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_AR_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_AR_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$AR_FOR_TARGET"; then
@@ -10922,7 +13446,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_AR_FOR_TARGET="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -10990,7 +13514,7 @@ if test -z "$ac_cv_path_AS_FOR_TARGET" && test -n "$gcc_cv_tool_dirs"; then
 set dummy as; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_path_AS_FOR_TARGET+set}" = set; then :
+if ${ac_cv_path_AS_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   case $AS_FOR_TARGET in
@@ -11004,7 +13528,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_path_AS_FOR_TARGET="$as_dir/$ac_word$ac_exec_ext"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -11042,7 +13566,7 @@ if test -n "$ac_cv_prog_AS_FOR_TARGET"; then
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_AS_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_AS_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$AS_FOR_TARGET"; then
@@ -11054,7 +13578,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_AS_FOR_TARGET="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -11101,7 +13625,7 @@ if test -z "$ac_cv_prog_AS_FOR_TARGET"; then
 set dummy ${ncn_target_tool_prefix}${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_AS_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_AS_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$AS_FOR_TARGET"; then
@@ -11113,7 +13637,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_AS_FOR_TARGET="${ncn_target_tool_prefix}${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -11140,7 +13664,7 @@ fi
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_AS_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_AS_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$AS_FOR_TARGET"; then
@@ -11152,7 +13676,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_AS_FOR_TARGET="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -11220,7 +13744,7 @@ if test -z "$ac_cv_path_DLLTOOL_FOR_TARGET" && test -n "$gcc_cv_tool_dirs"; then
 set dummy dlltool; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_path_DLLTOOL_FOR_TARGET+set}" = set; then :
+if ${ac_cv_path_DLLTOOL_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   case $DLLTOOL_FOR_TARGET in
@@ -11234,7 +13758,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_path_DLLTOOL_FOR_TARGET="$as_dir/$ac_word$ac_exec_ext"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -11272,7 +13796,7 @@ if test -n "$ac_cv_prog_DLLTOOL_FOR_TARGET"; then
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_DLLTOOL_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_DLLTOOL_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$DLLTOOL_FOR_TARGET"; then
@@ -11284,7 +13808,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_DLLTOOL_FOR_TARGET="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -11331,7 +13855,7 @@ if test -z "$ac_cv_prog_DLLTOOL_FOR_TARGET"; then
 set dummy ${ncn_target_tool_prefix}${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_DLLTOOL_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_DLLTOOL_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$DLLTOOL_FOR_TARGET"; then
@@ -11343,7 +13867,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_DLLTOOL_FOR_TARGET="${ncn_target_tool_prefix}${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -11370,7 +13894,7 @@ fi
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_DLLTOOL_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_DLLTOOL_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$DLLTOOL_FOR_TARGET"; then
@@ -11382,7 +13906,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_DLLTOOL_FOR_TARGET="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -11450,7 +13974,7 @@ if test -z "$ac_cv_path_LD_FOR_TARGET" && test -n "$gcc_cv_tool_dirs"; then
 set dummy ld; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_path_LD_FOR_TARGET+set}" = set; then :
+if ${ac_cv_path_LD_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   case $LD_FOR_TARGET in
@@ -11464,7 +13988,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_path_LD_FOR_TARGET="$as_dir/$ac_word$ac_exec_ext"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -11502,7 +14026,7 @@ if test -n "$ac_cv_prog_LD_FOR_TARGET"; then
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_LD_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_LD_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$LD_FOR_TARGET"; then
@@ -11514,7 +14038,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_LD_FOR_TARGET="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -11561,7 +14085,7 @@ if test -z "$ac_cv_prog_LD_FOR_TARGET"; then
 set dummy ${ncn_target_tool_prefix}${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_LD_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_LD_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$LD_FOR_TARGET"; then
@@ -11573,7 +14097,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_LD_FOR_TARGET="${ncn_target_tool_prefix}${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -11600,7 +14124,7 @@ fi
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_LD_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_LD_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$LD_FOR_TARGET"; then
@@ -11612,7 +14136,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_LD_FOR_TARGET="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -11680,7 +14204,7 @@ if test -z "$ac_cv_path_LIPO_FOR_TARGET" && test -n "$gcc_cv_tool_dirs"; then
 set dummy lipo; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_path_LIPO_FOR_TARGET+set}" = set; then :
+if ${ac_cv_path_LIPO_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   case $LIPO_FOR_TARGET in
@@ -11694,7 +14218,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_path_LIPO_FOR_TARGET="$as_dir/$ac_word$ac_exec_ext"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -11732,7 +14256,7 @@ if test -n "$ac_cv_prog_LIPO_FOR_TARGET"; then
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_LIPO_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_LIPO_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$LIPO_FOR_TARGET"; then
@@ -11744,7 +14268,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_LIPO_FOR_TARGET="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -11791,7 +14315,7 @@ if test -z "$ac_cv_prog_LIPO_FOR_TARGET"; then
 set dummy ${ncn_target_tool_prefix}${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_LIPO_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_LIPO_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$LIPO_FOR_TARGET"; then
@@ -11803,7 +14327,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_LIPO_FOR_TARGET="${ncn_target_tool_prefix}${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -11830,7 +14354,7 @@ fi
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_LIPO_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_LIPO_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$LIPO_FOR_TARGET"; then
@@ -11842,7 +14366,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_LIPO_FOR_TARGET="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -11910,7 +14434,7 @@ if test -z "$ac_cv_path_NM_FOR_TARGET" && test -n "$gcc_cv_tool_dirs"; then
 set dummy nm; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_path_NM_FOR_TARGET+set}" = set; then :
+if ${ac_cv_path_NM_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   case $NM_FOR_TARGET in
@@ -11924,7 +14448,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_path_NM_FOR_TARGET="$as_dir/$ac_word$ac_exec_ext"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -11962,7 +14486,7 @@ if test -n "$ac_cv_prog_NM_FOR_TARGET"; then
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_NM_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_NM_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$NM_FOR_TARGET"; then
@@ -11974,7 +14498,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_NM_FOR_TARGET="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -12021,7 +14545,7 @@ if test -z "$ac_cv_prog_NM_FOR_TARGET"; then
 set dummy ${ncn_target_tool_prefix}${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_NM_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_NM_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$NM_FOR_TARGET"; then
@@ -12033,7 +14557,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_NM_FOR_TARGET="${ncn_target_tool_prefix}${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -12060,7 +14584,7 @@ fi
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_NM_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_NM_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$NM_FOR_TARGET"; then
@@ -12072,7 +14596,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_NM_FOR_TARGET="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -12140,7 +14664,7 @@ if test -z "$ac_cv_path_OBJCOPY_FOR_TARGET" && test -n "$gcc_cv_tool_dirs"; then
 set dummy objcopy; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_path_OBJCOPY_FOR_TARGET+set}" = set; then :
+if ${ac_cv_path_OBJCOPY_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   case $OBJCOPY_FOR_TARGET in
@@ -12154,7 +14678,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_path_OBJCOPY_FOR_TARGET="$as_dir/$ac_word$ac_exec_ext"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -12192,7 +14716,7 @@ if test -n "$ac_cv_prog_OBJCOPY_FOR_TARGET"; then
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_OBJCOPY_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_OBJCOPY_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$OBJCOPY_FOR_TARGET"; then
@@ -12204,7 +14728,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_OBJCOPY_FOR_TARGET="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -12251,7 +14775,7 @@ if test -z "$ac_cv_prog_OBJCOPY_FOR_TARGET"; then
 set dummy ${ncn_target_tool_prefix}${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_OBJCOPY_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_OBJCOPY_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$OBJCOPY_FOR_TARGET"; then
@@ -12263,7 +14787,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_OBJCOPY_FOR_TARGET="${ncn_target_tool_prefix}${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -12290,7 +14814,7 @@ fi
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_OBJCOPY_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_OBJCOPY_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$OBJCOPY_FOR_TARGET"; then
@@ -12302,7 +14826,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_OBJCOPY_FOR_TARGET="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -12370,7 +14894,7 @@ if test -z "$ac_cv_path_OBJDUMP_FOR_TARGET" && test -n "$gcc_cv_tool_dirs"; then
 set dummy objdump; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_path_OBJDUMP_FOR_TARGET+set}" = set; then :
+if ${ac_cv_path_OBJDUMP_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   case $OBJDUMP_FOR_TARGET in
@@ -12384,7 +14908,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_path_OBJDUMP_FOR_TARGET="$as_dir/$ac_word$ac_exec_ext"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -12422,7 +14946,7 @@ if test -n "$ac_cv_prog_OBJDUMP_FOR_TARGET"; then
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_OBJDUMP_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_OBJDUMP_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$OBJDUMP_FOR_TARGET"; then
@@ -12434,7 +14958,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_OBJDUMP_FOR_TARGET="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -12481,7 +15005,7 @@ if test -z "$ac_cv_prog_OBJDUMP_FOR_TARGET"; then
 set dummy ${ncn_target_tool_prefix}${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_OBJDUMP_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_OBJDUMP_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$OBJDUMP_FOR_TARGET"; then
@@ -12493,7 +15017,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_OBJDUMP_FOR_TARGET="${ncn_target_tool_prefix}${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -12520,7 +15044,7 @@ fi
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_OBJDUMP_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_OBJDUMP_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$OBJDUMP_FOR_TARGET"; then
@@ -12532,7 +15056,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_OBJDUMP_FOR_TARGET="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -12576,6 +15100,236 @@ fi
 
 
 
+if test -z "$ac_cv_path_OTOOL_FOR_TARGET" ; then
+  if test -n "$with_build_time_tools"; then
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for otool in $with_build_time_tools" >&5
+$as_echo_n "checking for otool in $with_build_time_tools... " >&6; }
+    if test -x $with_build_time_tools/otool; then
+      OTOOL_FOR_TARGET=`cd $with_build_time_tools && pwd`/otool
+      ac_cv_path_OTOOL_FOR_TARGET=$OTOOL_FOR_TARGET
+      { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_OTOOL_FOR_TARGET" >&5
+$as_echo "$ac_cv_path_OTOOL_FOR_TARGET" >&6; }
+    else
+      { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+    fi
+  elif test $build != $host && test $have_gcc_for_target = yes; then
+    OTOOL_FOR_TARGET=`$GCC_FOR_TARGET --print-prog-name=otool`
+    test $OTOOL_FOR_TARGET = otool && OTOOL_FOR_TARGET=
+    test -n "$OTOOL_FOR_TARGET" && ac_cv_path_OTOOL_FOR_TARGET=$OTOOL_FOR_TARGET
+  fi
+fi
+if test -z "$ac_cv_path_OTOOL_FOR_TARGET" && test -n "$gcc_cv_tool_dirs"; then
+  # Extract the first word of "otool", so it can be a program name with args.
+set dummy otool; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_path_OTOOL_FOR_TARGET+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  case $OTOOL_FOR_TARGET in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_OTOOL_FOR_TARGET="$OTOOL_FOR_TARGET" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $gcc_cv_tool_dirs
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_path_OTOOL_FOR_TARGET="$as_dir/$ac_word$ac_exec_ext"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+OTOOL_FOR_TARGET=$ac_cv_path_OTOOL_FOR_TARGET
+if test -n "$OTOOL_FOR_TARGET"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OTOOL_FOR_TARGET" >&5
+$as_echo "$OTOOL_FOR_TARGET" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_path_OTOOL_FOR_TARGET" ; then
+
+
+if test -n "$OTOOL_FOR_TARGET"; then
+  ac_cv_prog_OTOOL_FOR_TARGET=$OTOOL_FOR_TARGET
+elif test -n "$ac_cv_prog_OTOOL_FOR_TARGET"; then
+  OTOOL_FOR_TARGET=$ac_cv_prog_OTOOL_FOR_TARGET
+fi
+
+if test -n "$ac_cv_prog_OTOOL_FOR_TARGET"; then
+  for ncn_progname in otool; do
+    # Extract the first word of "${ncn_progname}", so it can be a program name with args.
+set dummy ${ncn_progname}; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_OTOOL_FOR_TARGET+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$OTOOL_FOR_TARGET"; then
+  ac_cv_prog_OTOOL_FOR_TARGET="$OTOOL_FOR_TARGET" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_OTOOL_FOR_TARGET="${ncn_progname}"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+OTOOL_FOR_TARGET=$ac_cv_prog_OTOOL_FOR_TARGET
+if test -n "$OTOOL_FOR_TARGET"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OTOOL_FOR_TARGET" >&5
+$as_echo "$OTOOL_FOR_TARGET" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  done
+fi
+
+if test -z "$ac_cv_prog_OTOOL_FOR_TARGET" && test -n "$with_build_time_tools"; then
+  for ncn_progname in otool; do
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ${ncn_progname} in $with_build_time_tools" >&5
+$as_echo_n "checking for ${ncn_progname} in $with_build_time_tools... " >&6; }
+    if test -x $with_build_time_tools/${ncn_progname}; then
+      ac_cv_prog_OTOOL_FOR_TARGET=$with_build_time_tools/${ncn_progname}
+      { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+      break
+    else
+      { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+    fi
+  done
+fi
+
+if test -z "$ac_cv_prog_OTOOL_FOR_TARGET"; then
+  for ncn_progname in otool; do
+    if test -n "$ncn_target_tool_prefix"; then
+      # Extract the first word of "${ncn_target_tool_prefix}${ncn_progname}", so it can be a program name with args.
+set dummy ${ncn_target_tool_prefix}${ncn_progname}; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_OTOOL_FOR_TARGET+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$OTOOL_FOR_TARGET"; then
+  ac_cv_prog_OTOOL_FOR_TARGET="$OTOOL_FOR_TARGET" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_OTOOL_FOR_TARGET="${ncn_target_tool_prefix}${ncn_progname}"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+OTOOL_FOR_TARGET=$ac_cv_prog_OTOOL_FOR_TARGET
+if test -n "$OTOOL_FOR_TARGET"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OTOOL_FOR_TARGET" >&5
+$as_echo "$OTOOL_FOR_TARGET" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+    fi
+    if test -z "$ac_cv_prog_OTOOL_FOR_TARGET" && test $build = $target ; then
+      # Extract the first word of "${ncn_progname}", so it can be a program name with args.
+set dummy ${ncn_progname}; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_OTOOL_FOR_TARGET+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$OTOOL_FOR_TARGET"; then
+  ac_cv_prog_OTOOL_FOR_TARGET="$OTOOL_FOR_TARGET" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_OTOOL_FOR_TARGET="${ncn_progname}"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+OTOOL_FOR_TARGET=$ac_cv_prog_OTOOL_FOR_TARGET
+if test -n "$OTOOL_FOR_TARGET"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OTOOL_FOR_TARGET" >&5
+$as_echo "$OTOOL_FOR_TARGET" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+    fi
+    test -n "$ac_cv_prog_OTOOL_FOR_TARGET" && break
+  done
+fi
+
+if test -z "$ac_cv_prog_OTOOL_FOR_TARGET" ; then
+  set dummy otool
+  if test $build = $target ; then
+    OTOOL_FOR_TARGET="$2"
+  else
+    OTOOL_FOR_TARGET="${ncn_target_tool_prefix}$2"
+  fi
+else
+  OTOOL_FOR_TARGET="$ac_cv_prog_OTOOL_FOR_TARGET"
+fi
+
+else
+  OTOOL_FOR_TARGET=$ac_cv_path_OTOOL_FOR_TARGET
+fi
+
+
+
+
 if test -z "$ac_cv_path_RANLIB_FOR_TARGET" ; then
   if test -n "$with_build_time_tools"; then
     { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ranlib in $with_build_time_tools" >&5
@@ -12600,7 +15354,7 @@ if test -z "$ac_cv_path_RANLIB_FOR_TARGET" && test -n "$gcc_cv_tool_dirs"; then
 set dummy ranlib; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_path_RANLIB_FOR_TARGET+set}" = set; then :
+if ${ac_cv_path_RANLIB_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   case $RANLIB_FOR_TARGET in
@@ -12614,7 +15368,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_path_RANLIB_FOR_TARGET="$as_dir/$ac_word$ac_exec_ext"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -12652,7 +15406,7 @@ if test -n "$ac_cv_prog_RANLIB_FOR_TARGET"; then
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_RANLIB_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_RANLIB_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$RANLIB_FOR_TARGET"; then
@@ -12664,7 +15418,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_RANLIB_FOR_TARGET="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -12711,7 +15465,7 @@ if test -z "$ac_cv_prog_RANLIB_FOR_TARGET"; then
 set dummy ${ncn_target_tool_prefix}${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_RANLIB_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_RANLIB_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$RANLIB_FOR_TARGET"; then
@@ -12723,7 +15477,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_RANLIB_FOR_TARGET="${ncn_target_tool_prefix}${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -12750,7 +15504,7 @@ fi
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_RANLIB_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_RANLIB_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$RANLIB_FOR_TARGET"; then
@@ -12762,7 +15516,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_RANLIB_FOR_TARGET="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -12830,7 +15584,7 @@ if test -z "$ac_cv_path_READELF_FOR_TARGET" && test -n "$gcc_cv_tool_dirs"; then
 set dummy readelf; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_path_READELF_FOR_TARGET+set}" = set; then :
+if ${ac_cv_path_READELF_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   case $READELF_FOR_TARGET in
@@ -12844,7 +15598,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_path_READELF_FOR_TARGET="$as_dir/$ac_word$ac_exec_ext"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -12882,7 +15636,7 @@ if test -n "$ac_cv_prog_READELF_FOR_TARGET"; then
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_READELF_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_READELF_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$READELF_FOR_TARGET"; then
@@ -12894,7 +15648,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_READELF_FOR_TARGET="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -12941,7 +15695,7 @@ if test -z "$ac_cv_prog_READELF_FOR_TARGET"; then
 set dummy ${ncn_target_tool_prefix}${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_READELF_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_READELF_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$READELF_FOR_TARGET"; then
@@ -12953,7 +15707,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_READELF_FOR_TARGET="${ncn_target_tool_prefix}${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -12980,7 +15734,7 @@ fi
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_READELF_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_READELF_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$READELF_FOR_TARGET"; then
@@ -12992,7 +15746,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_READELF_FOR_TARGET="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -13060,7 +15814,7 @@ if test -z "$ac_cv_path_STRIP_FOR_TARGET" && test -n "$gcc_cv_tool_dirs"; then
 set dummy strip; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_path_STRIP_FOR_TARGET+set}" = set; then :
+if ${ac_cv_path_STRIP_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   case $STRIP_FOR_TARGET in
@@ -13074,7 +15828,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_path_STRIP_FOR_TARGET="$as_dir/$ac_word$ac_exec_ext"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -13112,7 +15866,7 @@ if test -n "$ac_cv_prog_STRIP_FOR_TARGET"; then
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_STRIP_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_STRIP_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$STRIP_FOR_TARGET"; then
@@ -13124,7 +15878,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_STRIP_FOR_TARGET="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -13171,7 +15925,7 @@ if test -z "$ac_cv_prog_STRIP_FOR_TARGET"; then
 set dummy ${ncn_target_tool_prefix}${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_STRIP_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_STRIP_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$STRIP_FOR_TARGET"; then
@@ -13183,7 +15937,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_STRIP_FOR_TARGET="${ncn_target_tool_prefix}${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -13210,7 +15964,7 @@ fi
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_STRIP_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_STRIP_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$STRIP_FOR_TARGET"; then
@@ -13222,7 +15976,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_STRIP_FOR_TARGET="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -13290,7 +16044,7 @@ if test -z "$ac_cv_path_WINDRES_FOR_TARGET" && test -n "$gcc_cv_tool_dirs"; then
 set dummy windres; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_path_WINDRES_FOR_TARGET+set}" = set; then :
+if ${ac_cv_path_WINDRES_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   case $WINDRES_FOR_TARGET in
@@ -13304,7 +16058,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_path_WINDRES_FOR_TARGET="$as_dir/$ac_word$ac_exec_ext"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -13342,7 +16096,7 @@ if test -n "$ac_cv_prog_WINDRES_FOR_TARGET"; then
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_WINDRES_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_WINDRES_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$WINDRES_FOR_TARGET"; then
@@ -13354,7 +16108,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_WINDRES_FOR_TARGET="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -13401,7 +16155,7 @@ if test -z "$ac_cv_prog_WINDRES_FOR_TARGET"; then
 set dummy ${ncn_target_tool_prefix}${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_WINDRES_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_WINDRES_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$WINDRES_FOR_TARGET"; then
@@ -13413,7 +16167,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_WINDRES_FOR_TARGET="${ncn_target_tool_prefix}${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -13440,7 +16194,7 @@ fi
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_WINDRES_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_WINDRES_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$WINDRES_FOR_TARGET"; then
@@ -13452,7 +16206,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_WINDRES_FOR_TARGET="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -13520,7 +16274,7 @@ if test -z "$ac_cv_path_WINDMC_FOR_TARGET" && test -n "$gcc_cv_tool_dirs"; then
 set dummy windmc; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_path_WINDMC_FOR_TARGET+set}" = set; then :
+if ${ac_cv_path_WINDMC_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   case $WINDMC_FOR_TARGET in
@@ -13534,7 +16288,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_path_WINDMC_FOR_TARGET="$as_dir/$ac_word$ac_exec_ext"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -13572,7 +16326,7 @@ if test -n "$ac_cv_prog_WINDMC_FOR_TARGET"; then
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_WINDMC_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_WINDMC_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$WINDMC_FOR_TARGET"; then
@@ -13584,7 +16338,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_WINDMC_FOR_TARGET="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -13631,7 +16385,7 @@ if test -z "$ac_cv_prog_WINDMC_FOR_TARGET"; then
 set dummy ${ncn_target_tool_prefix}${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_WINDMC_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_WINDMC_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$WINDMC_FOR_TARGET"; then
@@ -13643,7 +16397,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_WINDMC_FOR_TARGET="${ncn_target_tool_prefix}${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -13670,7 +16424,7 @@ fi
 set dummy ${ncn_progname}; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_WINDMC_FOR_TARGET+set}" = set; then :
+if ${ac_cv_prog_WINDMC_FOR_TARGET+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$WINDMC_FOR_TARGET"; then
@@ -13682,7 +16436,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
     for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
     ac_cv_prog_WINDMC_FOR_TARGET="${ncn_progname}"
     $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -14116,6 +16870,51 @@ $as_echo "pre-installed" >&6; }
   fi
 fi
 
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking where to find the target gdc" >&5
+$as_echo_n "checking where to find the target gdc... " >&6; }
+if test "x${build}" != "x${host}" ; then
+  if expr "x$GDC_FOR_TARGET" : "x/" > /dev/null; then
+    # We already found the complete path
+    ac_dir=`dirname $GDC_FOR_TARGET`
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: pre-installed in $ac_dir" >&5
+$as_echo "pre-installed in $ac_dir" >&6; }
+  else
+    # Canadian cross, just use what we found
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: pre-installed" >&5
+$as_echo "pre-installed" >&6; }
+  fi
+else
+  ok=yes
+  case " ${configdirs} " in
+    *" gcc "*) ;;
+    *) ok=no ;;
+  esac
+  case ,${enable_languages}, in
+    *,d,*) ;;
+    *) ok=no ;;
+  esac
+  if test $ok = yes; then
+    # An in-tree tool is available and we can use it
+    GDC_FOR_TARGET='$$r/$(HOST_SUBDIR)/gcc/gdc -B$$r/$(HOST_SUBDIR)/gcc/'
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: just compiled" >&5
+$as_echo "just compiled" >&6; }
+  elif expr "x$GDC_FOR_TARGET" : "x/" > /dev/null; then
+    # We already found the complete path
+    ac_dir=`dirname $GDC_FOR_TARGET`
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: pre-installed in $ac_dir" >&5
+$as_echo "pre-installed in $ac_dir" >&6; }
+  elif test "x$target" = "x$host"; then
+    # We can use an host tool
+    GDC_FOR_TARGET='$(GDC)'
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: host tool" >&5
+$as_echo "host tool" >&6; }
+  else
+    # We need a cross tool
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: pre-installed" >&5
+$as_echo "pre-installed" >&6; }
+  fi
+fi
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking where to find the target ld" >&5
 $as_echo_n "checking where to find the target ld... " >&6; }
 if test "x${build}" != "x${host}" ; then
@@ -14315,6 +17114,37 @@ $as_echo "pre-installed" >&6; }
   fi
 fi
 
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking where to find the target otool" >&5
+$as_echo_n "checking where to find the target otool... " >&6; }
+if test "x${build}" != "x${host}" ; then
+  if expr "x$OTOOL_FOR_TARGET" : "x/" > /dev/null; then
+    # We already found the complete path
+    ac_dir=`dirname $OTOOL_FOR_TARGET`
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: pre-installed in $ac_dir" >&5
+$as_echo "pre-installed in $ac_dir" >&6; }
+  else
+    # Canadian cross, just use what we found
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: pre-installed" >&5
+$as_echo "pre-installed" >&6; }
+  fi
+else
+  if expr "x$OTOOL_FOR_TARGET" : "x/" > /dev/null; then
+    # We already found the complete path
+    ac_dir=`dirname $OTOOL_FOR_TARGET`
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: pre-installed in $ac_dir" >&5
+$as_echo "pre-installed in $ac_dir" >&6; }
+  elif test "x$target" = "x$host"; then
+    # We can use an host tool
+    OTOOL_FOR_TARGET='$(OTOOL)'
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: host tool" >&5
+$as_echo "host tool" >&6; }
+  else
+    # We need a cross tool
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: pre-installed" >&5
+$as_echo "pre-installed" >&6; }
+  fi
+fi
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking where to find the target ranlib" >&5
 $as_echo_n "checking where to find the target ranlib... " >&6; }
 if test "x${build}" != "x${host}" ; then
@@ -14648,8 +17478,8 @@ fi
 compare_exclusions="gcc/cc*-checksum\$(objext) | gcc/ada/*tools/*"
 case "$target" in
   hppa*64*-*-hpux*) ;;
-  hppa*-*-hpux*) compare_exclusions="gcc/cc*-checksum\$(objext) | */libgcc/lib2funcs* | gcc/ada/*tools/* | gcc/function-tests.o" ;;
-  powerpc*-ibm-aix*) compare_exclusions="gcc/cc*-checksum\$(objext) | gcc/ada/*tools/* | *libgomp*\$(objext)" ;;
+  hppa*-*-hpux*) compare_exclusions="$compare_exclusions | */libgcc/lib2funcs* | gcc/function-tests.o" ;;
+  powerpc*-ibm-aix*) compare_exclusions="$compare_exclusions | *libgomp*\$(objext)" ;;
 esac
 
 
@@ -14719,10 +17549,21 @@ $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;;
      :end' >>confcache
 if diff "$cache_file" confcache >/dev/null 2>&1; then :; else
   if test -w "$cache_file"; then
-    test "x$cache_file" != "x/dev/null" &&
+    if test "x$cache_file" != "x/dev/null"; then
       { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5
 $as_echo "$as_me: updating cache $cache_file" >&6;}
-    cat confcache >$cache_file
+      if test ! -f "$cache_file" || test -h "$cache_file"; then
+	cat confcache >"$cache_file"
+      else
+        case $cache_file in #(
+        */* | ?:*)
+	  mv -f confcache "$cache_file"$$ &&
+	  mv -f "$cache_file"$$ "$cache_file" ;; #(
+        *)
+	  mv -f confcache "$cache_file" ;;
+	esac
+      fi
+    fi
   else
     { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5
 $as_echo "$as_me: not updating unwritable cache $cache_file" >&6;}
@@ -14774,6 +17615,7 @@ DEFS=`sed -n "$ac_script" confdefs.h`
 
 ac_libobjs=
 ac_ltlibobjs=
+U=
 for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue
   # 1. Remove the extension, and $U if already installed.
   ac_script='s/\$U\././;s/\.o$//;s/\.obj$//'
@@ -14789,7 +17631,7 @@ LTLIBOBJS=$ac_ltlibobjs
 
 
 
-: ${CONFIG_STATUS=./config.status}
+: "${CONFIG_STATUS=./config.status}"
 ac_write_fail=0
 ac_clean_files_save=$ac_clean_files
 ac_clean_files="$ac_clean_files $CONFIG_STATUS"
@@ -14890,6 +17732,7 @@ fi
 IFS=" ""	$as_nl"
 
 # Find who we are.  Look in the path if we contain no directory separator.
+as_myself=
 case $0 in #((
   *[\\/]* ) as_myself=$0 ;;
   *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
@@ -14935,19 +17778,19 @@ export LANGUAGE
 (unset CDPATH) >/dev/null 2>&1 && unset CDPATH
 
 
-# as_fn_error ERROR [LINENO LOG_FD]
-# ---------------------------------
+# as_fn_error STATUS ERROR [LINENO LOG_FD]
+# ----------------------------------------
 # Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are
 # provided, also output the error to LOG_FD, referencing LINENO. Then exit the
-# script with status $?, using 1 if that was 0.
+# script with STATUS, using 1 if that was 0.
 as_fn_error ()
 {
-  as_status=$?; test $as_status -eq 0 && as_status=1
-  if test "$3"; then
-    as_lineno=${as_lineno-"$2"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
-    $as_echo "$as_me:${as_lineno-$LINENO}: error: $1" >&$3
+  as_status=$1; test $as_status -eq 0 && as_status=1
+  if test "$4"; then
+    as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+    $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4
   fi
-  $as_echo "$as_me: error: $1" >&2
+  $as_echo "$as_me: error: $2" >&2
   as_fn_exit $as_status
 } # as_fn_error
 
@@ -15085,16 +17928,16 @@ if (echo >conf$$.file) 2>/dev/null; then
     # ... but there are two gotchas:
     # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
     # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
-    # In both cases, we have to default to `cp -p'.
+    # In both cases, we have to default to `cp -pR'.
     ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
-      as_ln_s='cp -p'
+      as_ln_s='cp -pR'
   elif ln conf$$.file conf$$ 2>/dev/null; then
     as_ln_s=ln
   else
-    as_ln_s='cp -p'
+    as_ln_s='cp -pR'
   fi
 else
-  as_ln_s='cp -p'
+  as_ln_s='cp -pR'
 fi
 rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
 rmdir conf$$.dir 2>/dev/null
@@ -15143,7 +17986,7 @@ $as_echo X"$as_dir" |
       test -d "$as_dir" && break
     done
     test -z "$as_dirs" || eval "mkdir $as_dirs"
-  } || test -d "$as_dir" || as_fn_error "cannot create directory $as_dir"
+  } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir"
 
 
 } # as_fn_mkdir_p
@@ -15154,28 +17997,16 @@ else
   as_mkdir_p=false
 fi
 
-if test -x / >/dev/null 2>&1; then
-  as_test_x='test -x'
-else
-  if ls -dL / >/dev/null 2>&1; then
-    as_ls_L_option=L
-  else
-    as_ls_L_option=
-  fi
-  as_test_x='
-    eval sh -c '\''
-      if test -d "$1"; then
-	test -d "$1/.";
-      else
-	case $1 in #(
-	-*)set "./$1";;
-	esac;
-	case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in #((
-	???[sx]*):;;*)false;;esac;fi
-    '\'' sh
-  '
-fi
-as_executable_p=$as_test_x
+
+# as_fn_executable_p FILE
+# -----------------------
+# Test if FILE is an executable regular file.
+as_fn_executable_p ()
+{
+  test -f "$1" && test -x "$1"
+} # as_fn_executable_p
+as_test_x='test -x'
+as_executable_p=as_fn_executable_p
 
 # Sed expression to map a string onto a valid CPP name.
 as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
@@ -15197,7 +18028,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # values after options handling.
 ac_log="
 This file was extended by $as_me, which was
-generated by GNU Autoconf 2.64.  Invocation command line was
+generated by GNU Autoconf 2.69.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
   CONFIG_HEADERS  = $CONFIG_HEADERS
@@ -15232,6 +18063,7 @@ Usage: $0 [OPTION]... [TAG]...
 
   -h, --help       print this help, then exit
   -V, --version    print version number and configuration settings, then exit
+      --config     print configuration, then exit
   -q, --quiet, --silent
                    do not print progress messages
   -d, --debug      don't remove temporary files
@@ -15246,12 +18078,13 @@ Report bugs to the package provider."
 
 _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
 config.status
-configured by $0, generated by GNU Autoconf 2.64,
-  with options \\"`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
+configured by $0, generated by GNU Autoconf 2.69,
+  with options \\"\$ac_cs_config\\"
 
-Copyright (C) 2009 Free Software Foundation, Inc.
+Copyright (C) 2012 Free Software Foundation, Inc.
 This config.status script is free software; the Free Software Foundation
 gives unlimited permission to copy, distribute and modify it."
 
@@ -15268,11 +18101,16 @@ ac_need_defaults=:
 while test $# != 0
 do
   case $1 in
-  --*=*)
+  --*=?*)
     ac_option=`expr "X$1" : 'X\([^=]*\)='`
     ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'`
     ac_shift=:
     ;;
+  --*=)
+    ac_option=`expr "X$1" : 'X\([^=]*\)='`
+    ac_optarg=
+    ac_shift=:
+    ;;
   *)
     ac_option=$1
     ac_optarg=$2
@@ -15286,12 +18124,15 @@ do
     ac_cs_recheck=: ;;
   --version | --versio | --versi | --vers | --ver | --ve | --v | -V )
     $as_echo "$ac_cs_version"; exit ;;
+  --config | --confi | --conf | --con | --co | --c )
+    $as_echo "$ac_cs_config"; exit ;;
   --debug | --debu | --deb | --de | --d | -d )
     debug=: ;;
   --file | --fil | --fi | --f )
     $ac_shift
     case $ac_optarg in
     *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;;
+    '') as_fn_error $? "missing file argument" ;;
     esac
     as_fn_append CONFIG_FILES " '$ac_optarg'"
     ac_need_defaults=false;;
@@ -15302,7 +18143,7 @@ do
     ac_cs_silent=: ;;
 
   # This is an error.
-  -*) as_fn_error "unrecognized option: \`$1'
+  -*) as_fn_error $? "unrecognized option: \`$1'
 Try \`$0 --help' for more information." ;;
 
   *) as_fn_append ac_config_targets " $1"
@@ -15322,7 +18163,7 @@ fi
 _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 if \$ac_cs_recheck; then
-  set X '$SHELL' '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion
+  set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion
   shift
   \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6
   CONFIG_SHELL='$SHELL'
@@ -15360,7 +18201,7 @@ do
   case $ac_config_target in
     "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;;
 
-  *) as_fn_error "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
+  *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
   esac
 done
 
@@ -15381,9 +18222,10 @@ fi
 # after its creation but before its name has been assigned to `$tmp'.
 $debug ||
 {
-  tmp=
+  tmp= ac_tmp=
   trap 'exit_status=$?
-  { test -z "$tmp" || test ! -d "$tmp" || rm -fr "$tmp"; } && exit $exit_status
+  : "${ac_tmp:=$tmp}"
+  { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status
 ' 0
   trap 'as_fn_exit 1' 1 2 13 15
 }
@@ -15391,12 +18233,13 @@ $debug ||
 
 {
   tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` &&
-  test -n "$tmp" && test -d "$tmp"
+  test -d "$tmp"
 }  ||
 {
   tmp=./conf$$-$RANDOM
   (umask 077 && mkdir "$tmp")
-} || as_fn_error "cannot create a temporary directory in ." "$LINENO" 5
+} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5
+ac_tmp=$tmp
 
 # Set up the scripts for CONFIG_FILES section.
 # No need to generate them if there are no CONFIG_FILES.
@@ -15430,24 +18273,24 @@ if test "x$ac_cr" = x; then
 fi
 ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' </dev/null 2>/dev/null`
 if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then
-  ac_cs_awk_cr='\r'
+  ac_cs_awk_cr='\\r'
 else
   ac_cs_awk_cr=$ac_cr
 fi
 
-echo 'BEGIN {' >"$tmp/subs1.awk" &&
+echo 'BEGIN {' >"$ac_tmp/subs1.awk" &&
 _ACEOF
 
 # Create commands to substitute file output variables.
 {
   echo "cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1" &&
-  echo 'cat >>"\$tmp/subs1.awk" <<\\_ACAWK &&' &&
+  echo 'cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK &&' &&
   echo "$ac_subst_files" | sed 's/.*/F["&"]="$&"/' &&
   echo "_ACAWK" &&
   echo "_ACEOF"
 } >conf$$files.sh &&
 . ./conf$$files.sh ||
-  as_fn_error "could not make $CONFIG_STATUS" "$LINENO" 5
+  as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
 rm -f conf$$files.sh
 
 {
@@ -15455,18 +18298,18 @@ rm -f conf$$files.sh
   echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' &&
   echo "_ACEOF"
 } >conf$$subs.sh ||
-  as_fn_error "could not make $CONFIG_STATUS" "$LINENO" 5
-ac_delim_num=`echo "$ac_subst_vars" | grep -c '$'`
+  as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
+ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'`
 ac_delim='%!_!# '
 for ac_last_try in false false false false false :; do
   . ./conf$$subs.sh ||
-    as_fn_error "could not make $CONFIG_STATUS" "$LINENO" 5
+    as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
 
   ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X`
   if test $ac_delim_n = $ac_delim_num; then
     break
   elif $ac_last_try; then
-    as_fn_error "could not make $CONFIG_STATUS" "$LINENO" 5
+    as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
   else
     ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
   fi
@@ -15474,7 +18317,7 @@ done
 rm -f conf$$subs.sh
 
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
-cat >>"\$tmp/subs1.awk" <<\\_ACAWK &&
+cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK &&
 _ACEOF
 sed -n '
 h
@@ -15488,7 +18331,7 @@ s/'"$ac_delim"'$//
 t delim
 :nl
 h
-s/\(.\{148\}\).*/\1/
+s/\(.\{148\}\)..*/\1/
 t more1
 s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/
 p
@@ -15502,7 +18345,7 @@ s/.\{148\}//
 t nl
 :delim
 h
-s/\(.\{148\}\).*/\1/
+s/\(.\{148\}\)..*/\1/
 t more2
 s/["\\]/\\&/g; s/^/"/; s/$/"/
 p
@@ -15522,7 +18365,7 @@ t delim
 rm -f conf$$subs.awk
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 _ACAWK
-cat >>"\$tmp/subs1.awk" <<_ACAWK &&
+cat >>"\$ac_tmp/subs1.awk" <<_ACAWK &&
   for (key in S) S_is_set[key] = 1
   FS = ""
   \$ac_cs_awk_pipe_init
@@ -15560,21 +18403,29 @@ if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then
   sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g"
 else
   cat
-fi < "$tmp/subs1.awk" > "$tmp/subs.awk" \
-  || as_fn_error "could not setup config files machinery" "$LINENO" 5
+fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \
+  || as_fn_error $? "could not setup config files machinery" "$LINENO" 5
 _ACEOF
 
-# VPATH may cause trouble with some makes, so we remove $(srcdir),
-# ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and
+# VPATH may cause trouble with some makes, so we remove sole $(srcdir),
+# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and
 # trailing colons and then remove the whole line if VPATH becomes empty
 # (actually we leave an empty line to preserve line numbers).
 if test "x$srcdir" = x.; then
-  ac_vpsub='/^[	 ]*VPATH[	 ]*=/{
-s/:*\$(srcdir):*/:/
-s/:*\${srcdir}:*/:/
-s/:*@srcdir@:*/:/
-s/^\([^=]*=[	 ]*\):*/\1/
+  ac_vpsub='/^[	 ]*VPATH[	 ]*=[	 ]*/{
+h
+s///
+s/^/:/
+s/[	 ]*$/:/
+s/:\$(srcdir):/:/g
+s/:\${srcdir}:/:/g
+s/:@srcdir@:/:/g
+s/^:*//
 s/:*$//
+x
+s/\(=[	 ]*\).*/\1/
+G
+s/\n//
 s/^[^=]*=[	 ]*$//
 }'
 fi
@@ -15592,7 +18443,7 @@ do
   esac
   case $ac_mode$ac_tag in
   :[FHL]*:*);;
-  :L* | :C*:*) as_fn_error "invalid tag \`$ac_tag'" "$LINENO" 5;;
+  :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;;
   :[FH]-) ac_tag=-:-;;
   :[FH]*) ac_tag=$ac_tag:$ac_tag.in;;
   esac
@@ -15611,7 +18462,7 @@ do
     for ac_f
     do
       case $ac_f in
-      -) ac_f="$tmp/stdin";;
+      -) ac_f="$ac_tmp/stdin";;
       *) # Look for the file first in the build tree, then in the source tree
 	 # (if the path is not absolute).  The absolute path cannot be DOS-style,
 	 # because $ac_f cannot contain `:'.
@@ -15620,7 +18471,7 @@ do
 	   [\\/$]*) false;;
 	   *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";;
 	   esac ||
-	   as_fn_error "cannot find input file: \`$ac_f'" "$LINENO" 5;;
+	   as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;;
       esac
       case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac
       as_fn_append ac_file_inputs " '$ac_f'"
@@ -15646,8 +18497,8 @@ $as_echo "$as_me: creating $ac_file" >&6;}
     esac
 
     case $ac_tag in
-    *:-:* | *:-) cat >"$tmp/stdin" \
-      || as_fn_error "could not create $ac_file" "$LINENO" 5 ;;
+    *:-:* | *:-) cat >"$ac_tmp/stdin" \
+      || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;;
     esac
     ;;
   esac
@@ -15779,26 +18630,27 @@ $ac_datarootdir_hack
 "
 eval sed \"\$ac_sed_extra\" "$ac_file_inputs" |
 if $ac_cs_awk_getline; then
-  $AWK -f "$tmp/subs.awk"
+  $AWK -f "$ac_tmp/subs.awk"
 else
-  $AWK -f "$tmp/subs.awk" | $SHELL
-fi >$tmp/out \
-  || as_fn_error "could not create $ac_file" "$LINENO" 5
+  $AWK -f "$ac_tmp/subs.awk" | $SHELL
+fi \
+  >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5
 
 test -z "$ac_datarootdir_hack$ac_datarootdir_seen" &&
-  { ac_out=`sed -n '/\${datarootdir}/p' "$tmp/out"`; test -n "$ac_out"; } &&
-  { ac_out=`sed -n '/^[	 ]*datarootdir[	 ]*:*=/p' "$tmp/out"`; test -z "$ac_out"; } &&
+  { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } &&
+  { ac_out=`sed -n '/^[	 ]*datarootdir[	 ]*:*=/p' \
+      "$ac_tmp/out"`; test -z "$ac_out"; } &&
   { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir'
-which seems to be undefined.  Please make sure it is defined." >&5
+which seems to be undefined.  Please make sure it is defined" >&5
 $as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir'
-which seems to be undefined.  Please make sure it is defined." >&2;}
+which seems to be undefined.  Please make sure it is defined" >&2;}
 
-  rm -f "$tmp/stdin"
+  rm -f "$ac_tmp/stdin"
   case $ac_file in
-  -) cat "$tmp/out" && rm -f "$tmp/out";;
-  *) rm -f "$ac_file" && mv "$tmp/out" "$ac_file";;
+  -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";;
+  *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";;
   esac \
-  || as_fn_error "could not create $ac_file" "$LINENO" 5
+  || as_fn_error $? "could not create $ac_file" "$LINENO" 5
  ;;
 
 
@@ -15821,7 +18673,7 @@ _ACEOF
 ac_clean_files=$ac_clean_files_save
 
 test $ac_write_fail = 0 ||
-  as_fn_error "write failure creating $CONFIG_STATUS" "$LINENO" 5
+  as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5
 
 
 # configure is writing to config.log, and then calls config.status.
@@ -15842,7 +18694,7 @@ if test "$no_create" != yes; then
   exec 5>>config.log
   # Use ||, not &&, to avoid exiting from the if with $? = 1, which
   # would make configure fail if this is the last instruction.
-  $ac_cs_success || as_fn_exit $?
+  $ac_cs_success || as_fn_exit 1
 fi
 if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then
   { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5
diff --git a/configure.ac b/configure.ac
index c3433336523c5..088e735c5dba5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,6 +1,6 @@
 #   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
 #   2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
-#   2014, 2015, 2016 Free Software Foundation, Inc.
+#   2014, 2015, 2016, 2019 Free Software Foundation, Inc.
 #
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
@@ -23,6 +23,7 @@ m4_include(config/acx.m4)
 m4_include(config/override.m4)
 m4_include(config/proginstall.m4)
 m4_include(config/elf.m4)
+m4_include(config/ax_cxx_compile_stdcxx.m4)
 m4_include([libtool.m4])
 m4_include([ltoptions.m4])
 m4_include([ltsugar.m4])
@@ -31,7 +32,6 @@ m4_include([lt~obsolete.m4])
 m4_include([config/isl.m4])
 
 AC_INIT(move-if-change)
-AC_PREREQ(2.64)
 AC_DISABLE_OPTION_CHECKING
 
 progname=$0
@@ -132,7 +132,7 @@ build_tools="build-texinfo build-flex build-bison build-m4 build-fixincludes"
 
 # these libraries are used by various programs built for the host environment
 #f
-host_libs="intl libiberty opcodes bfd readline tcl tk itcl libgui zlib libbacktrace libcpp libdecnumber gmp mpfr mpc isl libelf libiconv"
+host_libs="intl libiberty opcodes bfd readline tcl tk itcl libgui zlib libbacktrace libcpp libcody libdecnumber gmp mpfr mpc isl libelf libiconv libctf"
 
 # these tools are built for the host environment
 # Note, the powerpc-eabi build depends on sim occurring before gdb in order to
@@ -140,7 +140,7 @@ host_libs="intl libiberty opcodes bfd readline tcl tk itcl libgui zlib libbacktr
 # binutils, gas and ld appear in that order because it makes sense to run
 # "make check" in that particular order.
 # If --enable-gold is used, "gold" may replace "ld".
-host_tools="texinfo flex bison binutils gas ld fixincludes gcc cgen sid sim gdb gprof etc expect dejagnu m4 utils guile fastjar gnattools libcc1 gotools"
+host_tools="texinfo flex bison binutils gas ld fixincludes gcc cgen sid sim gdb gdbserver gprof etc expect dejagnu m4 utils guile fastjar gnattools libcc1 gotools c++tools"
 
 # these libraries are built for the target environment, and are built after
 # the host libraries and the host tools (which may be a cross compiler)
@@ -157,14 +157,15 @@ target_libraries="target-libgcc \
 		target-libstdc++-v3 \
 		target-libsanitizer \
 		target-libvtv \
-		target-libmpx \
 		target-libssp \
 		target-libquadmath \
 		target-libgfortran \
 		target-libffi \
 		target-libobjc \
 		target-libada \
-		target-libgo"
+		target-libgo \
+		target-libphobos \
+		target-zlib"
 
 # these tools are built using the target libraries, and are intended to
 # run only in the target environment
@@ -512,7 +513,7 @@ if test x$enable_libgomp = x ; then
 	;;
     *-*-darwin* | *-*-aix*)
 	;;
-    nvptx*-*-*)
+    nvptx*-*-* | amdgcn*-*-*)
 	;;
     *)
 	noconfigdirs="$noconfigdirs target-libgomp"
@@ -601,22 +602,6 @@ if test -d ${srcdir}/libvtv; then
 fi
 
 
-# Enable libmpx on supported systems by request.
-if test -d ${srcdir}/libmpx; then
-    if test x$enable_libmpx = x; then
-       AC_MSG_CHECKING([for libmpx support])
-       if (srcdir=${srcdir}/libmpx; \
-               . ${srcdir}/configure.tgt; \
-               test "$LIBMPX_SUPPORTED" != "yes")
-       then
-           AC_MSG_RESULT([no])
-           noconfigdirs="$noconfigdirs target-libmpx"
-       else
-           AC_MSG_RESULT([yes])
-       fi
-    fi
-fi
-
 # Disable libhsail-rt on unsupported systems.
 if test -d ${srcdir}/libhsail-rt; then
     if test x$enable_libhsail_rt = x; then
@@ -654,9 +639,16 @@ case "${target}" in
     # No hosted I/O support.
     noconfigdirs="$noconfigdirs target-libssp"
     ;;
+  bpf-*-*)
+    noconfigdirs="$noconfigdirs target-libssp"
+    ;;
   powerpc-*-aix* | rs6000-*-aix*)
     noconfigdirs="$noconfigdirs target-libssp"
     ;;
+  pru-*-*)
+    # No hosted I/O support.
+    noconfigdirs="$noconfigdirs target-libssp"
+    ;;
   rl78-*-*)
     # libssp uses a misaligned load to trigger a fault, but the RL78
     # doesn't fault for those - instead, it gives a build-time error
@@ -677,6 +669,10 @@ if test "${ENABLE_LIBSTDCXX}" = "default" ; then
       # VxWorks uses the Dinkumware C++ library.
       noconfigdirs="$noconfigdirs target-libstdc++-v3"
       ;;
+    amdgcn*-*-*)
+      # Not ported/fails to build when using newlib.
+      noconfigdirs="$noconfigdirs target-libstdc++-v3"
+      ;;
     arm*-wince-pe*)
       # the C++ libraries don't build on top of CE's C libraries
       noconfigdirs="$noconfigdirs target-libstdc++-v3"
@@ -684,18 +680,83 @@ if test "${ENABLE_LIBSTDCXX}" = "default" ; then
     avr-*-*)
       noconfigdirs="$noconfigdirs target-libstdc++-v3"
       ;;
+    bpf-*-*)
+      noconfigdirs="$noconfigdirs target-libstdc++-v3"
+      ;;
     ft32-*-*)
       noconfigdirs="$noconfigdirs target-libstdc++-v3"
       ;;
   esac
 fi
 
+# Disable C++ on systems where it is known to not work.
+# For testing, you can override this with --enable-languages=c++.
+case ,${enable_languages}, in
+  *,c++,*)
+    ;;
+  *)
+      case "${target}" in
+        bpf-*-*)
+          unsupported_languages="$unsupported_languages c++"
+          ;;
+      esac
+      ;;
+esac
+
+# Disable Objc on systems where it is known to not work.
+# For testing, you can override this with --enable-languages=objc.
+case ,${enable_languages}, in
+  *,objc,*)
+    ;;
+  *)
+      case "${target}" in
+        bpf-*-*)
+          unsupported_languages="$unsupported_languages objc"
+          ;;
+      esac
+      ;;
+esac
+
+# Disable D on systems where it is known to not work.
+# For testing, you can override this with --enable-languages=d.
+case ,${enable_languages}, in
+  *,d,*)
+    ;;
+  *)
+    case "${target}" in
+      bpf-*-*)
+	unsupported_languages="$unsupported_languages d"
+	;;
+    esac
+    ;;
+esac
+
+# Disable libphobos on unsupported systems.
+# For testing, you can override this with --enable-libphobos.
+if test -d ${srcdir}/libphobos; then
+    if test x$enable_libphobos = x; then
+	AC_MSG_CHECKING([for libphobos support])
+	if (srcdir=${srcdir}/libphobos; \
+		. ${srcdir}/configure.tgt; \
+		test "$LIBPHOBOS_SUPPORTED" != "yes")
+	then
+	    AC_MSG_RESULT([no])
+	    noconfigdirs="$noconfigdirs target-libphobos"
+	else
+	    AC_MSG_RESULT([yes])
+	fi
+    fi
+fi
+
 # Disable Fortran for some systems.
 case "${target}" in
   mmix-*-*)
     # See <http://gcc.gnu.org/ml/gcc-patches/2004-11/msg00572.html>.
     unsupported_languages="$unsupported_languages fortran"
     ;;
+  bpf-*-*)
+    unsupported_languages="$unsupported_languages fortran"
+    ;;
 esac
 
 # Disable libffi for some systems.
@@ -742,6 +803,9 @@ case "${target}" in
   arm*-*-symbianelf*)
     noconfigdirs="$noconfigdirs target-libffi"
     ;;
+  bpf-*-*)
+    noconfigdirs="$noconfigdirs target-libffi"
+    ;;
   cris-*-* | crisv32-*-*)
     case "${target}" in
       *-*-linux*)
@@ -788,11 +852,27 @@ esac
 # Disable the go frontend on systems where it is known to not work. Please keep
 # this in sync with contrib/config-list.mk.
 case "${target}" in
-*-*-darwin* | *-*-cygwin* | *-*-mingw*)
+*-*-darwin* | *-*-cygwin* | *-*-mingw* | bpf-* )
     unsupported_languages="$unsupported_languages go"
     ;;
 esac
 
+# Only allow gdbserver on some systems.
+if test -d ${srcdir}/gdbserver; then
+    if test x$enable_gdbserver = x; then
+	AC_MSG_CHECKING([for gdbserver support])
+	if (srcdir=${srcdir}/gdbserver; \
+		. ${srcdir}/configure.srv; \
+		test -n "$UNSUPPORTED")
+	then
+	    AC_MSG_RESULT([no])
+	    noconfigdirs="$noconfigdirs gdbserver"
+	else
+	    AC_MSG_RESULT([yes])
+	fi
+    fi
+fi
+
 # Disable libgo for some systems where it is known to not work.
 # For testing, you can easily override this with --enable-libgo.
 if test x$enable_libgo = x; then
@@ -804,6 +884,9 @@ if test x$enable_libgo = x; then
     *-*-cygwin* | *-*-mingw*)
 	noconfigdirs="$noconfigdirs target-libgo"
 	;;
+    bpf-*-*)
+        noconfigdirs="$noconfigdirs target-libgo"
+        ;;
     esac
 fi
 
@@ -841,6 +924,9 @@ case "${target}" in
   powerpc*-*-*)
     libgloss_dir=rs6000
     ;;
+  pru-*-*)
+    libgloss_dir=pru
+    ;;
   sparc*-*-*)
     libgloss_dir=sparc
     ;;
@@ -872,6 +958,9 @@ case "${target}" in
   sparc-*-sunos4*)
     noconfigdirs="$noconfigdirs target-newlib target-libgloss"
     ;;
+  bpf-*-*)
+    noconfigdirs="$noconfigdirs target-newlib target-libgloss"
+    ;;
   *-*-aix*)
     noconfigdirs="$noconfigdirs target-newlib target-libgloss"
     ;;
@@ -920,6 +1009,8 @@ case "${target}" in
     noconfigdirs="$noconfigdirs ld gas gdb gprof"
     noconfigdirs="$noconfigdirs sim target-rda"
     ;;
+  amdgcn*-*-*)
+    ;;
   arm-*-darwin*)
     noconfigdirs="$noconfigdirs ld gas gdb gprof"
     noconfigdirs="$noconfigdirs sim target-rda"
@@ -944,6 +1035,7 @@ case "${target}" in
     ;;
   *-*-freebsd*)
     if test "x$with_gmp" = x && test "x$with_gmp_dir" = x \
+	&& ! test -d ${srcdir}/gmp \
 	&& test -f /usr/local/include/gmp.h; then
       with_gmp=/usr/local
     fi
@@ -983,6 +1075,9 @@ case "${target}" in
     # newlib is not 64 bit ready
     noconfigdirs="$noconfigdirs target-newlib target-libgloss"
     ;;
+  bpf-*-*)
+    noconfigdirs="$noconfigdirs target-libobjc target-libbacktrace"
+    ;;
   sh*-*-pe|mips*-*-pe|*arm-wince-pe)
     noconfigdirs="$noconfigdirs tcl tk itcl libgui sim"
     ;;
@@ -1108,6 +1203,13 @@ case "${target}" in
   mt-*-*)
     noconfigdirs="$noconfigdirs sim"
     ;;
+  nfp-*-*)
+    noconfigdirs="$noconfigdirs ld gas gdb gprof sim"
+    noconfigdirs="$noconfigdirs $target_libraries"
+    ;;
+  pdp11-*-*)
+    noconfigdirs="$noconfigdirs gdb gprof"
+    ;;
   powerpc-*-aix*)
     # copied from rs6000-*-* entry
     noconfigdirs="$noconfigdirs gprof"
@@ -1248,6 +1350,7 @@ if test "${build}" != "${host}" ; then
   CXX_FOR_BUILD=${CXX_FOR_BUILD-g++}
   GFORTRAN_FOR_BUILD=${GFORTRAN_FOR_BUILD-gfortran}
   GOC_FOR_BUILD=${GOC_FOR_BUILD-gccgo}
+  GDC_FOR_BUILD=${GDC_FOR_BUILD-gdc}
   DLLTOOL_FOR_BUILD=${DLLTOOL_FOR_BUILD-dlltool}
   LD_FOR_BUILD=${LD_FOR_BUILD-ld}
   NM_FOR_BUILD=${NM_FOR_BUILD-nm}
@@ -1261,6 +1364,7 @@ else
   CXX_FOR_BUILD="\$(CXX)"
   GFORTRAN_FOR_BUILD="\$(GFORTRAN)"
   GOC_FOR_BUILD="\$(GOC)"
+  GDC_FOR_BUILD="\$(GDC)"
   DLLTOOL_FOR_BUILD="\$(DLLTOOL)"
   LD_FOR_BUILD="\$(LD)"
   NM_FOR_BUILD="\$(NM)"
@@ -1299,11 +1403,11 @@ if test "$GCC" = yes; then
   LDFLAGS="$LDFLAGS -static-libstdc++ -static-libgcc"
   AC_MSG_CHECKING([whether g++ accepts -static-libstdc++ -static-libgcc])
   AC_LANG_PUSH(C++)
-  AC_LINK_IFELSE([
+  AC_LINK_IFELSE([AC_LANG_SOURCE([
 #if (__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 5)
 #error -static-libstdc++ not implemented
 #endif
-int main() {}],
+int main() {}])],
     [AC_MSG_RESULT([yes]); have_static_libs=yes],
     [AC_MSG_RESULT([no])])
   AC_LANG_POP(C++)
@@ -1357,10 +1461,17 @@ case "$have_compiler:$host:$target:$enable_bootstrap" in
     ;;
 esac
 
-# When bootstrapping with GCC, build stage 1 in C++98 mode to ensure that a
-# C++98 compiler can still start the bootstrap.
+# When bootstrapping with GCC, build stage 1 in C++11 mode to ensure that a
+# C++11 compiler can still start the bootstrap.  Otherwise, if building GCC,
+# require C++11 (or higher).
 if test "$enable_bootstrap:$GXX" = "yes:yes"; then
-  CXX="$CXX -std=gnu++98"
+  CXX="$CXX -std=c++11"
+elif test "$have_compiler" = yes; then
+  AX_CXX_COMPILE_STDCXX(11)
+
+  if test "${build}" != "${host}"; then
+    AX_CXX_COMPILE_STDCXX(11, [], [], [_FOR_BUILD])
+  fi
 fi
 
 # Used for setting $lt_cv_objdir
@@ -1519,12 +1630,12 @@ if test -d ${srcdir}/gcc && test "x$have_gmp" = xno; then
     AC_MSG_CHECKING([for the correct version of mpfr.h])
     AC_TRY_COMPILE([#include <gmp.h>
     #include <mpfr.h>],[
-    #if MPFR_VERSION < MPFR_VERSION_NUM(2,4,0)
+    #if MPFR_VERSION < MPFR_VERSION_NUM(3,1,0)
     choke me
     #endif
     ], [AC_TRY_COMPILE([#include <gmp.h>
     #include <mpfr.h>],[
-    #if MPFR_VERSION < MPFR_VERSION_NUM(2,4,2)
+    #if MPFR_VERSION < MPFR_VERSION_NUM(3,1,6)
     choke me
     #endif
     ], [AC_MSG_RESULT([yes])], [AC_MSG_RESULT([buggy but acceptable])])],
@@ -1559,9 +1670,9 @@ if test -d ${srcdir}/gcc && test "x$have_gmp" = xno; then
     int t;
     mpfr_init (n);
     mpfr_init (x);
-    mpfr_atan2 (n, n, x, GMP_RNDN);
-    mpfr_erfc (n, x, GMP_RNDN);
-    mpfr_subnormalize (x, t, GMP_RNDN);
+    mpfr_atan2 (n, n, x, MPFR_RNDN);
+    mpfr_erfc (n, x, MPFR_RNDN);
+    mpfr_subnormalize (x, t, MPFR_RNDN);
     mpfr_clear(n);
     mpfr_clear(x);
     mpc_init2 (c, 53);
@@ -1579,11 +1690,11 @@ if test -d ${srcdir}/gcc && test "x$have_gmp" = xno; then
 # The library versions listed in the error message below should match
 # the HARD-minimums enforced above.
   if test x$have_gmp != xyes; then
-    AC_MSG_ERROR([Building GCC requires GMP 4.2+, MPFR 2.4.0+ and MPC 0.8.0+.
+    AC_MSG_ERROR([Building GCC requires GMP 4.2+, MPFR 3.1.0+ and MPC 0.8.0+.
 Try the --with-gmp, --with-mpfr and/or --with-mpc options to specify
 their locations.  Source code for these libraries can be found at
 their respective hosting sites as well as at
-ftp://gcc.gnu.org/pub/gcc/infrastructure/.  See also
+https://gcc.gnu.org/pub/gcc/infrastructure/.  See also
 http://gcc.gnu.org/install/prerequisites.html for additional info.  If
 you obtained GMP, MPFR and/or MPC from a vendor distribution package,
 make sure that you have installed both the libraries and the header
@@ -1610,6 +1721,19 @@ AC_ARG_WITH(stage1-libs,
 [stage1_libs=])
 AC_SUBST(stage1_libs)
 
+# Whether or not to use -static-libstdc++ and -static-libgcc.  The
+# default is yes if gcc is being built; no otherwise.  The reason for
+# this default is that gdb is sometimes linked against GNU Source
+# Highlight, which is a shared library that uses C++ exceptions.  In
+# this case, -static-libstdc++ will cause crashes.
+AC_ARG_WITH(static-standard-libraries,
+[AS_HELP_STRING([--with-static-standard-libraries],
+                [use -static-libstdc++ and -static-libgcc (default=auto)])],
+[], [with_static_standard_libraries=auto])
+if test "$with_static_standard_libraries" = auto; then
+  with_static_standard_libraries=$have_compiler
+fi
+
 # Linker flags to use for stage1 or when not bootstrapping.
 AC_ARG_WITH(stage1-ldflags,
 [AS_HELP_STRING([--with-stage1-ldflags=FLAGS], [linker flags for stage1])],
@@ -1622,7 +1746,8 @@ AC_ARG_WITH(stage1-ldflags,
  # In stage 1, default to linking libstdc++ and libgcc statically with GCC
  # if supported.  But if the user explicitly specified the libraries to use,
  # trust that they are doing what they want.
- if test "$stage1_libs" = "" -a "$have_static_libs" = yes; then
+ if test "$with_static_standard_libraries" = yes -a "$stage1_libs" = "" \
+     -a "$have_static_libs" = yes; then
    stage1_ldflags="-static-libstdc++ -static-libgcc"
  fi])
 AC_SUBST(stage1_ldflags)
@@ -1960,9 +2085,14 @@ if test -d ${srcdir}/gcc; then
         esac
 
         # Disable jit if -enable-host-shared not specified
-        case ${add_this_lang}:${language}:${host_shared} in
-          yes:jit:no)
-	    # PR jit/64780: explicitly specify --enable-host-shared
+        # but not if building for Mingw. All code in Windows
+        # is position independent code (PIC).
+        case $target in
+          *mingw*) ;;
+          *)
+          case ${add_this_lang}:${language}:${host_shared} in
+            yes:jit:no)
+	           # PR jit/64780: explicitly specify --enable-host-shared
 	    AC_MSG_ERROR([
 Enabling language "jit" requires --enable-host-shared.
 
@@ -1973,16 +2103,18 @@ If you want to build both the jit and the regular compiler, it is often
 best to do this via two separate configure/builds, in separate
 directories, to avoid imposing the performance cost of
 --enable-host-shared on the regular compiler.])
-	    ;;
-          all:jit:no)
-	    AC_MSG_WARN([--enable-host-shared required to build $language])
-            add_this_lang=unsupported
-            ;;
-          *:jit:no)
-            # Silently disable.
-            add_this_lang=unsupported
-            ;;
-	esac
+	            ;;
+            all:jit:no)
+	      AC_MSG_WARN([--enable-host-shared required to build $language])
+              add_this_lang=unsupported
+              ;;
+            *:jit:no)
+              # Silently disable.
+              add_this_lang=unsupported
+              ;;
+	        esac
+          ;;
+        esac
 
         # Disable a language that is unsupported by the target.
 	case "${add_this_lang}: $unsupported_languages " in
@@ -2679,7 +2811,7 @@ fi
 # or bootstrap-ubsan, bootstrap it.
 if echo " ${target_configdirs} " | grep " libsanitizer " > /dev/null 2>&1; then
   case "$BUILD_CONFIG" in
-    *bootstrap-asan* | *bootstrap-ubsan* )
+    *bootstrap-hwasan* | *bootstrap-asan* | *bootstrap-ubsan* )
       bootstrap_target_libs=${bootstrap_target_libs}target-libsanitizer,
       bootstrap_fixincludes=yes
       ;;
@@ -2692,16 +2824,6 @@ if echo " ${target_configdirs} " | grep " libvtv " > /dev/null 2>&1 &&
   bootstrap_target_libs=${bootstrap_target_libs}target-libvtv,
 fi
 
-# If we are building libmpx and $BUILD_CONFIG contains bootstrap-mpx,
-# bootstrap it.
-if echo " ${target_configdirs} " | grep " libmpx " > /dev/null 2>&1; then
-  case "$BUILD_CONFIG" in
-    *bootstrap-mpx* )
-      bootstrap_target_libs=${bootstrap_target_libs}target-libmpx,
-      ;;
-  esac
-fi
-
 # Determine whether gdb needs tk/tcl or not.
 # Use 'maybe' since enable_gdbtk might be true even if tk isn't available
 # and in that case we want gdb to be built without tk.  Ugh!
@@ -2730,6 +2852,18 @@ esac
 CONFIGURE_GDB_TK=`echo ${GDB_TK} | sed s/-all-/-configure-/g`
 INSTALL_GDB_TK=`echo ${GDB_TK} | sed s/-all-/-install-/g`
 
+# gdb and gdbserver depend on gnulib and gdbsupport, but as nothing
+# else does, only include them if one of these is built.  The Makefile
+# provides the ordering, so it's enough here to add to the list.
+case " ${configdirs} " in
+  *\ gdb\ *)
+    configdirs="${configdirs} gnulib gdbsupport"
+    ;;
+  *\ gdbserver\ *)
+    configdirs="${configdirs} gnulib gdbsupport"
+    ;;
+esac
+
 # Strip out unwanted targets.
 
 # While at that, we remove Makefiles if we were started for recursive
@@ -2742,7 +2876,9 @@ INSTALL_GDB_TK=`echo ${GDB_TK} | sed s/-all-/-install-/g`
 # extrasub-{build,host,target} not because there is any reason to split
 # the substitutions up that way, but only to remain below the limit of
 # 99 commands in a script, for HP-UX sed.
-# Do not nest @if/@endif pairs, because configure will not warn you at all.
+
+# Do not nest @if/@endif or @unless/@endunless pairs, because
+# configure will not warn you at all.
 
 case "$enable_bootstrap:$ENABLE_GOLD: $configdirs :,$stage1_languages," in
   yes:yes:*\ gold\ *:*,c++,*) ;;
@@ -2761,8 +2897,10 @@ for module in ${build_configdirs} ; do
   extrasub_build="$extrasub_build
 /^@if build-$module\$/d
 /^@endif build-$module\$/d
+/^@unless build-$module\$/,/^@endunless build-$module\$/d
 /^@if build-$module-$bootstrap_suffix\$/d
-/^@endif build-$module-$bootstrap_suffix\$/d"
+/^@endif build-$module-$bootstrap_suffix\$/d
+/^@unless build-$module-$bootstrap_suffix\$/,/^@endunless build-$module-$bootstrap_suffix\$/d"
 done
 extrasub_host=
 for module in ${configdirs} ; do
@@ -2781,8 +2919,10 @@ for module in ${configdirs} ; do
   extrasub_host="$extrasub_host
 /^@if $module\$/d
 /^@endif $module\$/d
+/^@unless $module\$/,/^@endunless $module\$/d
 /^@if $module-$host_bootstrap_suffix\$/d
-/^@endif $module-$host_bootstrap_suffix\$/d"
+/^@endif $module-$host_bootstrap_suffix\$/d
+/^@unless $module-$host_bootstrap_suffix\$/,/^@endunless $module-$host_bootstrap_suffix\$/d"
 done
 extrasub_target=
 for module in ${target_configdirs} ; do
@@ -2801,13 +2941,17 @@ for module in ${target_configdirs} ; do
   extrasub_target="$extrasub_target
 /^@if target-$module\$/d
 /^@endif target-$module\$/d
+/^@unless target-$module\$/,/^@endunless target-$module\$/d
 /^@if target-$module-$target_bootstrap_suffix\$/d
-/^@endif target-$module-$target_bootstrap_suffix\$/d"
+/^@endif target-$module-$target_bootstrap_suffix\$/d
+/^@unless target-$module-$target_bootstrap_suffix\$/,/^@endunless target-$module-$target_bootstrap_suffix\$/d"
 done
 
 # Do the final fixup along with target modules.
 extrasub_target="$extrasub_target
-/^@if /,/^@endif /d"
+/^@if /,/^@endif /d
+/^@unless /d
+/^@endunless /d"
 
 # Create the serialization dependencies.  This uses a temporary file.
 
@@ -3248,6 +3392,7 @@ AC_SUBST(CXX_FOR_BUILD)
 AC_SUBST(DLLTOOL_FOR_BUILD)
 AC_SUBST(GFORTRAN_FOR_BUILD)
 AC_SUBST(GOC_FOR_BUILD)
+AC_SUBST(GDC_FOR_BUILD)
 AC_SUBST(LDFLAGS_FOR_BUILD)
 AC_SUBST(LD_FOR_BUILD)
 AC_SUBST(NM_FOR_BUILD)
@@ -3333,6 +3478,7 @@ NCN_STRICT_CHECK_TOOLS(WINDRES, windres)
 NCN_STRICT_CHECK_TOOLS(WINDMC, windmc)
 NCN_STRICT_CHECK_TOOLS(OBJCOPY, objcopy)
 NCN_STRICT_CHECK_TOOLS(OBJDUMP, objdump)
+NCN_STRICT_CHECK_TOOLS(OTOOL, otool)
 NCN_STRICT_CHECK_TOOLS(READELF, readelf)
 AC_SUBST(CC)
 AC_SUBST(CXX)
@@ -3357,6 +3503,7 @@ NCN_STRICT_CHECK_TARGET_TOOLS(CXX_FOR_TARGET, c++ g++ cxx gxx)
 NCN_STRICT_CHECK_TARGET_TOOLS(GCC_FOR_TARGET, gcc, ${CC_FOR_TARGET})
 NCN_STRICT_CHECK_TARGET_TOOLS(GFORTRAN_FOR_TARGET, gfortran)
 NCN_STRICT_CHECK_TARGET_TOOLS(GOC_FOR_TARGET, gccgo)
+NCN_STRICT_CHECK_TARGET_TOOLS(GDC_FOR_TARGET, gdc)
 
 ACX_CHECK_INSTALLED_TARGET_TOOL(AR_FOR_TARGET, ar)
 ACX_CHECK_INSTALLED_TARGET_TOOL(AS_FOR_TARGET, as)
@@ -3366,6 +3513,7 @@ ACX_CHECK_INSTALLED_TARGET_TOOL(LIPO_FOR_TARGET, lipo)
 ACX_CHECK_INSTALLED_TARGET_TOOL(NM_FOR_TARGET, nm)
 ACX_CHECK_INSTALLED_TARGET_TOOL(OBJCOPY_FOR_TARGET, objcopy)
 ACX_CHECK_INSTALLED_TARGET_TOOL(OBJDUMP_FOR_TARGET, objdump)
+ACX_CHECK_INSTALLED_TARGET_TOOL(OTOOL_FOR_TARGET, otool)
 ACX_CHECK_INSTALLED_TARGET_TOOL(RANLIB_FOR_TARGET, ranlib)
 ACX_CHECK_INSTALLED_TARGET_TOOL(READELF_FOR_TARGET, readelf)
 ACX_CHECK_INSTALLED_TARGET_TOOL(STRIP_FOR_TARGET, strip)
@@ -3390,11 +3538,14 @@ GCC_TARGET_TOOL(gfortran, GFORTRAN_FOR_TARGET, GFORTRAN,
 		[gcc/gfortran -B$$r/$(HOST_SUBDIR)/gcc/], fortran)
 GCC_TARGET_TOOL(gccgo, GOC_FOR_TARGET, GOC,
 		[gcc/gccgo -B$$r/$(HOST_SUBDIR)/gcc/], go)
+GCC_TARGET_TOOL(gdc, GDC_FOR_TARGET, GDC,
+		[gcc/gdc -B$$r/$(HOST_SUBDIR)/gcc/], d)
 GCC_TARGET_TOOL(ld, LD_FOR_TARGET, LD, [ld/ld-new])
 GCC_TARGET_TOOL(lipo, LIPO_FOR_TARGET, LIPO)
 GCC_TARGET_TOOL(nm, NM_FOR_TARGET, NM, [binutils/nm-new])
 GCC_TARGET_TOOL(objcopy, OBJCOPY_FOR_TARGET, OBJCOPY, [binutils/objcopy])
 GCC_TARGET_TOOL(objdump, OBJDUMP_FOR_TARGET, OBJDUMP, [binutils/objdump])
+GCC_TARGET_TOOL(otool, OTOOL_FOR_TARGET, OTOOL)
 GCC_TARGET_TOOL(ranlib, RANLIB_FOR_TARGET, RANLIB, [binutils/ranlib])
 GCC_TARGET_TOOL(readelf, READELF_FOR_TARGET, READELF, [binutils/readelf])
 GCC_TARGET_TOOL(strip, STRIP_FOR_TARGET, STRIP, [binutils/strip-new])
@@ -3517,8 +3668,8 @@ AC_SUBST(stage2_werror_flag)
 compare_exclusions="gcc/cc*-checksum\$(objext) | gcc/ada/*tools/*"
 case "$target" in
   hppa*64*-*-hpux*) ;;
-  hppa*-*-hpux*) compare_exclusions="gcc/cc*-checksum\$(objext) | */libgcc/lib2funcs* | gcc/ada/*tools/* | gcc/function-tests.o" ;;
-  powerpc*-ibm-aix*) compare_exclusions="gcc/cc*-checksum\$(objext) | gcc/ada/*tools/* | *libgomp*\$(objext)" ;;
+  hppa*-*-hpux*) compare_exclusions="$compare_exclusions | */libgcc/lib2funcs* | gcc/function-tests.o" ;;
+  powerpc*-ibm-aix*) compare_exclusions="$compare_exclusions | *libgomp*\$(objext)" ;;
 esac
 AC_SUBST(compare_exclusions)
 
diff --git a/gcc/config.gcc b/gcc/config.gcc
index a8f9a63fcc779..617d2fe053405 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -1,5 +1,5 @@
 # GCC target-specific configuration file.
-# Copyright (C) 1997-2018 Free Software Foundation, Inc.
+# Copyright (C) 1997-2021 Free Software Foundation, Inc.
 
 #This file is part of GCC.
 
@@ -86,6 +86,9 @@
 #  tm_p_file		Location of file with declarations for functions
 #			in $out_file.
 #
+#  tm_d_file		A list of headers with definitions of target hook
+#			macros for the D compiler.
+#
 #  out_file		The name of the machine description C support
 #			file, if different from "$cpu_type/$cpu_type.c".
 #
@@ -139,6 +142,9 @@
 #  cxx_target_objs	List of extra target-dependent objects that be
 #			linked into the C++ compiler only.
 #
+#  d_target_objs	List of extra target-dependent objects that be
+#			linked into the D compiler only.
+#
 #  fortran_target_objs	List of extra target-dependent objects that be
 #			linked into the fortran compiler only.
 #
@@ -191,6 +197,9 @@
 #
 #  target_has_targetm_common	Set to yes or no depending on whether the
 #			target has its own definition of targetm_common.
+#
+#  target_has_targetdm	Set to yes or no depending on whether the target
+#			has its own definition of targetdm.
 
 out_file=
 common_out_file=
@@ -206,9 +215,11 @@ extra_gcc_objs=
 extra_options=
 c_target_objs=
 cxx_target_objs=
+d_target_objs=
 fortran_target_objs=
 target_has_targetcm=no
 target_has_targetm_common=yes
+target_has_targetdm=no
 tm_defines=
 xm_defines=
 # Set this to force installation and use of collect2.
@@ -236,8 +247,7 @@ md_file=
 
 # Obsolete configurations.
 case ${target} in
-  powerpc*-*-*spe*			\
-  | tile*-*-*				\
+  tile*-*-*				\
  )
     if test "x$enable_obsolete" != xyes; then
       echo "*** Configuration ${target} is obsolete." >&2
@@ -261,14 +271,19 @@ case ${target} in
  | arm*-*-elf				\
  | arm*-*-linux*			\
  | arm*-*-uclinux*			\
+ | cris-*-linux*			\
+ | crisv32-*-*				\
  | i[34567]86-go32-*			\
  | i[34567]86-*-go32*			\
  | m68k-*-uclinuxoldabi*		\
  | mips64orion*-*-rtems*		\
  | pdp11-*-bsd				\
+ | powerpc*-*-linux*paired*		\
+ | powerpc*-*-*spe*			\
  | sparc-hal-solaris2*			\
+ | spu*-*-*				\
  | thumb-*-*				\
- | *-*-freebsd[12] | *-*-freebsd[12].*	\
+ | *-*-freebsd[12] | *-*-freebsd[1234].* \
  | *-*-freebsd*aout*			\
  | *-*-linux*aout*			\
  | *-*-linux*coff*			\
@@ -279,6 +294,7 @@ case ${target} in
  | *-*-solaris2				\
  | *-*-solaris2.[0-9]			\
  | *-*-solaris2.[0-9].*			\
+ | *-*-solaris2.10*			\
  | *-*-sysv*				\
  | vax-*-vms*				\
  )
@@ -301,17 +317,22 @@ m32c*-*-*)
         ;;
 aarch64*-*-*)
 	cpu_type=aarch64
-	extra_headers="arm_fp16.h arm_neon.h arm_acle.h"
+	extra_headers="arm_fp16.h arm_neon.h arm_bf16.h arm_acle.h arm_sve.h"
 	c_target_objs="aarch64-c.o"
 	cxx_target_objs="aarch64-c.o"
-	extra_objs="aarch64-builtins.o aarch-common.o cortex-a57-fma-steering.o"
-	target_gtfiles="\$(srcdir)/config/aarch64/aarch64-builtins.c"
+	d_target_objs="aarch64-d.o"
+	extra_objs="aarch64-builtins.o aarch-common.o aarch64-sve-builtins.o aarch64-sve-builtins-shapes.o aarch64-sve-builtins-base.o aarch64-sve-builtins-sve2.o cortex-a57-fma-steering.o aarch64-speculation.o falkor-tag-collision-avoidance.o aarch64-bti-insert.o aarch64-cc-fusion.o"
+	target_gtfiles="\$(srcdir)/config/aarch64/aarch64-builtins.c \$(srcdir)/config/aarch64/aarch64-sve-builtins.h \$(srcdir)/config/aarch64/aarch64-sve-builtins.cc"
 	target_has_targetm_common=yes
 	;;
 alpha*-*-*)
 	cpu_type=alpha
 	extra_options="${extra_options} g.opt"
 	;;
+amdgcn*)
+	cpu_type=gcn
+	use_gcc_stdint=wrap
+	;;
 am33_2.0-*-linux*)
 	cpu_type=mn10300
 	;;
@@ -325,10 +346,11 @@ arc*-*-*)
 arm*-*-*)
 	cpu_type=arm
 	extra_objs="arm-builtins.o aarch-common.o"
-	extra_headers="mmintrin.h arm_neon.h arm_acle.h arm_fp16.h arm_cmse.h"
+	extra_headers="mmintrin.h arm_neon.h arm_acle.h arm_fp16.h arm_cmse.h arm_bf16.h arm_mve_types.h arm_mve.h arm_cde.h"
 	target_type_format_char='%'
 	c_target_objs="arm-c.o"
 	cxx_target_objs="arm-c.o"
+	d_target_objs="arm-d.o"
 	extra_options="${extra_options} arm/arm-tables.opt"
 	target_gtfiles="\$(srcdir)/config/arm/arm-builtins.c"
 	;;
@@ -340,8 +362,8 @@ avr-*-*)
 bfin*-*)
 	cpu_type=bfin
 	;;
-crisv32-*)
-	cpu_type=cris
+bpf-*-*)
+	cpu_type=bpf
 	;;
 frv*)	cpu_type=frv
 	extra_options="${extra_options} g.opt"
@@ -361,7 +383,9 @@ i[34567]86-*-*)
 	cpu_type=i386
 	c_target_objs="i386-c.o"
 	cxx_target_objs="i386-c.o"
-	extra_objs="x86-tune-sched.o x86-tune-sched-bd.o x86-tune-sched-atom.o x86-tune-sched-core.o"
+	d_target_objs="i386-d.o"
+	extra_objs="x86-tune-sched.o x86-tune-sched-bd.o x86-tune-sched-atom.o x86-tune-sched-core.o i386-options.o i386-builtins.o i386-expand.o i386-features.o"
+	target_gtfiles="\$(srcdir)/config/i386/i386-builtins.c \$(srcdir)/config/i386/i386-expand.c \$(srcdir)/config/i386/i386-options.c"
 	extra_options="${extra_options} fused-madd.opt"
 	extra_headers="cpuid.h mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
 		       pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h
@@ -384,14 +408,22 @@ i[34567]86-*-*)
 		       avx512vbmi2vlintrin.h avx512vnniintrin.h
 		       avx512vnnivlintrin.h vaesintrin.h vpclmulqdqintrin.h
 		       avx512vpopcntdqvlintrin.h avx512bitalgintrin.h
-		       pconfigintrin.h wbnoinvdintrin.h movdirintrin.h"
+		       pconfigintrin.h wbnoinvdintrin.h movdirintrin.h
+		       waitpkgintrin.h cldemoteintrin.h avx512bf16vlintrin.h
+		       avx512bf16intrin.h enqcmdintrin.h serializeintrin.h
+		       avx512vp2intersectintrin.h avx512vp2intersectvlintrin.h
+		       tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h
+		       amxbf16intrin.h x86gprintrin.h uintrintrin.h
+		       hresetintrin.h keylockerintrin.h avxvnniintrin.h"
 	;;
 x86_64-*-*)
 	cpu_type=i386
 	c_target_objs="i386-c.o"
 	cxx_target_objs="i386-c.o"
+	d_target_objs="i386-d.o"
 	extra_options="${extra_options} fused-madd.opt"
-	extra_objs="x86-tune-sched.o x86-tune-sched-bd.o x86-tune-sched-atom.o x86-tune-sched-core.o"
+	extra_objs="x86-tune-sched.o x86-tune-sched-bd.o x86-tune-sched-atom.o x86-tune-sched-core.o i386-options.o i386-builtins.o i386-expand.o i386-features.o"
+	target_gtfiles="\$(srcdir)/config/i386/i386-builtins.c \$(srcdir)/config/i386/i386-expand.c \$(srcdir)/config/i386/i386-options.c"
 	extra_headers="cpuid.h mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
 		       pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h
 		       nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
@@ -413,7 +445,13 @@ x86_64-*-*)
 		       avx512vbmi2vlintrin.h avx512vnniintrin.h
 		       avx512vnnivlintrin.h vaesintrin.h vpclmulqdqintrin.h
 		       avx512vpopcntdqvlintrin.h avx512bitalgintrin.h
-		       pconfigintrin.h wbnoinvdintrin.h movdirintrin.h"
+		       pconfigintrin.h wbnoinvdintrin.h movdirintrin.h
+		       waitpkgintrin.h cldemoteintrin.h avx512bf16vlintrin.h
+		       avx512bf16intrin.h enqcmdintrin.h serializeintrin.h
+		       avx512vp2intersectintrin.h avx512vp2intersectvlintrin.h
+		       tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h
+		       amxbf16intrin.h x86gprintrin.h uintrintrin.h
+		       hresetintrin.h keylockerintrin.h avxvnniintrin.h"
 	;;
 ia64-*-*)
 	extra_headers=ia64intrin.h
@@ -439,13 +477,24 @@ microblaze*-*-*)
         ;;
 mips*-*-*)
 	cpu_type=mips
-	extra_headers="loongson.h msa.h"
+	d_target_objs="mips-d.o"
+	extra_headers="loongson.h loongson-mmiintrin.h msa.h"
 	extra_objs="frame-header-opt.o"
 	extra_options="${extra_options} g.opt fused-madd.opt mips/mips-tables.opt"
 	;;
 nds32*)
 	cpu_type=nds32
-	extra_headers="nds32_intrinsic.h"
+	extra_headers="nds32_intrinsic.h nds32_isr.h nds32_init.inc"
+	case ${target} in
+	  nds32*-*-linux*)
+	    extra_options="${extra_options} nds32/nds32-linux.opt"
+	    ;;
+	  nds32*-*-elf*)
+	    extra_options="${extra_options} nds32/nds32-elf.opt"
+	    ;;
+	  *)
+	    ;;
+	esac
 	extra_objs="nds32-cost.o nds32-intrinsic.o nds32-isr.o nds32-md-auxiliary.o nds32-pipelines-auxiliary.o nds32-predicates.o nds32-memory-manipulation.o nds32-fp-as-gp.o nds32-relax-opt.o nds32-utils.o"
 	;;
 nios2-*-*)
@@ -455,52 +504,54 @@ nios2-*-*)
 nvptx-*-*)
 	cpu_type=nvptx
 	;;
-powerpc*-*-*spe*)
-	cpu_type=powerpcspe
-	extra_headers="ppc-asm.h altivec.h spe.h ppu_intrinsics.h paired.h spu2vmx.h vec_types.h si2vmx.h htmintrin.h htmxlintrin.h"
-	case x$with_cpu in
-	    xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[3456789]|xpower6x|xrs64a|xcell|xa2|xe500mc64|xe5500|xe6500)
-		cpu_is_64bit=yes
-		;;
-	esac
-	extra_options="${extra_options} g.opt fused-madd.opt powerpcspe/powerpcspe-tables.opt"
+or1k*-*-*)
+	cpu_type=or1k
 	;;
 powerpc*-*-*)
 	cpu_type=rs6000
-	extra_objs="rs6000-string.o rs6000-p8swap.o"
+	extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
+	extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
 	extra_headers="ppc-asm.h altivec.h htmintrin.h htmxlintrin.h"
 	extra_headers="${extra_headers} bmi2intrin.h bmiintrin.h"
 	extra_headers="${extra_headers} xmmintrin.h mm_malloc.h emmintrin.h"
 	extra_headers="${extra_headers} mmintrin.h x86intrin.h"
+	extra_headers="${extra_headers} pmmintrin.h tmmintrin.h smmintrin.h"
 	extra_headers="${extra_headers} ppu_intrinsics.h spu2vmx.h vec_types.h si2vmx.h"
-	extra_headers="${extra_headers} paired.h"
 	extra_headers="${extra_headers} amo.h"
 	case x$with_cpu in
-	    xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[3456789]|xpower6x|xrs64a|xcell|xa2|xe500mc64|xe5500|xe6500)
+	    xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[3456789]|xpower10|xpower6x|xrs64a|xcell|xa2|xe500mc64|xe5500|xe6500)
 		cpu_is_64bit=yes
 		;;
 	esac
 	extra_options="${extra_options} g.opt fused-madd.opt rs6000/rs6000-tables.opt"
+	target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-logue.c \$(srcdir)/config/rs6000/rs6000-call.c"
+	target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-pcrel-opt.c"
+	;;
+pru-*-*)
+	cpu_type=pru
 	;;
 riscv*)
 	cpu_type=riscv
 	extra_objs="riscv-builtins.o riscv-c.o"
+	d_target_objs="riscv-d.o"
 	;;
 rs6000*-*-*)
 	extra_options="${extra_options} g.opt fused-madd.opt rs6000/rs6000-tables.opt"
-	extra_objs="rs6000-string.o rs6000-p8swap.o"
+	extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
+	extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
+	target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-logue.c \$(srcdir)/config/rs6000/rs6000-call.c"
+	target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-pcrel-opt.c"
 	;;
 sparc*-*-*)
 	cpu_type=sparc
 	c_target_objs="sparc-c.o"
 	cxx_target_objs="sparc-c.o"
+	d_target_objs="sparc-d.o"
 	extra_headers="visintrin.h"
 	;;
-spu*-*-*)
-	cpu_type=spu
-	;;
 s390*-*-*)
 	cpu_type=s390
+	d_target_objs="s390-d.o"
 	extra_options="${extra_options} fused-madd.opt"
 	extra_headers="s390intrin.h htmintrin.h htmxlintrin.h vecintrin.h"
 	;;
@@ -530,10 +581,13 @@ tilepro*-*-*)
 esac
 
 tm_file=${cpu_type}/${cpu_type}.h
+tm_d_file=${cpu_type}/${cpu_type}.h
 if test -f ${srcdir}/config/${cpu_type}/${cpu_type}-protos.h
 then
 	tm_p_file=${cpu_type}/${cpu_type}-protos.h
+	tm_d_file="${tm_d_file} ${cpu_type}/${cpu_type}-protos.h"
 fi
+
 extra_modes=
 if test -f ${srcdir}/config/${cpu_type}/${cpu_type}-modes.def
 then
@@ -571,13 +625,6 @@ i[34567]86-*-*)
 		echo "This target does not support --with-abi."
 		exit 1
 	fi
-	if test "x$enable_cld" = xyes; then
-		tm_defines="${tm_defines} USE_IX86_CLD=1"
-	fi
-	if test "x$enable_frame_pointer" = xyes; then
-		tm_defines="${tm_defines} USE_IX86_FRAME_POINTER=1"
-	fi
-	tm_file="vxworks-dummy.h ${tm_file}"
 	;;
 x86_64-*-*)
 	case ${with_abi} in
@@ -598,20 +645,9 @@ x86_64-*-*)
 		echo "Unknown ABI used in --with-abi=$with_abi"
 		exit 1
 	esac
-	if test "x$enable_cld" = xyes; then
-		tm_defines="${tm_defines} USE_IX86_CLD=1"
-	fi
-	if test "x$enable_frame_pointer" = xyes; then
-		tm_defines="${tm_defines} USE_IX86_FRAME_POINTER=1"
-	fi
-	tm_file="vxworks-dummy.h ${tm_file}"
 	;;
 arm*-*-*)
 	tm_p_file="arm/arm-flags.h ${tm_p_file} arm/aarch-common-protos.h"
-	tm_file="vxworks-dummy.h ${tm_file}"
-	;;
-mips*-*-* | sh*-*-* | sparc*-*-*)
-	tm_file="vxworks-dummy.h ${tm_file}"
 	;;
 esac
 
@@ -630,15 +666,19 @@ tm_defines="$tm_defines LIBC_GLIBC=1 LIBC_UCLIBC=2 LIBC_BIONIC=3 LIBC_MUSL=4"
 x86_archs="athlon athlon-4 athlon-fx athlon-mp athlon-tbird \
 athlon-xp k6 k6-2 k6-3 geode c3 c3-2 winchip-c6 winchip2 i386 i486 \
 i586 i686 pentium pentium-m pentium-mmx pentium2 pentium3 pentium3m \
-pentium4 pentium4m pentiumpro prescott lakemont"
+pentium4 pentium4m pentiumpro prescott lakemont samuel-2 nehemiah \
+c7 esther"
 
 # 64-bit x86 processors supported by --with-arch=.  Each processor
 # MUST be separated by exactly one space.
 x86_64_archs="amdfam10 athlon64 athlon64-sse3 barcelona bdver1 bdver2 \
-bdver3 bdver4 znver1 btver1 btver2 k8 k8-sse3 opteron opteron-sse3 nocona \
-core2 corei7 corei7-avx core-avx-i core-avx2 atom slm nehalem westmere \
-sandybridge ivybridge haswell broadwell bonnell silvermont knl knm \
-skylake-avx512 cannonlake icelake-client icelake-server skylake x86-64 native"
+bdver3 bdver4 znver1 znver2 znver3 btver1 btver2 k8 k8-sse3 opteron \
+opteron-sse3 nocona core2 corei7 corei7-avx core-avx-i core-avx2 atom \
+slm nehalem westmere sandybridge ivybridge haswell broadwell bonnell \
+silvermont knl knm skylake-avx512 cannonlake icelake-client icelake-server \
+skylake goldmont goldmont-plus tremont cascadelake tigerlake cooperlake \
+sapphirerapids alderlake rocketlake eden-x2 nano nano-1000 nano-2000 nano-3000 \
+nano-x2 eden-x4 nano-x4 x86-64 x86-64-v2 x86-64-v3 x86-64-v4 native"
 
 # Additional x86 processors supported by --with-cpu=.  Each processor
 # MUST be separated by exactly one space.
@@ -649,25 +689,57 @@ case ${target} in
 *-*-darwin*)
   tmake_file="t-darwin "
   tm_file="${tm_file} darwin.h"
+  darwin_os=`echo ${target} | sed 's/.*darwin\([0-9.]*\).*$/\1/'`
+  darwin_maj=`expr "$darwin_os" : '\([0-9]*\).*'`
+  macos_min=`expr "$darwin_os" : '[0-9]*\.\([0-9]*\).*'`
+  macos_maj=10
+  if test x"${macos_min}" = x; then
+    macos_min=0
+  fi
+  def_ld64=85.2
   case ${target} in
-  *-*-darwin9*)
-    tm_file="${tm_file} darwin9.h"
-    ;;
-  *-*-darwin[1][01]*)
-    tm_file="${tm_file} darwin9.h darwin10.h"
-    ;;
-  *-*-darwin[1][2-9]* | *-*-darwin[2][0-9]*)
-    tm_file="${tm_file} darwin9.h darwin10.h darwin12.h"
-    ;;
+      # Darwin 4 to 19 correspond to macOS 10.0 to 10.15
+      *-*-darwin[4-9]* | *-*-darwin1[0-9]*)
+        macos_min=`expr $darwin_maj - 4`
+        ;;
+      *-*-darwin20*)
+        # Darwin 20 corresponds to macOS 11.
+        macos_maj=11
+        def_ld64=609.0
+        ;;
+      *-*-darwin)
+        case ${cpu_type} in
+          aarch64) macos_maj=11 ;;
+          x86_64) macos_min=6 ;;
+          *)  macos_min=5 ;;
+          esac
+          case ${host} in
+            *-*-darwin*) tm_defines="$tm_defines DARWIN_USE_KERNEL_VERS" ;;
+            *)
+              # If configuring a cross-compiler then we will have set some
+              # default above, but it is probably not what was intended.
+              echo "Warning: Using ${target} is only suitable for Darwin hosts" 1>&2
+              echo "configure with an explicit target version" 1>&2
+              ;;
+          esac
+        ;;
+      *)
+        echo "Error: configuring for an unreleased macOS version ${target}" 1>&2
+        exit 1
+        ;;
   esac
+  tm_defines="$tm_defines DEF_MIN_OSX_VERSION=\\\"${macos_maj}.${macos_min}\\\""
+  tm_defines="$tm_defines DEF_LD64=\\\"${def_ld64}\\\""
   tm_file="${tm_file} ${cpu_type}/darwin.h"
   tm_p_file="${tm_p_file} darwin-protos.h"
-  target_gtfiles="\$(srcdir)/config/darwin.c"
+  target_gtfiles="$target_gtfiles \$(srcdir)/config/darwin.c"
   extra_options="${extra_options} darwin.opt"
   c_target_objs="${c_target_objs} darwin-c.o"
   cxx_target_objs="${cxx_target_objs} darwin-c.o"
+  d_target_objs="${d_target_objs} darwin-d.o"
   fortran_target_objs="darwin-f.o"
   target_has_targetcm=yes
+  target_has_targetdm=yes
   extra_objs="${extra_objs} darwin.o"
   extra_gcc_objs="darwin-driver.o"
   default_use_cxa_atexit=yes
@@ -695,6 +767,9 @@ case ${target} in
   extra_options="$extra_options rpath.opt dragonfly.opt"
   default_use_cxa_atexit=yes
   use_gcc_stdint=wrap
+  d_target_objs="${d_target_objs} dragonfly-d.o"
+  tmake_file="${tmake_file} t-dragonfly"
+  target_has_targetdm=yes
   ;;
 *-*-freebsd*)
   # This is the generic ELF configuration of FreeBSD.  Later
@@ -743,11 +818,14 @@ case ${target} in
       default_use_cxa_atexit=yes;;
   esac
   use_gcc_stdint=wrap
+  d_target_objs="${d_target_objs} freebsd-d.o"
+  tmake_file="${tmake_file} t-freebsd"
+  target_has_targetdm=yes
   ;;
 *-*-fuchsia*)
   native_system_header_dir=/include
   ;;
-*-*-linux* | frv-*-*linux* | *-*-kfreebsd*-gnu | *-*-gnu* | *-*-kopensolaris*-gnu)
+*-*-linux* | frv-*-*linux* | *-*-kfreebsd*-gnu | *-*-gnu* | *-*-kopensolaris*-gnu | *-*-uclinuxfdpiceabi)
   extra_options="$extra_options gnu-user.opt"
   gas=yes
   gnu_ld=yes
@@ -776,7 +854,7 @@ case ${target} in
     *-*-*android*)
       tm_defines="$tm_defines DEFAULT_LIBC=LIBC_BIONIC"
       ;;
-    *-*-*uclibc*)
+    *-*-*uclibc* | *-*-uclinuxfdpiceabi)
       tm_defines="$tm_defines DEFAULT_LIBC=LIBC_UCLIBC"
       ;;
     *-*-*musl*)
@@ -802,13 +880,16 @@ case ${target} in
   esac
   c_target_objs="${c_target_objs} glibc-c.o"
   cxx_target_objs="${cxx_target_objs} glibc-c.o"
+  d_target_objs="${d_target_objs} glibc-d.o"
   tmake_file="${tmake_file} t-glibc"
   target_has_targetcm=yes
+  target_has_targetdm=yes
   ;;
 *-*-netbsd*)
   tm_p_file="${tm_p_file} netbsd-protos.h"
   tmake_file="t-netbsd t-slibgcc"
   extra_objs="${extra_objs} netbsd.o"
+  d_target_objs="${d_target_objs} netbsd-d.o"
   gas=yes
   gnu_ld=yes
   use_gcc_stdint=wrap
@@ -817,6 +898,12 @@ case ${target} in
   esac
   nbsd_tm_file="netbsd.h netbsd-stdint.h netbsd-elf.h"
   default_use_cxa_atexit=yes
+  target_has_targetdm=yes
+  case ${target} in
+    arm*-* | i[34567]86-* | powerpc*-* | sparc*-* | x86_64-*)
+      default_gnu_indirect_function=yes
+      ;;
+  esac
   ;;
 *-*-openbsd*)
   tmake_file="t-openbsd"
@@ -830,6 +917,8 @@ case ${target} in
       default_use_cxa_atexit=yes
       ;;
   esac
+  d_target_objs="${d_target_objs} openbsd-d.o"
+  target_has_targetdm=yes
   ;;
 *-*-phoenix*)
   gas=yes
@@ -885,6 +974,7 @@ case ${target} in
   tmake_file="${tmake_file} t-sol2 t-slibgcc"
   c_target_objs="${c_target_objs} sol2-c.o"
   cxx_target_objs="${cxx_target_objs} sol2-c.o sol2-cxx.o"
+  d_target_objs="${d_target_objs} sol2-d.o"
   extra_objs="${extra_objs} sol2.o sol2-stubs.o"
   extra_options="${extra_options} sol2.opt"
   case ${enable_threads}:${have_pthread_h}:${have_thread_h} in
@@ -892,6 +982,7 @@ case ${target} in
       thread_file=posix
       ;;
   esac
+  target_has_targetdm=yes
   ;;
 *-*-*vms*)
   extra_options="${extra_options} vms/vms.opt"
@@ -915,20 +1006,75 @@ case ${target} in
 *-*-vxworks*)
   tmake_file=t-vxworks
   xm_defines=POSIX
+
   extra_options="${extra_options} vxworks.opt"
   extra_objs="$extra_objs vxworks.o"
-  use_gcc_stdint=provide
+
+  c_target_objs="${c_target_objs} vxworks-c.o"
+  cxx_target_objs="${cxx_target_objs} vxworks-c.o"
+  extra_headers="${extra_headers} ../vxworks/vxworks-predef.h"
+  target_has_targetcm="yes"
+
+  # This private header exposes a consistent interface for checks on
+  # the VxWorks version our runtime header files need to perform, based on
+  # what the system headers adverstise:
+
+  extra_headers="${extra_headers} ../vxworks/_vxworks-versions.h"
+
+  # Starting from VxWorks 7, the system comes with a Dinkumware
+  # environment which requires the inclusion of "yvals.h" before other
+  # system headers.  We provide wrapped versions of a few headers to
+  # accomodate such constraints:
+
+  extra_headers="${extra_headers} ../vxworks/_yvals.h"
+  extra_headers="${extra_headers} ../vxworks/_yvals-wrapper.h"
+
+  extra_headers="${extra_headers} ../vxworks/math.h ../vxworks/complex.h"
+  extra_headers="${extra_headers} ../vxworks/inttypes.h ../vxworks/setjmp.h"
+
+  # We provide stdint.h ...
+
   tm_file="${tm_file} vxworks-stdint.h"
+
+  # .. only through the yvals conditional wrapping mentioned above
+  # to abide by the VxWorks 7 expectations.  The final copy is performed
+  # explicitly by a t-vxworks Makefile rule.
+
+  use_gcc_stdint=none
+  extra_headers="${extra_headers} ../../ginclude/stdint-gcc.h"
+
   case ${enable_threads} in
     no) ;;
     "" | yes | vxworks) thread_file='vxworks' ;;
     *) echo 'Unknown thread configuration for VxWorks'; exit 1 ;;
   esac
+
+  # A few common macro definitions conveying general characteristics
+  # of the configuration at hand. Note that by VxWorks 7, we mean the
+  # the SR6xx major update or beyond in vendor parlance:
+
   case $target in
     *-*-vxworks7*)
       tm_defines="$tm_defines TARGET_VXWORKS7=1"
       ;;
   esac
+  case $target in
+    *64-*-vxworks*)
+      tm_defines="$tm_defines TARGET_VXWORKS64=1"
+      ;;
+  esac
+
+  # Then a few build configuration controls for VxWorks 7, which
+  # has specificities on top of which we aim to provide more complete
+  # C++ support:
+
+  case $target in
+    *-*-vxworks7*)
+      # VxWorks 7 always has init/fini_array support and it is simpler to
+      # just leverage this, sticking to what the system toolchain does:
+      gcc_cv_initfini_array=yes
+      ;;
+  esac
   ;;
 *-*-elf|arc*-*-elf*)
   # Assume that newlib is being used and so __cxa_atexit is provided.
@@ -940,7 +1086,7 @@ esac
 case ${target} in
 aarch64*-*-elf | aarch64*-*-fuchsia* | aarch64*-*-rtems*)
 	tm_file="${tm_file} dbxelf.h elfos.h newlib-stdint.h"
-	tm_file="${tm_file} aarch64/aarch64-elf.h aarch64/aarch64-elf-raw.h"
+	tm_file="${tm_file} aarch64/aarch64-elf.h aarch64/aarch64-errata.h aarch64/aarch64-elf-raw.h"
 	tmake_file="${tmake_file} aarch64/t-aarch64"
 	case $target in
 	aarch64-*-elf*)
@@ -950,7 +1096,7 @@ aarch64*-*-elf | aarch64*-*-fuchsia* | aarch64*-*-rtems*)
                 tm_file="${tm_file} fuchsia.h"
                 ;;
 	aarch64-*-rtems*)
-	  	tm_file="${tm_file} rtems.h aarch64/rtems.h"
+		tm_file="${tm_file} aarch64/rtems.h rtems.h"
 		;;
 	esac
 	case $target in
@@ -977,22 +1123,25 @@ aarch64*-*-elf | aarch64*-*-fuchsia* | aarch64*-*-rtems*)
 	;;
 aarch64*-*-freebsd*)
 	tm_file="${tm_file} dbxelf.h elfos.h ${fbsd_tm_file}"
-	tm_file="${tm_file} aarch64/aarch64-elf.h aarch64/aarch64-freebsd.h"
+	tm_file="${tm_file} aarch64/aarch64-elf.h aarch64/aarch64-errata.h aarch64/aarch64-freebsd.h"
 	tmake_file="${tmake_file} aarch64/t-aarch64 aarch64/t-aarch64-freebsd"
+	tm_defines="${tm_defines}  TARGET_DEFAULT_ASYNC_UNWIND_TABLES=1"
+	;;
+aarch64*-*-netbsd*)
+	tm_file="${tm_file} dbxelf.h elfos.h ${nbsd_tm_file}"
+	tm_file="${tm_file} aarch64/aarch64-elf.h aarch64/aarch64-errata.h aarch64/aarch64-netbsd.h"
+	tmake_file="${tmake_file} aarch64/t-aarch64 aarch64/t-aarch64-netbsd"
+	extra_options="${extra_options} netbsd.opt netbsd-elf.opt"
 	;;
 aarch64*-*-linux*)
-	tm_file="${tm_file} dbxelf.h elfos.h gnu-user.h linux.h linux-android.h glibc-stdint.h"
-	tm_file="${tm_file} aarch64/aarch64-elf.h aarch64/aarch64-linux.h"
-	extra_options="${extra_options} linux-android.opt"
+	tm_file="${tm_file} dbxelf.h elfos.h gnu-user.h linux.h glibc-stdint.h"
+	tm_file="${tm_file} aarch64/aarch64-elf.h aarch64/aarch64-errata.h aarch64/aarch64-linux.h"
 	tmake_file="${tmake_file} aarch64/t-aarch64 aarch64/t-aarch64-linux"
 	tm_defines="${tm_defines}  TARGET_DEFAULT_ASYNC_UNWIND_TABLES=1"
 	case $target in
 	aarch64_be-*)
 		tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=1"
 		;;
-	aarch64*-*-linux-android*)
-		tm_file="${tm_file} aarch64/aarch64-linux-android.h"
-		;;
 	esac
 	aarch64_multilibs="${with_multilib_list}"
 	if test "$aarch64_multilibs" = "default"; then
@@ -1013,16 +1162,16 @@ aarch64*-*-linux*)
 	done
 	TM_MULTILIB_CONFIG=`echo $TM_MULTILIB_CONFIG | sed 's/^,//'`
 	;;
+aarch64*-wrs-vxworks*)
+        tm_file="${tm_file} elfos.h aarch64/aarch64-elf.h"
+        tm_file="${tm_file} vx-common.h vxworks.h aarch64/aarch64-vxworks.h"
+        tmake_file="${tmake_file} aarch64/t-aarch64 aarch64/t-aarch64-vxworks"
+        ;;
 alpha*-*-linux*)
 	tm_file="elfos.h ${tm_file} alpha/elf.h alpha/linux.h alpha/linux-elf.h glibc-stdint.h"
 	tmake_file="${tmake_file} alpha/t-linux alpha/t-alpha"
 	extra_options="${extra_options} alpha/elf.opt"
 	;;
-alpha*-*-freebsd*)
-	tm_file="elfos.h ${tm_file} ${fbsd_tm_file} alpha/elf.h alpha/freebsd.h"
-	tmake_file="${tmake_file} alpha/t-alpha"
-	extra_options="${extra_options} alpha/elf.opt"
-	;;
 alpha*-*-netbsd*)
 	tm_file="elfos.h ${tm_file} ${nbsd_tm_file} alpha/elf.h alpha/netbsd.h"
 	tmake_file="${tmake_file} alpha/t-alpha"
@@ -1085,23 +1234,17 @@ arc*-*-linux*)
 	# automatically detect that GAS supports it, yet we require it.
 	gcc_cv_initfini_array=yes
 	;;
-arm-wrs-vxworks|arm-wrs-vxworks7)
+arm-wrs-vxworks7*)
+	# We only support VxWorks 7 now on ARM, post SR600.  Pre SR600
+	# VxWorks 7 was transitory and major versions prior to 7 were based
+	# on long deprecated ABI, not supported at all any more regardless
+	# of VxWorks.
 	extra_options="${extra_options} arm/vxworks.opt"
-	tmake_file="${tmake_file} arm/t-arm arm/t-vxworks"
-        case $target in
-	*-vxworks7)
-            # VxWorks7 on ARM adheres to the ARM Base Platform ABI
-	    tmake_file="${tmake_file} arm/t-bpabi"
-	    tm_file="arm/bpabi.h ${tm_file}"
-	    # The BPABI long long divmod functions return a 128-bit value in
-	    # registers r0-r3.
-	    need_64bit_hwint=yes
-	    default_use_cxa_atexit=yes
-	    ;;
-        *)  ;;
-        esac
-        tm_file="elfos.h arm/elf.h arm/aout.h ${tm_file} vx-common.h vxworks.h arm/vxworks.h"
-	target_cpu_cname="arm8"
+	tmake_file="${tmake_file} arm/t-arm arm/t-vxworks arm/t-bpabi"
+	tm_file="elfos.h arm/elf.h arm/bpabi.h arm/aout.h ${tm_file}"
+	tm_file="${tm_file} vx-common.h vxworks.h arm/vxworks.h"
+	target_cpu_cname="generic-armv7-a"
+	need_64bit_hwint=yes
 	;;
 arm*-*-freebsd*)                # ARM FreeBSD EABI
 	tm_file="dbxelf.h elfos.h ${fbsd_tm_file} arm/elf.h"
@@ -1130,12 +1273,37 @@ arm*-*-freebsd*)                # ARM FreeBSD EABI
 	with_tls=${with_tls:-gnu}
 	;;
 arm*-*-netbsdelf*)
-	tm_file="dbxelf.h elfos.h ${nbsd_tm_file} arm/elf.h arm/aout.h ${tm_file} arm/netbsd-elf.h"
-	extra_options="${extra_options} netbsd.opt netbsd-elf.opt"
+	target_cpu_cname="strongarm"
 	tmake_file="${tmake_file} arm/t-arm"
-	target_cpu_cname="arm6"
+	tm_file="dbxelf.h elfos.h ${nbsd_tm_file} arm/elf.h"
+	extra_options="${extra_options} netbsd.opt netbsd-elf.opt"
+	case ${target} in
+	arm*eb-*) tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=1" ;;
+	esac
+	case ${target} in
+	arm*-*-netbsdelf-*eabi*)
+	    tm_file="$tm_file arm/bpabi.h arm/netbsd-elf.h arm/netbsd-eabi.h"
+	    tmake_file="$tmake_file arm/t-bpabi arm/t-netbsdeabi"
+	    ;;
+	*)
+	    tm_file="$tm_file arm/netbsd-elf.h"
+	    tmake_file="$tmake_file arm/t-netbsd"
+	    ;;
+	esac
+	tm_file="${tm_file} arm/aout.h arm/arm.h"
+	case ${target} in
+	arm*-*-netbsdelf-*eabihf*)
+	    # Hard-float requires at least Arm v5te
+	    target_cpu_cname="arm10e"
+	    tm_defines="${tm_defines} TARGET_DEFAULT_FLOAT_ABI=ARM_FLOAT_ABI_HARD"
+	    ;;
+	esac
+	case ${target} in
+	armv6*) target_cpu_cname="arm1176jzf-s";;
+	armv7*) target_cpu_cname="generic-armv7-a";;
+	esac
 	;;
-arm*-*-linux-*)			# ARM GNU/Linux with ELF
+arm*-*-linux-* | arm*-*-uclinuxfdpiceabi)
 	tm_file="dbxelf.h elfos.h gnu-user.h linux.h linux-android.h glibc-stdint.h arm/elf.h arm/linux-gas.h arm/linux-elf.h"
 	extra_options="${extra_options} linux-android.opt"
 	case $target in
@@ -1144,7 +1312,12 @@ arm*-*-linux-*)			# ARM GNU/Linux with ELF
 	    ;;
 	esac
 	tmake_file="${tmake_file} arm/t-arm arm/t-arm-elf arm/t-bpabi arm/t-linux-eabi"
-	tm_file="$tm_file arm/bpabi.h arm/linux-eabi.h arm/aout.h vxworks-dummy.h arm/arm.h"
+	tm_file="$tm_file arm/bpabi.h arm/linux-eabi.h arm/aout.h arm/arm.h"
+	case $target in
+	arm*-*-uclinuxfdpiceabi)
+	    tm_file="$tm_file arm/uclinuxfdpiceabi.h"
+	    ;;
+	esac
 	# Generation of floating-point instructions requires at least ARMv5te.
 	if [ "$with_float" = "hard" -o "$with_float" = "softfp" ] ; then
 	    target_cpu_cname="arm10e"
@@ -1164,7 +1337,7 @@ arm*-*-linux-*)			# ARM GNU/Linux with ELF
 arm*-*-uclinux*eabi*)		# ARM ucLinux
 	tm_file="dbxelf.h elfos.h arm/unknown-elf.h arm/elf.h arm/linux-gas.h arm/uclinux-elf.h glibc-stdint.h"
 	tmake_file="${tmake_file} arm/t-arm arm/t-arm-elf arm/t-bpabi"
-	tm_file="$tm_file arm/bpabi.h arm/uclinux-eabi.h arm/aout.h vxworks-dummy.h arm/arm.h"
+	tm_file="$tm_file arm/bpabi.h arm/uclinux-eabi.h arm/aout.h arm/arm.h"
 	target_cpu_cname="arm7tdmi"
 	# The EABI requires the use of __cxa_atexit.
 	default_use_cxa_atexit=yes
@@ -1197,7 +1370,7 @@ arm*-*-eabi* | arm*-*-symbianelf* | arm*-*-rtems* | arm*-*-fuchsia*)
 	  target_cpu_cname="generic-armv7-a"
 	  ;;
 	arm*-*-rtems*)
-	  tm_file="${tm_file} rtems.h arm/rtems.h newlib-stdint.h"
+	  tm_file="${tm_file} arm/rtems.h rtems.h newlib-stdint.h"
 	  tmake_file="${tmake_file} arm/t-bpabi arm/t-rtems"
 	  ;;
 	arm*-*-symbianelf*)
@@ -1208,7 +1381,7 @@ arm*-*-eabi* | arm*-*-symbianelf* | arm*-*-rtems* | arm*-*-fuchsia*)
 	  target_cpu_cname="arm10tdmi"
 	  ;;
 	esac
-	tm_file="${tm_file} arm/aout.h vxworks-dummy.h arm/arm.h"
+	tm_file="${tm_file} arm/aout.h arm/arm.h"
 	;;
 avr-*-*)
 	tm_file="elfos.h avr/elf.h avr/avr-arch.h avr/avr.h avr/specs.h dbxelf.h avr/avr-stdint.h"
@@ -1216,6 +1389,128 @@ avr-*-*)
 	    tm_file="${tm_file} ${cpu_type}/avrlibc.h"
 	    tm_defines="${tm_defines} WITH_AVRLIBC"
 	fi
+	# Work out avr_double_comparison which is 2 or 3 and is used in
+	# target hook FLOAT_LIB_COMPARE_RETURNS_BOOL to determine whether
+	# DFmode comparisons return 3-state or 2-state results.
+	case y${with_double_comparison} in
+	    y | ytristate)
+		avr_double_comparison=3
+		;;
+	    ybool | ylibf7)
+		avr_double_comparison=2
+		;;
+	    *)
+		echo "Error: --with-double-comparison= can only be used with: 'tristate', 'bool', 'libf7'" 1>&2
+		exit 1
+		;;
+	esac
+	case "y${with_libf7}" in
+	    yno)
+		# avr_double_comparison as set above.
+		;;
+	    ylibgcc)
+		avr_double_comparison=2
+		tm_defines="${tm_defines} WITH_LIBF7_LIBGCC"
+		;;
+	    y | yyes | ymath-symbols)
+		avr_double_comparison=2
+		tm_defines="${tm_defines} WITH_LIBF7_LIBGCC"
+		tm_defines="${tm_defines} WITH_LIBF7_MATH"
+		tm_defines="${tm_defines} WITH_LIBF7_MATH_SYMBOLS"
+		;;
+	    ymath)
+		avr_double_comparison=2
+		tm_defines="${tm_defines} WITH_LIBF7_LIBGCC"
+		tm_defines="${tm_defines} WITH_LIBF7_MATH"
+		;;
+	    *)
+		echo "Error: --with-libf7=${with_libf7} but can only be used with: 'libgcc', 'math', 'math-symbols', 'yes', 'no'" 1>&2
+		exit 1
+		;;
+	esac
+	tm_defines="${tm_defines} WITH_DOUBLE_COMPARISON=${avr_double_comparison}"
+	case y${with_double} in
+	    y32)
+		avr_double=32
+		tm_defines="${tm_defines} HAVE_DOUBLE32"
+		;;
+	    y64)
+		avr_double=64
+		tm_defines="${tm_defines} HAVE_DOUBLE64"
+		;;
+	    y64,32)
+		avr_double=64
+		avr_double_multilib=1
+		tm_defines="${tm_defines} HAVE_DOUBLE32"
+		tm_defines="${tm_defines} HAVE_DOUBLE64"
+		tm_defines="${tm_defines} HAVE_DOUBLE_MULTILIB"
+		;;
+	    y | y32,64)
+		avr_double=32
+		avr_double_multilib=1
+		tm_defines="${tm_defines} HAVE_DOUBLE32"
+		tm_defines="${tm_defines} HAVE_DOUBLE64"
+		tm_defines="${tm_defines} HAVE_DOUBLE_MULTILIB"
+		;;
+	    *)
+		echo "Error: --with-double= can only be used with: '32', '32,64', '64,32', '64'" 1>&2
+		exit 1
+		;;
+	esac
+	case y${with_long_double} in
+	    y32)
+		avr_long_double=32
+		tm_defines="${tm_defines} HAVE_LONG_DOUBLE32"
+		;;
+	    y64)
+		avr_long_double=64
+		tm_defines="${tm_defines} HAVE_LONG_DOUBLE64"
+		;;
+	    y | y64,32)
+		avr_long_double=64
+		avr_long_double_multilib=1
+		tm_defines="${tm_defines} HAVE_LONG_DOUBLE32"
+		tm_defines="${tm_defines} HAVE_LONG_DOUBLE64"
+		tm_defines="${tm_defines} HAVE_LONG_DOUBLE_MULTILIB"
+		;;
+	    y32,64)
+		avr_long_double=32
+		avr_long_double_multilib=1
+		tm_defines="${tm_defines} HAVE_LONG_DOUBLE32"
+		tm_defines="${tm_defines} HAVE_LONG_DOUBLE64"
+		tm_defines="${tm_defines} HAVE_LONG_DOUBLE_MULTILIB"
+		;;
+	    ydouble)
+		avr_long_double=${avr_double}
+		tm_defines="${tm_defines} HAVE_LONG_DOUBLE_IS_DOUBLE"
+		if test y${avr_double_multilib} = y1; then
+		    tm_defines="${tm_defines} HAVE_LONG_DOUBLE32"
+		    tm_defines="${tm_defines} HAVE_LONG_DOUBLE64"
+		else
+		    tm_defines="${tm_defines} HAVE_LONG_DOUBLE${avr_long_double}"
+		fi
+		;;
+	    *)
+		echo "Error: --with-long_double= can only be used with: '32', '32,64', '64,32', '64', 'double'" 1>&2
+		exit 1
+		;;
+	esac
+	if test ${avr_long_double}x${avr_long_double_multilib}y${avr_double_multilib}z = 32xy1z; then
+	    if test y${with_long_double} != ydouble; then
+		echo "Error: --with-double=${with_double} requests a multilib for double, but long double is always 32 bits wide due to --with-long-double=${with_long_double}" 1>&2
+		exit 1
+	    fi
+	fi
+	if test ${avr_double}x${avr_long_double_multilib}y${avr_double_multilib}z = 64x1yz; then
+	    echo "Error: --with-long-double=${with_long_double} requests a multilib for long double, but double is always 64 bits wide due to --with-double=64" 1>&2
+	    exit 1
+	fi
+	if test y${avr_double}${avr_long_double} = y6432; then
+	    echo "Error: double default of 64 bits from --with-double=${with_double} conflicts with default of 32 bits for long double from --with-long-double=${with_long_double}" 1>&2
+	    exit 1
+	fi
+	tm_defines="${tm_defines} WITH_DOUBLE${avr_double}"
+	tm_defines="${tm_defines} WITH_LONG_DOUBLE${avr_long_double}"
 	tmake_file="${tmake_file} avr/t-avr avr/t-multilib"
 	use_gcc_stdint=wrap
 	extra_gcc_objs="driver-avr.o avr-devices.o"
@@ -1245,19 +1540,18 @@ bfin*-*)
 	use_collect2=no
 	use_gcc_stdint=wrap
 	;;
+bpf-*-*)
+        tm_file="elfos.h ${tm_file}"
+        tmake_file="${tmake_file} bpf/t-bpf"
+        use_collect2=no
+        extra_headers="bpf-helpers.h"
+        use_gcc_stdint=provide
+        ;;
 cr16-*-elf)
         tm_file="elfos.h ${tm_file} newlib-stdint.h"
         tmake_file="${tmake_file} cr16/t-cr16 "
         use_collect2=no
         ;;
-crisv32-*-elf | crisv32-*-none)
-	tm_file="dbxelf.h elfos.h newlib-stdint.h ${tm_file}"
-	tmake_file="cris/t-cris"
-	target_cpu_default=32
-	gas=yes
-	extra_options="${extra_options} cris/elf.opt"
-	use_gcc_stdint=wrap
-	;;
 cris-*-elf | cris-*-none)
 	tm_file="dbxelf.h elfos.h newlib-stdint.h ${tm_file}"
 	tmake_file="cris/t-cris cris/t-elfmulti"
@@ -1265,16 +1559,71 @@ cris-*-elf | cris-*-none)
 	extra_options="${extra_options} cris/elf.opt"
 	use_gcc_stdint=wrap
 	;;
-crisv32-*-linux* | cris-*-linux*)
-	tm_file="dbxelf.h elfos.h ${tm_file} gnu-user.h linux.h glibc-stdint.h cris/linux.h"
-	tmake_file="${tmake_file} cris/t-cris cris/t-linux"
-	extra_options="${extra_options} cris/linux.opt"
-	case $target in
-	  cris-*-*)
-		target_cpu_default=10
+csky-*-*)
+	if test x${with_endian} != x; then
+	    case ${with_endian} in
+		big|little)		;;
+		*)
+		    echo "with_endian=${with_endian} not supported."
+		    exit 1
+		    ;;
+	    esac
+	fi
+	if test x${with_float} != x; then
+	    case ${with_float} in
+		soft | hard) ;;
+		*) echo
+		    "Unknown floating point type used in --with-float=$with_float"
+		    exit 1
+		    ;;
+	    esac
+	fi
+	tm_file="csky/csky.h"
+	md_file="csky/csky.md"
+	out_file="csky/csky.c"
+	tm_p_file="${tm_p_file} csky/csky-protos.h"
+	extra_options="${extra_options} csky/csky_tables.opt"
+
+	if test x${enable_tpf_debug} = xyes; then
+	    tm_defines="${tm_defines} ENABLE_TPF_DEBUG"
+	fi
+
+	case ${target} in
+	    csky-*-elf*)
+		tm_file="dbxelf.h elfos.h newlib-stdint.h ${tm_file} csky/csky-elf.h"
+		tmake_file="csky/t-csky csky/t-csky-elf"
+		default_use_cxa_atexit=no
+		use_gcc_stdint=wrap
 		;;
-	  crisv32-*-*)
-		target_cpu_default=32
+	    csky-*-linux*)
+		tm_file="dbxelf.h elfos.h gnu-user.h linux.h glibc-stdint.h ${tm_file} csky/csky-linux-elf.h"
+		tmake_file="${tmake_file} csky/t-csky csky/t-csky-linux"
+
+		if test "x${enable_multilib}" = xyes ; then
+		    tm_file="$tm_file ./sysroot-suffix.h"
+		    tmake_file="${tmake_file} csky/t-sysroot-suffix"
+		fi
+
+		case ${target} in
+		    csky-*-linux-gnu*)
+			tm_defines="$tm_defines DEFAULT_LIBC=LIBC_GLIBC"
+			# Force .init_array support.  The configure script cannot always
+			# automatically detect that GAS supports it, yet we require it.
+			gcc_cv_initfini_array=yes
+			;;
+		    csky-*-linux-uclibc*)
+			tm_defines="$tm_defines DEFAULT_LIBC=LIBC_UCLIBC"
+			default_use_cxa_atexit=no
+			;;
+		    *)
+			echo "Unknown target $target"
+			exit 1
+			;;
+		esac
+		;;
+	    *)
+		echo "Unknown target $target"
+		exit 1
 		;;
 	esac
 	;;
@@ -1283,7 +1632,7 @@ epiphany-*-elf | epiphany-*-rtems*)
 	tmake_file="${tmake_file} epiphany/t-epiphany"
 	case ${target} in
 	epiphany-*-rtems*)
-	  tm_file="${tm_file} rtems.h epiphany/rtems.h newlib-stdint.h"
+	  tm_file="${tm_file} epiphany/rtems.h rtems.h newlib-stdint.h"
 	  ;;
 	*)
 	  tm_file="${tm_file} newlib-stdint.h"
@@ -1312,6 +1661,28 @@ ft32-*-elf)
 	tm_file="dbxelf.h elfos.h newlib-stdint.h ${tm_file}"
 	tmake_file="${tmake_file} ft32/t-ft32"
 	;;
+amdgcn-*-amdhsa)
+	tm_file="elfos.h gcn/gcn-hsa.h gcn/gcn.h newlib-stdint.h"
+	tmake_file="gcn/t-gcn-hsa"
+	native_system_header_dir=/include
+	extra_modes=gcn/gcn-modes.def
+	extra_objs="${extra_objs} gcn-tree.o"
+	extra_gcc_objs="driver-gcn.o"
+	case "$host" in
+	x86_64*-*-linux-gnu )
+		if test "$ac_cv_search_dlopen" != no; then
+			extra_programs="${extra_programs} gcn-run\$(exeext)"
+		fi
+		;;
+	esac
+	if test x$enable_as_accelerator = xyes; then
+		extra_programs="${extra_programs} mkoffload\$(exeext)"
+		tm_file="${tm_file} gcn/offload.h"
+	fi
+	# Force .init_array support.
+	gcc_cv_initfini_array=yes
+	thread_file=gcn
+	;;
 moxie-*-elf)
 	gas=yes
 	gnu_ld=yes
@@ -1347,22 +1718,35 @@ hppa*64*-*-linux*)
 	tm_file="pa/pa64-start.h ${tm_file} dbxelf.h elfos.h gnu-user.h linux.h \
 		 glibc-stdint.h pa/pa-linux.h pa/pa64-regs.h pa/pa-64.h \
 		 pa/pa64-linux.h"
+	tmake_file="${tmake_file} pa/t-pa pa/t-linux"
+	d_target_objs="${d_target_objs} pa-d.o"
 	gas=yes gnu_ld=yes
 	;;
 hppa*-*-linux*)
 	target_cpu_default="MASK_PA_11|MASK_NO_SPACE_REGS|MASK_CALLER_COPIES"
 	tm_file="${tm_file} dbxelf.h elfos.h gnu-user.h linux.h glibc-stdint.h pa/pa-linux.h \
 		 pa/pa32-regs.h pa/pa32-linux.h"
-	tmake_file="${tmake_file} pa/t-linux"
+	tmake_file="${tmake_file} pa/t-pa pa/t-linux"
+	d_target_objs="${d_target_objs} pa-d.o"
 	;;
 hppa*-*-openbsd*)
 	target_cpu_default="MASK_PA_11"
 	tm_file="${tm_file} dbxelf.h elfos.h openbsd.h openbsd-stdint.h openbsd-libpthread.h \
 		 pa/pa-openbsd.h pa/pa32-regs.h pa/pa32-openbsd.h"
 	extra_options="${extra_options} openbsd.opt"
+	tmake_file="pa/t-pa"
+	d_target_objs="${d_target_objs} pa-d.o"
 	gas=yes
 	gnu_ld=yes
 	;;
+hppa*-*-netbsd*)
+	target_cpu_default="MASK_PA_11|MASK_NO_SPACE_REGS"
+	tm_file="${tm_file} dbxelf.h elfos.h ${nbsd_tm_file} \
+		 pa/pa-netbsd.h pa/pa32-regs.h pa/pa32-netbsd.h"
+	tmake_file="${tmake_file}"
+	tm_defines="${tm_defines} CHAR_FAST8=1 SHORT_FAST16=1"
+	extra_options="${extra_options} netbsd.opt netbsd-elf.opt"
+	;;
 hppa[12]*-*-hpux10*)
 	case ${target} in
 	hppa1.1-*-* | hppa2*-*-*)
@@ -1380,7 +1764,8 @@ hppa[12]*-*-hpux10*)
 	esac
 	use_gcc_stdint=provide
 	tm_file="${tm_file} hpux-stdint.h"
-	tmake_file="t-slibgcc"
+	tmake_file="pa/t-pa t-slibgcc"
+	d_target_objs="${d_target_objs} pa-d.o"
 	case ${enable_threads} in
 	  "")
 	    if test x$have_pthread_h = xyes ; then
@@ -1422,7 +1807,8 @@ hppa*64*-*-hpux11*)
 	esac
 	extra_options="${extra_options} pa/pa-hpux.opt \
 		       pa/pa-hpux1010.opt pa/pa64-hpux.opt hpux11.opt"
-	tmake_file="t-slibgcc"
+	tmake_file="pa/t-pa t-slibgcc"
+	d_target_objs="${d_target_objs} pa-d.o"
 	case x${enable_threads} in
 	x | xyes | xposix )
 		thread_file=posix
@@ -1460,7 +1846,8 @@ hppa[12]*-*-hpux11*)
 		extra_options="${extra_options} pa/pa-hpux1131.opt"
 		;;
 	esac
-	tmake_file="t-slibgcc"
+	tmake_file="pa/t-pa t-slibgcc"
+	d_target_objs="${d_target_objs} pa-d.o"
 	case x${enable_threads} in
 	x | xyes | xposix )
 		thread_file=posix
@@ -1682,7 +2069,7 @@ i[34567]86-*-rtems*)
 	tm_file="${tm_file} i386/unix.h i386/att.h dbxelf.h elfos.h i386/i386elf.h i386/rtemself.h rtems.h newlib-stdint.h"
 	tmake_file="${tmake_file} i386/t-rtems"
 	;;
-i[34567]86-*-solaris2* | x86_64-*-solaris2.1[0-9]*)
+i[34567]86-*-solaris2* | x86_64-*-solaris2*)
 	# Set default arch_32 to pentium4, tune_32 to generic like the other
 	# i386 targets, although config.guess defaults to i386-pc-solaris2*.
 	with_arch_32=${with_arch_32:-pentium4}
@@ -1707,7 +2094,7 @@ i[34567]86-*-solaris2* | x86_64-*-solaris2.1[0-9]*)
 		esac
 	fi
 	;;
-i[4567]86-wrs-vxworks|i[4567]86-wrs-vxworksae|i[4567]86-wrs-vxworks7|x86_64-wrs-vxworks7)
+i[4567]86-wrs-vxworks*|x86_64-wrs-vxworks7*)
 	tm_file="${tm_file} i386/unix.h i386/att.h elfos.h"
 	case ${target} in
 	  x86_64-*)
@@ -1731,11 +2118,13 @@ i[34567]86-*-cygwin*)
 	tm_file="${tm_file} i386/unix.h i386/bsd.h i386/gas.h dbxcoff.h i386/cygming.h i386/cygwin.h i386/cygwin-stdint.h"
 	xm_file=i386/xm-cygwin.h
 	tmake_file="${tmake_file} i386/t-cygming t-slibgcc"
-	target_gtfiles="\$(srcdir)/config/i386/winnt.c"
+	target_gtfiles="$target_gtfiles \$(srcdir)/config/i386/winnt.c"
 	extra_options="${extra_options} i386/cygming.opt i386/cygwin.opt"
 	extra_objs="${extra_objs} winnt.o winnt-stubs.o"
 	c_target_objs="${c_target_objs} msformat-c.o"
 	cxx_target_objs="${cxx_target_objs} winnt-cxx.o msformat-c.o"
+	d_target_objs="${d_target_objs} winnt-d.o"
+	target_has_targetdm="yes"
 	if test x$enable_threads = xyes; then
 		thread_file='posix'
 	fi
@@ -1747,11 +2136,13 @@ x86_64-*-cygwin*)
 	tm_file="${tm_file} i386/unix.h i386/bsd.h i386/gas.h dbxcoff.h i386/cygming.h i386/cygwin.h i386/cygwin-w64.h i386/cygwin-stdint.h"
 	xm_file=i386/xm-cygwin.h
 	tmake_file="${tmake_file} i386/t-cygming t-slibgcc i386/t-cygwin-w64"
-	target_gtfiles="\$(srcdir)/config/i386/winnt.c"
+	target_gtfiles="$target_gtfiles \$(srcdir)/config/i386/winnt.c"
 	extra_options="${extra_options} i386/cygming.opt i386/cygwin.opt"
 	extra_objs="${extra_objs} winnt.o winnt-stubs.o"
 	c_target_objs="${c_target_objs} msformat-c.o"
 	cxx_target_objs="${cxx_target_objs} winnt-cxx.o msformat-c.o"
+	d_target_objs="${d_target_objs} winnt-d.o"
+	target_has_targetdm="yes"
 	if test x$enable_threads = xyes; then
 		thread_file='posix'
 	fi
@@ -1764,7 +2155,9 @@ i[34567]86-*-mingw* | x86_64-*-mingw*)
 	xm_file=i386/xm-mingw32.h
 	c_target_objs="${c_target_objs} winnt-c.o"
 	cxx_target_objs="${cxx_target_objs} winnt-c.o"
+	d_target_objs="${d_target_objs} winnt-d.o"
 	target_has_targetcm="yes"
+	target_has_targetdm="yes"
 	case ${target} in
 		x86_64-*-* | *-w64-*)
 			need_64bit_isa=yes
@@ -1815,7 +2208,7 @@ i[34567]86-*-mingw* | x86_64-*-mingw*)
 			;;
 	esac
         native_system_header_dir=/mingw/include
-	target_gtfiles="\$(srcdir)/config/i386/winnt.c"
+	target_gtfiles="$target_gtfiles \$(srcdir)/config/i386/winnt.c"
 	extra_options="${extra_options} i386/cygming.opt i386/mingw.opt"
 	case ${target} in
 		*-w64-*)
@@ -2083,6 +2476,11 @@ riscv*-*-linux*)
 	tmake_file="${tmake_file} riscv/t-riscv riscv/t-linux"
 	gnu_ld=yes
 	gas=yes
+	case $target in
+	riscv32be-*|riscv64be-*)
+		tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=1"
+		;;
+	esac
 	# Force .init_array support.  The configure script cannot always
 	# automatically detect that GAS supports it, yet we require it.
 	gcc_cv_initfini_array=yes
@@ -2091,19 +2489,26 @@ riscv*-*-elf* | riscv*-*-rtems*)
 	tm_file="elfos.h newlib-stdint.h ${tm_file} riscv/elf.h"
 	case ${target} in
 	*-*-rtems*)
-	  tm_file="${tm_file} rtems.h riscv/rtems.h"
+	  tm_file="${tm_file} riscv/rtems.h rtems.h"
 	  tmake_file="${tmake_file} riscv/t-rtems"
 	  ;;
 	*)
-	  case "x${enable_multilib}" in
-	  xno) ;;
-	  xyes) tmake_file="${tmake_file} riscv/t-elf-multilib" ;;
-	  *) echo "Unknown value for enable_multilib"; exit 1
-	  esac
+	  if test "x${with_multilib_generator}" == xdefault; then
+		  case "x${enable_multilib}" in
+		  xno) ;;
+		  xyes) tmake_file="${tmake_file} riscv/t-elf-multilib" ;;
+		  *) echo "Unknown value for enable_multilib"; exit 1
+		  esac
+	  fi
 	esac
 	tmake_file="${tmake_file} riscv/t-riscv"
 	gnu_ld=yes
 	gas=yes
+	case $target in
+	riscv32be-*|riscv64be-*)
+		tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=1"
+		;;
+	esac
 	# Force .init_array support.  The configure script cannot always
 	# automatically detect that GAS supports it, yet we require it.
 	gcc_cv_initfini_array=yes
@@ -2113,6 +2518,11 @@ riscv*-*-freebsd*)
 	tmake_file="${tmake_file} riscv/t-riscv"
 	gnu_ld=yes
 	gas=yes
+	case $target in
+	riscv32be-*|riscv64be-*)
+		tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=1"
+		;;
+	esac
 	# Force .init_array support.  The configure script cannot always
 	# automatically detect that GAS supports it, yet we require it.
 	gcc_cv_initfini_array=yes
@@ -2146,17 +2556,6 @@ mips*-*-linux*)				# Linux MIPS, either endian.
 	tm_file="dbxelf.h elfos.h gnu-user.h linux.h linux-android.h glibc-stdint.h ${tm_file} mips/gnu-user.h mips/linux.h mips/linux-common.h"
 	extra_options="${extra_options} linux-android.opt"
 	case ${target} in
-		mips64*android*)
-			default_mips_arch=mips64r6
-			default_mips_abi=64
-			tm_file="${tm_file} mips/android.h"
-			tmake_file="${tmake_file} mips/t-linux-android64"
-			;;
-		mips*android*)
-			default_mips_arch=mips32
-			tm_file="${tm_file} mips/android.h"
-			tmake_file="$tmake_file mips/t-linux-android"
-			;;
 		mipsisa32r6*)
 			default_mips_arch=mips32r6
 			;;
@@ -2355,24 +2754,63 @@ mn10300-*-*)
 	use_collect2=no
 	use_gcc_stdint=wrap
 	;;
-msp430*-*-*)
+msp430-*-*)
 	tm_file="dbxelf.h elfos.h newlib-stdint.h ${tm_file}"
 	c_target_objs="msp430-c.o"
 	cxx_target_objs="msp430-c.o"
 	tmake_file="${tmake_file} msp430/t-msp430"
-	extra_gcc_objs="driver-msp430.o"
+	extra_objs="${extra_objs} msp430-devices.o"
+	extra_gcc_objs="driver-msp430.o msp430-devices.o"
+	# Enable .init_array unless it has been explicitly disabled.
+	# The MSP430 EABI mandates the use of .init_array, and the Newlib CRT
+	# code since mid-2019 expects it.
+	if test x${disable_initfini_array} != xyes; then
+		gcc_cv_initfini_array=yes
+	fi
+	case ${target} in
+	  msp430-*-elfbare)
+	    # __cxa_atexit increases code size, and we don't need to support
+	    # dynamic shared objects on MSP430, so regular Newlib atexit is a
+	    # fine replacement as it also supports registration of more than 32
+	    # functions.
+	    default_use_cxa_atexit=no
+	    # This target does not match the generic *-*-elf case above which
+	    # sets use_gcc_stdint=wrap, so explicitly set it here.
+	    use_gcc_stdint=wrap
+	    ;;
+	esac
 	;;
-nds32le-*-*)
+nds32*-*-*)
 	target_cpu_default="0"
 	tm_defines="${tm_defines}"
-	tm_file="dbxelf.h elfos.h newlib-stdint.h ${tm_file} nds32/nds32_intrinsic.h"
-	tmake_file="nds32/t-nds32 nds32/t-mlibs"
-	;;
-nds32be-*-*)
-	target_cpu_default="0|MASK_BIG_ENDIAN"
-	tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=1"
-	tm_file="dbxelf.h elfos.h newlib-stdint.h ${tm_file} nds32/nds32_intrinsic.h"
-	tmake_file="nds32/t-nds32 nds32/t-mlibs"
+	case ${target} in
+	  nds32le*-*-*)
+	    ;;
+	  nds32be-*-*)
+	    target_cpu_default="${target_cpu_default}|MASK_BIG_ENDIAN"
+	    tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=1"
+	    ;;
+	esac
+	case ${target} in
+	  nds32*-*-elf*)
+	    tm_file="dbxelf.h elfos.h newlib-stdint.h ${tm_file} nds32/elf.h nds32/nds32_intrinsic.h"
+	    tmake_file="nds32/t-nds32 nds32/t-elf"
+	    ;;
+	  nds32*-*-linux*)
+	    tm_file="dbxelf.h elfos.h ${tm_file} gnu-user.h linux.h glibc-stdint.h nds32/linux.h nds32/nds32_intrinsic.h"
+	    tmake_file="${tmake_file} nds32/t-nds32 nds32/t-linux"
+	    gcc_cv_initfini_array=yes
+	    ;;
+	esac
+
+	# Handle --enable-default-relax setting.
+	if test x${enable_default_relax} = xyes; then
+		tm_defines="${tm_defines} TARGET_DEFAULT_RELAX=1"
+	fi
+	# Handle --with-ext-dsp
+	if test x${with_ext_dsp} = xyes; then
+		tm_defines="${tm_defines} TARGET_DEFAULT_EXT_DSP=1"
+	fi
 	;;
 nios2-*-*)
 	tm_file="elfos.h ${tm_file}"
@@ -2400,6 +2838,50 @@ nvptx-*)
 		tm_file="${tm_file} nvptx/offload.h"
 	fi
 	;;
+or1k*-*-*)
+	tm_file="elfos.h ${tm_file}"
+	tmake_file="${tmake_file} or1k/t-or1k"
+	# Force .init_array support.  The configure script cannot always
+	# automatically detect that GAS supports it, yet we require it.
+	gcc_cv_initfini_array=yes
+
+	# Handle --with-multilib-list=...
+	or1k_multilibs="${with_multilib_list}"
+	if test "$or1k_multilibs" = "default"; then
+		or1k_multilibs="mcmov,msoft-mul,msoft-div"
+	fi
+	or1k_multilibs=`echo $or1k_multilibs | sed -e 's/,/ /g'`
+	for or1k_multilib in ${or1k_multilibs}; do
+		case ${or1k_multilib} in
+		mcmov | msext | msfimm | \
+		mror | mrori | \
+		mhard-float | mdouble-float | munordered-float | msoft-float | \
+		mhard-div | mhard-mul | \
+		msoft-div | msoft-mul )
+			TM_MULTILIB_CONFIG="${TM_MULTILIB_CONFIG},${or1k_multilib}"
+			;;
+		*)
+			echo "--with-multilib-list=${with_multilib_list} not supported."
+			exit 1
+		esac
+	done
+	TM_MULTILIB_CONFIG=`echo $TM_MULTILIB_CONFIG | sed 's/^,//'`
+
+	case ${target} in
+	or1k*-*-linux*)
+		tm_file="${tm_file} gnu-user.h linux.h glibc-stdint.h"
+		tm_file="${tm_file} or1k/linux.h"
+		;;
+	or1k*-*-elf*)
+		tm_file="${tm_file} newlib-stdint.h or1k/elf.h"
+		extra_options="${extra_options} or1k/elf.opt"
+		;;
+	or1k*-*-rtems*)
+		tm_file="${tm_file} newlib-stdint.h or1k/rtems.h rtems.h"
+		tmake_file="${tmake_file} or1k/t-rtems"
+		;;
+	esac
+	;;
 pdp11-*-*)
 	tm_file="${tm_file} newlib-stdint.h"
 	use_gcc_stdint=wrap
@@ -2419,7 +2901,6 @@ powerpc-*-darwin*)
 	  *-darwin8*)
 	    tmake_file="${tmake_file} ${cpu_type}/t-darwin32-biarch"
 	    tm_file="${tm_file} ${cpu_type}/darwin32-biarch.h"
-	    tm_file="${tm_file} ${cpu_type}/darwin8.h"
 	    ;;
 	  *-darwin7*)
 	    tm_file="${tm_file} ${cpu_type}/darwin7.h"
@@ -2430,98 +2911,79 @@ powerpc-*-darwin*)
 	    ;;
 	esac
 	tmake_file="${tmake_file} t-slibgcc"
-	extra_headers=altivec.h
 	;;
 powerpc64-*-darwin*)
 	extra_options="${extra_options} ${cpu_type}/darwin.opt"
 	tmake_file="${tmake_file} ${cpu_type}/t-darwin64-biarch t-slibgcc"
 	tm_file="${tm_file} ${cpu_type}/darwin64-biarch.h"
-	extra_headers=altivec.h
 	;;
 powerpc*-*-freebsd*)
-	tm_file="${tm_file} dbxelf.h elfos.h ${fbsd_tm_file} rs6000/sysv4.h"
+	tm_file="${tm_file} dbxelf.h elfos.h gnu-user.h ${fbsd_tm_file} rs6000/sysv4.h"
 	extra_options="${extra_options} rs6000/sysv4.opt"
 	tmake_file="rs6000/t-fprules rs6000/t-ppcos ${tmake_file} rs6000/t-ppccomm"
+	case ${target} in
+	    powerpc*le-*-*)
+		tm_file="${tm_file} rs6000/sysv4le.h" ;;
+	esac
 	case ${target} in
 	     powerpc64*)
 	    	tm_file="${tm_file} rs6000/default64.h rs6000/freebsd64.h"
 		tmake_file="${tmake_file} rs6000/t-freebsd64"
 		extra_options="${extra_options} rs6000/linux64.opt"
+		if test $fbsd_major -ge 13; then
+		    tm_defines="${tm_defines} TARGET_FREEBSD32_SECURE_PLT=1"
+		fi
 		;;
 	     *)
+		if test $fbsd_major -ge 13; then
+		    tm_file="rs6000/secureplt.h ${tm_file}"
+		fi
 	        tm_file="${tm_file} rs6000/freebsd.h"
 		;;
 	esac
 	;;
 powerpc-*-netbsd*)
-	tm_file="${tm_file} dbxelf.h elfos.h ${nbsd_tm_file} freebsd-spec.h rs6000/sysv4.h rs6000/netbsd.h"
+	tm_file="${tm_file} dbxelf.h elfos.h gnu-user.h ${nbsd_tm_file} freebsd-spec.h rs6000/sysv4.h rs6000/netbsd.h"
 	extra_options="${extra_options} netbsd.opt netbsd-elf.opt"
 	tmake_file="${tmake_file} rs6000/t-netbsd"
 	extra_options="${extra_options} rs6000/sysv4.opt"
 	;;
-powerpc-*-eabispe*)
-	tm_file="${tm_file} dbxelf.h elfos.h freebsd-spec.h newlib-stdint.h ${cpu_type}/sysv4.h ${cpu_type}/eabi.h ${cpu_type}/e500.h ${cpu_type}/eabispe.h"
-	extra_options="${extra_options} ${cpu_type}/sysv4.opt"
-	tmake_file="${cpu_type}/t-spe ${cpu_type}/t-ppccomm"
-	use_gcc_stdint=wrap
-	;;
 powerpc-*-eabisimaltivec*)
-	tm_file="${tm_file} dbxelf.h elfos.h freebsd-spec.h newlib-stdint.h rs6000/sysv4.h rs6000/eabi.h rs6000/eabisim.h rs6000/eabialtivec.h"
+	tm_file="${tm_file} dbxelf.h elfos.h gnu-user.h freebsd-spec.h newlib-stdint.h rs6000/sysv4.h rs6000/eabi.h rs6000/eabisim.h rs6000/eabialtivec.h"
 	extra_options="${extra_options} rs6000/sysv4.opt"
 	tmake_file="rs6000/t-fprules rs6000/t-ppcendian rs6000/t-ppccomm"
 	use_gcc_stdint=wrap
 	;;
 powerpc-*-eabisim*)
-	tm_file="${tm_file} dbxelf.h elfos.h usegas.h freebsd-spec.h newlib-stdint.h rs6000/sysv4.h rs6000/eabi.h rs6000/eabisim.h"
+	tm_file="${tm_file} dbxelf.h elfos.h gnu-user.h usegas.h freebsd-spec.h newlib-stdint.h rs6000/sysv4.h rs6000/eabi.h rs6000/eabisim.h"
 	extra_options="${extra_options} rs6000/sysv4.opt"
 	tmake_file="rs6000/t-fprules rs6000/t-ppcgas rs6000/t-ppccomm"
 	use_gcc_stdint=wrap
 	;;
 powerpc-*-elf*)
-	tm_file="${tm_file} dbxelf.h elfos.h usegas.h freebsd-spec.h newlib-stdint.h rs6000/sysv4.h"
+	tm_file="${tm_file} dbxelf.h elfos.h gnu-user.h usegas.h freebsd-spec.h newlib-stdint.h rs6000/sysv4.h"
 	extra_options="${extra_options} rs6000/sysv4.opt"
 	tmake_file="rs6000/t-fprules rs6000/t-ppcgas rs6000/t-ppccomm"
 	;;
 powerpc-*-eabialtivec*)
-	tm_file="${tm_file} dbxelf.h elfos.h freebsd-spec.h newlib-stdint.h rs6000/sysv4.h rs6000/eabi.h rs6000/eabialtivec.h"
+	tm_file="${tm_file} dbxelf.h elfos.h gnu-user.h freebsd-spec.h newlib-stdint.h rs6000/sysv4.h rs6000/eabi.h rs6000/eabialtivec.h"
 	extra_options="${extra_options} rs6000/sysv4.opt"
 	tmake_file="rs6000/t-fprules rs6000/t-ppcendian rs6000/t-ppccomm"
 	use_gcc_stdint=wrap
 	;;
-powerpc-xilinx-eabi*)
-	tm_file="${tm_file} dbxelf.h elfos.h usegas.h freebsd-spec.h newlib-stdint.h rs6000/sysv4.h rs6000/eabi.h rs6000/singlefp.h rs6000/xfpu.h rs6000/xilinx.h"
-	extra_options="${extra_options} rs6000/sysv4.opt rs6000/xilinx.opt"
-	tmake_file="rs6000/t-fprules rs6000/t-ppcgas rs6000/t-ppccomm rs6000/t-xilinx"
-	use_gcc_stdint=wrap
-	;;
 powerpc-*-eabi*)
-	tm_file="${tm_file} dbxelf.h elfos.h usegas.h freebsd-spec.h newlib-stdint.h rs6000/sysv4.h rs6000/eabi.h"
+	tm_file="${tm_file} dbxelf.h elfos.h gnu-user.h usegas.h freebsd-spec.h newlib-stdint.h rs6000/sysv4.h rs6000/eabi.h"
 	extra_options="${extra_options} rs6000/sysv4.opt"
 	tmake_file="rs6000/t-fprules rs6000/t-ppcgas rs6000/t-ppccomm"
 	use_gcc_stdint=wrap
 	;;
-powerpc-*-rtems*spe*)
-	tm_file="${tm_file} dbxelf.h elfos.h freebsd-spec.h newlib-stdint.h powerpcspe/sysv4.h powerpcspe/eabi.h powerpcspe/e500.h powerpcspe/rtems.h rtems.h"
-	extra_options="${extra_options} powerpcspe/sysv4.opt"
-	tmake_file="${tmake_file} powerpcspe/t-fprules powerpcspe/t-rtems powerpcspe/t-ppccomm"
-	;;
 powerpc-*-rtems*)
-	tm_file="rs6000/biarch64.h ${tm_file} dbxelf.h elfos.h freebsd-spec.h newlib-stdint.h rs6000/sysv4.h rs6000/rtems.h rtems.h"
+	tm_file="rs6000/biarch64.h ${tm_file} dbxelf.h elfos.h gnu-user.h freebsd-spec.h newlib-stdint.h rs6000/sysv4.h rs6000/rtems.h rtems.h"
 	extra_options="${extra_options} rs6000/sysv4.opt rs6000/linux64.opt"
 	tmake_file="${tmake_file} rs6000/t-fprules rs6000/t-rtems rs6000/t-ppccomm"
 	;;
-powerpc*-*-linux*spe*)
-	tm_file="${tm_file} dbxelf.h elfos.h gnu-user.h freebsd-spec.h powerpcspe/sysv4.h"
-	extra_options="${extra_options} powerpcspe/sysv4.opt"
-	tmake_file="${tmake_file} powerpcspe/t-fprules powerpcspe/t-ppccomm"
-	extra_objs="$extra_objs powerpcspe-linux.o"
-	maybe_biarch=
-	tm_file="${tm_file} powerpcspe/linux.h glibc-stdint.h"
-	tmake_file="${tmake_file} powerpcspe/t-ppcos powerpcspe/t-linux"
-	tm_file="${tm_file} powerpcspe/linuxspe.h powerpcspe/e500.h"
-	;;
 powerpc*-*-linux*)
-	tm_file="${tm_file} dbxelf.h elfos.h gnu-user.h freebsd-spec.h rs6000/sysv4.h"
+	tm_file="${tm_file} dbxelf.h elfos.h gnu-user.h linux.h freebsd-spec.h rs6000/sysv4.h"
 	extra_options="${extra_options} rs6000/sysv4.opt"
 	tmake_file="${tmake_file} rs6000/t-fprules rs6000/t-ppccomm"
 	extra_objs="$extra_objs rs6000-linux.o"
@@ -2537,15 +2999,6 @@ powerpc*-*-linux*)
 	    *powerpc64*) maybe_biarch=yes ;;
 	    all) maybe_biarch=yes ;;
 	esac
-	case ${target} in
-	    powerpc64*-*-linux*spe* | powerpc64*-*-linux*paired*)
-		echo "*** Configuration ${target} not supported" 1>&2
-		exit 1
-		;;
-	    powerpc*-*-linux*spe* | powerpc*-*-linux*paired*)
-		maybe_biarch=
-		;;
-	esac
 	case ${target}:${enable_targets}:${maybe_biarch} in
 	    powerpc64-* | powerpc-*:*:yes | *:*powerpc64-*:yes | *:all:yes \
 	    | powerpc64le*:*powerpcle* | powerpc64le*:*powerpc-* \
@@ -2586,10 +3039,6 @@ powerpc*-*-linux*)
 		extra_options="${extra_options} rs6000/476.opt" ;;
 	    powerpc*-*-linux*altivec*)
 		tm_file="${tm_file} rs6000/linuxaltivec.h" ;;
-	    powerpc*-*-linux*spe*)
-		tm_file="${tm_file} ${cpu_type}/linuxspe.h ${cpu_type}/e500.h" ;;
-	    powerpc*-*-linux*paired*)
-		tm_file="${tm_file} rs6000/750cl.h" ;;
 	esac
 	case ${target} in
 	    *-linux*-musl*)
@@ -2599,18 +3048,32 @@ powerpc*-*-linux*)
 		tm_file="rs6000/secureplt.h ${tm_file}"
 	fi
 	;;
-powerpc-wrs-vxworks*spe)
-	tm_file="${tm_file} elfos.h freebsd-spec.h powerpcspe/sysv4.h"
-	tmake_file="${tmake_file} powerpcspe/t-fprules powerpcspe/t-ppccomm powerpcspe/t-vxworks"
-	extra_options="${extra_options} powerpcspe/sysv4.opt"
-	extra_headers=ppc-asm.h
-	tm_file="${tm_file} vx-common.h vxworks.h powerpcspe/vxworks.h powerpcspe/e500.h"
+powerpc*-wrs-vxworks7r*)
+
+	# Wind River 7 post SR0600 is mostly like Linux so we setup
+	# our config in a very similar fashion and adjust to a few
+	# specificities.
+
+	# The system compiler is configured with secureplt by default.
+	tm_file="${tm_file} rs6000/secureplt.h"
+
+	tm_file="${tm_file} elfos.h gnu-user.h linux.h freebsd-spec.h"
+	tm_file="${tm_file} rs6000/sysv4.h rs6000/biarch64.h rs6000/default64.h rs6000/linux64.h"
+	tm_file="${tm_file} vx-common.h vxworks.h rs6000/vxworks.h"
+
+	extra_options="${extra_options} rs6000/sysv4.opt linux.opt rs6000/linux64.opt"
+
+	tmake_file="${tmake_file} t-linux rs6000/t-linux64 rs6000/t-fprules rs6000/t-ppccomm"
+	tmake_file="${tmake_file} rs6000/t-vxworks"
+
+	tm_defines="$tm_defines DEFAULT_LIBC=LIBC_GLIBC"
+	extra_objs="$extra_objs linux.o rs6000-linux.o"
 	;;
 powerpc-wrs-vxworks*)
-	tm_file="${tm_file} elfos.h freebsd-spec.h rs6000/sysv4.h"
+	tm_file="${tm_file} elfos.h gnu-user.h freebsd-spec.h rs6000/sysv4.h"
 	tmake_file="${tmake_file} rs6000/t-fprules rs6000/t-ppccomm rs6000/t-vxworks"
 	extra_options="${extra_options} rs6000/sysv4.opt"
-	extra_headers=ppc-asm.h
+	extra_headers="${extra_headers} ppc-asm.h"
 	case ${target} in
           *-vxworksmils*)
             tm_file="${tm_file} vx-common.h vxworksae.h rs6000/vxworks.h rs6000/vxworksmils.h"
@@ -2627,7 +3090,7 @@ powerpc-wrs-vxworks*)
 	;;
 powerpc-*-lynxos*)
 	xm_defines=POSIX
-	tm_file="${tm_file} dbxelf.h elfos.h rs6000/sysv4.h rs6000/lynx.h lynx.h"
+	tm_file="${tm_file} dbxelf.h elfos.h gnu-user.h rs6000/sysv4.h rs6000/lynx.h lynx.h"
 	tmake_file="t-lynx rs6000/t-lynx"
 	extra_options="${extra_options} rs6000/sysv4.opt lynx.opt"
 	thread_file=lynx
@@ -2635,57 +3098,27 @@ powerpc-*-lynxos*)
 	gas=yes
 	;;
 powerpcle-*-elf*)
-	tm_file="${tm_file} dbxelf.h elfos.h usegas.h freebsd-spec.h newlib-stdint.h rs6000/sysv4.h rs6000/sysv4le.h"
+	tm_file="${tm_file} dbxelf.h elfos.h gnu-user.h usegas.h freebsd-spec.h newlib-stdint.h rs6000/sysv4.h rs6000/sysv4le.h"
 	tmake_file="rs6000/t-fprules rs6000/t-ppcgas rs6000/t-ppccomm"
 	extra_options="${extra_options} rs6000/sysv4.opt"
 	;;
 powerpcle-*-eabisim*)
-	tm_file="${tm_file} dbxelf.h elfos.h usegas.h freebsd-spec.h newlib-stdint.h rs6000/sysv4.h rs6000/sysv4le.h rs6000/eabi.h rs6000/eabisim.h"
+	tm_file="${tm_file} dbxelf.h elfos.h gnu-user.h usegas.h freebsd-spec.h newlib-stdint.h rs6000/sysv4.h rs6000/sysv4le.h rs6000/eabi.h rs6000/eabisim.h"
 	tmake_file="rs6000/t-fprules rs6000/t-ppcgas rs6000/t-ppccomm"
 	extra_options="${extra_options} rs6000/sysv4.opt"
 	use_gcc_stdint=wrap
 	;;
 powerpcle-*-eabi*)
-	tm_file="${tm_file} dbxelf.h elfos.h usegas.h freebsd-spec.h newlib-stdint.h rs6000/sysv4.h rs6000/sysv4le.h rs6000/eabi.h"
+	tm_file="${tm_file} dbxelf.h elfos.h gnu-user.h usegas.h freebsd-spec.h newlib-stdint.h rs6000/sysv4.h rs6000/sysv4le.h rs6000/eabi.h"
 	tmake_file="rs6000/t-fprules rs6000/t-ppcgas rs6000/t-ppccomm"
 	extra_options="${extra_options} rs6000/sysv4.opt"
 	use_gcc_stdint=wrap
 	;;
-rs6000-ibm-aix4.[3456789]* | powerpc-ibm-aix4.[3456789]*)
-	tm_file="rs6000/biarch64.h ${tm_file} rs6000/aix.h rs6000/aix43.h rs6000/xcoff.h rs6000/aix-stdint.h"
-	tmake_file="rs6000/t-aix43 t-slibgcc"
-	extra_options="${extra_options} rs6000/aix64.opt"
-	use_collect2=yes
-	thread_file='aix'
-	use_gcc_stdint=provide
-	extra_headers=
-	;;
-rs6000-ibm-aix5.1.* | powerpc-ibm-aix5.1.*)
-	tm_file="rs6000/biarch64.h ${tm_file} rs6000/aix.h rs6000/aix51.h rs6000/xcoff.h rs6000/aix-stdint.h"
-	extra_options="${extra_options} rs6000/aix64.opt"
-	tmake_file="rs6000/t-aix43 t-slibgcc"
-	use_collect2=yes
-	thread_file='aix'
-	use_gcc_stdint=wrap
-	extra_headers=
-	;;
-rs6000-ibm-aix5.2.* | powerpc-ibm-aix5.2.*)
-	tm_file="${tm_file} rs6000/aix.h rs6000/aix52.h rs6000/xcoff.h rs6000/aix-stdint.h"
-	tmake_file="rs6000/t-aix52 t-slibgcc"
-	extra_options="${extra_options} rs6000/aix64.opt"
-	use_collect2=yes
-	thread_file='aix'
-	use_gcc_stdint=wrap
-	extra_headers=
-	;;
-rs6000-ibm-aix5.3.* | powerpc-ibm-aix5.3.*)
-	tm_file="${tm_file} rs6000/aix.h rs6000/aix53.h rs6000/xcoff.h rs6000/aix-stdint.h"
-	tmake_file="rs6000/t-aix52 t-slibgcc"
-	extra_options="${extra_options} rs6000/aix64.opt"
-	use_collect2=yes
-	thread_file='aix'
+pru*-*-*)
+	tm_file="elfos.h newlib-stdint.h ${tm_file}"
+	tmake_file="${tmake_file} pru/t-pru"
+	extra_objs="pru-pragma.o pru-passes.o"
 	use_gcc_stdint=wrap
-	extra_headers=altivec.h
 	;;
 rs6000-ibm-aix6.* | powerpc-ibm-aix6.*)
 	tm_file="${tm_file} rs6000/aix.h rs6000/aix61.h rs6000/xcoff.h rs6000/aix-stdint.h"
@@ -2694,27 +3127,32 @@ rs6000-ibm-aix6.* | powerpc-ibm-aix6.*)
 	use_collect2=yes
 	thread_file='aix'
 	use_gcc_stdint=wrap
-	extra_headers=altivec.h
 	default_use_cxa_atexit=yes
 	;;
 rs6000-ibm-aix7.1.* | powerpc-ibm-aix7.1.*)
-	tm_file="${tm_file} rs6000/aix.h rs6000/aix71.h rs6000/xcoff.h rs6000/aix-stdint.h"
 	tmake_file="rs6000/t-aix52 t-slibgcc"
+	if test x$cpu_is_64bit = xyes; then
+	    tm_file="${tm_file} rs6000/biarch64.h"
+	    tmake_file="rs6000/t-aix64 t-slibgcc"
+	fi
+	tm_file="${tm_file} rs6000/aix.h rs6000/aix71.h rs6000/xcoff.h rs6000/aix-stdint.h"
 	extra_options="${extra_options} rs6000/aix64.opt"
 	use_collect2=yes
 	thread_file='aix'
 	use_gcc_stdint=wrap
-	extra_headers="altivec.h amo.h"
 	default_use_cxa_atexit=yes
 	;;
 rs6000-ibm-aix[789].* | powerpc-ibm-aix[789].*)
-	tm_file="${tm_file} rs6000/aix.h rs6000/aix72.h rs6000/xcoff.h rs6000/aix-stdint.h"
 	tmake_file="rs6000/t-aix52 t-slibgcc"
+	if test x$cpu_is_64bit = xyes; then
+	    tm_file="${tm_file} rs6000/biarch64.h"
+	    tmake_file="rs6000/t-aix64 t-slibgcc"
+	fi
+	tm_file="${tm_file} rs6000/aix.h rs6000/aix72.h rs6000/xcoff.h rs6000/aix-stdint.h"
 	extra_options="${extra_options} rs6000/aix64.opt"
 	use_collect2=yes
 	thread_file='aix'
 	use_gcc_stdint=wrap
-	extra_headers="altivec.h amo.h"
 	default_use_cxa_atexit=yes
 	;;
 rl78-*-elf*)
@@ -2727,6 +3165,11 @@ rl78-*-elf*)
 rx-*-elf*)
 	tm_file="dbxelf.h elfos.h newlib-stdint.h ${tm_file}"
 	tmake_file="${tmake_file} rx/t-rx"
+	extra_options="${extra_options} rx/elf.opt"
+	;;
+rx-*-linux*)
+	tm_file="elfos.h linux.h glibc-stdint.h rx/linux.h ../../libgcc/config/rx/rx-abi.h"
+	tmake_file="${tmake_file} rx/t-linux"
 	;;
 s390-*-linux*)
 	tm_file="s390/s390.h dbxelf.h elfos.h gnu-user.h linux.h glibc-stdint.h s390/linux.h"
@@ -2748,7 +3191,7 @@ s390x-*-linux*)
 	tmake_file="${tmake_file} s390/t-linux64 s390/t-s390"
 	;;
 s390x-ibm-tpf*)
-	tm_file="s390/s390x.h s390/s390.h dbxelf.h elfos.h s390/tpf.h"
+	tm_file="s390/s390x.h s390/s390.h dbxelf.h elfos.h glibc-stdint.h s390/tpf.h"
 	tm_p_file=s390/s390-protos.h
 	c_target_objs="${c_target_objs} s390-c.o"
 	cxx_target_objs="${cxx_target_objs} s390-c.o"
@@ -3017,15 +3460,6 @@ sparc64-*-openbsd*)
 	with_cpu=ultrasparc
 	tmake_file="${tmake_file} sparc/t-sparc"
 	;;
-spu-*-elf*)
-	tm_file="dbxelf.h elfos.h spu/spu-elf.h spu/spu.h newlib-stdint.h"
-	tmake_file="spu/t-spu-elf"
-        native_system_header_dir=/include
-	extra_headers="spu_intrinsics.h spu_internals.h vmx2spu.h spu_mfcio.h vec_types.h spu_cache.h"
-	extra_modes=spu/spu-modes.def
-	c_target_objs="${c_target_objs} spu-c.o"
-	cxx_target_objs="${cxx_target_objs} spu-c.o"
-	;;
 tic6x-*-elf)
 	tm_file="elfos.h ${tm_file} c6x/elf-common.h c6x/elf.h"
 	tm_file="${tm_file} dbxelf.h tm-dwarf2.h newlib-stdint.h"
@@ -3064,7 +3498,7 @@ tilepro*-*-linux*)
 v850-*-rtems*)
 	target_cpu_default="TARGET_CPU_generic"
 	tm_file="dbxelf.h elfos.h v850/v850.h"
-	tm_file="${tm_file} rtems.h v850/rtems.h newlib-stdint.h"
+	tm_file="${tm_file} v850/rtems.h rtems.h newlib-stdint.h"
 	tmake_file="${tmake_file} v850/t-v850"
 	tmake_file="${tmake_file} v850/t-rtems"
 	use_collect2=no
@@ -3124,11 +3558,17 @@ xstormy16-*-elf)
 	# For historical reasons, the target files omit the 'x'.
 	tm_file="dbxelf.h elfos.h newlib-stdint.h stormy16/stormy16.h"
 	tm_p_file=stormy16/stormy16-protos.h
+	tm_d_file="elfos.h stormy16/stormy16.h"
 	md_file=stormy16/stormy16.md
 	out_file=stormy16/stormy16.c
 	extra_options=stormy16/stormy16.opt
 	tmake_file="stormy16/t-stormy16"
 	;;
+xtensa*-esp32-elf*)
+	tm_file="${tm_file} dbxelf.h elfos.h newlib-stdint.h xtensa/elf.h"
+	tmake_file="${tmake_file} xtensa/t-xtensa-psram-fix"
+	extra_options="${extra_options} xtensa/elf.opt"
+	;;
 xtensa*-*-elf*)
 	tm_file="${tm_file} dbxelf.h elfos.h newlib-stdint.h xtensa/elf.h"
 	extra_options="${extra_options} xtensa/elf.opt"
@@ -3189,7 +3629,9 @@ esac
 case ${target} in
 *-*-linux*android*|*-*-linux*uclibc*|*-*-linux*musl*)
         ;;
-*-*-linux*)
+*-*-kfreebsd*-gnu | *-*-kopensolaris*-gnu)
+        ;;
+*-*-linux* | *-*-gnu*)
 	case ${target} in
 	aarch64*-* | arm*-* | i[34567]86-* | powerpc*-* | s390*-* | sparc*-* | x86_64-*)
 		default_gnu_indirect_function=yes
@@ -3219,6 +3661,10 @@ if [ "$common_out_file" = "" ]; then
   fi
 fi
 
+if [ "$target_has_targetdm" = "no" ]; then
+  d_target_objs="$d_target_objs default-d.o"
+fi
+
 # Support for --with-cpu and related options (and a few unrelated options,
 # too).
 case ${with_cpu} in
@@ -3295,6 +3741,14 @@ case ${target} in
 	arch=znver1
 	cpu=znver1
 	;;
+      znver2-*)
+	arch=znver2
+	cpu=znver2
+	;;
+      znver3-*)
+	arch=znver3
+	cpu=znver3
+	;;
       bdver4-*)
         arch=bdver4
         cpu=bdver4
@@ -3412,6 +3866,14 @@ case ${target} in
 	arch=znver1
 	cpu=znver1
 	;;
+      znver2-*)
+	arch=znver2
+	cpu=znver2
+	;;
+      znver3-*)
+	arch=znver3
+	cpu=znver3
+	;;
       bdver4-*)
         arch=bdver4
         cpu=bdver4
@@ -3517,20 +3979,6 @@ if test x$with_cpu = x ; then
 	  ;;
       esac
       ;;
-    powerpc*-*-*spe*)
-      # For SPE, start with 8540, then upgrade to 8548 if
-      # --enable-e500-double was requested explicitly or if we were
-      # configured for e500v2.
-      with_cpu=8540
-      if test x$enable_e500_double = xyes; then
-        with_cpu=8548
-      fi
-      case ${target_noncanonical} in
-        e500v2*)
-          with_cpu=8548
-          ;;
-      esac
-      ;;
     sparc*-*-*)
       case ${target} in
 	*-leon-*)
@@ -3698,14 +4146,14 @@ fi
 # Infer a default setting for --with-llsc.
 if test x$with_llsc = x; then
   case ${target} in
-    mips64r5900-*-* | mips64r5900el-*-* | mipsr5900-*-* | mipsr5900el-*-*)
-      # The R5900 doesn't support LL(D) and SC(D).
-      with_llsc=no
-      ;;
     mips*-*-linux*)
       # The kernel emulates LL and SC where necessary.
       with_llsc=yes
       ;;
+    mips64r5900-*-* | mips64r5900el-*-* | mipsr5900-*-* | mipsr5900el-*-*)
+      # The R5900 doesn't support LL(D) and SC(D).
+      with_llsc=no
+      ;;
   esac
 fi
 
@@ -3718,9 +4166,17 @@ fi
 supported_defaults=
 case "${target}" in
 	aarch64*-*-*)
-		supported_defaults="abi cpu arch"
-		for which in cpu arch; do
-
+		supported_defaults="abi cpu cpu_64 arch arch_64 tune tune_64"
+		if test x$with_cpu_64 != x && test x$with_cpu = x; then
+			with_cpu=$with_cpu_64
+		fi
+		if test x$with_arch_64 != x && test x$with_arch = x; then
+			with_arch=$with_arch_64
+		fi
+		if test x$with_tune_64 != x && test x$with_tune = x; then
+			with_tune=$with_tune_64
+		fi
+		for which in cpu arch tune; do
 			eval "val=\$with_$which"
 			base_val=`echo $val | sed -e 's/\+.*//'`
 			ext_val=`echo $val | sed -e 's/[a-z0-9.-]\+//'`
@@ -3759,32 +4215,46 @@ case "${target}" in
 				  sed -e 's/,.*$//'`
 			  fi
 
+			  # Disallow extensions in --with-tune=cortex-a53+crc.
+			  if [ $which = tune ] && [ x"$ext_val" != x ]; then
+			    echo "Architecture extensions not supported in --with-$which=$val" 1>&2
+			    exit 1
+			  fi
+
+			  # Use the pre-processor to strip flatten the options.
+			  # This makes the format less rigid than if we use
+			  # grep and sed directly here.
+			  opt_macro="AARCH64_OPT_EXTENSION(A, B, C, D, E, F)=A, B, C, D, E, F"
+			  options_parsed="`$ac_cv_prog_CPP -D"$opt_macro" -x c \
+				${srcdir}/config/aarch64/aarch64-option-extensions.def`"
+
+			  # Match one element inside AARCH64_OPT_EXTENSION, we
+			  # consume anything that's not a ,.
+			  elem="[ 	]*\([^,]\+\)[ 	]*"
+
+			  # Repeat the pattern for the number of entries in the
+			  # AARCH64_OPT_EXTENSION, currently 6 times.
+			  sed_patt="^$elem,$elem,$elem,$elem,$elem,$elem"
+
 			  while [ x"$ext_val" != x ]
 			  do
 				ext_val=`echo $ext_val | sed -e 's/\+//'`
 				ext=`echo $ext_val | sed -e 's/\+.*//'`
 				base_ext=`echo $ext | sed -e 's/^no//'`
+				opt_line=`echo -e "$options_parsed" | \
+					grep "^\"$base_ext\""`
 
 				if [ x"$base_ext" = x ] \
-				    || grep "^AARCH64_OPT_EXTENSION(\"$base_ext\"," \
-				    ${srcdir}/config/aarch64/aarch64-option-extensions.def \
-				    > /dev/null; then
-
-				  ext_canon=`grep "^AARCH64_OPT_EXTENSION(\"$base_ext\"," \
-					${srcdir}/config/aarch64/aarch64-option-extensions.def | \
-					sed -e 's/^[^,]*,[ 	]*//' | \
-					sed -e 's/,.*$//'`
-				  ext_on=`grep "^AARCH64_OPT_EXTENSION(\"$base_ext\"," \
-					${srcdir}/config/aarch64/aarch64-option-extensions.def | \
-					sed -e 's/^[^,]*,[ 	]*[^,]*,[ 	]*//' | \
-					sed -e 's/,.*$//' | \
-					sed -e 's/).*$//'`
-				  ext_off=`grep "^AARCH64_OPT_EXTENSION(\"$base_ext\"," \
-					${srcdir}/config/aarch64/aarch64-option-extensions.def | \
-					sed -e 's/^[^,]*,[ 	]*[^,]*,[ 	]*[^,]*,[ 	]*//' | \
-					sed -e 's/,.*$//' | \
-					sed -e 's/).*$//'`
+				    || [[ -n $opt_line ]]; then
 
+				  # These regexp extract the elements based on
+				  # their group match index in the regexp.
+				  ext_canon=`echo -e "$opt_line" | \
+					sed -e "s/$sed_patt/\2/"`
+				  ext_on=`echo -e "$opt_line" | \
+					sed -e "s/$sed_patt/\3/"`
+				  ext_off=`echo -e "$opt_line" | \
+					sed -e "s/$sed_patt/\4/"`
 
 				  if [ $ext = $base_ext ]; then
 					# Adding extension
@@ -3808,8 +4278,13 @@ case "${target}" in
 			  fi
 			  true
 			else
-			  echo "Unknown $which used in --with-$which=$val" 1>&2
-			  exit 1
+			  # Allow --with-$which=native.
+			  if [ "$val" = native ]; then
+			    true
+			  else
+			    echo "Unknown $which used in --with-$which=$val" 1>&2
+			    exit 1
+			  fi
 			fi
 		done
 		;;
@@ -3847,6 +4322,10 @@ case "${target}" in
 		fi
 		;;
 
+    csky-*-*)
+	supported_defaults="cpu endian float"
+	;;
+
 	arm*-*-*)
 		supported_defaults="arch cpu float tune fpu abi mode tls"
 		for which in cpu tune arch; do
@@ -3880,12 +4359,13 @@ case "${target}" in
 
 		# see if --with-fpu matches any of the supported FPUs
 		if [ x"$with_fpu" != x ] ; then
+		  val=$with_fpu
 		  fpu=`awk -f ${srcdir}/config/arm/parsecpu.awk \
-			-v cmd="chkfpu $with_fpu" \
+			-v cmd="chkfpu $val" \
 			${srcdir}/config/arm/arm-cpus.in`
-		  if [ "$fpu" = "error"]
+		  if [ "$fpu" = "error" ]
 		  then
-		    echo "Unknown target in --with-$which=$val" 1>&2
+		    echo "Unknown target in --with-fpu=$val" 1>&2
 		    exit 1
 		  fi
 		fi
@@ -3935,6 +4415,7 @@ case "${target}" in
 
 		# Add extra multilibs
 		if test "x$with_multilib_list" != x; then
+			ml=
 			arm_multilibs=`echo $with_multilib_list | sed -e 's/,/ /g'`
 			if test "x${arm_multilibs}" != xdefault ; then
 				for arm_multilib in ${arm_multilibs}; do
@@ -3942,6 +4423,15 @@ case "${target}" in
 					aprofile|rmprofile)
 						tmake_profile_file="arm/t-multilib"
 						;;
+					@*)
+						ml=`echo "X$arm_multilib" | sed '1s,^X@,,'`
+						if test -f "${srcdir}/config/arm/${ml}"; then
+							tmake_file="${tmake_file} arm/${ml}"
+						else
+							echo "Error: ${ml} does not exist in ${srcdir}/config/arm" >&2
+							exit 1
+						fi
+						;;
 					*)
 						echo "Error: --with-multilib-list=${with_multilib_list} not supported." 1>&2
 						exit 1
@@ -3962,6 +4452,9 @@ case "${target}" in
 				    || test "x$with_mode" != x ; then
 				    echo "Error: You cannot use any of --with-arch/cpu/fpu/float/mode with --with-multilib-list=${with_multilib_list}" 1>&2
 				    exit 1
+				elif test "x$ml" != x ; then
+				    echo "Error: You cannot use builtin multilib profiles along with custom ones" 1>&2
+				    exit 1
 				fi
 				# But pass the default value for float-abi
 				# through to the multilib selector
@@ -3970,7 +4463,7 @@ case "${target}" in
 				TM_MULTILIB_CONFIG="$with_multilib_list"
 			fi
 		fi
-		target_cpu_cname=${target_cpu_cname:-arm6}
+		target_cpu_cname=${target_cpu_cname:-arm7tdmi}
 		with_cpu=${with_cpu:-$target_cpu_cname}
 		;;
 
@@ -4026,6 +4519,24 @@ case "${target}" in
 		esac
 		;;
 
+	amdgcn-*-*)
+		supported_defaults="arch tune"
+
+		for which in arch tune; do
+			eval "val=\$with_$which"
+			case ${val} in
+			"" | fiji | gfx900 | gfx906 )
+				# OK
+				;;
+			*)
+				echo "Unknown cpu used in --with-$which=$val." 1>&2
+				exit 1
+				;;
+			esac
+		done
+		[ "x$with_arch" = x ] && with_arch=fiji
+		;;
+
 	hppa*-*-*)
 		supported_defaults="arch schedule"
 
@@ -4074,6 +4585,15 @@ case "${target}" in
 				if test x${val} != x; then
 					case " $x86_64_archs " in
 					*" ${val} "*)
+						# Disallow x86-64-v* for --with-cpu=/--with-tune=
+						case "x$which$val" in
+						xcpu*x86-64-v*|xtune*x86-64-v*)
+							echo "Unknown CPU given in --with-$which=$val." 1>&2
+							exit 1
+							;;
+						*)
+							;;
+						esac
 						# OK
 						;;
 					*)
@@ -4104,42 +4624,84 @@ case "${target}" in
 		;;
 
 	riscv*-*-*)
-		supported_defaults="abi arch tune"
+		supported_defaults="abi arch tune riscv_attribute isa_spec"
 
 		case "${target}" in
-		riscv32*) xlen=32 ;;
+		riscv-* | riscv32*) xlen=32 ;;
 		riscv64*) xlen=64 ;;
 		*) echo "Unsupported RISC-V target ${target}" 1>&2; exit 1 ;;
 		esac
 
+		case "${with_isa_spec}" in
+		""|default|2.2)
+			tm_defines="${tm_defines} TARGET_DEFAULT_ISA_SPEC=ISA_SPEC_CLASS_2P2"
+			;;
+		20191213 | 201912)
+			tm_defines="${tm_defines} TARGET_DEFAULT_ISA_SPEC=ISA_SPEC_CLASS_20191213"
+			;;
+		20190608 | 201906)
+			tm_defines="${tm_defines} TARGET_DEFAULT_ISA_SPEC=ISA_SPEC_CLASS_20190608"
+			;;
+		*)
+			echo "--with-isa-spec only accept 2.2, 20191213, 201912, 20190608 or 201906" 1>&2
+			exit 1
+		esac
+
+		case "${with_riscv_attribute}" in
+		yes)
+			tm_defines="${tm_defines} TARGET_RISCV_ATTRIBUTE=1"
+			;;
+		no)
+			tm_defines="${tm_defines} TARGET_RISCV_ATTRIBUTE=0"
+			;;
+		""|default)
+			case "${target}" in
+			riscv*-*-elf*)
+				tm_defines="${tm_defines} TARGET_RISCV_ATTRIBUTE=1"
+				;;
+			*)
+				tm_defines="${tm_defines} TARGET_RISCV_ATTRIBUTE=0"
+				;;
+			esac
+			;;
+		*)
+			echo "--with-riscv-attribute=${with_riscv_attribute} is not supported.  The argument must begin with yes, no or default." 1>&2
+			exit 1
+			;;
+		esac
+
+
 		# Infer arch from --with-arch, --target, and --with-abi.
 		case "${with_arch}" in
-		rv32i* | rv32g* | rv64i* | rv64g*)
+		rv32e* | rv32i* | rv32g* | rv64i* | rv64g*)
 			# OK.
 			;;
 		"")
 			# Infer XLEN, but otherwise assume GC.
 			case "${with_abi}" in
+			ilp32e) with_arch="rv32e" ;;
 			ilp32 | ilp32f | ilp32d) with_arch="rv32gc" ;;
 			lp64 | lp64f | lp64d) with_arch="rv64gc" ;;
 			*) with_arch="rv${xlen}gc" ;;
 			esac
 			;;
 		*)
-			echo "--with-arch=${with_arch} is not supported.  The argument must begin with rv32i, rv32g, rv64i, or rv64g." 1>&2
+			echo "--with-arch=${with_arch} is not supported.  The argument must begin with rv32e, rv32i, rv32g, rv64i, or rv64g." 1>&2
 			exit 1
 			;;
 		esac
+		tm_defines="${tm_defines} TARGET_RISCV_DEFAULT_ARCH=${with_arch}"
 
 		# Make sure --with-abi is valid.  If it was not specified,
 		# pick a default based on the ISA, preferring soft-float
 		# unless the D extension is present.
 		case "${with_abi}" in
-		ilp32 | ilp32f | ilp32d | lp64 | lp64f | lp64d)
+		ilp32 | ilp32e | ilp32f | ilp32d | lp64 | lp64f | lp64d)
 			;;
 		"")
 			case "${with_arch}" in
 			rv32*d* | rv32g*) with_abi=ilp32d ;;
+			rv32e*) with_abi=ilp32e ;;
 			rv32*) with_abi=ilp32 ;;
 			rv64*d* | rv64g*) with_abi=lp64d ;;
 			rv64*) with_abi=lp64 ;;
@@ -4150,10 +4712,11 @@ case "${target}" in
 			exit 1
 			;;
 		esac
+		tm_defines="${tm_defines} TARGET_RISCV_DEFAULT_ABI=${with_abi}"
 
 		# Make sure ABI and ISA are compatible.
 		case "${with_abi},${with_arch}" in
-		ilp32,rv32* \
+		ilp32,rv32* | ilp32e,rv32e* \
 		| ilp32f,rv32*f* | ilp32f,rv32g* \
 		| ilp32d,rv32*d* | ilp32d,rv32g* \
 		| lp64,rv64* \
@@ -4165,6 +4728,45 @@ case "${target}" in
 			exit 1
 			;;
 		esac
+		# Handle --with-multilib-generator.
+		if test "x${with_multilib_generator}" != xdefault; then
+			if test "x${with_multilib_list}" != xdefault; then
+				echo "--with-multilib-list= can't used with --with-multilib-generator= at same time" 1>&2
+				exit 1
+			fi
+			case "${target}" in
+			riscv*-*-elf*)
+				if ${srcdir}/config/riscv/multilib-generator \
+					`echo ${with_multilib_generator} | sed 's/;/ /g'`\
+					> t-multilib-config;
+				then
+					tmake_file="${tmake_file} riscv/t-withmultilib-generator"
+				else
+					echo "invalid option for --with-multilib-generator" 1>&2
+					exit 1
+				fi
+				;;
+			*)
+				echo "--with-multilib-generator= is not supported for ${target}, only supported for riscv*-*-elf*" 1>&2
+				exit 1
+				;;
+			esac
+		fi
+
+		# Handle --with-multilib-list.
+		if test "x${with_multilib_list}" != xdefault; then
+			tmake_file="${tmake_file} riscv/t-withmultilib"
+
+			case ${with_multilib_list} in
+			ilp32 | ilp32f | ilp32d \
+			| lp64 | lp64f | lp64d )
+				TM_MULTILIB_CONFIG="${with_arch},${with_multilib_list}"
+				;;
+			*)
+				echo "--with-multilib-list=${with_multilib_list} not supported."
+				exit 1
+			esac
+		fi
 		;;
 
 	mips*-*-*)
@@ -4352,11 +4954,11 @@ case "${target}" in
 		"")
 			with_cpu=n9
 			;;
-		n6 | n7 | n8 | e8 | s8 | n9)
+		n6 | n7 |n8 | e8 | s8 | n9 | n10 | d10 | n12 | n13 | n15)
 			# OK
 			;;
 		*)
-			echo "Cannot accept --with-cpu=$with_cpu, available values are: n6 n7 n8 e8 s8 n9" 1>&2
+			echo "Cannot accept --with-cpu=$with_cpu, available values are: n6 n7 n8 e8 s8 n9 n10 d10 n12 n13 n15" 1>&2
 			exit 1
 			;;
 		esac
@@ -4364,17 +4966,38 @@ case "${target}" in
 		# process --with-nds32-lib
 		case "${with_nds32_lib}" in
 		"")
-			# the default library is newlib
-			with_nds32_lib=newlib
+			case ${target} in
+			*-*-*uclibc*)
+				with_nds32_lib=ulibc
+				;;
+			*-*-linux*)
+				with_nds32_lib=glibc
+				;;
+			*)
+				with_nds32_lib=newlib
+				tm_defines="${tm_defines} TARGET_DEFAULT_CTOR_DTOR=1"
+				;;
+			esac
 			;;
 		newlib)
 			# OK
+			tm_defines="${tm_defines} TARGET_DEFAULT_CTOR_DTOR=1"
 			;;
 		mculib)
+			# OK
+			# for the arch=v3f or arch=v3s under mculib toolchain,
+			# we would like to set -fno-math-errno as default
+			case "${with_arch}" in
+			v3f | v3s)
+				tm_defines="${tm_defines} TARGET_DEFAULT_NO_MATH_ERRNO=1"
+				;;
+			esac
+			;;
+		glibc | uclibc)
 			# OK
 			;;
 		*)
-			echo "Cannot accept --with-nds32-lib=$with_nds32_lib, available values are: newlib mculib" 1>&2
+			echo "Cannot accept --with-nds32-lib=$with_nds32_lib, available values are: newlib mculib glibc uclibc" 1>&2
 			exit 1
 			;;
 		esac
@@ -4439,7 +5062,7 @@ case "${target}" in
 				eval "with_$which=405"
 				;;
 			"" | common | native \
-			| power[3456789] | power5+ | power6x \
+			| power[3456789] | power10 | power5+ | power6x \
 			| powerpc | powerpc64 | powerpc64le \
 			| rs64 \
 			| 401 | 403 | 405 | 405fp | 440 | 440fp | 464 | 464fp \
@@ -4522,7 +5145,7 @@ case "${target}" in
 		for which in arch tune; do
 			eval "val=\$with_$which"
 			case ${val} in
-			"" | native | g5 | g6 | z900 | z990 | z9-109 | z9-ec | z10 | z196 | zEC12 | z13 | z14 | arch3 | arch5 | arch6 | arch7 | arch8 | arch9 | arch10 | arch11 | arch12)
+			"" | native | z900 | z990 | z9-109 | z9-ec | z10 | z196 | zEC12 | z13 | z14 | z15 | arch5 | arch6 | arch7 | arch8 | arch9 | arch10 | arch11 | arch12 | arch13 | arch14 )
 				# OK
 				;;
 			*)
@@ -4595,23 +5218,6 @@ case "${target}" in
 		esac
 		;;
 
-	spu-*-*)
-		supported_defaults="arch tune"
-
-		for which in arch tune; do
-			eval "val=\$with_$which"
-			case ${val} in
-			"" | cell | celledp)
-				# OK
-				;;
-			*)
-				echo "Unknown cpu used in --with-$which=$val." 1>&2
-				exit 1
-				;;
-			esac
-		done
-		;;
-
 	tic6x-*-*)
 		supported_defaults="arch"
 
@@ -4650,6 +5256,18 @@ case "${target}" in
 		;;
 esac
 
+# Targets for which there is at least one VxWorks port should include
+# vxworks-dummy.h to allow safe references to various TARGET_VXWORKS kinds
+# of markers from other files in the port, including the vxworks*.h files to
+# distinguish VxWorks variants such as VxWorks 7 or 64).
+
+case ${target} in
+arm*-*-* | i[34567]86-*-* | mips*-*-* | powerpc*-*-* | sh*-*-* \
+| sparc*-*-* | x86_64-*-*)
+	tm_file="vxworks-dummy.h ${tm_file}"
+	;;
+esac
+
 # Set some miscellaneous flags for particular targets.
 target_cpu_default2=
 case ${target} in
@@ -4688,8 +5306,8 @@ case ${target} in
 	i[34567]86-*-darwin* | x86_64-*-darwin*)
 		;;
 	i[34567]86-*-linux* | x86_64-*-linux*)
-		extra_objs="${extra_objs} cet.o"
-		tmake_file="$tmake_file i386/t-linux i386/t-cet"
+		extra_objs="${extra_objs} gnu-property.o"
+		tmake_file="$tmake_file i386/t-linux i386/t-gnu-property"
 		;;
 	i[34567]86-*-kfreebsd*-gnu | x86_64-*-kfreebsd*-gnu)
 		tmake_file="$tmake_file i386/t-kfreebsd"
@@ -4700,7 +5318,7 @@ case ${target} in
 	i[34567]86-*-msdosdjgpp*)
 		tmake_file="${tmake_file} i386/t-djgpp"
 		;;
-	i[34567]86-*-solaris2* | x86_64-*-solaris2.1[0-9]*)
+	i[34567]86-*-solaris2* | x86_64-*-solaris2*)
 		;;
 	i[34567]86-*-cygwin* | x86_64-*-cygwin*)
 		;;
@@ -4764,6 +5382,7 @@ case ${target} in
 		out_file="${cpu_type}/${cpu_type}.c"
 		c_target_objs="${c_target_objs} ${cpu_type}-c.o"
 		cxx_target_objs="${cxx_target_objs} ${cpu_type}-c.o"
+		d_target_objs="${d_target_objs} ${cpu_type}-d.o"
 		tmake_file="${cpu_type}/t-${cpu_type} ${tmake_file}"
 		;;
 
diff --git a/gcc/config.host b/gcc/config.host
index c65569da2e922..81ff7ed1043e2 100644
--- a/gcc/config.host
+++ b/gcc/config.host
@@ -1,5 +1,5 @@
 # GCC host-specific configuration file.
-# Copyright (C) 1997-2018 Free Software Foundation, Inc.
+# Copyright (C) 1997-2021 Free Software Foundation, Inc.
 
 #This file is part of GCC.
 
@@ -99,7 +99,8 @@ case ${host} in
 esac
 
 case ${host} in
-  aarch64*-*-freebsd* | aarch64*-*-linux* | aarch64*-*-fuchsia*)
+  aarch64*-*-freebsd* | aarch64*-*-linux* | aarch64*-*-fuchsia* |\
+  aarch64*-*-darwin*)
     case ${target} in
       aarch64*-*-*)
 	host_extra_gcc_objs="driver-aarch64.o"
@@ -107,7 +108,7 @@ case ${host} in
 	;;
     esac
     ;;
-  arm*-*-freebsd* | arm*-*-linux* | arm*-*-fuchsia*)
+  arm*-*-freebsd* | arm*-*-netbsd* | arm*-*-linux* | arm*-*-fuchsia*)
     case ${target} in
       arm*-*-*)
 	host_extra_gcc_objs="driver-arm.o"
@@ -144,10 +145,6 @@ case ${host} in
   rs6000-*-* \
   | powerpc*-*-* )
     case ${target} in
-      powerpc*-*-*spe*)
-        host_extra_gcc_objs="driver-powerpcspe.o"
-        host_xmake_file="${host_xmake_file} powerpcspe/x-powerpcspe"
-        ;;
       rs6000-*-* \
       | powerpc*-*-* )
         host_extra_gcc_objs="driver-rs6000.o"
@@ -217,7 +214,7 @@ case ${host} in
     out_host_hook_obj=host-hpux.o
     host_xmake_file="${host_xmake_file} x-hpux"
     ;;
-  i[34567]86-*-solaris2* | x86_64-*-solaris2.1[0-9]*)
+  i[34567]86-*-solaris2* | x86_64-*-solaris2*)
     out_host_hook_obj=host-solaris.o
     host_xmake_file="${host_xmake_file} x-solaris"
     ;;
@@ -236,7 +233,7 @@ case ${host} in
     out_host_hook_obj=host-cygwin.o
     host_xmake_file="${host_xmake_file} i386/x-cygwin"
     host_exeext=.exe
-    host_lto_plugin_soname=cyglto_plugin-0.dll
+    host_lto_plugin_soname=cyglto_plugin.dll
     ;;
   i[34567]86-*-mingw32*)
     host_xm_file=i386/xm-mingw32.h
@@ -244,7 +241,7 @@ case ${host} in
     host_exeext=.exe
     out_host_hook_obj=host-mingw32.o
     host_extra_gcc_objs="${host_extra_gcc_objs} driver-mingw32.o"
-    host_lto_plugin_soname=liblto_plugin-0.dll
+    host_lto_plugin_soname=liblto_plugin.dll
     ;;
   x86_64-*-mingw*)
     use_long_long_for_widest_fast_int=yes
@@ -253,7 +250,11 @@ case ${host} in
     host_exeext=.exe
     out_host_hook_obj=host-mingw32.o
     host_extra_gcc_objs="${host_extra_gcc_objs} driver-mingw32.o"
-    host_lto_plugin_soname=liblto_plugin-0.dll
+    host_lto_plugin_soname=liblto_plugin.dll
+    ;;
+  aarch64*-*-darwin*)
+    out_host_hook_obj="${out_host_hook_obj} host-aarch64-darwin.o"
+    host_xmake_file="${host_xmake_file} aarch64/x-darwin"
     ;;
   i[34567]86-*-darwin* | x86_64-*-darwin*)
     out_host_hook_obj="${out_host_hook_obj} host-i386-darwin.o"
@@ -282,6 +283,10 @@ case ${host} in
     out_host_hook_obj=host-openbsd.o
     host_xmake_file="${host_xmake_file} x-openbsd"
     ;;
+  *-*-netbsd*)
+    out_host_hook_obj=host-netbsd.o
+    host_xmake_file="${host_xmake_file} x-netbsd"
+    ;;
   ia64-*-hpux*)
     use_long_long_for_widest_fast_int=yes
     out_host_hook_obj=host-hpux.o
diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def
index a37a5553894d6..b7497277bb8b5 100644
--- a/gcc/config/aarch64/aarch64-arches.def
+++ b/gcc/config/aarch64/aarch64-arches.def
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2018 Free Software Foundation, Inc.
+/* Copyright (C) 2011-2021 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -35,5 +35,8 @@ AARCH64_ARCH("armv8.1-a",     generic,	     8_1A,	8,  AARCH64_FL_FOR_ARCH8_1)
 AARCH64_ARCH("armv8.2-a",     generic,	     8_2A,	8,  AARCH64_FL_FOR_ARCH8_2)
 AARCH64_ARCH("armv8.3-a",     generic,	     8_3A,	8,  AARCH64_FL_FOR_ARCH8_3)
 AARCH64_ARCH("armv8.4-a",     generic,	     8_4A,	8,  AARCH64_FL_FOR_ARCH8_4)
+AARCH64_ARCH("armv8.5-a",     generic,	     8_5A,	8,  AARCH64_FL_FOR_ARCH8_5)
+AARCH64_ARCH("armv8.6-a",     generic,	     8_6A,	8,  AARCH64_FL_FOR_ARCH8_6)
+AARCH64_ARCH("armv8-r",       generic,	     8R  ,	8,  AARCH64_FL_FOR_ARCH8_R)
 
 #undef AARCH64_ARCH
diff --git a/gcc/config/aarch64/aarch64-bti-insert.c b/gcc/config/aarch64/aarch64-bti-insert.c
new file mode 100644
index 0000000000000..5d6bc169d6bb0
--- /dev/null
+++ b/gcc/config/aarch64/aarch64-bti-insert.c
@@ -0,0 +1,248 @@
+/* Branch Target Identification for AArch64 architecture.
+   Copyright (C) 2019-2021 Free Software Foundation, Inc.
+   Contributed by Arm Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#define IN_TARGET_CODE 1
+
+#include "config.h"
+#define INCLUDE_STRING
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "target.h"
+#include "rtl.h"
+#include "tree.h"
+#include "memmodel.h"
+#include "gimple.h"
+#include "tm_p.h"
+#include "stringpool.h"
+#include "attribs.h"
+#include "emit-rtl.h"
+#include "gimplify.h"
+#include "gimple-iterator.h"
+#include "dumpfile.h"
+#include "rtl-iter.h"
+#include "cfgrtl.h"
+#include "tree-pass.h"
+#include "cgraph.h"
+
+/* This pass enables the support for Branch Target Identification Mechanism
+   for AArch64.  This is a new security feature introduced in ARMv8.5-A
+   archtitecture.  A BTI instruction is used to guard against the execution
+   of instructions which are not the intended target of an indirect branch.
+
+   Outside of a guarded memory region, a BTI instruction executes as a NOP.
+   Within a guarded memory region any target of an indirect branch must be
+   a compatible BTI or BRK, HLT, PACIASP, PACIBASP instruction (even if the
+   branch is triggered in a non-guarded memory region).  An incompatibility
+   generates a Branch Target Exception.
+
+   The compatibility of the BTI instruction is as follows:
+   BTI j : Can be a target of any indirect jump (BR Xn).
+   BTI c : Can be a target of any indirect call (BLR Xn and BR X16/X17).
+   BTI jc: Can be a target of any indirect call or indirect jump.
+   BTI   : Can not be a target of any indirect call or indirect jump.
+
+  In order to enable this mechanism, this pass iterates through the
+  control flow of the code and adds appropriate BTI instructions :
+  * Add a new "BTI C" at the beginning of a function, unless its already
+    protected by a "PACIASP/PACIBSP".  We exempt the functions that are only
+    called directly.
+  * Add a new "BTI J" for every target of an indirect jump, jump table targets,
+    non-local goto targets or labels that might be referenced by variables,
+    constant pools, etc (NOTE_INSN_DELETED_LABEL)
+
+  Since we have already changed the use of indirect tail calls to only x16
+  and x17, we do not have to use "BTI JC".
+
+  This pass is triggered by the command line option -mbranch-protection=bti or
+  -mbranch-protection=standard.  Since all the BTI instructions are in the HINT
+  space, this pass does not require any minimum architecture version.  */
+
+namespace {
+
+const pass_data pass_data_insert_bti =
+{
+  RTL_PASS, /* type.  */
+  "bti", /* name.  */
+  OPTGROUP_NONE, /* optinfo_flags.  */
+  TV_MACH_DEP, /* tv_id.  */
+  0, /* properties_required.  */
+  0, /* properties_provided.  */
+  0, /* properties_destroyed.  */
+  0, /* todo_flags_start.  */
+  0, /* todo_flags_finish.  */
+};
+
+/* Check if X (or any sub-rtx of X) is a PACIASP/PACIBSP instruction.  */
+static bool
+aarch64_pac_insn_p (rtx x)
+{
+  if (!INSN_P (x))
+    return false;
+
+  subrtx_var_iterator::array_type array;
+  FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (x), ALL)
+    {
+      rtx sub = *iter;
+      if (sub && GET_CODE (sub) == UNSPEC)
+	{
+	  int unspec_val = XINT (sub, 1);
+	  switch (unspec_val)
+	    {
+	    case UNSPEC_PACIASP:
+            /* fall-through.  */
+            case UNSPEC_PACIBSP:
+	      return true;
+
+	    default:
+	      return false;
+	    }
+	  iter.skip_subrtxes ();
+	}
+    }
+  return false;
+}
+
+/* Check if INSN is a BTI J insn.  */
+static bool
+aarch64_bti_j_insn_p (rtx_insn *insn)
+{
+  if (!insn || !INSN_P (insn))
+    return false;
+
+  rtx pat = PATTERN (insn);
+  return GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_BTI_J;
+}
+
+/* Insert the BTI instruction.  */
+/* This is implemented as a late RTL pass that runs before branch
+   shortening and does the following.  */
+static unsigned int
+rest_of_insert_bti (void)
+{
+  timevar_push (TV_MACH_DEP);
+
+  rtx bti_insn;
+  rtx_insn *insn;
+  basic_block bb;
+
+  bb = 0;
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
+	   insn = NEXT_INSN (insn))
+	{
+	  /* If a label is marked to be preserved or can be a non-local goto
+	     target, it must be protected with a BTI J.  */
+	  if (LABEL_P (insn)
+	       && (LABEL_PRESERVE_P (insn)
+		   || bb->flags & BB_NON_LOCAL_GOTO_TARGET))
+	    {
+	      bti_insn = gen_bti_j ();
+	      emit_insn_after (bti_insn, insn);
+	      continue;
+	    }
+
+	  /* There could still be more labels that are valid targets of a
+	     BTI J instuction.  To find them we start looking through the
+	     JUMP_INSN.  If it jumps to a jump table, then we find all labels
+	     of the jump table to protect with a BTI J.  */
+	  if (JUMP_P (insn))
+	    {
+	      rtx_jump_table_data *table;
+	      if (tablejump_p (insn, NULL, &table))
+		{
+		  rtvec vec = table->get_labels ();
+		  int j;
+		  rtx_insn *label;
+
+		  for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
+		    {
+		      label = as_a <rtx_insn *> (XEXP (RTVEC_ELT (vec, j), 0));
+		      rtx_insn *next = next_nonnote_nondebug_insn (label);
+		      if (aarch64_bti_j_insn_p (next))
+			continue;
+
+		      bti_insn = gen_bti_j ();
+		      emit_insn_after (bti_insn, label);
+		    }
+		}
+	    }
+
+	  /* Also look for calls to setjmp () which would be marked with
+	     REG_SETJMP note and put a BTI J after.  This is where longjump ()
+	     will return.  */
+	  if (CALL_P (insn) && (find_reg_note (insn, REG_SETJMP, NULL)))
+	    {
+	      bti_insn = gen_bti_j ();
+	      emit_insn_after (bti_insn, insn);
+	      continue;
+	    }
+	}
+    }
+
+  /* Since a Branch Target Exception can only be triggered by an indirect call,
+     we exempt function that are only called directly.  We also exempt
+     functions that are already protected by Return Address Signing (PACIASP/
+     PACIBSP).  For all other cases insert a BTI C at the beginning of the
+     function.  */
+  if (!cgraph_node::get (cfun->decl)->only_called_directly_p ())
+    {
+      bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
+      insn = BB_HEAD (bb);
+      if (!aarch64_pac_insn_p (get_first_nonnote_insn ()))
+	{
+	  bti_insn = gen_bti_c ();
+	  emit_insn_before (bti_insn, insn);
+	}
+    }
+
+  timevar_pop (TV_MACH_DEP);
+  return 0;
+}
+
+
+class pass_insert_bti : public rtl_opt_pass
+{
+public:
+  pass_insert_bti (gcc::context *ctxt)
+    : rtl_opt_pass (pass_data_insert_bti, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *)
+    {
+      return aarch64_bti_enabled ();
+    }
+
+  virtual unsigned int execute (function *)
+    {
+      return rest_of_insert_bti ();
+    }
+
+}; // class pass_insert_bti
+
+} // anon namespace
+
+rtl_opt_pass *
+make_pass_insert_bti (gcc::context *ctxt)
+{
+  return new pass_insert_bti (ctxt);
+}
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 02c6738d2207d..acdea2a0601c1 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -1,5 +1,5 @@
 /* Builtins' description for AArch64 SIMD architecture.
-   Copyright (C) 2011-2018 Free Software Foundation, Inc.
+   Copyright (C) 2011-2021 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -42,6 +42,9 @@
 #include "langhooks.h"
 #include "gimple-iterator.h"
 #include "case-cfn-macros.h"
+#include "emit-rtl.h"
+#include "stringpool.h"
+#include "attribs.h"
 
 #define v8qi_UP  E_V8QImode
 #define v4hi_UP  E_V4HImode
@@ -67,6 +70,9 @@
 #define hi_UP    E_HImode
 #define hf_UP    E_HFmode
 #define qi_UP    E_QImode
+#define bf_UP    E_BFmode
+#define v4bf_UP  E_V4BFmode
+#define v8bf_UP  E_V8BFmode
 #define UP(X) X##_UP
 
 #define SIMD_MAX_BUILTIN_ARGS 5
@@ -102,9 +108,33 @@ enum aarch64_type_qualifiers
   /* Lane indices - must be in range, and flipped for bigendian.  */
   qualifier_lane_index = 0x200,
   /* Lane indices for single lane structure loads and stores.  */
-  qualifier_struct_load_store_lane_index = 0x400
+  qualifier_struct_load_store_lane_index = 0x400,
+  /* Lane indices selected in pairs. - must be in range, and flipped for
+     bigendian.  */
+  qualifier_lane_pair_index = 0x800,
+  /* Lane indices selected in quadtuplets. - must be in range, and flipped for
+     bigendian.  */
+  qualifier_lane_quadtup_index = 0x1000,
 };
 
+/* Flags that describe what a function might do.  */
+const unsigned int FLAG_NONE = 0U;
+const unsigned int FLAG_READ_FPCR = 1U << 0;
+const unsigned int FLAG_RAISE_FP_EXCEPTIONS = 1U << 1;
+const unsigned int FLAG_READ_MEMORY = 1U << 2;
+const unsigned int FLAG_PREFETCH_MEMORY = 1U << 3;
+const unsigned int FLAG_WRITE_MEMORY = 1U << 4;
+
+/* Not all FP intrinsics raise FP exceptions or read FPCR register,
+   use this flag to suppress it.  */
+const unsigned int FLAG_AUTO_FP = 1U << 5;
+
+const unsigned int FLAG_FP = FLAG_READ_FPCR | FLAG_RAISE_FP_EXCEPTIONS;
+const unsigned int FLAG_ALL = FLAG_READ_FPCR | FLAG_RAISE_FP_EXCEPTIONS
+  | FLAG_READ_MEMORY | FLAG_PREFETCH_MEMORY | FLAG_WRITE_MEMORY;
+const unsigned int FLAG_STORE = FLAG_WRITE_MEMORY | FLAG_AUTO_FP;
+const unsigned int FLAG_LOAD = FLAG_READ_MEMORY | FLAG_AUTO_FP;
+
 typedef struct
 {
   const char *name;
@@ -112,6 +142,7 @@ typedef struct
   const enum insn_code code;
   unsigned int fcode;
   enum aarch64_type_qualifiers *qualifiers;
+  unsigned int flags;
 } aarch64_simd_builtin_datum;
 
 static enum aarch64_type_qualifiers
@@ -165,12 +196,26 @@ aarch64_types_ternopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
       qualifier_unsigned, qualifier_unsigned };
 #define TYPES_TERNOPU (aarch64_types_ternopu_qualifiers)
 static enum aarch64_type_qualifiers
+aarch64_types_ternopu_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_unsigned, qualifier_unsigned,
+      qualifier_unsigned, qualifier_lane_index };
+#define TYPES_TERNOPU_LANE (aarch64_types_ternopu_lane_qualifiers)
+static enum aarch64_type_qualifiers
 aarch64_types_ternopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_unsigned, qualifier_unsigned,
       qualifier_unsigned, qualifier_immediate };
 #define TYPES_TERNOPUI (aarch64_types_ternopu_imm_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_ternop_ssus_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_unsigned, qualifier_none };
+#define TYPES_TERNOP_SSUS (aarch64_types_ternop_ssus_qualifiers)
 
 
+static enum aarch64_type_qualifiers
+aarch64_types_quadop_lane_pair_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_none,
+      qualifier_none, qualifier_lane_pair_index };
+#define TYPES_QUADOP_LANE_PAIR (aarch64_types_quadop_lane_pair_qualifiers)
 static enum aarch64_type_qualifiers
 aarch64_types_quadop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_none, qualifier_none, qualifier_none,
@@ -182,6 +227,19 @@ aarch64_types_quadopu_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
       qualifier_unsigned, qualifier_lane_index };
 #define TYPES_QUADOPU_LANE (aarch64_types_quadopu_lane_qualifiers)
 
+static enum aarch64_type_qualifiers
+aarch64_types_quadopssus_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_unsigned,
+      qualifier_none, qualifier_lane_quadtup_index };
+#define TYPES_QUADOPSSUS_LANE_QUADTUP \
+	(aarch64_types_quadopssus_lane_quadtup_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_quadopsssu_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_none,
+      qualifier_unsigned, qualifier_lane_quadtup_index };
+#define TYPES_QUADOPSSSU_LANE_QUADTUP \
+	(aarch64_types_quadopsssu_lane_quadtup_qualifiers)
+
 static enum aarch64_type_qualifiers
 aarch64_types_quadopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
@@ -209,6 +267,11 @@ static enum aarch64_type_qualifiers
 aarch64_types_unsigned_shift_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate };
 #define TYPES_USHIFTIMM (aarch64_types_unsigned_shift_qualifiers)
+#define TYPES_USHIFT2IMM (aarch64_types_ternopu_imm_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_shift2_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_unsigned, qualifier_unsigned, qualifier_none, qualifier_immediate };
+#define TYPES_SHIFT2IMM_UUSS (aarch64_types_shift2_to_unsigned_qualifiers)
 
 static enum aarch64_type_qualifiers
 aarch64_types_ternop_s_imm_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
@@ -220,6 +283,7 @@ aarch64_types_ternop_s_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
 #define TYPES_SETREG (aarch64_types_ternop_s_imm_qualifiers)
 #define TYPES_SHIFTINSERT (aarch64_types_ternop_s_imm_qualifiers)
 #define TYPES_SHIFTACC (aarch64_types_ternop_s_imm_qualifiers)
+#define TYPES_SHIFT2IMM (aarch64_types_ternop_s_imm_qualifiers)
 
 static enum aarch64_type_qualifiers
 aarch64_types_ternop_p_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
@@ -297,47 +361,53 @@ aarch64_types_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
 #define CF4(N, X) CODE_FOR_##N##X##4
 #define CF10(N, X) CODE_FOR_##N##X
 
-#define VAR1(T, N, MAP, A) \
-  {#N #A, UP (A), CF##MAP (N, A), 0, TYPES_##T},
-#define VAR2(T, N, MAP, A, B) \
-  VAR1 (T, N, MAP, A) \
-  VAR1 (T, N, MAP, B)
-#define VAR3(T, N, MAP, A, B, C) \
-  VAR2 (T, N, MAP, A, B) \
-  VAR1 (T, N, MAP, C)
-#define VAR4(T, N, MAP, A, B, C, D) \
-  VAR3 (T, N, MAP, A, B, C) \
-  VAR1 (T, N, MAP, D)
-#define VAR5(T, N, MAP, A, B, C, D, E) \
-  VAR4 (T, N, MAP, A, B, C, D) \
-  VAR1 (T, N, MAP, E)
-#define VAR6(T, N, MAP, A, B, C, D, E, F) \
-  VAR5 (T, N, MAP, A, B, C, D, E) \
-  VAR1 (T, N, MAP, F)
-#define VAR7(T, N, MAP, A, B, C, D, E, F, G) \
-  VAR6 (T, N, MAP, A, B, C, D, E, F) \
-  VAR1 (T, N, MAP, G)
-#define VAR8(T, N, MAP, A, B, C, D, E, F, G, H) \
-  VAR7 (T, N, MAP, A, B, C, D, E, F, G) \
-  VAR1 (T, N, MAP, H)
-#define VAR9(T, N, MAP, A, B, C, D, E, F, G, H, I) \
-  VAR8 (T, N, MAP, A, B, C, D, E, F, G, H) \
-  VAR1 (T, N, MAP, I)
-#define VAR10(T, N, MAP, A, B, C, D, E, F, G, H, I, J) \
-  VAR9 (T, N, MAP, A, B, C, D, E, F, G, H, I) \
-  VAR1 (T, N, MAP, J)
-#define VAR11(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \
-  VAR10 (T, N, MAP, A, B, C, D, E, F, G, H, I, J) \
-  VAR1 (T, N, MAP, K)
-#define VAR12(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L) \
-  VAR11 (T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \
-  VAR1 (T, N, MAP, L)
-#define VAR13(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M) \
-  VAR12 (T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L) \
-  VAR1 (T, N, MAP, M)
-#define VAR14(T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M, N) \
-  VAR13 (T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M) \
-  VAR1 (T, X, MAP, N)
+#define VAR1(T, N, MAP, FLAG, A) \
+  {#N #A, UP (A), CF##MAP (N, A), 0, TYPES_##T, FLAG_##FLAG},
+#define VAR2(T, N, MAP, FLAG, A, B) \
+  VAR1 (T, N, MAP, FLAG, A) \
+  VAR1 (T, N, MAP, FLAG, B)
+#define VAR3(T, N, MAP, FLAG, A, B, C) \
+  VAR2 (T, N, MAP, FLAG, A, B) \
+  VAR1 (T, N, MAP, FLAG, C)
+#define VAR4(T, N, MAP, FLAG, A, B, C, D) \
+  VAR3 (T, N, MAP, FLAG, A, B, C) \
+  VAR1 (T, N, MAP, FLAG, D)
+#define VAR5(T, N, MAP, FLAG, A, B, C, D, E) \
+  VAR4 (T, N, MAP, FLAG, A, B, C, D) \
+  VAR1 (T, N, MAP, FLAG, E)
+#define VAR6(T, N, MAP, FLAG, A, B, C, D, E, F) \
+  VAR5 (T, N, MAP, FLAG, A, B, C, D, E) \
+  VAR1 (T, N, MAP, FLAG, F)
+#define VAR7(T, N, MAP, FLAG, A, B, C, D, E, F, G) \
+  VAR6 (T, N, MAP, FLAG, A, B, C, D, E, F) \
+  VAR1 (T, N, MAP, FLAG, G)
+#define VAR8(T, N, MAP, FLAG, A, B, C, D, E, F, G, H) \
+  VAR7 (T, N, MAP, FLAG, A, B, C, D, E, F, G) \
+  VAR1 (T, N, MAP, FLAG, H)
+#define VAR9(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I) \
+  VAR8 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H) \
+  VAR1 (T, N, MAP, FLAG, I)
+#define VAR10(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J) \
+  VAR9 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I) \
+  VAR1 (T, N, MAP, FLAG, J)
+#define VAR11(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K) \
+  VAR10 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J) \
+  VAR1 (T, N, MAP, FLAG, K)
+#define VAR12(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L) \
+  VAR11 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K) \
+  VAR1 (T, N, MAP, FLAG, L)
+#define VAR13(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M) \
+  VAR12 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L) \
+  VAR1 (T, N, MAP, FLAG, M)
+#define VAR14(T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N) \
+  VAR13 (T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M) \
+  VAR1 (T, X, MAP, FLAG, N)
+#define VAR15(T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O) \
+  VAR14 (T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N) \
+  VAR1 (T, X, MAP, FLAG, O)
+#define VAR16(T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
+  VAR15 (T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O) \
+  VAR1 (T, X, MAP, FLAG, P)
 
 #include "aarch64-builtin-iterators.h"
 
@@ -356,6 +426,18 @@ static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
   CRC32_BUILTIN (crc32cw, SI) \
   CRC32_BUILTIN (crc32cx, DI)
 
+/* The next 8 FCMLA instrinsics require some special handling compared the
+   normal simd intrinsics.  */
+#define AARCH64_SIMD_FCMLA_LANEQ_BUILTINS \
+  FCMLA_LANEQ_BUILTIN (0, v2sf, fcmla, V2SF, false) \
+  FCMLA_LANEQ_BUILTIN (90, v2sf, fcmla, V2SF, false) \
+  FCMLA_LANEQ_BUILTIN (180, v2sf, fcmla, V2SF, false) \
+  FCMLA_LANEQ_BUILTIN (270, v2sf, fcmla, V2SF, false) \
+  FCMLA_LANEQ_BUILTIN (0, v4hf, fcmla_laneq, V4HF, true) \
+  FCMLA_LANEQ_BUILTIN (90, v4hf, fcmla_laneq, V4HF, true) \
+  FCMLA_LANEQ_BUILTIN (180, v4hf, fcmla_laneq, V4HF, true) \
+  FCMLA_LANEQ_BUILTIN (270, v4hf, fcmla_laneq, V4HF, true) \
+
 typedef struct
 {
   const char *name;
@@ -364,11 +446,24 @@ typedef struct
   unsigned int fcode;
 } aarch64_crc_builtin_datum;
 
+/* Hold information about how to expand the FCMLA_LANEQ builtins.  */
+typedef struct
+{
+  const char *name;
+  machine_mode mode;
+  const enum insn_code icode;
+  unsigned int fcode;
+  bool lane;
+} aarch64_fcmla_laneq_builtin_datum;
+
 #define CRC32_BUILTIN(N, M) \
   AARCH64_BUILTIN_##N,
 
+#define FCMLA_LANEQ_BUILTIN(I, N, X, M, T) \
+  AARCH64_SIMD_BUILTIN_FCMLA_LANEQ##I##_##M,
+
 #undef VAR1
-#define VAR1(T, N, MAP, A) \
+#define VAR1(T, N, MAP, FLAG, A) \
   AARCH64_SIMD_BUILTIN_##T##_##N##A,
 
 enum aarch64_builtins
@@ -380,6 +475,11 @@ enum aarch64_builtins
   AARCH64_BUILTIN_GET_FPSR,
   AARCH64_BUILTIN_SET_FPSR,
 
+  AARCH64_BUILTIN_GET_FPCR64,
+  AARCH64_BUILTIN_SET_FPCR64,
+  AARCH64_BUILTIN_GET_FPSR64,
+  AARCH64_BUILTIN_SET_FPSR64,
+
   AARCH64_BUILTIN_RSQRT_DF,
   AARCH64_BUILTIN_RSQRT_SF,
   AARCH64_BUILTIN_RSQRT_V2DF,
@@ -398,7 +498,31 @@ enum aarch64_builtins
   /* ARMv8.3-A Pointer Authentication Builtins.  */
   AARCH64_PAUTH_BUILTIN_AUTIA1716,
   AARCH64_PAUTH_BUILTIN_PACIA1716,
+  AARCH64_PAUTH_BUILTIN_AUTIB1716,
+  AARCH64_PAUTH_BUILTIN_PACIB1716,
   AARCH64_PAUTH_BUILTIN_XPACLRI,
+  /* Special cased Armv8.3-A Complex FMA by Lane quad Builtins.  */
+  AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE,
+  AARCH64_SIMD_FCMLA_LANEQ_BUILTINS
+  /* Builtin for Arm8.3-a Javascript conversion instruction.  */
+  AARCH64_JSCVT,
+  /* TME builtins.  */
+  AARCH64_TME_BUILTIN_TSTART,
+  AARCH64_TME_BUILTIN_TCOMMIT,
+  AARCH64_TME_BUILTIN_TTEST,
+  AARCH64_TME_BUILTIN_TCANCEL,
+  /* Armv8.5-a RNG instruction builtins.  */
+  AARCH64_BUILTIN_RNG_RNDR,
+  AARCH64_BUILTIN_RNG_RNDRRS,
+  /* MEMTAG builtins.  */
+  AARCH64_MEMTAG_BUILTIN_START,
+  AARCH64_MEMTAG_BUILTIN_IRG,
+  AARCH64_MEMTAG_BUILTIN_GMI,
+  AARCH64_MEMTAG_BUILTIN_SUBP,
+  AARCH64_MEMTAG_BUILTIN_INC_TAG,
+  AARCH64_MEMTAG_BUILTIN_SET_TAG,
+  AARCH64_MEMTAG_BUILTIN_GET_TAG,
+  AARCH64_MEMTAG_BUILTIN_END,
   AARCH64_BUILTIN_MAX
 };
 
@@ -410,6 +534,18 @@ static aarch64_crc_builtin_datum aarch64_crc_builtin_data[] = {
   AARCH64_CRC32_BUILTINS
 };
 
+
+#undef FCMLA_LANEQ_BUILTIN
+#define FCMLA_LANEQ_BUILTIN(I, N, X, M, T) \
+  {"__builtin_aarch64_fcmla_laneq"#I#N, E_##M##mode, CODE_FOR_aarch64_##X##I##N, \
+   AARCH64_SIMD_BUILTIN_FCMLA_LANEQ##I##_##M, T},
+
+/* This structure contains how to manage the mapping form the builtin to the
+   instruction to generate in the backend and how to invoke the instruction.  */
+static aarch64_fcmla_laneq_builtin_datum aarch64_fcmla_lane_builtin_data[] = {
+  AARCH64_SIMD_FCMLA_LANEQ_BUILTINS
+};
+
 #undef CRC32_BUILTIN
 
 static GTY(()) tree aarch64_builtin_decls[AARCH64_BUILTIN_MAX];
@@ -441,6 +577,7 @@ const char *aarch64_scalar_builtin_types[] = {
   "__builtin_aarch64_simd_oi",
   "__builtin_aarch64_simd_ci",
   "__builtin_aarch64_simd_xi",
+  "__builtin_aarch64_simd_bf",
   NULL
 };
 
@@ -498,6 +635,23 @@ static tree aarch64_simd_intXI_type_node = NULL_TREE;
 tree aarch64_fp16_type_node = NULL_TREE;
 tree aarch64_fp16_ptr_type_node = NULL_TREE;
 
+/* Back-end node type for brain float (bfloat) types.  */
+tree aarch64_bf16_type_node = NULL_TREE;
+tree aarch64_bf16_ptr_type_node = NULL_TREE;
+
+/* Wrapper around add_builtin_function.  NAME is the name of the built-in
+   function, TYPE is the function type, CODE is the function subcode
+   (relative to AARCH64_BUILTIN_GENERAL), and ATTRS is the function
+   attributes.  */
+static tree
+aarch64_general_add_builtin (const char *name, tree type, unsigned int code,
+			     tree attrs = NULL_TREE)
+{
+  code = (code << AARCH64_BUILTIN_SHIFT) | AARCH64_BUILTIN_GENERAL;
+  return add_builtin_function (name, type, code, BUILT_IN_MD,
+			       NULL, attrs);
+}
+
 static const char *
 aarch64_mangle_builtin_scalar_type (const_tree type)
 {
@@ -519,24 +673,18 @@ aarch64_mangle_builtin_scalar_type (const_tree type)
 static const char *
 aarch64_mangle_builtin_vector_type (const_tree type)
 {
-  int i;
-  int nelts = sizeof (aarch64_simd_types) / sizeof (aarch64_simd_types[0]);
-
-  for (i = 0; i < nelts; i++)
-    if (aarch64_simd_types[i].mode ==  TYPE_MODE (type)
-	&& TYPE_NAME (type)
-	&& TREE_CODE (TYPE_NAME (type)) == TYPE_DECL
-	&& DECL_NAME (TYPE_NAME (type))
-	&& !strcmp
-	     (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))),
-	      aarch64_simd_types[i].name))
-      return aarch64_simd_types[i].mangle;
+  tree attrs = TYPE_ATTRIBUTES (type);
+  if (tree attr = lookup_attribute ("Advanced SIMD type", attrs))
+    {
+      tree mangled_name = TREE_VALUE (TREE_VALUE (attr));
+      return IDENTIFIER_POINTER (mangled_name);
+    }
 
   return NULL;
 }
 
 const char *
-aarch64_mangle_builtin_type (const_tree type)
+aarch64_general_mangle_builtin_type (const_tree type)
 {
   const char *mangle;
   /* Walk through all the AArch64 builtins types tables to filter out the
@@ -578,6 +726,8 @@ aarch64_simd_builtin_std_type (machine_mode mode,
       return float_type_node;
     case E_DFmode:
       return double_type_node;
+    case E_BFmode:
+      return aarch64_bf16_type_node;
     default:
       gcc_unreachable ();
     }
@@ -643,6 +793,10 @@ aarch64_init_simd_builtin_types (void)
   /* Poly types are a world of their own.  */
   aarch64_simd_types[Poly8_t].eltype = aarch64_simd_types[Poly8_t].itype =
     build_distinct_type_copy (unsigned_intQI_type_node);
+  /* Prevent front-ends from transforming Poly8_t arrays into string
+     literals.  */
+  TYPE_STRING_FLAG (aarch64_simd_types[Poly8_t].eltype) = false;
+
   aarch64_simd_types[Poly16_t].eltype = aarch64_simd_types[Poly16_t].itype =
     build_distinct_type_copy (unsigned_intHI_type_node);
   aarch64_simd_types[Poly64_t].eltype = aarch64_simd_types[Poly64_t].itype =
@@ -665,6 +819,10 @@ aarch64_init_simd_builtin_types (void)
   aarch64_simd_types[Float64x1_t].eltype = double_type_node;
   aarch64_simd_types[Float64x2_t].eltype = double_type_node;
 
+  /* Init Bfloat vector types with underlying __bf16 type.  */
+  aarch64_simd_types[Bfloat16x4_t].eltype = aarch64_bf16_type_node;
+  aarch64_simd_types[Bfloat16x8_t].eltype = aarch64_bf16_type_node;
+
   for (i = 0; i < nelts; i++)
     {
       tree eltype = aarch64_simd_types[i].eltype;
@@ -672,10 +830,16 @@ aarch64_init_simd_builtin_types (void)
 
       if (aarch64_simd_types[i].itype == NULL)
 	{
-	  aarch64_simd_types[i].itype
-	    = build_distinct_type_copy
-	      (build_vector_type (eltype, GET_MODE_NUNITS (mode)));
-	  SET_TYPE_STRUCTURAL_EQUALITY (aarch64_simd_types[i].itype);
+	  tree type = build_vector_type (eltype, GET_MODE_NUNITS (mode));
+	  type = build_distinct_type_copy (type);
+	  SET_TYPE_STRUCTURAL_EQUALITY (type);
+
+	  tree mangled_name = get_identifier (aarch64_simd_types[i].mangle);
+	  tree value = tree_cons (NULL_TREE, mangled_name, NULL_TREE);
+	  TYPE_ATTRIBUTES (type)
+	    = tree_cons (get_identifier ("Advanced SIMD type"), value,
+			 TYPE_ATTRIBUTES (type));
+	  aarch64_simd_types[i].itype = type;
 	}
 
       tdecl = add_builtin_type (aarch64_simd_types[i].name,
@@ -729,6 +893,8 @@ aarch64_init_simd_builtin_scalar_types (void)
 					     "__builtin_aarch64_simd_poly128");
   (*lang_hooks.types.register_builtin_type) (intTI_type_node,
 					     "__builtin_aarch64_simd_ti");
+  (*lang_hooks.types.register_builtin_type) (aarch64_bf16_type_node,
+					     "__builtin_aarch64_simd_bf");
   /* Unsigned integer types for various mode sizes.  */
   (*lang_hooks.types.register_builtin_type) (unsigned_intQI_type_node,
 					     "__builtin_aarch64_simd_uqi");
@@ -740,8 +906,124 @@ aarch64_init_simd_builtin_scalar_types (void)
 					     "__builtin_aarch64_simd_udi");
 }
 
+/* Return a set of FLAG_* flags that describe what the function could do,
+   taking the command-line flags into account.  */
+static unsigned int
+aarch64_call_properties (aarch64_simd_builtin_datum *d)
+{
+  unsigned int flags = d->flags;
+
+  if (!(flags & FLAG_AUTO_FP) && FLOAT_MODE_P (d->mode))
+    flags |= FLAG_FP;
+
+  /* -fno-trapping-math means that we can assume any FP exceptions
+     are not user-visible.  */
+  if (!flag_trapping_math)
+    flags &= ~FLAG_RAISE_FP_EXCEPTIONS;
+
+  return flags;
+}
+
+/* Return true if calls to the function could modify some form of
+   global state.  */
+static bool
+aarch64_modifies_global_state_p (aarch64_simd_builtin_datum *d)
+{
+  unsigned int flags = aarch64_call_properties (d);
+
+  if (flags & FLAG_RAISE_FP_EXCEPTIONS)
+    return true;
+
+  if (flags & FLAG_PREFETCH_MEMORY)
+    return true;
+
+  return flags & FLAG_WRITE_MEMORY;
+}
+
+/* Return true if calls to the function could read some form of
+   global state.  */
+static bool
+aarch64_reads_global_state_p (aarch64_simd_builtin_datum *d)
+{
+  unsigned int flags = aarch64_call_properties (d);
+
+  if (flags & FLAG_READ_FPCR)
+    return true;
+
+  return flags & FLAG_READ_MEMORY;
+}
+
+/* Return true if calls to the function could raise a signal.  */
+static bool
+aarch64_could_trap_p (aarch64_simd_builtin_datum *d)
+{
+  unsigned int flags = aarch64_call_properties (d);
+
+  if (flags & FLAG_RAISE_FP_EXCEPTIONS)
+    return true;
+
+  if (flags & (FLAG_READ_MEMORY | FLAG_WRITE_MEMORY))
+    return true;
+
+  return false;
+}
+
+/* Add attribute NAME to ATTRS.  */
+static tree
+aarch64_add_attribute (const char *name, tree attrs)
+{
+  return tree_cons (get_identifier (name), NULL_TREE, attrs);
+}
+
+/* Return the appropriate function attributes.  */
+static tree
+aarch64_get_attributes (aarch64_simd_builtin_datum *d)
+{
+  tree attrs = NULL_TREE;
+
+  if (!aarch64_modifies_global_state_p (d))
+    {
+      if (aarch64_reads_global_state_p (d))
+	attrs = aarch64_add_attribute ("pure", attrs);
+      else
+	attrs = aarch64_add_attribute ("const", attrs);
+    }
+
+  if (!flag_non_call_exceptions || !aarch64_could_trap_p (d))
+    attrs = aarch64_add_attribute ("nothrow", attrs);
+
+  return aarch64_add_attribute ("leaf", attrs);
+}
+
 static bool aarch64_simd_builtins_initialized_p = false;
 
+/* Due to the architecture not providing lane variant of the lane instructions
+   for fcmla we can't use the standard simd builtin expansion code, but we
+   still want the majority of the validation that would normally be done.  */
+
+void
+aarch64_init_fcmla_laneq_builtins (void)
+{
+  unsigned int i = 0;
+
+  for (i = 0; i < ARRAY_SIZE (aarch64_fcmla_lane_builtin_data); ++i)
+    {
+      aarch64_fcmla_laneq_builtin_datum* d
+	= &aarch64_fcmla_lane_builtin_data[i];
+      tree argtype = aarch64_lookup_simd_builtin_type (d->mode, qualifier_none);
+      machine_mode quadmode = GET_MODE_2XWIDER_MODE (d->mode).require ();
+      tree quadtype
+	= aarch64_lookup_simd_builtin_type (quadmode, qualifier_none);
+      tree lanetype
+	= aarch64_simd_builtin_std_type (SImode, qualifier_lane_pair_index);
+      tree ftype = build_function_type_list (argtype, argtype, argtype,
+					     quadtype, lanetype, NULL_TREE);
+      tree fndecl = aarch64_general_add_builtin (d->name, ftype, d->fcode);
+
+      aarch64_builtin_decls[d->fcode] = fndecl;
+    }
+}
+
 void
 aarch64_init_simd_builtins (void)
 {
@@ -765,10 +1047,10 @@ aarch64_init_simd_builtins (void)
 						  size_type_node,
 						  intSI_type_node,
 						  NULL);
-  aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_LANE_CHECK] =
-      add_builtin_function ("__builtin_aarch64_im_lane_boundsi", lane_check_fpr,
-			    AARCH64_SIMD_BUILTIN_LANE_CHECK, BUILT_IN_MD,
-			    NULL, NULL_TREE);
+  aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_LANE_CHECK]
+    = aarch64_general_add_builtin ("__builtin_aarch64_im_lane_boundsi",
+				   lane_check_fpr,
+				   AARCH64_SIMD_BUILTIN_LANE_CHECK);
 
   for (i = 0; i < ARRAY_SIZE (aarch64_simd_builtin_data); i++, fcode++)
     {
@@ -866,10 +1148,14 @@ aarch64_init_simd_builtins (void)
 	snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s",
 		  d->name);
 
-      fndecl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD,
-				     NULL, NULL_TREE);
+      tree attrs = aarch64_get_attributes (d);
+
+      fndecl = aarch64_general_add_builtin (namebuf, ftype, fcode, attrs);
       aarch64_builtin_decls[fcode] = fndecl;
     }
+
+   /* Initialize the remaining fcmla_laneq intrinsics.  */
+   aarch64_init_fcmla_laneq_builtins ();
 }
 
 static void
@@ -884,8 +1170,7 @@ aarch64_init_crc32_builtins ()
       tree argtype = aarch64_simd_builtin_std_type (d->mode,
 						    qualifier_unsigned);
       tree ftype = build_function_type_list (usi_type, usi_type, argtype, NULL_TREE);
-      tree fndecl = add_builtin_function (d->name, ftype, d->fcode,
-                                          BUILT_IN_MD, NULL, NULL_TREE);
+      tree fndecl = aarch64_general_add_builtin (d->name, ftype, d->fcode);
 
       aarch64_builtin_decls[d->fcode] = fndecl;
     }
@@ -925,8 +1210,8 @@ aarch64_init_builtin_rsqrt (void)
   for (; bdd < bdd_end; bdd++)
   {
     ftype = build_function_type_list (bdd->type_node, bdd->type_node, NULL_TREE);
-    fndecl = add_builtin_function (bdd->builtin_name,
-      ftype, bdd->function_code, BUILT_IN_MD, NULL, NULL_TREE);
+    fndecl = aarch64_general_add_builtin (bdd->builtin_name,
+					  ftype, bdd->function_code);
     aarch64_builtin_decls[bdd->function_code] = fndecl;
   }
 }
@@ -946,6 +1231,19 @@ aarch64_init_fp16_types (void)
   aarch64_fp16_ptr_type_node = build_pointer_type (aarch64_fp16_type_node);
 }
 
+/* Initialize the backend REAL_TYPE type supporting bfloat types.  */
+static void
+aarch64_init_bf16_types (void)
+{
+  aarch64_bf16_type_node = make_node (REAL_TYPE);
+  TYPE_PRECISION (aarch64_bf16_type_node) = 16;
+  SET_TYPE_MODE (aarch64_bf16_type_node, BFmode);
+  layout_type (aarch64_bf16_type_node);
+
+  lang_hooks.types.register_builtin_type (aarch64_bf16_type_node, "__bf16");
+  aarch64_bf16_ptr_type_node = build_pointer_type (aarch64_bf16_type_node);
+}
+
 /* Pointer authentication builtins that will become NOP on legacy platform.
    Currently, these builtins are for internal use only (libgcc EH unwinder).  */
 
@@ -960,47 +1258,192 @@ aarch64_init_pauth_hint_builtins (void)
     = build_function_type_list (ptr_type_node, ptr_type_node, NULL_TREE);
 
   aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_AUTIA1716]
-    = add_builtin_function ("__builtin_aarch64_autia1716", ftype_pointer_auth,
-			    AARCH64_PAUTH_BUILTIN_AUTIA1716, BUILT_IN_MD, NULL,
-			    NULL_TREE);
+    = aarch64_general_add_builtin ("__builtin_aarch64_autia1716",
+				   ftype_pointer_auth,
+				   AARCH64_PAUTH_BUILTIN_AUTIA1716);
   aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_PACIA1716]
-    = add_builtin_function ("__builtin_aarch64_pacia1716", ftype_pointer_auth,
-			    AARCH64_PAUTH_BUILTIN_PACIA1716, BUILT_IN_MD, NULL,
-			    NULL_TREE);
+    = aarch64_general_add_builtin ("__builtin_aarch64_pacia1716",
+				   ftype_pointer_auth,
+				   AARCH64_PAUTH_BUILTIN_PACIA1716);
+  aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_AUTIB1716]
+    = aarch64_general_add_builtin ("__builtin_aarch64_autib1716",
+				   ftype_pointer_auth,
+				   AARCH64_PAUTH_BUILTIN_AUTIB1716);
+  aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_PACIB1716]
+    = aarch64_general_add_builtin ("__builtin_aarch64_pacib1716",
+				   ftype_pointer_auth,
+				   AARCH64_PAUTH_BUILTIN_PACIB1716);
   aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_XPACLRI]
-    = add_builtin_function ("__builtin_aarch64_xpaclri", ftype_pointer_strip,
-			    AARCH64_PAUTH_BUILTIN_XPACLRI, BUILT_IN_MD, NULL,
-			    NULL_TREE);
+    = aarch64_general_add_builtin ("__builtin_aarch64_xpaclri",
+				   ftype_pointer_strip,
+				   AARCH64_PAUTH_BUILTIN_XPACLRI);
 }
 
-void
-aarch64_init_builtins (void)
+/* Initialize the transactional memory extension (TME) builtins.  */
+static void
+aarch64_init_tme_builtins (void)
+{
+  tree ftype_uint64_void
+    = build_function_type_list (uint64_type_node, NULL);
+  tree ftype_void_void
+    = build_function_type_list (void_type_node, NULL);
+  tree ftype_void_uint64
+    = build_function_type_list (void_type_node, uint64_type_node, NULL);
+
+  aarch64_builtin_decls[AARCH64_TME_BUILTIN_TSTART]
+    = aarch64_general_add_builtin ("__builtin_aarch64_tstart",
+				   ftype_uint64_void,
+				   AARCH64_TME_BUILTIN_TSTART);
+  aarch64_builtin_decls[AARCH64_TME_BUILTIN_TTEST]
+    = aarch64_general_add_builtin ("__builtin_aarch64_ttest",
+				   ftype_uint64_void,
+				   AARCH64_TME_BUILTIN_TTEST);
+  aarch64_builtin_decls[AARCH64_TME_BUILTIN_TCOMMIT]
+    = aarch64_general_add_builtin ("__builtin_aarch64_tcommit",
+				   ftype_void_void,
+				   AARCH64_TME_BUILTIN_TCOMMIT);
+  aarch64_builtin_decls[AARCH64_TME_BUILTIN_TCANCEL]
+    = aarch64_general_add_builtin ("__builtin_aarch64_tcancel",
+				   ftype_void_uint64,
+				   AARCH64_TME_BUILTIN_TCANCEL);
+}
+
+/* Add builtins for Random Number instructions.  */
+
+static void
+aarch64_init_rng_builtins (void)
+{
+  tree unsigned_ptr_type = build_pointer_type (unsigned_intDI_type_node);
+  tree ftype
+    = build_function_type_list (integer_type_node, unsigned_ptr_type, NULL);
+  aarch64_builtin_decls[AARCH64_BUILTIN_RNG_RNDR]
+    = aarch64_general_add_builtin ("__builtin_aarch64_rndr", ftype,
+				   AARCH64_BUILTIN_RNG_RNDR);
+  aarch64_builtin_decls[AARCH64_BUILTIN_RNG_RNDRRS]
+    = aarch64_general_add_builtin ("__builtin_aarch64_rndrrs", ftype,
+				   AARCH64_BUILTIN_RNG_RNDRRS);
+}
+
+/* Initialize the memory tagging extension (MTE) builtins.  */
+struct
+{
+  tree ftype;
+  enum insn_code icode;
+} aarch64_memtag_builtin_data[AARCH64_MEMTAG_BUILTIN_END -
+			      AARCH64_MEMTAG_BUILTIN_START - 1];
+
+static void
+aarch64_init_memtag_builtins (void)
 {
-  tree ftype_set_fpr
+  tree fntype = NULL;
+
+#define AARCH64_INIT_MEMTAG_BUILTINS_DECL(F, N, I, T) \
+  aarch64_builtin_decls[AARCH64_MEMTAG_BUILTIN_##F] \
+    = aarch64_general_add_builtin ("__builtin_aarch64_memtag_"#N, \
+				   T, AARCH64_MEMTAG_BUILTIN_##F); \
+  aarch64_memtag_builtin_data[AARCH64_MEMTAG_BUILTIN_##F - \
+			      AARCH64_MEMTAG_BUILTIN_START - 1] = \
+				{T, CODE_FOR_##I};
+
+  fntype = build_function_type_list (ptr_type_node, ptr_type_node,
+				     uint64_type_node, NULL);
+  AARCH64_INIT_MEMTAG_BUILTINS_DECL (IRG, irg, irg, fntype);
+
+  fntype = build_function_type_list (uint64_type_node, ptr_type_node,
+				     uint64_type_node, NULL);
+  AARCH64_INIT_MEMTAG_BUILTINS_DECL (GMI, gmi, gmi, fntype);
+
+  fntype = build_function_type_list (ptrdiff_type_node, ptr_type_node,
+				     ptr_type_node, NULL);
+  AARCH64_INIT_MEMTAG_BUILTINS_DECL (SUBP, subp, subp, fntype);
+
+  fntype = build_function_type_list (ptr_type_node, ptr_type_node,
+				     unsigned_type_node, NULL);
+  AARCH64_INIT_MEMTAG_BUILTINS_DECL (INC_TAG, inc_tag, addg, fntype);
+
+  fntype = build_function_type_list (void_type_node, ptr_type_node, NULL);
+  AARCH64_INIT_MEMTAG_BUILTINS_DECL (SET_TAG, set_tag, stg, fntype);
+
+  fntype = build_function_type_list (ptr_type_node, ptr_type_node, NULL);
+  AARCH64_INIT_MEMTAG_BUILTINS_DECL (GET_TAG, get_tag, ldg, fntype);
+
+#undef AARCH64_INIT_MEMTAG_BUILTINS_DECL
+}
+
+/* Initialize fpsr fpcr getters and setters.  */
+
+static void
+aarch64_init_fpsr_fpcr_builtins (void)
+{
+  tree ftype_set
     = build_function_type_list (void_type_node, unsigned_type_node, NULL);
-  tree ftype_get_fpr
+  tree ftype_get
     = build_function_type_list (unsigned_type_node, NULL);
 
   aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR]
-    = add_builtin_function ("__builtin_aarch64_get_fpcr", ftype_get_fpr,
-			    AARCH64_BUILTIN_GET_FPCR, BUILT_IN_MD, NULL, NULL_TREE);
+    = aarch64_general_add_builtin ("__builtin_aarch64_get_fpcr",
+				   ftype_get,
+				   AARCH64_BUILTIN_GET_FPCR);
   aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR]
-    = add_builtin_function ("__builtin_aarch64_set_fpcr", ftype_set_fpr,
-			    AARCH64_BUILTIN_SET_FPCR, BUILT_IN_MD, NULL, NULL_TREE);
+    = aarch64_general_add_builtin ("__builtin_aarch64_set_fpcr",
+				   ftype_set,
+				   AARCH64_BUILTIN_SET_FPCR);
   aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR]
-    = add_builtin_function ("__builtin_aarch64_get_fpsr", ftype_get_fpr,
-			    AARCH64_BUILTIN_GET_FPSR, BUILT_IN_MD, NULL, NULL_TREE);
+    = aarch64_general_add_builtin ("__builtin_aarch64_get_fpsr",
+				   ftype_get,
+				   AARCH64_BUILTIN_GET_FPSR);
   aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR]
-    = add_builtin_function ("__builtin_aarch64_set_fpsr", ftype_set_fpr,
-			    AARCH64_BUILTIN_SET_FPSR, BUILT_IN_MD, NULL, NULL_TREE);
+    = aarch64_general_add_builtin ("__builtin_aarch64_set_fpsr",
+				   ftype_set,
+				   AARCH64_BUILTIN_SET_FPSR);
+
+  ftype_set
+    = build_function_type_list (void_type_node, long_long_unsigned_type_node,
+				NULL);
+  ftype_get
+    = build_function_type_list (long_long_unsigned_type_node, NULL);
+
+  aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR64]
+    = aarch64_general_add_builtin ("__builtin_aarch64_get_fpcr64",
+				   ftype_get,
+				   AARCH64_BUILTIN_GET_FPCR64);
+  aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR64]
+    = aarch64_general_add_builtin ("__builtin_aarch64_set_fpcr64",
+				   ftype_set,
+				   AARCH64_BUILTIN_SET_FPCR64);
+  aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR64]
+    = aarch64_general_add_builtin ("__builtin_aarch64_get_fpsr64",
+				   ftype_get,
+				   AARCH64_BUILTIN_GET_FPSR64);
+  aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR64]
+    = aarch64_general_add_builtin ("__builtin_aarch64_set_fpsr64",
+				   ftype_set,
+				   AARCH64_BUILTIN_SET_FPSR64);
+}
+
+/* Initialize all builtins in the AARCH64_BUILTIN_GENERAL group.  */
+
+void
+aarch64_general_init_builtins (void)
+{
+  aarch64_init_fpsr_fpcr_builtins ();
 
   aarch64_init_fp16_types ();
 
+  aarch64_init_bf16_types ();
+
   if (TARGET_SIMD)
     aarch64_init_simd_builtins ();
 
   aarch64_init_crc32_builtins ();
   aarch64_init_builtin_rsqrt ();
+  aarch64_init_rng_builtins ();
+
+  tree ftype_jcvt
+    = build_function_type_list (intSI_type_node, double_type_node, NULL);
+  aarch64_builtin_decls[AARCH64_JSCVT]
+    = aarch64_general_add_builtin ("__builtin_aarch64_jcvtzs", ftype_jcvt,
+				   AARCH64_JSCVT);
 
   /* Initialize pointer authentication builtins which are backed by instructions
      in NOP encoding space.
@@ -1010,10 +1453,17 @@ aarch64_init_builtins (void)
      register them.  */
   if (!TARGET_ILP32)
     aarch64_init_pauth_hint_builtins ();
+
+  if (TARGET_TME)
+    aarch64_init_tme_builtins ();
+
+  if (TARGET_MEMTAG)
+    aarch64_init_memtag_builtins ();
 }
 
+/* Implement TARGET_BUILTIN_DECL for the AARCH64_BUILTIN_GENERAL group.  */
 tree
-aarch64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
+aarch64_general_builtin_decl (unsigned code, bool)
 {
   if (code >= AARCH64_BUILTIN_MAX)
     return error_mark_node;
@@ -1027,6 +1477,8 @@ typedef enum
   SIMD_ARG_CONSTANT,
   SIMD_ARG_LANE_INDEX,
   SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX,
+  SIMD_ARG_LANE_PAIR_INDEX,
+  SIMD_ARG_LANE_QUADTUP_INDEX,
   SIMD_ARG_STOP
 } builtin_simd_arg;
 
@@ -1098,9 +1550,43 @@ aarch64_simd_expand_args (rtx target, int icode, int have_retval,
 		  /* Keep to GCC-vector-extension lane indices in the RTL.  */
 		  op[opc] = aarch64_endian_lane_rtx (vmode, INTVAL (op[opc]));
 		}
-	      /* Fall through - if the lane index isn't a constant then
-		 the next case will error.  */
-	      /* FALLTHRU */
+	      /* If the lane index isn't a constant then error out.  */
+	      goto constant_arg;
+
+	    case SIMD_ARG_LANE_PAIR_INDEX:
+	      /* Must be a previous operand into which this is an index and
+		 index is restricted to nunits / 2.  */
+	      gcc_assert (opc > 0);
+	      if (CONST_INT_P (op[opc]))
+		{
+		  machine_mode vmode = insn_data[icode].operand[opc - 1].mode;
+		  unsigned int nunits
+		    = GET_MODE_NUNITS (vmode).to_constant ();
+		  aarch64_simd_lane_bounds (op[opc], 0, nunits / 2, exp);
+		  /* Keep to GCC-vector-extension lane indices in the RTL.  */
+		  int lane = INTVAL (op[opc]);
+		  op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane),
+					  SImode);
+		}
+	      /* If the lane index isn't a constant then error out.  */
+	      goto constant_arg;
+	    case SIMD_ARG_LANE_QUADTUP_INDEX:
+	      /* Must be a previous operand into which this is an index and
+		 index is restricted to nunits / 4.  */
+	      gcc_assert (opc > 0);
+	      if (CONST_INT_P (op[opc]))
+		{
+		  machine_mode vmode = insn_data[icode].operand[opc - 1].mode;
+		  unsigned int nunits
+		    = GET_MODE_NUNITS (vmode).to_constant ();
+		  aarch64_simd_lane_bounds (op[opc], 0, nunits / 4, exp);
+		  /* Keep to GCC-vector-extension lane indices in the RTL.  */
+		  int lane = INTVAL (op[opc]);
+		  op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane),
+					  SImode);
+		}
+	      /* If the lane index isn't a constant then error out.  */
+	      goto constant_arg;
 	    case SIMD_ARG_CONSTANT:
 constant_arg:
 	      if (!(*insn_data[icode].operand[opc].predicate)
@@ -1211,6 +1697,10 @@ aarch64_simd_expand_builtin (int fcode, tree exp, rtx target)
 
       if (d->qualifiers[qualifiers_k] & qualifier_lane_index)
 	args[k] = SIMD_ARG_LANE_INDEX;
+      else if (d->qualifiers[qualifiers_k] & qualifier_lane_pair_index)
+	args[k] = SIMD_ARG_LANE_PAIR_INDEX;
+      else if (d->qualifiers[qualifiers_k] & qualifier_lane_quadtup_index)
+	args[k] = SIMD_ARG_LANE_QUADTUP_INDEX;
       else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index)
 	args[k] = SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX;
       else if (d->qualifiers[qualifiers_k] & qualifier_immediate)
@@ -1313,84 +1803,363 @@ aarch64_expand_builtin_rsqrt (int fcode, tree exp, rtx target)
   return target;
 }
 
-/* Expand an expression EXP that calls a built-in function,
-   with result going to TARGET if that's convenient.  */
+/* Expand a FCMLA lane expression EXP with code FCODE and
+   result going to TARGET if that is convenient.  */
+
+rtx
+aarch64_expand_fcmla_builtin (tree exp, rtx target, int fcode)
+{
+  int bcode = fcode - AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE - 1;
+  aarch64_fcmla_laneq_builtin_datum* d
+    = &aarch64_fcmla_lane_builtin_data[bcode];
+  machine_mode quadmode = GET_MODE_2XWIDER_MODE (d->mode).require ();
+  rtx op0 = force_reg (d->mode, expand_normal (CALL_EXPR_ARG (exp, 0)));
+  rtx op1 = force_reg (d->mode, expand_normal (CALL_EXPR_ARG (exp, 1)));
+  rtx op2 = force_reg (quadmode, expand_normal (CALL_EXPR_ARG (exp, 2)));
+  tree tmp = CALL_EXPR_ARG (exp, 3);
+  rtx lane_idx = expand_expr (tmp, NULL_RTX, VOIDmode, EXPAND_INITIALIZER);
+
+  /* Validate that the lane index is a constant.  */
+  if (!CONST_INT_P (lane_idx))
+    {
+      error ("%Kargument %d must be a constant immediate", exp, 4);
+      return const0_rtx;
+    }
+
+  /* Validate that the index is within the expected range.  */
+  int nunits = GET_MODE_NUNITS (quadmode).to_constant ();
+  aarch64_simd_lane_bounds (lane_idx, 0, nunits / 2, exp);
+
+  /* Generate the correct register and mode.  */
+  int lane = INTVAL (lane_idx);
+
+  if (lane < nunits / 4)
+    op2 = simplify_gen_subreg (d->mode, op2, quadmode,
+			       subreg_lowpart_offset (d->mode, quadmode));
+  else
+    {
+      /* Select the upper 64 bits, either a V2SF or V4HF, this however
+	 is quite messy, as the operation required even though simple
+	 doesn't have a simple RTL pattern, and seems it's quite hard to
+	 define using a single RTL pattern.  The target generic version
+	 gen_highpart_mode generates code that isn't optimal.  */
+      rtx temp1 = gen_reg_rtx (d->mode);
+      rtx temp2 = gen_reg_rtx (DImode);
+      temp1 = simplify_gen_subreg (d->mode, op2, quadmode,
+				   subreg_lowpart_offset (d->mode, quadmode));
+      temp1 = simplify_gen_subreg (V2DImode, temp1, d->mode, 0);
+      if (BYTES_BIG_ENDIAN)
+	emit_insn (gen_aarch64_get_lanev2di (temp2, temp1, const0_rtx));
+      else
+	emit_insn (gen_aarch64_get_lanev2di (temp2, temp1, const1_rtx));
+      op2 = simplify_gen_subreg (d->mode, temp2, GET_MODE (temp2), 0);
+
+      /* And recalculate the index.  */
+      lane -= nunits / 4;
+    }
+
+  /* Keep to GCC-vector-extension lane indices in the RTL, only nunits / 4
+     (max nunits in range check) are valid.  Which means only 0-1, so we
+     only need to know the order in a V2mode.  */
+  lane_idx = aarch64_endian_lane_rtx (V2DImode, lane);
+
+  if (!target
+      || !REG_P (target)
+      || GET_MODE (target) != d->mode)
+    target = gen_reg_rtx (d->mode);
+
+  rtx pat = NULL_RTX;
+
+  if (d->lane)
+    pat = GEN_FCN (d->icode) (target, op0, op1, op2, lane_idx);
+  else
+    pat = GEN_FCN (d->icode) (target, op0, op1, op2);
+
+  if (!pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+  return target;
+}
+
+/* Function to expand an expression EXP which calls one of the Transactional
+   Memory Extension (TME) builtins FCODE with the result going to TARGET.  */
+static rtx
+aarch64_expand_builtin_tme (int fcode, tree exp, rtx target)
+{
+  switch (fcode)
+    {
+    case AARCH64_TME_BUILTIN_TSTART:
+      target = gen_reg_rtx (DImode);
+      emit_insn (GEN_FCN (CODE_FOR_tstart) (target));
+      break;
+
+    case AARCH64_TME_BUILTIN_TTEST:
+      target = gen_reg_rtx (DImode);
+      emit_insn (GEN_FCN (CODE_FOR_ttest) (target));
+      break;
+
+    case AARCH64_TME_BUILTIN_TCOMMIT:
+      emit_insn (GEN_FCN (CODE_FOR_tcommit) ());
+      break;
+
+    case AARCH64_TME_BUILTIN_TCANCEL:
+      {
+	tree arg0 = CALL_EXPR_ARG (exp, 0);
+	rtx op0 = expand_normal (arg0);
+	if (CONST_INT_P (op0) && UINTVAL (op0) <= 65536)
+	  emit_insn (GEN_FCN (CODE_FOR_tcancel) (op0));
+	else
+	  {
+	    error ("%Kargument must be a 16-bit constant immediate", exp);
+	    return const0_rtx;
+	  }
+      }
+      break;
+
+    default :
+      gcc_unreachable ();
+    }
+    return target;
+}
+
+/* Expand a random number builtin EXP with code FCODE, putting the result
+   int TARGET.  If IGNORE is true the return value is ignored.  */
+
 rtx
-aarch64_expand_builtin (tree exp,
-		     rtx target,
-		     rtx subtarget ATTRIBUTE_UNUSED,
-		     machine_mode mode ATTRIBUTE_UNUSED,
-		     int ignore ATTRIBUTE_UNUSED)
+aarch64_expand_rng_builtin (tree exp, rtx target, int fcode, int ignore)
+{
+  rtx pat;
+  enum insn_code icode;
+  if (fcode == AARCH64_BUILTIN_RNG_RNDR)
+    icode = CODE_FOR_aarch64_rndr;
+  else if (fcode == AARCH64_BUILTIN_RNG_RNDRRS)
+    icode = CODE_FOR_aarch64_rndrrs;
+  else
+    gcc_unreachable ();
+
+  rtx rand = gen_reg_rtx (DImode);
+  pat = GEN_FCN (icode) (rand);
+  if (!pat)
+    return NULL_RTX;
+
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  rtx res_addr = expand_normal (arg0);
+  res_addr = convert_memory_address (Pmode, res_addr);
+  rtx res_mem = gen_rtx_MEM (DImode, res_addr);
+  emit_insn (pat);
+  emit_move_insn (res_mem, rand);
+  /* If the status result is unused don't generate the CSET code.  */
+  if (ignore)
+    return target;
+
+  rtx cc_reg = gen_rtx_REG (CC_Zmode, CC_REGNUM);
+  rtx cmp_rtx = gen_rtx_fmt_ee (EQ, SImode, cc_reg, const0_rtx);
+  emit_insn (gen_aarch64_cstoresi (target, cmp_rtx, cc_reg));
+  return target;
+}
+
+/* Expand an expression EXP that calls a MEMTAG built-in FCODE
+   with result going to TARGET.  */
+static rtx
+aarch64_expand_builtin_memtag (int fcode, tree exp, rtx target)
+{
+  if (TARGET_ILP32)
+    {
+      error ("Memory Tagging Extension does not support %<-mabi=ilp32%>");
+      return const0_rtx;
+    }
+
+  rtx pat = NULL;
+  enum insn_code icode = aarch64_memtag_builtin_data[fcode -
+			   AARCH64_MEMTAG_BUILTIN_START - 1].icode;
+
+  rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
+  machine_mode mode0 = GET_MODE (op0);
+  op0 = force_reg (mode0 == VOIDmode ? DImode : mode0, op0);
+  op0 = convert_to_mode (DImode, op0, true);
+
+  switch (fcode)
+    {
+      case AARCH64_MEMTAG_BUILTIN_IRG:
+      case AARCH64_MEMTAG_BUILTIN_GMI:
+      case AARCH64_MEMTAG_BUILTIN_SUBP:
+      case AARCH64_MEMTAG_BUILTIN_INC_TAG:
+	{
+	  if (! target
+	      || GET_MODE (target) != DImode
+	      || ! (*insn_data[icode].operand[0].predicate) (target, DImode))
+	    target = gen_reg_rtx (DImode);
+
+	  if (fcode == AARCH64_MEMTAG_BUILTIN_INC_TAG)
+	    {
+	      rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
+
+	      if ((*insn_data[icode].operand[3].predicate) (op1, QImode))
+		{
+		  pat = GEN_FCN (icode) (target, op0, const0_rtx, op1);
+		  break;
+		}
+	      error ("%Kargument %d must be a constant immediate "
+		     "in range [0,15]", exp, 2);
+	      return const0_rtx;
+	    }
+	  else
+	    {
+	      rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
+	      machine_mode mode1 = GET_MODE (op1);
+	      op1 = force_reg (mode1 == VOIDmode ? DImode : mode1, op1);
+	      op1 = convert_to_mode (DImode, op1, true);
+	      pat = GEN_FCN (icode) (target, op0, op1);
+	    }
+	  break;
+	}
+      case AARCH64_MEMTAG_BUILTIN_GET_TAG:
+	target = op0;
+	pat = GEN_FCN (icode) (target, op0, const0_rtx);
+	break;
+      case AARCH64_MEMTAG_BUILTIN_SET_TAG:
+	pat = GEN_FCN (icode) (op0, op0, const0_rtx);
+	break;
+      default:
+	gcc_unreachable();
+    }
+
+  if (!pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand an expression EXP as fpsr or fpcr setter (depending on
+   UNSPEC) using MODE.  */
+static void
+aarch64_expand_fpsr_fpcr_setter (int unspec, machine_mode mode, tree exp)
+{
+  tree arg = CALL_EXPR_ARG (exp, 0);
+  rtx op = force_reg (mode, expand_normal (arg));
+  emit_insn (gen_aarch64_set (unspec, mode, op));
+}
+
+/* Expand a fpsr or fpcr getter (depending on UNSPEC) using MODE.
+   Return the target.  */
+static rtx
+aarch64_expand_fpsr_fpcr_getter (enum insn_code icode, machine_mode mode,
+				 rtx target)
+{
+  expand_operand op;
+  create_output_operand (&op, target, mode);
+  expand_insn (icode, 1, &op);
+  return op.value;
+}
+
+/* Expand an expression EXP that calls built-in function FCODE,
+   with result going to TARGET if that's convenient.  IGNORE is true
+   if the result of the builtin is ignored.  */
+rtx
+aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target,
+				int ignore)
 {
-  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
-  int fcode = DECL_FUNCTION_CODE (fndecl);
   int icode;
-  rtx pat, op0;
+  rtx op0;
   tree arg0;
 
   switch (fcode)
     {
     case AARCH64_BUILTIN_GET_FPCR:
+      return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpcrsi,
+					      SImode, target);
     case AARCH64_BUILTIN_SET_FPCR:
+      aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPCR, SImode, exp);
+      return target;
     case AARCH64_BUILTIN_GET_FPSR:
+      return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpsrsi,
+					      SImode, target);
     case AARCH64_BUILTIN_SET_FPSR:
-      if ((fcode == AARCH64_BUILTIN_GET_FPCR)
-	  || (fcode == AARCH64_BUILTIN_GET_FPSR))
-	{
-	  icode = (fcode == AARCH64_BUILTIN_GET_FPSR) ?
-	    CODE_FOR_get_fpsr : CODE_FOR_get_fpcr;
-	  target = gen_reg_rtx (SImode);
-	  pat = GEN_FCN (icode) (target);
-	}
-      else
-	{
-	  target = NULL_RTX;
-	  icode = (fcode == AARCH64_BUILTIN_SET_FPSR) ?
-	    CODE_FOR_set_fpsr : CODE_FOR_set_fpcr;
-	  arg0 = CALL_EXPR_ARG (exp, 0);
-	  op0 = force_reg (SImode, expand_normal (arg0));
-	  pat = GEN_FCN (icode) (op0);
-	}
-      emit_insn (pat);
+      aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPSR, SImode, exp);
+      return target;
+    case AARCH64_BUILTIN_GET_FPCR64:
+      return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpcrdi,
+					      DImode, target);
+    case AARCH64_BUILTIN_SET_FPCR64:
+      aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPCR, DImode, exp);
+      return target;
+    case AARCH64_BUILTIN_GET_FPSR64:
+      return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpsrdi,
+					      DImode, target);
+    case AARCH64_BUILTIN_SET_FPSR64:
+      aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPSR, DImode, exp);
       return target;
-
     case AARCH64_PAUTH_BUILTIN_AUTIA1716:
     case AARCH64_PAUTH_BUILTIN_PACIA1716:
+    case AARCH64_PAUTH_BUILTIN_AUTIB1716:
+    case AARCH64_PAUTH_BUILTIN_PACIB1716:
     case AARCH64_PAUTH_BUILTIN_XPACLRI:
       arg0 = CALL_EXPR_ARG (exp, 0);
       op0 = force_reg (Pmode, expand_normal (arg0));
 
-      if (!target)
-	target = gen_reg_rtx (Pmode);
-      else
-	target = force_reg (Pmode, target);
-
-      emit_move_insn (target, op0);
-
       if (fcode == AARCH64_PAUTH_BUILTIN_XPACLRI)
 	{
 	  rtx lr = gen_rtx_REG (Pmode, R30_REGNUM);
 	  icode = CODE_FOR_xpaclri;
 	  emit_move_insn (lr, op0);
 	  emit_insn (GEN_FCN (icode) ());
-	  emit_move_insn (target, lr);
+	  return lr;
 	}
       else
 	{
 	  tree arg1 = CALL_EXPR_ARG (exp, 1);
 	  rtx op1 = force_reg (Pmode, expand_normal (arg1));
-	  icode = (fcode == AARCH64_PAUTH_BUILTIN_PACIA1716
-		   ? CODE_FOR_paci1716 : CODE_FOR_auti1716);
+	  switch (fcode)
+	    {
+	    case AARCH64_PAUTH_BUILTIN_AUTIA1716:
+	      icode = CODE_FOR_autia1716;
+	      break;
+	    case AARCH64_PAUTH_BUILTIN_AUTIB1716:
+	      icode = CODE_FOR_autib1716;
+	      break;
+	    case AARCH64_PAUTH_BUILTIN_PACIA1716:
+	      icode = CODE_FOR_pacia1716;
+	      break;
+	    case AARCH64_PAUTH_BUILTIN_PACIB1716:
+	      icode = CODE_FOR_pacib1716;
+	      break;
+	    default:
+	      icode = 0;
+	      gcc_unreachable ();
+	    }
 
 	  rtx x16_reg = gen_rtx_REG (Pmode, R16_REGNUM);
 	  rtx x17_reg = gen_rtx_REG (Pmode, R17_REGNUM);
 	  emit_move_insn (x17_reg, op0);
 	  emit_move_insn (x16_reg, op1);
 	  emit_insn (GEN_FCN (icode) ());
-	  emit_move_insn (target, x17_reg);
+	  return x17_reg;
 	}
 
-      return target;
+    case AARCH64_JSCVT:
+      {
+	expand_operand ops[2];
+	create_output_operand (&ops[0], target, SImode);
+	op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
+	create_input_operand (&ops[1], op0, DFmode);
+	expand_insn (CODE_FOR_aarch64_fjcvtzs, 2, ops);
+	return ops[0].value;
+      }
+
+    case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V2SF:
+    case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V2SF:
+    case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V2SF:
+    case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V2SF:
+    case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V4HF:
+    case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V4HF:
+    case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V4HF:
+    case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V4HF:
+      return aarch64_expand_fcmla_builtin (exp, target, fcode);
+    case AARCH64_BUILTIN_RNG_RNDR:
+    case AARCH64_BUILTIN_RNG_RNDRRS:
+      return aarch64_expand_rng_builtin (exp, target, fcode, ignore);
     }
 
   if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX)
@@ -1405,6 +2174,16 @@ aarch64_expand_builtin (tree exp,
       || fcode == AARCH64_BUILTIN_RSQRT_V4SF)
     return aarch64_expand_builtin_rsqrt (fcode, exp, target);
 
+  if (fcode == AARCH64_TME_BUILTIN_TSTART
+      || fcode == AARCH64_TME_BUILTIN_TCOMMIT
+      || fcode == AARCH64_TME_BUILTIN_TTEST
+      || fcode == AARCH64_TME_BUILTIN_TCANCEL)
+    return aarch64_expand_builtin_tme (fcode, exp, target);
+
+  if (fcode >= AARCH64_MEMTAG_BUILTIN_START
+      && fcode <= AARCH64_MEMTAG_BUILTIN_END)
+    return aarch64_expand_builtin_memtag (fcode, exp, target);
+
   gcc_unreachable ();
 }
 
@@ -1413,17 +2192,13 @@ aarch64_builtin_vectorized_function (unsigned int fn, tree type_out,
 				     tree type_in)
 {
   machine_mode in_mode, out_mode;
-  unsigned HOST_WIDE_INT in_n, out_n;
 
   if (TREE_CODE (type_out) != VECTOR_TYPE
       || TREE_CODE (type_in) != VECTOR_TYPE)
     return NULL_TREE;
 
-  out_mode = TYPE_MODE (TREE_TYPE (type_out));
-  in_mode = TYPE_MODE (TREE_TYPE (type_in));
-  if (!TYPE_VECTOR_SUBPARTS (type_out).is_constant (&out_n)
-      || !TYPE_VECTOR_SUBPARTS (type_in).is_constant (&in_n))
-    return NULL_TREE;
+  out_mode = TYPE_MODE (type_out);
+  in_mode = TYPE_MODE (type_in);
 
 #undef AARCH64_CHECK_BUILTIN_MODE
 #define AARCH64_CHECK_BUILTIN_MODE(C, N) 1
@@ -1439,8 +2214,7 @@ aarch64_builtin_vectorized_function (unsigned int fn, tree type_out,
     {
 #undef AARCH64_CHECK_BUILTIN_MODE
 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \
-  (out_mode == N##Fmode && out_n == C \
-   && in_mode == N##Fmode && in_n == C)
+  (out_mode == V##C##N##Fmode && in_mode == V##C##N##Fmode)
     CASE_CFN_FLOOR:
       return AARCH64_FIND_FRINT_VARIANT (floor);
     CASE_CFN_CEIL:
@@ -1455,8 +2229,7 @@ aarch64_builtin_vectorized_function (unsigned int fn, tree type_out,
       return AARCH64_FIND_FRINT_VARIANT (sqrt);
 #undef AARCH64_CHECK_BUILTIN_MODE
 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \
-  (out_mode == SImode && out_n == C \
-   && in_mode == N##Imode && in_n == C)
+  (out_mode == V##C##SImode && in_mode == V##C##N##Imode)
     CASE_CFN_CLZ:
       {
 	if (AARCH64_CHECK_BUILTIN_MODE (4, S))
@@ -1473,8 +2246,7 @@ aarch64_builtin_vectorized_function (unsigned int fn, tree type_out,
       }
 #undef AARCH64_CHECK_BUILTIN_MODE
 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \
-  (out_mode == N##Imode && out_n == C \
-   && in_mode == N##Fmode && in_n == C)
+  (out_mode == V##C##N##Imode && in_mode == V##C##N##Fmode)
     CASE_CFN_IFLOOR:
     CASE_CFN_LFLOOR:
     CASE_CFN_LLFLOOR:
@@ -1523,29 +2295,6 @@ aarch64_builtin_vectorized_function (unsigned int fn, tree type_out,
 
 	return aarch64_builtin_decls[builtin];
       }
-    case CFN_BUILT_IN_BSWAP16:
-#undef AARCH64_CHECK_BUILTIN_MODE
-#define AARCH64_CHECK_BUILTIN_MODE(C, N) \
-  (out_mode == N##Imode && out_n == C \
-   && in_mode == N##Imode && in_n == C)
-      if (AARCH64_CHECK_BUILTIN_MODE (4, H))
-	return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4hi];
-      else if (AARCH64_CHECK_BUILTIN_MODE (8, H))
-	return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv8hi];
-      else
-	return NULL_TREE;
-    case CFN_BUILT_IN_BSWAP32:
-      if (AARCH64_CHECK_BUILTIN_MODE (2, S))
-	return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2si];
-      else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
-	return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4si];
-      else
-	return NULL_TREE;
-    case CFN_BUILT_IN_BSWAP64:
-      if (AARCH64_CHECK_BUILTIN_MODE (2, D))
-	return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2di];
-      else
-	return NULL_TREE;
     default:
       return NULL_TREE;
     }
@@ -1556,7 +2305,7 @@ aarch64_builtin_vectorized_function (unsigned int fn, tree type_out,
 /* Return builtin for reciprocal square root.  */
 
 tree
-aarch64_builtin_rsqrt (unsigned int fn)
+aarch64_general_builtin_rsqrt (unsigned int fn)
 {
   if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2df)
     return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2DF];
@@ -1568,23 +2317,24 @@ aarch64_builtin_rsqrt (unsigned int fn)
 }
 
 #undef VAR1
-#define VAR1(T, N, MAP, A) \
+#define VAR1(T, N, MAP, FLAG, A) \
   case AARCH64_SIMD_BUILTIN_##T##_##N##A:
 
+/* Try to fold a call to the built-in function with subcode FCODE.  The
+   function is passed the N_ARGS arguments in ARGS and it returns a value
+   of type TYPE.  Return the new expression on success and NULL_TREE on
+   failure.  */
 tree
-aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args,
-		      bool ignore ATTRIBUTE_UNUSED)
+aarch64_general_fold_builtin (unsigned int fcode, tree type,
+			      unsigned int n_args ATTRIBUTE_UNUSED, tree *args)
 {
-  int fcode = DECL_FUNCTION_CODE (fndecl);
-  tree type = TREE_TYPE (TREE_TYPE (fndecl));
-
   switch (fcode)
     {
-      BUILTIN_VDQF (UNOP, abs, 2)
+      BUILTIN_VDQF (UNOP, abs, 2, ALL)
 	return fold_build1 (ABS_EXPR, type, args[0]);
-      VAR1 (UNOP, floatv2si, 2, v2sf)
-      VAR1 (UNOP, floatv4si, 2, v4sf)
-      VAR1 (UNOP, floatv2di, 2, v2df)
+      VAR1 (UNOP, floatv2si, 2, ALL, v2sf)
+      VAR1 (UNOP, floatv4si, 2, ALL, v4sf)
+      VAR1 (UNOP, floatv2di, 2, ALL, v2df)
 	return fold_build1 (FLOAT_EXPR, type, args[0]);
       default:
 	break;
@@ -1593,109 +2343,90 @@ aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args,
   return NULL_TREE;
 }
 
-bool
-aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi)
+/* Try to fold STMT, given that it's a call to the built-in function with
+   subcode FCODE.  Return the new statement on success and null on
+   failure.  */
+gimple *
+aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt)
 {
-  bool changed = false;
-  gimple *stmt = gsi_stmt (*gsi);
-  tree call = gimple_call_fn (stmt);
-  tree fndecl;
   gimple *new_stmt = NULL;
-
-  if (call)
+  unsigned nargs = gimple_call_num_args (stmt);
+  tree *args = (nargs > 0
+		? gimple_call_arg_ptr (stmt, 0)
+		: &error_mark_node);
+
+  /* We use gimple's IFN_REDUC_(PLUS|MIN|MAX)s for float, signed int
+     and unsigned int; it will distinguish according to the types of
+     the arguments to the __builtin.  */
+  switch (fcode)
     {
-      fndecl = gimple_call_fndecl (stmt);
-      if (fndecl)
+      BUILTIN_VALL (UNOP, reduc_plus_scal_, 10, ALL)
+	new_stmt = gimple_build_call_internal (IFN_REDUC_PLUS,
+					       1, args[0]);
+	gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
+	break;
+      BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10, ALL)
+      BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10, ALL)
+	new_stmt = gimple_build_call_internal (IFN_REDUC_MAX,
+					       1, args[0]);
+	gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
+	break;
+      BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10, ALL)
+      BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10, ALL)
+	new_stmt = gimple_build_call_internal (IFN_REDUC_MIN,
+					       1, args[0]);
+	gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
+	break;
+      BUILTIN_GPF (BINOP, fmulx, 0, ALL)
 	{
-	  int fcode = DECL_FUNCTION_CODE (fndecl);
-	  unsigned nargs = gimple_call_num_args (stmt);
-	  tree *args = (nargs > 0
-			? gimple_call_arg_ptr (stmt, 0)
-			: &error_mark_node);
-
-	  /* We use gimple's IFN_REDUC_(PLUS|MIN|MAX)s for float, signed int
-	     and unsigned int; it will distinguish according to the types of
-	     the arguments to the __builtin.  */
-	  switch (fcode)
+	  gcc_assert (nargs == 2);
+	  bool a0_cst_p = TREE_CODE (args[0]) == REAL_CST;
+	  bool a1_cst_p = TREE_CODE (args[1]) == REAL_CST;
+	  if (a0_cst_p || a1_cst_p)
 	    {
-	      BUILTIN_VALL (UNOP, reduc_plus_scal_, 10)
-	        new_stmt = gimple_build_call_internal (IFN_REDUC_PLUS,
-						       1, args[0]);
-		gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
-		break;
-	      BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10)
-	      BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10)
-	        new_stmt = gimple_build_call_internal (IFN_REDUC_MAX,
-						       1, args[0]);
-		gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
-		break;
-	      BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10)
-	      BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10)
-	        new_stmt = gimple_build_call_internal (IFN_REDUC_MIN,
-						       1, args[0]);
-		gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
-		break;
-	      BUILTIN_GPF (BINOP, fmulx, 0)
+	      if (a0_cst_p && a1_cst_p)
 		{
-		  gcc_assert (nargs == 2);
-		  bool a0_cst_p = TREE_CODE (args[0]) == REAL_CST;
-		  bool a1_cst_p = TREE_CODE (args[1]) == REAL_CST;
-		  if (a0_cst_p || a1_cst_p)
-		    {
-		      if (a0_cst_p && a1_cst_p)
-			{
-			  tree t0 = TREE_TYPE (args[0]);
-			  real_value a0 = (TREE_REAL_CST (args[0]));
-			  real_value a1 = (TREE_REAL_CST (args[1]));
-			  if (real_equal (&a1, &dconst0))
-			    std::swap (a0, a1);
-			  /* According to real_equal (), +0 equals -0.  */
-			  if (real_equal (&a0, &dconst0) && real_isinf (&a1))
-			    {
-			      real_value res = dconst2;
-			      res.sign = a0.sign ^ a1.sign;
-			      new_stmt =
-				gimple_build_assign (gimple_call_lhs (stmt),
-						     REAL_CST,
-						     build_real (t0, res));
-			    }
-			  else
-			    new_stmt =
-			      gimple_build_assign (gimple_call_lhs (stmt),
-						   MULT_EXPR,
-						   args[0], args[1]);
-			}
-		      else /* a0_cst_p ^ a1_cst_p.  */
-			{
-			  real_value const_part = a0_cst_p
-			    ? TREE_REAL_CST (args[0]) : TREE_REAL_CST (args[1]);
-			  if (!real_equal (&const_part, &dconst0)
-			      && !real_isinf (&const_part))
-			    new_stmt =
-			      gimple_build_assign (gimple_call_lhs (stmt),
-						   MULT_EXPR, args[0], args[1]);
-			}
-		    }
-		  if (new_stmt)
+		  tree t0 = TREE_TYPE (args[0]);
+		  real_value a0 = (TREE_REAL_CST (args[0]));
+		  real_value a1 = (TREE_REAL_CST (args[1]));
+		  if (real_equal (&a1, &dconst0))
+		    std::swap (a0, a1);
+		  /* According to real_equal (), +0 equals -0.  */
+		  if (real_equal (&a0, &dconst0) && real_isinf (&a1))
 		    {
-		      gimple_set_vuse (new_stmt, gimple_vuse (stmt));
-		      gimple_set_vdef (new_stmt, gimple_vdef (stmt));
+		      real_value res = dconst2;
+		      res.sign = a0.sign ^ a1.sign;
+		      new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
+						      REAL_CST,
+						      build_real (t0, res));
 		    }
-		  break;
+		  else
+		    new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
+						    MULT_EXPR,
+						    args[0], args[1]);
 		}
-	    default:
-	      break;
+	      else /* a0_cst_p ^ a1_cst_p.  */
+		{
+		  real_value const_part = a0_cst_p
+		    ? TREE_REAL_CST (args[0]) : TREE_REAL_CST (args[1]);
+		  if (!real_equal (&const_part, &dconst0)
+		      && !real_isinf (&const_part))
+		    new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
+						    MULT_EXPR, args[0],
+						    args[1]);
+		}
+	    }
+	  if (new_stmt)
+	    {
+	      gimple_set_vuse (new_stmt, gimple_vuse (stmt));
+	      gimple_set_vdef (new_stmt, gimple_vdef (stmt));
 	    }
+	  break;
 	}
+    default:
+      break;
     }
-
-  if (new_stmt)
-    {
-      gsi_replace (gsi, new_stmt, true);
-      changed = true;
-    }
-
-  return changed;
+  return new_stmt;
 }
 
 void
@@ -1751,10 +2482,12 @@ aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
   mask_sr = build_int_cst (unsigned_type_node,
 			   ~(AARCH64_FE_ALL_EXCEPT));
 
-  ld_fenv_cr = build2 (MODIFY_EXPR, unsigned_type_node,
-		       fenv_cr, build_call_expr (get_fpcr, 0));
-  ld_fenv_sr = build2 (MODIFY_EXPR, unsigned_type_node,
-		       fenv_sr, build_call_expr (get_fpsr, 0));
+  ld_fenv_cr = build4 (TARGET_EXPR, unsigned_type_node,
+		       fenv_cr, build_call_expr (get_fpcr, 0),
+		       NULL_TREE, NULL_TREE);
+  ld_fenv_sr = build4 (TARGET_EXPR, unsigned_type_node,
+		       fenv_sr, build_call_expr (get_fpsr, 0),
+		       NULL_TREE, NULL_TREE);
 
   masked_fenv_cr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_cr, mask_cr);
   masked_fenv_sr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_sr, mask_sr);
@@ -1786,8 +2519,9 @@ aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
        __atomic_feraiseexcept (new_fenv_var);  */
 
   new_fenv_var = create_tmp_var_raw (unsigned_type_node);
-  reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
-			new_fenv_var, build_call_expr (get_fpsr, 0));
+  reload_fenv = build4 (TARGET_EXPR, unsigned_type_node,
+			new_fenv_var, build_call_expr (get_fpsr, 0),
+			NULL_TREE, NULL_TREE);
   restore_fnenv = build_call_expr (set_fpsr, 1, fenv_sr);
   atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
   update_call = build_call_expr (atomic_feraiseexcept, 1,
@@ -1797,6 +2531,106 @@ aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
 			    reload_fenv, restore_fnenv), update_call);
 }
 
+/* Resolve overloaded MEMTAG build-in functions.  */
+#define AARCH64_BUILTIN_SUBCODE(F) \
+  (DECL_MD_FUNCTION_CODE (F) >> AARCH64_BUILTIN_SHIFT)
+
+static tree
+aarch64_resolve_overloaded_memtag (location_t loc,
+				   tree fndecl, void *pass_params)
+{
+  vec<tree, va_gc> *params = static_cast<vec<tree, va_gc> *> (pass_params);
+  unsigned param_num = params ? params->length() : 0;
+  unsigned int fcode = AARCH64_BUILTIN_SUBCODE (fndecl);
+  tree inittype = aarch64_memtag_builtin_data[
+		    fcode - AARCH64_MEMTAG_BUILTIN_START - 1].ftype;
+  unsigned arg_num = list_length (TYPE_ARG_TYPES (inittype)) - 1;
+
+  if (param_num != arg_num)
+    {
+      TREE_TYPE (fndecl) = inittype;
+      return NULL_TREE;
+    }
+  tree retype = NULL;
+
+  if (fcode == AARCH64_MEMTAG_BUILTIN_SUBP)
+    {
+      tree t0 = TREE_TYPE ((*params)[0]);
+      tree t1 = TREE_TYPE ((*params)[1]);
+
+      if (t0 == error_mark_node || TREE_CODE (t0) != POINTER_TYPE)
+	t0 = ptr_type_node;
+      if (t1 == error_mark_node || TREE_CODE (t1) != POINTER_TYPE)
+	t1 = ptr_type_node;
+
+      if (TYPE_MODE (t0) != DImode)
+	warning_at (loc, 1, "expected 64-bit address but argument 1 is %d-bit",
+	    (int)tree_to_shwi (DECL_SIZE ((*params)[0])));
+
+      if (TYPE_MODE (t1) != DImode)
+	warning_at (loc, 1, "expected 64-bit address but argument 2 is %d-bit",
+	    (int)tree_to_shwi (DECL_SIZE ((*params)[1])));
+
+      retype = build_function_type_list (ptrdiff_type_node, t0, t1, NULL);
+    }
+  else
+    {
+      tree t0 = TREE_TYPE ((*params)[0]);
+
+      if (t0 == error_mark_node || TREE_CODE (t0) != POINTER_TYPE)
+	{
+	  TREE_TYPE (fndecl) = inittype;
+	  return NULL_TREE;
+	}
+
+      if (TYPE_MODE (t0) != DImode)
+	warning_at (loc, 1, "expected 64-bit address but argument 1 is %d-bit",
+	    (int)tree_to_shwi (DECL_SIZE ((*params)[0])));
+
+      switch (fcode)
+	{
+	case AARCH64_MEMTAG_BUILTIN_IRG:
+	  retype = build_function_type_list (t0, t0, uint64_type_node, NULL);
+	  break;
+	case AARCH64_MEMTAG_BUILTIN_GMI:
+	  retype = build_function_type_list (uint64_type_node, t0,
+	      uint64_type_node, NULL);
+	  break;
+	case AARCH64_MEMTAG_BUILTIN_INC_TAG:
+	  retype = build_function_type_list (t0, t0, unsigned_type_node, NULL);
+	  break;
+	case AARCH64_MEMTAG_BUILTIN_SET_TAG:
+	  retype = build_function_type_list (void_type_node, t0, NULL);
+	  break;
+	case AARCH64_MEMTAG_BUILTIN_GET_TAG:
+	  retype = build_function_type_list (t0, t0, NULL);
+	  break;
+	default:
+	  return NULL_TREE;
+	}
+    }
+
+  if (!retype || retype == error_mark_node)
+    TREE_TYPE (fndecl) = inittype;
+  else
+    TREE_TYPE (fndecl) = retype;
+
+  return NULL_TREE;
+}
+
+/* Called at aarch64_resolve_overloaded_builtin in aarch64-c.c.  */
+tree
+aarch64_resolve_overloaded_builtin_general (location_t loc, tree function,
+					    void *pass_params)
+{
+  unsigned int fcode = AARCH64_BUILTIN_SUBCODE (function);
+
+  if (fcode >= AARCH64_MEMTAG_BUILTIN_START
+      && fcode <= AARCH64_MEMTAG_BUILTIN_END)
+    return aarch64_resolve_overloaded_memtag(loc, function, pass_params);
+
+  return NULL_TREE;
+}
 
 #undef AARCH64_CHECK_BUILTIN_MODE
 #undef AARCH64_FIND_FRINT_VARIANT
diff --git a/gcc/config/aarch64/aarch64-c.c b/gcc/config/aarch64/aarch64-c.c
index 108c012039449..f9ddffa007818 100644
--- a/gcc/config/aarch64/aarch64-c.c
+++ b/gcc/config/aarch64/aarch64-c.c
@@ -1,5 +1,5 @@
 /* Target-specific code for C family languages.
-   Copyright (C) 2015-2018 Free Software Foundation, Inc.
+   Copyright (C) 2015-2021 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -63,12 +63,15 @@ aarch64_define_unconditional_macros (cpp_reader *pfile)
      as interoperability with the same arm macro.  */
   builtin_define ("__ARM_ARCH_8A");
 
-  builtin_define_with_int_value ("__ARM_ARCH_PROFILE", 'A');
+  builtin_define_with_int_value ("__ARM_ARCH_PROFILE",
+      AARCH64_ISA_V8_R ? 'R' : 'A');
   builtin_define ("__ARM_FEATURE_CLZ");
   builtin_define ("__ARM_FEATURE_IDIV");
   builtin_define ("__ARM_FEATURE_UNALIGNED");
   builtin_define ("__ARM_PCS_AAPCS64");
   builtin_define_with_int_value ("__ARM_SIZEOF_WCHAR_T", WCHAR_TYPE_SIZE / 8);
+
+  builtin_define ("__GCC_ASM_FLAG_OUTPUTS__");
 }
 
 /* Undefine/redefine macros that depend on the current backend state and may
@@ -109,6 +112,8 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
 
   aarch64_def_or_undef (TARGET_CRC32, "__ARM_FEATURE_CRC32", pfile);
   aarch64_def_or_undef (TARGET_DOTPROD, "__ARM_FEATURE_DOTPROD", pfile);
+  aarch64_def_or_undef (TARGET_COMPLEX, "__ARM_FEATURE_COMPLEX", pfile);
+  aarch64_def_or_undef (TARGET_JSCVT, "__ARM_FEATURE_JCVT", pfile);
 
   cpp_undef (pfile, "__AARCH64_CMODEL_TINY__");
   cpp_undef (pfile, "__AARCH64_CMODEL_SMALL__");
@@ -145,6 +150,20 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
 	bits = 0;
       builtin_define_with_int_value ("__ARM_FEATURE_SVE_BITS", bits);
     }
+  aarch64_def_or_undef (TARGET_SVE, "__ARM_FEATURE_SVE_VECTOR_OPERATORS",
+			pfile);
+  aarch64_def_or_undef (TARGET_SVE_I8MM,
+			"__ARM_FEATURE_SVE_MATMUL_INT8", pfile);
+  aarch64_def_or_undef (TARGET_SVE_F32MM,
+			"__ARM_FEATURE_SVE_MATMUL_FP32", pfile);
+  aarch64_def_or_undef (TARGET_SVE_F64MM,
+			"__ARM_FEATURE_SVE_MATMUL_FP64", pfile);
+  aarch64_def_or_undef (TARGET_SVE2, "__ARM_FEATURE_SVE2", pfile);
+  aarch64_def_or_undef (TARGET_SVE2_AES, "__ARM_FEATURE_SVE2_AES", pfile);
+  aarch64_def_or_undef (TARGET_SVE2_BITPERM,
+			"__ARM_FEATURE_SVE2_BITPERM", pfile);
+  aarch64_def_or_undef (TARGET_SVE2_SHA3, "__ARM_FEATURE_SVE2_SHA3", pfile);
+  aarch64_def_or_undef (TARGET_SVE2_SM4, "__ARM_FEATURE_SVE2_SM4", pfile);
 
   aarch64_def_or_undef (TARGET_LSE, "__ARM_FEATURE_ATOMICS", pfile);
   aarch64_def_or_undef (TARGET_AES, "__ARM_FEATURE_AES", pfile);
@@ -155,6 +174,33 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
   aarch64_def_or_undef (TARGET_SM4, "__ARM_FEATURE_SM4", pfile);
   aarch64_def_or_undef (TARGET_F16FML, "__ARM_FEATURE_FP16_FML", pfile);
 
+  aarch64_def_or_undef (TARGET_FRINT, "__ARM_FEATURE_FRINT", pfile);
+  aarch64_def_or_undef (TARGET_TME, "__ARM_FEATURE_TME", pfile);
+  aarch64_def_or_undef (TARGET_RNG, "__ARM_FEATURE_RNG", pfile);
+  aarch64_def_or_undef (TARGET_MEMTAG, "__ARM_FEATURE_MEMORY_TAGGING", pfile);
+
+  aarch64_def_or_undef (aarch64_bti_enabled (),
+			"__ARM_FEATURE_BTI_DEFAULT", pfile);
+
+  cpp_undef (pfile, "__ARM_FEATURE_PAC_DEFAULT");
+  if (aarch64_ra_sign_scope != AARCH64_FUNCTION_NONE)
+    {
+      int v = 0;
+      if (aarch64_ra_sign_key == AARCH64_KEY_A)
+	v |= 1;
+      if (aarch64_ra_sign_key == AARCH64_KEY_B)
+	v |= 2;
+      if (aarch64_ra_sign_scope == AARCH64_FUNCTION_ALL)
+	v |= 4;
+      builtin_define_with_int_value ("__ARM_FEATURE_PAC_DEFAULT", v);
+    }
+
+  aarch64_def_or_undef (TARGET_I8MM, "__ARM_FEATURE_MATMUL_INT8", pfile);
+  aarch64_def_or_undef (TARGET_BF16_SIMD,
+			"__ARM_FEATURE_BF16_VECTOR_ARITHMETIC", pfile);
+  aarch64_def_or_undef (TARGET_BF16_FP,
+			"__ARM_FEATURE_BF16_SCALAR_ARITHMETIC", pfile);
+
   /* Not for ACLE, but required to keep "float.h" correct if we switch
      target between implementations that do or do not support ARMv8.2-A
      16-bit floating-point extensions.  */
@@ -196,12 +242,12 @@ aarch64_pragma_target_parse (tree args, tree pop_target)
   else
     {
       pop_target = pop_target ? pop_target : target_option_default_node;
-      cl_target_option_restore (&global_options,
+      cl_target_option_restore (&global_options, &global_options_set,
 				TREE_TARGET_OPTION (pop_target));
     }
 
   target_option_current_node
-    = build_target_option_node (&global_options);
+    = build_target_option_node (&global_options, &global_options_set);
 
   aarch64_reset_previous_fndecl ();
   /* For the definitions, ensure all newly defined macros are considered
@@ -236,6 +282,73 @@ aarch64_pragma_target_parse (tree args, tree pop_target)
   return true;
 }
 
+/* Implement "#pragma GCC aarch64".  */
+static void
+aarch64_pragma_aarch64 (cpp_reader *)
+{
+  tree x;
+  if (pragma_lex (&x) != CPP_STRING)
+    {
+      error ("%<#pragma GCC aarch64%> requires a string parameter");
+      return;
+    }
+
+  const char *name = TREE_STRING_POINTER (x);
+  if (strcmp (name, "arm_sve.h") == 0)
+    aarch64_sve::handle_arm_sve_h ();
+  else
+    error ("unknown %<#pragma GCC aarch64%> option %qs", name);
+}
+
+/* Implement TARGET_RESOLVE_OVERLOADED_BUILTIN.  */
+static tree
+aarch64_resolve_overloaded_builtin (unsigned int uncast_location,
+				    tree fndecl, void *uncast_arglist)
+{
+  vec<tree, va_gc> empty = {};
+  location_t location = (location_t) uncast_location;
+  vec<tree, va_gc> *arglist = (uncast_arglist
+			       ? (vec<tree, va_gc> *) uncast_arglist
+			       : &empty);
+  unsigned int code = DECL_MD_FUNCTION_CODE (fndecl);
+  unsigned int subcode = code >> AARCH64_BUILTIN_SHIFT;
+  tree new_fndecl;
+  switch (code & AARCH64_BUILTIN_CLASS)
+    {
+    case AARCH64_BUILTIN_GENERAL:
+      return aarch64_resolve_overloaded_builtin_general (location, fndecl,
+							 uncast_arglist);
+    case AARCH64_BUILTIN_SVE:
+      new_fndecl = aarch64_sve::resolve_overloaded_builtin (location, subcode,
+							    arglist);
+      break;
+    }
+  if (new_fndecl == NULL_TREE || new_fndecl == error_mark_node)
+    return new_fndecl;
+  return build_function_call_vec (location, vNULL, new_fndecl, arglist,
+				  NULL, fndecl);
+}
+
+/* Implement TARGET_CHECK_BUILTIN_CALL.  */
+static bool
+aarch64_check_builtin_call (location_t loc, vec<location_t> arg_loc,
+			    tree fndecl, tree orig_fndecl,
+			    unsigned int nargs, tree *args)
+{
+  unsigned int code = DECL_MD_FUNCTION_CODE (fndecl);
+  unsigned int subcode = code >> AARCH64_BUILTIN_SHIFT;
+  switch (code & AARCH64_BUILTIN_CLASS)
+    {
+    case AARCH64_BUILTIN_GENERAL:
+      return true;
+
+    case AARCH64_BUILTIN_SVE:
+      return aarch64_sve::check_builtin_call (loc, arg_loc, subcode,
+					      orig_fndecl, nargs, args);
+    }
+  gcc_unreachable ();
+}
+
 /* Implement REGISTER_TARGET_PRAGMAS.  */
 
 void
@@ -243,4 +356,9 @@ aarch64_register_pragmas (void)
 {
   /* Update pragma hook to allow parsing #pragma GCC target.  */
   targetm.target_option.pragma_parse = aarch64_pragma_target_parse;
+
+  targetm.resolve_overloaded_builtin = aarch64_resolve_overloaded_builtin;
+  targetm.check_builtin_call = aarch64_check_builtin_call;
+
+  c_register_pragma ("GCC", "aarch64", aarch64_pragma_aarch64);
 }
diff --git a/gcc/config/aarch64/aarch64-cc-fusion.cc b/gcc/config/aarch64/aarch64-cc-fusion.cc
new file mode 100644
index 0000000000000..09069a20de28f
--- /dev/null
+++ b/gcc/config/aarch64/aarch64-cc-fusion.cc
@@ -0,0 +1,296 @@
+// Pass to fuse CC operations with other instructions.
+// Copyright (C) 2021 Free Software Foundation, Inc.
+//
+// This file is part of GCC.
+//
+// GCC is free software; you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 3, or (at your option) any later
+// version.
+//
+// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with GCC; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+// This pass looks for sequences of the form:
+//
+//    A: (set (reg R1) X1)
+//    B: ...instructions that might change the value of X1...
+//    C: (set (reg CC) X2) // X2 uses R1
+//
+// and tries to change them to:
+//
+//    C': [(set (reg CC) X2')
+//         (set (reg R1) X1)]
+//    B: ...instructions that might change the value of X1...
+//
+// where X2' is the result of replacing R1 with X1 in X2.
+//
+// This sequence occurs in SVE code in two important cases:
+//
+// (a) Sometimes, to deal correctly with overflow, we need to increment
+//     an IV after a WHILELO rather than before it.  In this case:
+//     - A is a WHILELO,
+//     - B includes an IV increment and
+//     - C is a separate PTEST.
+//
+// (b) ACLE code of the form:
+//
+//       svbool_t ok = svrdffr ();
+//       if (svptest_last (pg, ok))
+//         ...
+//
+//     must, for performance reasons, be code-generated as:
+//
+//       RDFFRS Pok.B, Pg/Z
+//       ...branch on flags result...
+//
+//     without a separate PTEST of Pok.  In this case:
+//     - A is an aarch64_rdffr
+//     - B includes an aarch64_update_ffrt
+//     - C is a separate PTEST
+//
+// Combine can handle this optimization if B doesn't exist and if A and
+// C are in the same BB.  This pass instead handles cases where B does
+// exist and cases where A and C are in different BBs of the same EBB.
+
+#define IN_TARGET_CODE 1
+
+#define INCLUDE_ALGORITHM
+#define INCLUDE_FUNCTIONAL
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "rtl.h"
+#include "df.h"
+#include "rtl-ssa.h"
+#include "tree-pass.h"
+
+using namespace rtl_ssa;
+
+namespace {
+const pass_data pass_data_cc_fusion =
+{
+  RTL_PASS, // type
+  "cc_fusion", // name
+  OPTGROUP_NONE, // optinfo_flags
+  TV_NONE, // tv_id
+  0, // properties_required
+  0, // properties_provided
+  0, // properties_destroyed
+  0, // todo_flags_start
+  TODO_df_finish, // todo_flags_finish
+};
+
+// Class that represents one run of the pass.
+class cc_fusion
+{
+public:
+  cc_fusion ()  : m_parallel () {}
+  void execute ();
+
+private:
+  rtx optimizable_set (const insn_info *);
+  bool parallelize_insns (def_info *, rtx, def_info *, rtx);
+  void optimize_cc_setter (def_info *, rtx);
+
+  // A spare PARALLEL rtx, or null if none.
+  rtx m_parallel;
+};
+
+// See whether INSN is a single_set that we can optimize.  Return the
+// set if so, otherwise return null.
+rtx
+cc_fusion::optimizable_set (const insn_info *insn)
+{
+  if (!insn->can_be_optimized ()
+      || insn->is_asm ()
+      || insn->has_volatile_refs ()
+      || insn->has_pre_post_modify ())
+    return NULL_RTX;
+
+  return single_set (insn->rtl ());
+}
+
+// CC_SET is a single_set that sets (only) CC_DEF; OTHER_SET is likewise
+// a single_set that sets (only) OTHER_DEF.  CC_SET is known to set the
+// CC register and the instruction that contains CC_SET is known to use
+// OTHER_DEF.  Try to do CC_SET and OTHER_SET in parallel.
+bool
+cc_fusion::parallelize_insns (def_info *cc_def, rtx cc_set,
+			      def_info *other_def, rtx other_set)
+{
+  auto attempt = crtl->ssa->new_change_attempt ();
+
+  insn_info *cc_insn = cc_def->insn ();
+  insn_info *other_insn = other_def->insn ();
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    fprintf (dump_file, "trying to parallelize insn %d and insn %d\n",
+	     other_insn->uid (), cc_insn->uid ());
+
+  // Try to substitute OTHER_SET into CC_INSN.
+  insn_change_watermark rtl_watermark;
+  rtx_insn *cc_rtl = cc_insn->rtl ();
+  insn_propagation prop (cc_rtl, SET_DEST (other_set),
+			 SET_SRC (other_set));
+  if (!prop.apply_to_pattern (&PATTERN (cc_rtl))
+      || prop.num_replacements == 0)
+    {
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	fprintf (dump_file, "-- failed to substitute all uses of r%d\n",
+		 other_def->regno ());
+      return false;
+    }
+
+  // Restrict the uses to those outside notes.
+  use_array cc_uses = remove_note_accesses (attempt, cc_insn->uses ());
+  use_array other_set_uses = remove_note_accesses (attempt,
+						   other_insn->uses ());
+
+  // Remove the use of the substituted value.
+  access_array_builder uses_builder (attempt);
+  uses_builder.reserve (cc_uses.size ());
+  for (use_info *use : cc_uses)
+    if (use->def () != other_def)
+      uses_builder.quick_push (use);
+  cc_uses = use_array (uses_builder.finish ());
+
+  // Get the list of uses for the new instruction.
+  insn_change cc_change (cc_insn);
+  cc_change.new_uses = merge_access_arrays (attempt, other_set_uses, cc_uses);
+  if (!cc_change.new_uses.is_valid ())
+    {
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	fprintf (dump_file, "-- cannot merge uses\n");
+      return false;
+    }
+
+  // The instruction initially defines just two registers.  recog can add
+  // extra clobbers if necessary.
+  auto_vec<access_info *, 2> new_defs;
+  new_defs.quick_push (cc_def);
+  new_defs.quick_push (other_def);
+  sort_accesses (new_defs);
+  cc_change.new_defs = def_array (access_array (new_defs));
+
+  // Make sure there is somewhere that the new instruction could live.
+  auto other_change = insn_change::delete_insn (other_insn);
+  insn_change *changes[] = { &other_change, &cc_change };
+  cc_change.move_range = cc_insn->ebb ()->insn_range ();
+  if (!restrict_movement_ignoring (cc_change, insn_is_changing (changes)))
+    {
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	fprintf (dump_file, "-- cannot satisfy all definitions and uses\n");
+      return false;
+    }
+
+  // Tentatively install the new pattern.  By convention, the CC set
+  // must be first.
+  if (m_parallel)
+    {
+      XVECEXP (m_parallel, 0, 0) = cc_set;
+      XVECEXP (m_parallel, 0, 1) = other_set;
+    }
+  else
+    {
+      rtvec vec = gen_rtvec (2, cc_set, other_set);
+      m_parallel = gen_rtx_PARALLEL (VOIDmode, vec);
+    }
+  validate_change (cc_rtl, &PATTERN (cc_rtl), m_parallel, 1);
+
+  // These routines report failures themselves.
+  if (!recog_ignoring (attempt, cc_change, insn_is_changing (changes))
+      || !changes_are_worthwhile (changes)
+      || !crtl->ssa->verify_insn_changes (changes))
+    return false;
+
+  remove_reg_equal_equiv_notes (cc_rtl);
+  confirm_change_group ();
+  crtl->ssa->change_insns (changes);
+  m_parallel = NULL_RTX;
+  return true;
+}
+
+// Try to optimize the instruction that contains CC_DEF, where CC_DEF describes
+// a definition of the CC register by CC_SET.
+void
+cc_fusion::optimize_cc_setter (def_info *cc_def, rtx cc_set)
+{
+  // Search the registers used by the CC setter for an easily-substitutable
+  // def-use chain.
+  for (use_info *other_use : cc_def->insn ()->uses ())
+    if (def_info *other_def = other_use->def ())
+      if (other_use->regno () != CC_REGNUM
+	  && other_def->ebb () == cc_def->ebb ())
+	if (rtx other_set = optimizable_set (other_def->insn ()))
+	  {
+	    rtx dest = SET_DEST (other_set);
+	    if (REG_P (dest)
+		&& REGNO (dest) == other_def->regno ()
+		&& REG_NREGS (dest) == 1
+		&& parallelize_insns (cc_def, cc_set, other_def, other_set))
+	      return;
+	  }
+}
+
+// Run the pass on the current function.
+void
+cc_fusion::execute ()
+{
+  // Initialization.
+  calculate_dominance_info (CDI_DOMINATORS);
+  df_analyze ();
+  crtl->ssa = new rtl_ssa::function_info (cfun);
+
+  // Walk through all instructions that set CC.  Look for a PTEST instruction
+  // that we can optimize.
+  //
+  // ??? The PTEST test isn't needed for correctness, but it ensures that the
+  // pass no effect on non-SVE code.
+  for (def_info *def : crtl->ssa->reg_defs (CC_REGNUM))
+    if (rtx cc_set = optimizable_set (def->insn ()))
+      if (REG_P (SET_DEST (cc_set))
+	  && REGNO (SET_DEST (cc_set)) == CC_REGNUM
+	  && GET_CODE (SET_SRC (cc_set)) == UNSPEC
+	  && XINT (SET_SRC (cc_set), 1) == UNSPEC_PTEST)
+	optimize_cc_setter (def, cc_set);
+
+  // Finalization.
+  crtl->ssa->perform_pending_updates ();
+  free_dominance_info (CDI_DOMINATORS);
+}
+
+class pass_cc_fusion : public rtl_opt_pass
+{
+public:
+  pass_cc_fusion (gcc::context *ctxt)
+    : rtl_opt_pass (pass_data_cc_fusion, ctxt)
+  {}
+
+  // opt_pass methods:
+  virtual bool gate (function *) { return TARGET_SVE && optimize >= 2; }
+  virtual unsigned int execute (function *);
+};
+
+unsigned int
+pass_cc_fusion::execute (function *)
+{
+  cc_fusion ().execute ();
+  return 0;
+}
+
+} // end namespace
+
+// Create a new CC fusion pass instance.
+
+rtl_opt_pass *
+make_pass_cc_fusion (gcc::context *ctxt)
+{
+  return new pass_cc_fusion (ctxt);
+}
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index f48b7c22b2d26..de8fe9bc09bb1 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2018 Free Software Foundation, Inc.
+/* Copyright (C) 2011-2021 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -46,6 +46,7 @@
 /* ARMv8-A Architecture Processors.  */
 
 /* ARM ('A') cores. */
+AARCH64_CORE("cortex-a34",  cortexa34, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1)
 AARCH64_CORE("cortex-a35",  cortexa35, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1)
 AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1)
 AARCH64_CORE("cortex-a57",  cortexa57, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1)
@@ -58,9 +59,20 @@ AARCH64_CORE("thunderx",      thunderx,      thunderx,  8A,  AARCH64_FL_FOR_ARCH
    this order is required to handle variant correctly. */
 AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO,	thunderxt88,  0x43, 0x0a1, 0)
 AARCH64_CORE("thunderxt88",   thunderxt88,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88,  0x43, 0x0a1, -1)
+
+/* OcteonTX is the official name for T81/T83. */
+AARCH64_CORE("octeontx",      octeontx,      thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a0, -1)
+AARCH64_CORE("octeontx81",    octeontxt81,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a2, -1)
+AARCH64_CORE("octeontx83",    octeontxt83,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a3, -1)
+
 AARCH64_CORE("thunderxt81",   thunderxt81,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a2, -1)
 AARCH64_CORE("thunderxt83",   thunderxt83,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a3, -1)
 
+/* Ampere Computing cores. */
+/* Do not swap around "emag" and "xgene1",
+   this order is required to handle variant correctly. */
+AARCH64_CORE("emag",        emag,      xgene1,    8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3)
+
 /* APM ('P') cores. */
 AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000, -1)
 
@@ -71,6 +83,9 @@ AARCH64_CORE("qdf24xx",     qdf24xx,   falkor,    8A,  AARCH64_FL_FOR_ARCH8 | AA
 /* Samsung ('S') cores. */
 AARCH64_CORE("exynos-m1",   exynosm1,  exynosm1,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1,  0x53, 0x001, -1)
 
+/* HXT ('h') cores. */
+AARCH64_CORE("phecda",      phecda,    falkor,    8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx,   0x68, 0x000, -1)
+
 /* ARMv8.1-A Architecture Processors.  */
 
 /* Broadcom ('B') cores. */
@@ -85,14 +100,51 @@ AARCH64_CORE("thunderx2t99",  thunderx2t99,  thunderx2t99, 8_1A,  AARCH64_FL_FOR
 /* ARM ('A') cores. */
 AARCH64_CORE("cortex-a55",  cortexa55, cortexa53, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1)
 AARCH64_CORE("cortex-a75",  cortexa75, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1)
-AARCH64_CORE("cortex-a76",  cortexa76, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa72, 0x41, 0xd0b, -1)
-AARCH64_CORE("ares",	    ares,      cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa72, 0x41, 0xd0c, -1)
-AARCH64_CORE("neoverse-n1", neoversen1,cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa72, 0x41, 0xd0c, -1)
+AARCH64_CORE("cortex-a76",  cortexa76, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1)
+AARCH64_CORE("cortex-a76ae",  cortexa76ae, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1)
+AARCH64_CORE("cortex-a77",  cortexa77, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1)
+AARCH64_CORE("cortex-a78",  cortexa78, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1)
+AARCH64_CORE("cortex-a78ae",  cortexa78ae, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd42, -1)
+AARCH64_CORE("cortex-a78c",  cortexa78c, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE | AARCH64_FL_FLAGM | AARCH64_FL_PAUTH, neoversen1, 0x41, 0xd4b, -1)
+AARCH64_CORE("cortex-a65",  cortexa65, cortexa53, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1)
+AARCH64_CORE("cortex-a65ae",  cortexa65ae, cortexa53, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1)
+AARCH64_CORE("cortex-x1",  cortexx1, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd44, -1)
+AARCH64_CORE("ares",  ares, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
+AARCH64_CORE("neoverse-n1",  neoversen1, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
+AARCH64_CORE("neoverse-e1",  neoversee1, cortexa53, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1)
+
+/* Cavium ('C') cores. */
+AARCH64_CORE("octeontx2",      octeontx2,      cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b0, -1)
+AARCH64_CORE("octeontx2t98",   octeontx2t98,   cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b1, -1)
+AARCH64_CORE("octeontx2t96",   octeontx2t96,   cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1)
+/* Note OcteonTX2 T93 is an alias to OcteonTX2 T96. */
+AARCH64_CORE("octeontx2t93",   octeontx2t93,   cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1)
+AARCH64_CORE("octeontx2f95",   octeontx2f95,   cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b3, -1)
+AARCH64_CORE("octeontx2f95n",  octeontx2f95n,  cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b4, -1)
+AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b5, -1)
+
+/* Fujitsu ('F') cores. */
+AARCH64_CORE("a64fx", a64fx, a64fx, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_SVE, a64fx, 0x46, 0x001, -1)
+
+/* HiSilicon ('H') cores. */
+AARCH64_CORE("tsv110",  tsv110, tsv110, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110,   0x48, 0xd01, -1)
 
 /* ARMv8.3-A Architecture Processors.  */
 
+/* Marvell cores (TX3). */
+AARCH64_CORE("thunderx3t110",  thunderx3t110,  thunderx3t110, 8_3A,  AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a)
+
+/* ARMv8.4-A Architecture Processors.  */
+
+/* Arm ('A') cores.  */
+AARCH64_CORE("zeus", zeus, cortexa57, 8_4A,  AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
+AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, 8_4A,  AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
+
 /* Qualcomm ('Q') cores. */
-AARCH64_CORE("saphira",     saphira,    falkor,    8_3A,  AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira,   0x51, 0xC01, -1)
+AARCH64_CORE("saphira",     saphira,    saphira,    8_4A,  AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira,   0x51, 0xC01, -1)
+
+/* Armv8.5-A Architecture Processors.  */
+AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 8_5A, AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG, neoversen2, 0x41, 0xd49, -1)
 
 /* ARMv8-A big.LITTLE implementations.  */
 
@@ -104,5 +156,9 @@ AARCH64_CORE("cortex-a73.cortex-a53",  cortexa73cortexa53, cortexa53, 8A,  AARCH
 /* ARM DynamIQ big.LITTLE configurations.  */
 
 AARCH64_CORE("cortex-a75.cortex-a55",  cortexa75cortexa55, cortexa53, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd0a, 0xd05), -1)
+AARCH64_CORE("cortex-a76.cortex-a55",  cortexa76cortexa55, cortexa53, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, AARCH64_BIG_LITTLE (0xd0b, 0xd05), -1)
+
+/* Armv8-R Architecture Processors.  */
+AARCH64_CORE("cortex-r82", cortexr82, cortexa53, 8R, AARCH64_FL_FOR_ARCH8_R, cortexa53, 0x41, 0xd15, -1)
 
 #undef AARCH64_CORE
diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h
index a455c62582b98..dd2e7e7cbb13d 100644
--- a/gcc/config/aarch64/aarch64-cost-tables.h
+++ b/gcc/config/aarch64/aarch64-cost-tables.h
@@ -1,6 +1,6 @@
 /* RTX cost tables for AArch64.
 
-   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   Copyright (C) 2014-2021 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -123,7 +123,8 @@ const struct cpu_cost_table qdf24xx_extra_costs =
   },
   /* Vector */
   {
-    COSTS_N_INSNS (1)  /* alu.  */
+    COSTS_N_INSNS (1),  /* alu.  */
+    COSTS_N_INSNS (4)   /* mult.  */
   }
 };
 
@@ -227,7 +228,8 @@ const struct cpu_cost_table thunderx_extra_costs =
   },
   /* Vector */
   {
-    COSTS_N_INSNS (1)	/* Alu.  */
+    COSTS_N_INSNS (1),	/* Alu.  */
+    COSTS_N_INSNS (4)	/* mult.  */
   }
 };
 
@@ -330,7 +332,321 @@ const struct cpu_cost_table thunderx2t99_extra_costs =
   },
   /* Vector */
   {
-    COSTS_N_INSNS (1)	/* Alu.  */
+    COSTS_N_INSNS (1),	/* Alu.  */
+    COSTS_N_INSNS (4)	/* Mult.  */
+  }
+};
+
+const struct cpu_cost_table thunderx3t110_extra_costs = 
+{
+  /* ALU */
+  {
+    0,			/* Arith.  */
+    0,			/* Logical.  */
+    0,			/* Shift.  */
+    0,			/* Shift_reg.  */
+    COSTS_N_INSNS (1),	/* Arith_shift.  */
+    COSTS_N_INSNS (1),	/* Arith_shift_reg.  */
+    COSTS_N_INSNS (1),	/* Log_shift.  */
+    COSTS_N_INSNS (1),	/* Log_shift_reg.  */
+    0,			/* Extend.  */
+    COSTS_N_INSNS (1),	/* Extend_arith.  */
+    0,			/* Bfi.  */
+    0,			/* Bfx.  */
+    COSTS_N_INSNS (3),	/* Clz.  */
+    0,			/* Rev.  */
+    0,			/* Non_exec.  */
+    true		/* Non_exec_costs_exec.  */
+  },
+  {
+    /* MULT SImode */
+    {
+      COSTS_N_INSNS (4),	/* Simple.  */
+      COSTS_N_INSNS (4),	/* Flag_setting.  */
+      COSTS_N_INSNS (4),	/* Extend.  */
+      COSTS_N_INSNS (5),	/* Add.  */
+      COSTS_N_INSNS (5),	/* Extend_add.  */
+      COSTS_N_INSNS (18)	/* Idiv.  */
+    },
+    /* MULT DImode */
+    {
+      COSTS_N_INSNS (4),       /* Simple.  */
+      0,                       /* Flag_setting.  */
+      COSTS_N_INSNS (4),       /* Extend.  */
+      COSTS_N_INSNS (5),       /* Add.  */
+      COSTS_N_INSNS (5),       /* Extend_add.  */
+      COSTS_N_INSNS (26)       /* Idiv.  */
+    }
+  },
+  /* LD/ST */
+  {
+    COSTS_N_INSNS (4),	/* Load.  */
+    COSTS_N_INSNS (4),	/* Load_sign_extend.  */
+    COSTS_N_INSNS (5),	/* Ldrd.  */
+    COSTS_N_INSNS (4),	/* Ldm_1st.  */
+    1,			/* Ldm_regs_per_insn_1st.  */
+    1,			/* Ldm_regs_per_insn_subsequent.  */
+    COSTS_N_INSNS (4),	/* Loadf.  */
+    COSTS_N_INSNS (4),	/* Loadd.  */
+    COSTS_N_INSNS (4),	/* Load_unaligned.  */
+    0,			/* Store.  */
+    0,			/* Strd.  */
+    0,			/* Stm_1st.  */
+    1,			/* Stm_regs_per_insn_1st.  */
+    1,			/* Stm_regs_per_insn_subsequent.  */
+    0,			/* Storef.  */
+    0,			/* Stored.  */
+    0,			/* Store_unaligned.  */
+    COSTS_N_INSNS (1),	/* Loadv.  */
+    COSTS_N_INSNS (1)	/* Storev.  */
+  },
+  {
+    /* FP SFmode */
+    {
+      COSTS_N_INSNS (4),	/* Div.  */
+      COSTS_N_INSNS (1),	/* Mult.  */
+      COSTS_N_INSNS (1),	/* Mult_addsub. */
+      COSTS_N_INSNS (1),	/* Fma.  */
+      COSTS_N_INSNS (1),	/* Addsub.  */
+      COSTS_N_INSNS (1),	/* Fpconst. */
+      COSTS_N_INSNS (1),	/* Neg.  */
+      COSTS_N_INSNS (1),	/* Compare.  */
+      COSTS_N_INSNS (2),	/* Widen.  */
+      COSTS_N_INSNS (2),	/* Narrow.  */
+      COSTS_N_INSNS (2),	/* Toint.  */
+      COSTS_N_INSNS (2),	/* Fromint.  */
+      COSTS_N_INSNS (2) 	/* Roundint.  */
+    },
+    /* FP DFmode */
+    {
+      COSTS_N_INSNS (6),	/* Div.  */
+      COSTS_N_INSNS (1),	/* Mult.  */
+      COSTS_N_INSNS (1),	/* Mult_addsub.  */
+      COSTS_N_INSNS (1),	/* Fma.  */
+      COSTS_N_INSNS (1),	/* Addsub.  */
+      COSTS_N_INSNS (1),	/* Fpconst.  */
+      COSTS_N_INSNS (1),	/* Neg.  */
+      COSTS_N_INSNS (1),	/* Compare.  */
+      COSTS_N_INSNS (2),	/* Widen.  */
+      COSTS_N_INSNS (2),	/* Narrow.  */
+      COSTS_N_INSNS (2),	/* Toint.  */
+      COSTS_N_INSNS (2),	/* Fromint.  */
+      COSTS_N_INSNS (2) 	/* Roundint.  */
+    }
+  },
+  /* Vector */
+  {
+    COSTS_N_INSNS (1),	/* Alu.  */
+    COSTS_N_INSNS (4)	/* Mult.  */
+  }
+};
+
+const struct cpu_cost_table tsv110_extra_costs =
+{
+  /* ALU */
+  {
+    0,                 /* arith.  */
+    0,                 /* logical.  */
+    0,                 /* shift.  */
+    0,                 /* shift_reg.  */
+    COSTS_N_INSNS (1), /* arith_shift.  */
+    COSTS_N_INSNS (1), /* arith_shift_reg.  */
+    COSTS_N_INSNS (1), /* log_shift.  */
+    COSTS_N_INSNS (1), /* log_shift_reg.  */
+    0,                 /* extend.  */
+    COSTS_N_INSNS (1), /* extend_arith.  */
+    0,                 /* bfi.  */
+    0,                 /* bfx.  */
+    0,                 /* clz.  */
+    0,                 /* rev.  */
+    0,                 /* non_exec.  */
+    true               /* non_exec_costs_exec.  */
+  },
+
+  {
+    /* MULT SImode */
+    {
+      COSTS_N_INSNS (2),       /* simple.  */
+      COSTS_N_INSNS (2),       /* flag_setting.  */
+      COSTS_N_INSNS (2),       /* extend.  */
+      COSTS_N_INSNS (2),       /* add.  */
+      COSTS_N_INSNS (2),       /* extend_add.  */
+      COSTS_N_INSNS (11)       /* idiv.  */
+    },
+    /* MULT DImode */
+    {
+      COSTS_N_INSNS (3),       /* simple.  */
+      0,                       /* flag_setting (N/A).  */
+      COSTS_N_INSNS (3),       /* extend.  */
+      COSTS_N_INSNS (3),       /* add.  */
+      COSTS_N_INSNS (3),       /* extend_add.  */
+      COSTS_N_INSNS (19)       /* idiv.  */
+    }
+  },
+  /* LD/ST */
+  {
+    COSTS_N_INSNS (3),         /* load.  */
+    COSTS_N_INSNS (4),         /* load_sign_extend.  */
+    COSTS_N_INSNS (3),         /* ldrd.  */
+    COSTS_N_INSNS (3),         /* ldm_1st.  */
+    1,                         /* ldm_regs_per_insn_1st.  */
+    2,                         /* ldm_regs_per_insn_subsequent.  */
+    COSTS_N_INSNS (4),         /* loadf.  */
+    COSTS_N_INSNS (4),         /* loadd.  */
+    COSTS_N_INSNS (4),         /* load_unaligned.  */
+    0,                         /* store.  */
+    0,                         /* strd.  */
+    0,                         /* stm_1st.  */
+    1,                         /* stm_regs_per_insn_1st.  */
+    2,                         /* stm_regs_per_insn_subsequent.  */
+    0,                         /* storef.  */
+    0,                         /* stored.  */
+    COSTS_N_INSNS (1),         /* store_unaligned.  */
+    COSTS_N_INSNS (4),         /* loadv.  */
+    COSTS_N_INSNS (4)          /* storev.  */
+  },
+  {
+    /* FP SFmode */
+    {
+      COSTS_N_INSNS (10),      /* div.  */
+      COSTS_N_INSNS (4),       /* mult.  */
+      COSTS_N_INSNS (4),       /* mult_addsub.  */
+      COSTS_N_INSNS (4),       /* fma.  */
+      COSTS_N_INSNS (4),       /* addsub.  */
+      COSTS_N_INSNS (1),       /* fpconst.  */
+      COSTS_N_INSNS (1),       /* neg.  */
+      COSTS_N_INSNS (1),       /* compare.  */
+      COSTS_N_INSNS (2),       /* widen.  */
+      COSTS_N_INSNS (2),       /* narrow.  */
+      COSTS_N_INSNS (2),       /* toint.  */
+      COSTS_N_INSNS (1),       /* fromint.  */
+      COSTS_N_INSNS (2)        /* roundint.  */
+    },
+    /* FP DFmode */
+    {
+      COSTS_N_INSNS (17),      /* div.  */
+      COSTS_N_INSNS (4),       /* mult.  */
+      COSTS_N_INSNS (6),       /* mult_addsub.  */
+      COSTS_N_INSNS (6),       /* fma.  */
+      COSTS_N_INSNS (3),       /* addsub.  */
+      COSTS_N_INSNS (1),       /* fpconst.  */
+      COSTS_N_INSNS (1),       /* neg.  */
+      COSTS_N_INSNS (1),       /* compare.  */
+      COSTS_N_INSNS (2),       /* widen.  */
+      COSTS_N_INSNS (2),       /* narrow.  */
+      COSTS_N_INSNS (2),       /* toint.  */
+      COSTS_N_INSNS (1),       /* fromint.  */
+      COSTS_N_INSNS (2)        /* roundint.  */
+    }
+  },
+  /* Vector */
+  {
+    COSTS_N_INSNS (1),  /* alu.  */
+    COSTS_N_INSNS (4)   /* mult.  */
+  }
+};
+
+const struct cpu_cost_table a64fx_extra_costs =
+{
+  /* ALU */
+  {
+    0,                 /* arith.  */
+    0,                 /* logical.  */
+    0,                 /* shift.  */
+    0,                 /* shift_reg.  */
+    COSTS_N_INSNS (1), /* arith_shift.  */
+    COSTS_N_INSNS (1), /* arith_shift_reg.  */
+    COSTS_N_INSNS (1), /* log_shift.  */
+    COSTS_N_INSNS (1), /* log_shift_reg.  */
+    0,                 /* extend.  */
+    COSTS_N_INSNS (1), /* extend_arith.  */
+    0,                 /* bfi.  */
+    0,                 /* bfx.  */
+    0,                 /* clz.  */
+    0,                 /* rev.  */
+    0,                 /* non_exec.  */
+    true               /* non_exec_costs_exec.  */
+  },
+  {
+    /* MULT SImode */
+    {
+      COSTS_N_INSNS (4),       /* simple.  */
+      COSTS_N_INSNS (4),       /* flag_setting.  */
+      COSTS_N_INSNS (4),       /* extend.  */
+      COSTS_N_INSNS (5),       /* add.  */
+      COSTS_N_INSNS (5),       /* extend_add.  */
+      COSTS_N_INSNS (18)       /* idiv.  */
+    },
+    /* MULT DImode */
+    {
+      COSTS_N_INSNS (4),       /* simple.  */
+      0,                       /* flag_setting (N/A).  */
+      COSTS_N_INSNS (4),       /* extend.  */
+      COSTS_N_INSNS (5),       /* add.  */
+      COSTS_N_INSNS (5),       /* extend_add.  */
+      COSTS_N_INSNS (26)       /* idiv.  */
+    }
+  },
+  /* LD/ST */
+  {
+    COSTS_N_INSNS (4),         /* load.  */
+    COSTS_N_INSNS (4),         /* load_sign_extend.  */
+    COSTS_N_INSNS (5),         /* ldrd.  */
+    COSTS_N_INSNS (4),         /* ldm_1st.  */
+    1,                         /* ldm_regs_per_insn_1st.  */
+    2,                         /* ldm_regs_per_insn_subsequent.  */
+    COSTS_N_INSNS (4),         /* loadf.  */
+    COSTS_N_INSNS (4),         /* loadd.  */
+    COSTS_N_INSNS (5),         /* load_unaligned.  */
+    0,                         /* store.  */
+    0,                         /* strd.  */
+    0,                         /* stm_1st.  */
+    1,                         /* stm_regs_per_insn_1st.  */
+    2,                         /* stm_regs_per_insn_subsequent.  */
+    0,                         /* storef.  */
+    0,                         /* stored.  */
+    0,                         /* store_unaligned.  */
+    COSTS_N_INSNS (1),         /* loadv.  */
+    COSTS_N_INSNS (1)          /* storev.  */
+  },
+  {
+    /* FP SFmode */
+    {
+      COSTS_N_INSNS (6),      /* div.  */
+      COSTS_N_INSNS (1),       /* mult.  */
+      COSTS_N_INSNS (1),       /* mult_addsub.  */
+      COSTS_N_INSNS (2),       /* fma.  */
+      COSTS_N_INSNS (1),       /* addsub.  */
+      COSTS_N_INSNS (1),       /* fpconst.  */
+      COSTS_N_INSNS (1),       /* neg.  */
+      COSTS_N_INSNS (1),       /* compare.  */
+      COSTS_N_INSNS (2),       /* widen.  */
+      COSTS_N_INSNS (2),       /* narrow.  */
+      COSTS_N_INSNS (2),       /* toint.  */
+      COSTS_N_INSNS (2),       /* fromint.  */
+      COSTS_N_INSNS (2)        /* roundint.  */
+    },
+    /* FP DFmode */
+    {
+      COSTS_N_INSNS (11),      /* div.  */
+      COSTS_N_INSNS (1),       /* mult.  */
+      COSTS_N_INSNS (1),       /* mult_addsub.  */
+      COSTS_N_INSNS (2),       /* fma.  */
+      COSTS_N_INSNS (1),       /* addsub.  */
+      COSTS_N_INSNS (1),       /* fpconst.  */
+      COSTS_N_INSNS (1),       /* neg.  */
+      COSTS_N_INSNS (1),       /* compare.  */
+      COSTS_N_INSNS (2),       /* widen.  */
+      COSTS_N_INSNS (2),       /* narrow.  */
+      COSTS_N_INSNS (2),       /* toint.  */
+      COSTS_N_INSNS (2),       /* fromint.  */
+      COSTS_N_INSNS (2)        /* roundint.  */
+    }
+  },
+  /* Vector */
+  {
+    COSTS_N_INSNS (1),  /* alu.  */
+    COSTS_N_INSNS (4)   /* mult.  */
   }
 };
 
diff --git a/gcc/config/aarch64/aarch64-d.c b/gcc/config/aarch64/aarch64-d.c
new file mode 100644
index 0000000000000..416bb7c8033ac
--- /dev/null
+++ b/gcc/config/aarch64/aarch64-d.c
@@ -0,0 +1,56 @@
+/* Subroutines for the D front end on the AArch64 architecture.
+   Copyright (C) 2017-2021 Free Software Foundation, Inc.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define IN_TARGET_CODE 1
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "d/d-target.h"
+#include "d/d-target-def.h"
+
+/* Implement TARGET_D_CPU_VERSIONS for AArch64 targets.  */
+
+void
+aarch64_d_target_versions (void)
+{
+  d_add_builtin_version ("AArch64");
+  d_add_builtin_version ("D_HardFloat");
+}
+
+/* Handle a call to `__traits(getTargetInfo, "floatAbi")'.  */
+
+static tree
+aarch64_d_handle_target_float_abi (void)
+{
+  const char *abi = "hard";
+
+  return build_string_literal (strlen (abi) + 1, abi);
+}
+
+/* Implement TARGET_D_REGISTER_CPU_TARGET_INFO.  */
+
+void
+aarch64_d_register_target_info (void)
+{
+  const struct d_target_info_spec handlers[] = {
+    { "floatAbi", aarch64_d_handle_target_float_abi },
+    { NULL, NULL },
+  };
+
+  d_add_target_info_handlers (handlers);
+}
diff --git a/gcc/config/aarch64/aarch64-elf-raw.h b/gcc/config/aarch64/aarch64-elf-raw.h
index c074238df0658..e986149e4003b 100644
--- a/gcc/config/aarch64/aarch64-elf-raw.h
+++ b/gcc/config/aarch64/aarch64-elf-raw.h
@@ -1,5 +1,5 @@
 /* Machine description for AArch64 architecture.
-   Copyright (C) 2009-2018 Free Software Foundation, Inc.
+   Copyright (C) 2009-2021 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -27,22 +27,6 @@
   " crtend%O%s crtn%O%s " \
   "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}"
 
-#if TARGET_FIX_ERR_A53_835769_DEFAULT
-#define CA53_ERR_835769_SPEC \
-  " %{!mno-fix-cortex-a53-835769:--fix-cortex-a53-835769}"
-#else
-#define CA53_ERR_835769_SPEC \
-  " %{mfix-cortex-a53-835769:--fix-cortex-a53-835769}"
-#endif
-
-#if TARGET_FIX_ERR_A53_843419_DEFAULT
-#define CA53_ERR_843419_SPEC \
-  " %{!mno-fix-cortex-a53-843419:--fix-cortex-a53-843419}"
-#else
-#define CA53_ERR_843419_SPEC \
-  " %{mfix-cortex-a53-843419:--fix-cortex-a53-843419}"
-#endif
-
 #ifndef LINK_SPEC
 #define LINK_SPEC "%{h*}			\
    %{static:-Bstatic}				\
@@ -51,8 +35,7 @@
    %{!static:%{rdynamic:-export-dynamic}}	\
    %{mbig-endian:-EB} %{mlittle-endian:-EL} -X	\
   -maarch64elf%{mabi=ilp32*:32}%{mbig-endian:b}" \
-  CA53_ERR_835769_SPEC \
-  CA53_ERR_843419_SPEC
+  AARCH64_ERRATA_LINK_SPEC
 #endif
 
 #endif /* GCC_AARCH64_ELF_RAW_H */
diff --git a/gcc/config/aarch64/aarch64-elf.h b/gcc/config/aarch64/aarch64-elf.h
index 93cd5b4ebb0f1..60504ef2e0a73 100644
--- a/gcc/config/aarch64/aarch64-elf.h
+++ b/gcc/config/aarch64/aarch64-elf.h
@@ -1,5 +1,5 @@
 /* Machine description for AArch64 architecture.
-   Copyright (C) 2009-2018 Free Software Foundation, Inc.
+   Copyright (C) 2009-2021 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
diff --git a/gcc/config/aarch64/aarch64-errata.h b/gcc/config/aarch64/aarch64-errata.h
new file mode 100644
index 0000000000000..7c707474714a0
--- /dev/null
+++ b/gcc/config/aarch64/aarch64-errata.h
@@ -0,0 +1,44 @@
+/* Machine description for AArch64 architecture.
+   Copyright (C) 2009-2021 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_AARCH64_ERRATA_H
+#define GCC_AARCH64_ERRATA_H
+
+#if TARGET_FIX_ERR_A53_835769_DEFAULT
+#define CA53_ERR_835769_SPEC \
+  " %{!mno-fix-cortex-a53-835769:--fix-cortex-a53-835769}"
+#else
+#define CA53_ERR_835769_SPEC \
+  " %{mfix-cortex-a53-835769:--fix-cortex-a53-835769}"
+#endif
+
+#if TARGET_FIX_ERR_A53_843419_DEFAULT
+#define CA53_ERR_843419_SPEC \
+  " %{!mno-fix-cortex-a53-843419:--fix-cortex-a53-843419}"
+#else
+#define CA53_ERR_843419_SPEC \
+  " %{mfix-cortex-a53-843419:--fix-cortex-a53-843419}"
+#endif
+
+#define AARCH64_ERRATA_LINK_SPEC		\
+  CA53_ERR_835769_SPEC				\
+  CA53_ERR_843419_SPEC
+
+#endif /*  GCC_AARCH64_ERRATA_H */
diff --git a/gcc/config/aarch64/aarch64-freebsd.h b/gcc/config/aarch64/aarch64-freebsd.h
index d0d8bc41437b8..e2dfe784030d9 100644
--- a/gcc/config/aarch64/aarch64-freebsd.h
+++ b/gcc/config/aarch64/aarch64-freebsd.h
@@ -1,5 +1,5 @@
 /* Definitions for AArch64 running FreeBSD
-   Copyright (C) 2016-2018 Free Software Foundation, Inc.
+   Copyright (C) 2016-2021 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -34,7 +34,7 @@
 
 #undef  FBSD_TARGET_LINK_SPEC
 #define FBSD_TARGET_LINK_SPEC "                                 \
-    %{p:%nconsider using `-pg' instead of `-p' with gprof (1) } \
+    %{p:%nconsider using `-pg' instead of `-p' with gprof (1)}  \
     %{v:-V}                                                     \
     %{assert*} %{R*} %{rpath*} %{defsym*}                       \
     %{shared:-Bshareable %{h*} %{soname*}}                      \
@@ -46,26 +46,8 @@
     -X" SUBTARGET_EXTRA_LINK_SPEC "                             \
     %{mbig-endian:-EB} %{mlittle-endian:-EL}"
 
-#if TARGET_FIX_ERR_A53_835769_DEFAULT
-#define CA53_ERR_835769_SPEC \
-  " %{!mno-fix-cortex-a53-835769:--fix-cortex-a53-835769}"
-#else
-#define CA53_ERR_835769_SPEC \
-  " %{mfix-cortex-a53-835769:--fix-cortex-a53-835769}"
-#endif
-
-#ifdef TARGET_FIX_ERR_A53_843419_DEFAULT
-#define CA53_ERR_843419_SPEC \
-  " %{!mno-fix-cortex-a53-843419:--fix-cortex-a53-843419}"
-#else
-#define CA53_ERR_843419_SPEC \
-  " %{mfix-cortex-a53-843419:--fix-cortex-a53-843419}"
-#endif
-
 #undef  LINK_SPEC
-#define LINK_SPEC FBSD_TARGET_LINK_SPEC	\
-                  CA53_ERR_835769_SPEC	\
-                  CA53_ERR_843419_SPEC
+#define LINK_SPEC FBSD_TARGET_LINK_SPEC AARCH64_ERRATA_LINK_SPEC
 
 #define GNU_USER_TARGET_MATHFILE_SPEC \
   "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}"
diff --git a/gcc/config/aarch64/aarch64-fusion-pairs.def b/gcc/config/aarch64/aarch64-fusion-pairs.def
index 813d6c582c0d9..d6be7304dc057 100644
--- a/gcc/config/aarch64/aarch64-fusion-pairs.def
+++ b/gcc/config/aarch64/aarch64-fusion-pairs.def
@@ -1,4 +1,4 @@
-/* Copyright (C) 2015-2018 Free Software Foundation, Inc.
+/* Copyright (C) 2015-2021 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -35,5 +35,6 @@ AARCH64_FUSION_PAIR ("adrp+ldr", ADRP_LDR)
 AARCH64_FUSION_PAIR ("cmp+branch", CMP_BRANCH)
 AARCH64_FUSION_PAIR ("aes+aesmc", AES_AESMC)
 AARCH64_FUSION_PAIR ("alu+branch", ALU_BRANCH)
+AARCH64_FUSION_PAIR ("alu+cbz", ALU_CBZ)
 
 #undef AARCH64_FUSION_PAIR
diff --git a/gcc/config/aarch64/aarch64-ldpstp.md b/gcc/config/aarch64/aarch64-ldpstp.md
index c008477c741d2..b5b8b6d942b10 100644
--- a/gcc/config/aarch64/aarch64-ldpstp.md
+++ b/gcc/config/aarch64/aarch64-ldpstp.md
@@ -1,5 +1,5 @@
 ;; AArch64 ldp/stp peephole optimizations.
-;; Copyright (C) 2014-2018 Free Software Foundation, Inc.
+;; Copyright (C) 2014-2021 Free Software Foundation, Inc.
 ;; Contributed by ARM Ltd.
 ;;
 ;; This file is part of GCC.
@@ -20,26 +20,18 @@
 
 (define_peephole2
   [(set (match_operand:GPI 0 "register_operand" "")
-	(match_operand:GPI 1 "aarch64_mem_pair_operand" ""))
+	(match_operand:GPI 1 "memory_operand" ""))
    (set (match_operand:GPI 2 "register_operand" "")
 	(match_operand:GPI 3 "memory_operand" ""))]
   "aarch64_operands_ok_for_ldpstp (operands, true, <MODE>mode)"
   [(parallel [(set (match_dup 0) (match_dup 1))
 	      (set (match_dup 2) (match_dup 3))])]
 {
-  rtx base, offset_1, offset_2;
-
-  extract_base_offset_in_addr (operands[1], &base, &offset_1);
-  extract_base_offset_in_addr (operands[3], &base, &offset_2);
-  if (INTVAL (offset_1) > INTVAL (offset_2))
-    {
-      std::swap (operands[0], operands[2]);
-      std::swap (operands[1], operands[3]);
-    }
+  aarch64_swap_ldrstr_operands (operands, true);
 })
 
 (define_peephole2
-  [(set (match_operand:GPI 0 "aarch64_mem_pair_operand" "")
+  [(set (match_operand:GPI 0 "memory_operand" "")
 	(match_operand:GPI 1 "aarch64_reg_or_zero" ""))
    (set (match_operand:GPI 2 "memory_operand" "")
 	(match_operand:GPI 3 "aarch64_reg_or_zero" ""))]
@@ -47,39 +39,23 @@
   [(parallel [(set (match_dup 0) (match_dup 1))
 	      (set (match_dup 2) (match_dup 3))])]
 {
-  rtx base, offset_1, offset_2;
-
-  extract_base_offset_in_addr (operands[0], &base, &offset_1);
-  extract_base_offset_in_addr (operands[2], &base, &offset_2);
-  if (INTVAL (offset_1) > INTVAL (offset_2))
-    {
-      std::swap (operands[0], operands[2]);
-      std::swap (operands[1], operands[3]);
-    }
+  aarch64_swap_ldrstr_operands (operands, false);
 })
 
 (define_peephole2
   [(set (match_operand:GPF 0 "register_operand" "")
-	(match_operand:GPF 1 "aarch64_mem_pair_operand" ""))
+	(match_operand:GPF 1 "memory_operand" ""))
    (set (match_operand:GPF 2 "register_operand" "")
 	(match_operand:GPF 3 "memory_operand" ""))]
   "aarch64_operands_ok_for_ldpstp (operands, true, <MODE>mode)"
   [(parallel [(set (match_dup 0) (match_dup 1))
 	      (set (match_dup 2) (match_dup 3))])]
 {
-  rtx base, offset_1, offset_2;
-
-  extract_base_offset_in_addr (operands[1], &base, &offset_1);
-  extract_base_offset_in_addr (operands[3], &base, &offset_2);
-  if (INTVAL (offset_1) > INTVAL (offset_2))
-    {
-      std::swap (operands[0], operands[2]);
-      std::swap (operands[1], operands[3]);
-    }
+  aarch64_swap_ldrstr_operands (operands, true);
 })
 
 (define_peephole2
-  [(set (match_operand:GPF 0 "aarch64_mem_pair_operand" "")
+  [(set (match_operand:GPF 0 "memory_operand" "")
 	(match_operand:GPF 1 "aarch64_reg_or_fp_zero" ""))
    (set (match_operand:GPF 2 "memory_operand" "")
 	(match_operand:GPF 3 "aarch64_reg_or_fp_zero" ""))]
@@ -87,55 +63,62 @@
   [(parallel [(set (match_dup 0) (match_dup 1))
 	      (set (match_dup 2) (match_dup 3))])]
 {
-  rtx base, offset_1, offset_2;
-
-  extract_base_offset_in_addr (operands[0], &base, &offset_1);
-  extract_base_offset_in_addr (operands[2], &base, &offset_2);
-  if (INTVAL (offset_1) > INTVAL (offset_2))
-    {
-      std::swap (operands[0], operands[2]);
-      std::swap (operands[1], operands[3]);
-    }
+  aarch64_swap_ldrstr_operands (operands, false);
 })
 
 (define_peephole2
-  [(set (match_operand:VD 0 "register_operand" "")
-	(match_operand:VD 1 "aarch64_mem_pair_operand" ""))
-   (set (match_operand:VD 2 "register_operand" "")
-	(match_operand:VD 3 "memory_operand" ""))]
-  "aarch64_operands_ok_for_ldpstp (operands, true, <MODE>mode)"
+  [(set (match_operand:DREG 0 "register_operand" "")
+	(match_operand:DREG 1 "memory_operand" ""))
+   (set (match_operand:DREG2 2 "register_operand" "")
+	(match_operand:DREG2 3 "memory_operand" ""))]
+  "aarch64_operands_ok_for_ldpstp (operands, true, <DREG:MODE>mode)"
   [(parallel [(set (match_dup 0) (match_dup 1))
 	      (set (match_dup 2) (match_dup 3))])]
 {
-  rtx base, offset_1, offset_2;
+  aarch64_swap_ldrstr_operands (operands, true);
+})
 
-  extract_base_offset_in_addr (operands[1], &base, &offset_1);
-  extract_base_offset_in_addr (operands[3], &base, &offset_2);
-  if (INTVAL (offset_1) > INTVAL (offset_2))
-    {
-      std::swap (operands[0], operands[2]);
-      std::swap (operands[1], operands[3]);
-    }
+(define_peephole2
+  [(set (match_operand:DREG 0 "memory_operand" "")
+	(match_operand:DREG 1 "register_operand" ""))
+   (set (match_operand:DREG2 2 "memory_operand" "")
+	(match_operand:DREG2 3 "register_operand" ""))]
+  "TARGET_SIMD
+   && aarch64_operands_ok_for_ldpstp (operands, false, <DREG:MODE>mode)"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+	      (set (match_dup 2) (match_dup 3))])]
+{
+  aarch64_swap_ldrstr_operands (operands, false);
 })
 
 (define_peephole2
-  [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "")
-	(match_operand:VD 1 "register_operand" ""))
-   (set (match_operand:VD 2 "memory_operand" "")
-	(match_operand:VD 3 "register_operand" ""))]
-  "TARGET_SIMD && aarch64_operands_ok_for_ldpstp (operands, false, <MODE>mode)"
+  [(set (match_operand:VQ 0 "register_operand" "")
+	(match_operand:VQ 1 "memory_operand" ""))
+   (set (match_operand:VQ2 2 "register_operand" "")
+	(match_operand:VQ2 3 "memory_operand" ""))]
+  "TARGET_SIMD
+   && aarch64_operands_ok_for_ldpstp (operands, true, <VQ:MODE>mode)
+   && (aarch64_tune_params.extra_tuning_flags
+	& AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS) == 0"
   [(parallel [(set (match_dup 0) (match_dup 1))
 	      (set (match_dup 2) (match_dup 3))])]
 {
-  rtx base, offset_1, offset_2;
+  aarch64_swap_ldrstr_operands (operands, true);
+})
 
-  extract_base_offset_in_addr (operands[0], &base, &offset_1);
-  extract_base_offset_in_addr (operands[2], &base, &offset_2);
-  if (INTVAL (offset_1) > INTVAL (offset_2))
-    {
-      std::swap (operands[0], operands[2]);
-      std::swap (operands[1], operands[3]);
-    }
+(define_peephole2
+  [(set (match_operand:VQ 0 "memory_operand" "")
+	(match_operand:VQ 1 "register_operand" ""))
+   (set (match_operand:VQ2 2 "memory_operand" "")
+	(match_operand:VQ2 3 "register_operand" ""))]
+  "TARGET_SIMD
+   && aarch64_operands_ok_for_ldpstp (operands, false, <VQ:MODE>mode)
+   && (aarch64_tune_params.extra_tuning_flags
+	& AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS) == 0"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+	      (set (match_dup 2) (match_dup 3))])]
+{
+  aarch64_swap_ldrstr_operands (operands, false);
 })
 
 
@@ -143,42 +126,48 @@
 
 (define_peephole2
   [(set (match_operand:DI 0 "register_operand" "")
-	(sign_extend:DI (match_operand:SI 1 "aarch64_mem_pair_operand" "")))
+	(sign_extend:DI (match_operand:SI 1 "memory_operand" "")))
    (set (match_operand:DI 2 "register_operand" "")
 	(sign_extend:DI (match_operand:SI 3 "memory_operand" "")))]
   "aarch64_operands_ok_for_ldpstp (operands, true, SImode)"
   [(parallel [(set (match_dup 0) (sign_extend:DI (match_dup 1)))
 	      (set (match_dup 2) (sign_extend:DI (match_dup 3)))])]
 {
-  rtx base, offset_1, offset_2;
-
-  extract_base_offset_in_addr (operands[1], &base, &offset_1);
-  extract_base_offset_in_addr (operands[3], &base, &offset_2);
-  if (INTVAL (offset_1) > INTVAL (offset_2))
-    {
-      std::swap (operands[0], operands[2]);
-      std::swap (operands[1], operands[3]);
-    }
+  aarch64_swap_ldrstr_operands (operands, true);
 })
 
 (define_peephole2
   [(set (match_operand:DI 0 "register_operand" "")
-	(zero_extend:DI (match_operand:SI 1 "aarch64_mem_pair_operand" "")))
+	(zero_extend:DI (match_operand:SI 1 "memory_operand" "")))
    (set (match_operand:DI 2 "register_operand" "")
 	(zero_extend:DI (match_operand:SI 3 "memory_operand" "")))]
   "aarch64_operands_ok_for_ldpstp (operands, true, SImode)"
   [(parallel [(set (match_dup 0) (zero_extend:DI (match_dup 1)))
 	      (set (match_dup 2) (zero_extend:DI (match_dup 3)))])]
 {
-  rtx base, offset_1, offset_2;
+  aarch64_swap_ldrstr_operands (operands, true);
+})
 
-  extract_base_offset_in_addr (operands[1], &base, &offset_1);
-  extract_base_offset_in_addr (operands[3], &base, &offset_2);
-  if (INTVAL (offset_1) > INTVAL (offset_2))
-    {
-      std::swap (operands[0], operands[2]);
-      std::swap (operands[1], operands[3]);
-    }
+;; Handle storing of a floating point zero with integer data.
+;; This handles cases like:
+;;   struct pair { int a; float b; }
+;;
+;;   p->a = 1;
+;;   p->b = 0.0;
+;;
+;; We can match modes that won't work for a stp instruction
+;; as aarch64_operands_ok_for_ldpstp checks that the modes are
+;; compatible.
+(define_peephole2
+  [(set (match_operand:DSX 0 "memory_operand" "")
+	(match_operand:DSX 1 "aarch64_reg_zero_or_fp_zero" ""))
+   (set (match_operand:<FCVT_TARGET> 2 "memory_operand" "")
+	(match_operand:<FCVT_TARGET> 3 "aarch64_reg_zero_or_fp_zero" ""))]
+  "aarch64_operands_ok_for_ldpstp (operands, false, <V_INT_EQUIV>mode)"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+	      (set (match_dup 2) (match_dup 3))])]
+{
+  aarch64_swap_ldrstr_operands (operands, false);
 })
 
 ;; Handle consecutive load/store whose offset is out of the range
@@ -200,18 +189,6 @@
   "aarch64_operands_adjust_ok_for_ldpstp (operands, true, <MODE>mode)"
   [(const_int 0)]
 {
-  rtx base, offset_1, offset_2;
-
-  extract_base_offset_in_addr (operands[1], &base, &offset_1);
-  extract_base_offset_in_addr (operands[3], &base, &offset_2);
-  if (INTVAL (offset_1) > INTVAL (offset_2))
-    {
-      std::swap (operands[0], operands[6]);
-      std::swap (operands[1], operands[7]);
-      std::swap (operands[2], operands[4]);
-      std::swap (operands[3], operands[5]);
-    }
-
   if (aarch64_gen_adjusted_ldpstp (operands, true, <MODE>mode, UNKNOWN))
     DONE;
   else
@@ -232,18 +209,6 @@
   "aarch64_operands_adjust_ok_for_ldpstp (operands, true, <MODE>mode)"
   [(const_int 0)]
 {
-  rtx base, offset_1, offset_2;
-
-  extract_base_offset_in_addr (operands[1], &base, &offset_1);
-  extract_base_offset_in_addr (operands[3], &base, &offset_2);
-  if (INTVAL (offset_1) > INTVAL (offset_2))
-    {
-      std::swap (operands[0], operands[6]);
-      std::swap (operands[1], operands[7]);
-      std::swap (operands[2], operands[4]);
-      std::swap (operands[3], operands[5]);
-    }
-
   if (aarch64_gen_adjusted_ldpstp (operands, true, <MODE>mode, UNKNOWN))
     DONE;
   else
@@ -264,18 +229,6 @@
   "aarch64_operands_adjust_ok_for_ldpstp (operands, true, SImode)"
   [(const_int 0)]
 {
-  rtx base, offset_1, offset_2;
-
-  extract_base_offset_in_addr (operands[1], &base, &offset_1);
-  extract_base_offset_in_addr (operands[3], &base, &offset_2);
-  if (INTVAL (offset_1) > INTVAL (offset_2))
-    {
-      std::swap (operands[0], operands[6]);
-      std::swap (operands[1], operands[7]);
-      std::swap (operands[2], operands[4]);
-      std::swap (operands[3], operands[5]);
-    }
-
   if (aarch64_gen_adjusted_ldpstp (operands, true, SImode, SIGN_EXTEND))
     DONE;
   else
@@ -296,18 +249,6 @@
   "aarch64_operands_adjust_ok_for_ldpstp (operands, true, SImode)"
   [(const_int 0)]
 {
-  rtx base, offset_1, offset_2;
-
-  extract_base_offset_in_addr (operands[1], &base, &offset_1);
-  extract_base_offset_in_addr (operands[3], &base, &offset_2);
-  if (INTVAL (offset_1) > INTVAL (offset_2))
-    {
-      std::swap (operands[0], operands[6]);
-      std::swap (operands[1], operands[7]);
-      std::swap (operands[2], operands[4]);
-      std::swap (operands[3], operands[5]);
-    }
-
   if (aarch64_gen_adjusted_ldpstp (operands, true, SImode, ZERO_EXTEND))
     DONE;
   else
@@ -328,18 +269,6 @@
   "aarch64_operands_adjust_ok_for_ldpstp (operands, false, <MODE>mode)"
   [(const_int 0)]
 {
-  rtx base, offset_1, offset_2;
-
-  extract_base_offset_in_addr (operands[0], &base, &offset_1);
-  extract_base_offset_in_addr (operands[2], &base, &offset_2);
-  if (INTVAL (offset_1) > INTVAL (offset_2))
-    {
-      std::swap (operands[0], operands[6]);
-      std::swap (operands[1], operands[7]);
-      std::swap (operands[2], operands[4]);
-      std::swap (operands[3], operands[5]);
-    }
-
   if (aarch64_gen_adjusted_ldpstp (operands, false, <MODE>mode, UNKNOWN))
     DONE;
   else
@@ -360,20 +289,50 @@
   "aarch64_operands_adjust_ok_for_ldpstp (operands, false, <MODE>mode)"
   [(const_int 0)]
 {
-  rtx base, offset_1, offset_2;
-
-  extract_base_offset_in_addr (operands[0], &base, &offset_1);
-  extract_base_offset_in_addr (operands[2], &base, &offset_2);
-  if (INTVAL (offset_1) > INTVAL (offset_2))
-    {
-      std::swap (operands[0], operands[6]);
-      std::swap (operands[1], operands[7]);
-      std::swap (operands[2], operands[4]);
-      std::swap (operands[3], operands[5]);
-    }
+  if (aarch64_gen_adjusted_ldpstp (operands, false, <MODE>mode, UNKNOWN))
+    DONE;
+  else
+    FAIL;
+})
 
+(define_peephole2
+  [(match_scratch:DI 8 "r")
+   (set (match_operand:VP_2E 0 "memory_operand" "")
+        (match_operand:VP_2E 1 "aarch64_reg_or_zero" ""))
+   (set (match_operand:VP_2E 2 "memory_operand" "")
+        (match_operand:VP_2E 3 "aarch64_reg_or_zero" ""))
+   (set (match_operand:VP_2E 4 "memory_operand" "")
+        (match_operand:VP_2E 5 "aarch64_reg_or_zero" ""))
+   (set (match_operand:VP_2E 6 "memory_operand" "")
+        (match_operand:VP_2E 7 "aarch64_reg_or_zero" ""))
+   (match_dup 8)]
+  "TARGET_SIMD
+   && aarch64_operands_adjust_ok_for_ldpstp (operands, false, <MODE>mode)"
+  [(const_int 0)]
+{
   if (aarch64_gen_adjusted_ldpstp (operands, false, <MODE>mode, UNKNOWN))
     DONE;
   else
     FAIL;
 })
+
+(define_peephole2
+  [(match_scratch:DI 8 "r")
+   (set (match_operand:VP_2E 0 "register_operand" "")
+        (match_operand:VP_2E 1 "memory_operand" ""))
+   (set (match_operand:VP_2E 2 "register_operand" "")
+        (match_operand:VP_2E 3 "memory_operand" ""))
+   (set (match_operand:VP_2E 4 "register_operand" "")
+        (match_operand:VP_2E 5 "memory_operand" ""))
+   (set (match_operand:VP_2E 6 "register_operand" "")
+        (match_operand:VP_2E 7 "memory_operand" ""))
+   (match_dup 8)]
+  "TARGET_SIMD
+   && aarch64_operands_adjust_ok_for_ldpstp (operands, true, <MODE>mode)"
+  [(const_int 0)]
+{
+  if (aarch64_gen_adjusted_ldpstp (operands, true, <MODE>mode, UNKNOWN))
+    DONE;
+  else
+    FAIL;
+})
diff --git a/gcc/config/aarch64/aarch64-linux.h b/gcc/config/aarch64/aarch64-linux.h
index 54613e4c6159a..7f2529a2a1d42 100644
--- a/gcc/config/aarch64/aarch64-linux.h
+++ b/gcc/config/aarch64/aarch64-linux.h
@@ -1,5 +1,5 @@
 /* Machine description for AArch64 architecture.
-   Copyright (C) 2009-2018 Free Software Foundation, Inc.
+   Copyright (C) 2009-2021 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -21,14 +21,7 @@
 #ifndef GCC_AARCH64_LINUX_H
 #define GCC_AARCH64_LINUX_H
 
-#ifndef RUNTIME_ROOT_PREFIX
-#define RUNTIME_ROOT_PREFIX ""
-#endif
-#define GLIBC_DYNAMIC_LINKER RUNTIME_ROOT_PREFIX "/lib/ld-linux-aarch64%{mbig-endian:_be}%{mabi=ilp32:_ilp32}.so.1"
-#ifdef BIONIC_DYNAMIC_LINKER
-#undef BIONIC_DYNAMIC_LINKER
-#endif
-#define BIONIC_DYNAMIC_LINKER RUNTIME_ROOT_PREFIX "/system/bin/linker64"
+#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux-aarch64%{mbig-endian:_be}%{mabi=ilp32:_ilp32}.so.1"
 
 #undef MUSL_DYNAMIC_LINKER
 #define MUSL_DYNAMIC_LINKER "/lib/ld-musl-aarch64%{mbig-endian:_be}%{mabi=ilp32:_ilp32}.so.1"
@@ -53,25 +46,8 @@
    %{mbig-endian:-EB} %{mlittle-endian:-EL}     \
    -maarch64linux%{mabi=ilp32:32}%{mbig-endian:b}"
 
-#if TARGET_FIX_ERR_A53_835769_DEFAULT
-#define CA53_ERR_835769_SPEC \
-  " %{!mno-fix-cortex-a53-835769:--fix-cortex-a53-835769}"
-#else
-#define CA53_ERR_835769_SPEC \
-  " %{mfix-cortex-a53-835769:--fix-cortex-a53-835769}"
-#endif
-
-#if TARGET_FIX_ERR_A53_843419_DEFAULT
-#define CA53_ERR_843419_SPEC \
-  " %{!mno-fix-cortex-a53-843419:--fix-cortex-a53-843419}"
-#else
-#define CA53_ERR_843419_SPEC \
-  " %{mfix-cortex-a53-843419:--fix-cortex-a53-843419}"
-#endif
-
-#define LINK_SPEC LINUX_TARGET_LINK_SPEC \
-                  CA53_ERR_835769_SPEC \
-                  CA53_ERR_843419_SPEC
+
+#define LINK_SPEC LINUX_TARGET_LINK_SPEC AARCH64_ERRATA_LINK_SPEC
 
 #define GNU_USER_TARGET_MATHFILE_SPEC \
   "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}"
@@ -88,7 +64,7 @@
     }						\
   while (0)
 
-#define TARGET_ASM_FILE_END file_end_indicate_exec_stack
+#define TARGET_ASM_FILE_END aarch64_file_end_indicate_exec_stack
 
 /* Uninitialized common symbols in non-PIE executables, even with
    strong definitions in dependent shared libraries, will resolve
diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def
index 1a05b6cc70828..1a07bc1b70e77 100644
--- a/gcc/config/aarch64/aarch64-modes.def
+++ b/gcc/config/aarch64/aarch64-modes.def
@@ -1,5 +1,5 @@
 /* Machine description for AArch64 architecture.
-   Copyright (C) 2009-2018 Free Software Foundation, Inc.
+   Copyright (C) 2009-2021 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -18,12 +18,28 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
+/* Important note about Carry generation in AArch64.
+
+   Unlike some architectures, the C flag generated by a subtract
+   operation, or a simple compare operation is set to 1 if the result
+   does not overflow in an unsigned sense.  That is, if there is no
+   borrow needed from a higher word.  That means that overflow from
+   addition will set C, but overflow from a subtraction will clear C.
+   We use CC_Cmode to represent detection of overflow from addition as
+   CCmode is used for 'normal' compare (subtraction) operations.  For
+   ADC, the representation becomes more complex still, since we cannot
+   use the normal idiom of comparing the result to one of the input
+   operands; instead we use CC_ADCmode to represent this case.  */
 CC_MODE (CCFP);
 CC_MODE (CCFPE);
 CC_MODE (CC_SWP);
+CC_MODE (CC_NZC);   /* Only N, Z and C bits of condition flags are valid.
+		       (Used with SVE predicate tests.)  */
 CC_MODE (CC_NZ);    /* Only N and Z bits of condition flags are valid.  */
 CC_MODE (CC_Z);     /* Only Z bit of condition flags is valid.  */
-CC_MODE (CC_C);     /* Only C bit of condition flags is valid.  */
+CC_MODE (CC_C);     /* C represents unsigned overflow of a simple addition.  */
+CC_MODE (CC_ADC);   /* Unsigned overflow from an ADC (add with carry).  */
+CC_MODE (CC_V);     /* Only V bit of condition flags is valid.  */
 
 /* Half-precision floating point for __fp16.  */
 FLOAT_MODE (HF, 2, 0);
@@ -46,6 +62,10 @@ ADJUST_ALIGNMENT (VNx8BI, 2);
 ADJUST_ALIGNMENT (VNx4BI, 2);
 ADJUST_ALIGNMENT (VNx2BI, 2);
 
+/* Bfloat16 modes.  */
+FLOAT_MODE (BF, 2, 0);
+ADJUST_FLOAT_FORMAT (BF, &arm_bfloat_half_format);
+
 VECTOR_MODES (INT, 8);        /*       V8QI V4HI V2SI.  */
 VECTOR_MODES (INT, 16);       /* V16QI V8HI V4SI V2DI.  */
 VECTOR_MODES (FLOAT, 8);      /*                 V2SF.  */
@@ -66,13 +86,14 @@ INT_MODE (XI, 64);
    strictly necessary to set the alignment here, since the default would
    be clamped to BIGGEST_ALIGNMENT anyhow, but it seems clearer.  */
 #define SVE_MODES(NVECS, VB, VH, VS, VD) \
-  VECTOR_MODES_WITH_PREFIX (VNx, INT, 16 * NVECS); \
-  VECTOR_MODES_WITH_PREFIX (VNx, FLOAT, 16 * NVECS); \
+  VECTOR_MODES_WITH_PREFIX (VNx, INT, 16 * NVECS, 0); \
+  VECTOR_MODES_WITH_PREFIX (VNx, FLOAT, 16 * NVECS, 0); \
   \
   ADJUST_NUNITS (VB##QI, aarch64_sve_vg * NVECS * 8); \
   ADJUST_NUNITS (VH##HI, aarch64_sve_vg * NVECS * 4); \
   ADJUST_NUNITS (VS##SI, aarch64_sve_vg * NVECS * 2); \
   ADJUST_NUNITS (VD##DI, aarch64_sve_vg * NVECS); \
+  ADJUST_NUNITS (VH##BF, aarch64_sve_vg * NVECS * 4); \
   ADJUST_NUNITS (VH##HF, aarch64_sve_vg * NVECS * 4); \
   ADJUST_NUNITS (VS##SF, aarch64_sve_vg * NVECS * 2); \
   ADJUST_NUNITS (VD##DF, aarch64_sve_vg * NVECS); \
@@ -81,6 +102,7 @@ INT_MODE (XI, 64);
   ADJUST_ALIGNMENT (VH##HI, 16); \
   ADJUST_ALIGNMENT (VS##SI, 16); \
   ADJUST_ALIGNMENT (VD##DI, 16); \
+  ADJUST_ALIGNMENT (VH##BF, 16); \
   ADJUST_ALIGNMENT (VH##HF, 16); \
   ADJUST_ALIGNMENT (VS##SF, 16); \
   ADJUST_ALIGNMENT (VD##DF, 16);
@@ -92,6 +114,52 @@ SVE_MODES (2, VNx32, VNx16, VNx8, VNx4)
 SVE_MODES (3, VNx48, VNx24, VNx12, VNx6)
 SVE_MODES (4, VNx64, VNx32, VNx16, VNx8)
 
+/* Partial SVE vectors:
+
+      VNx2QI VNx4QI VNx8QI
+      VNx2HI VNx4HI
+      VNx2SI
+
+   In memory they occupy contiguous locations, in the same way as fixed-length
+   vectors.  E.g. VNx8QImode is half the size of VNx16QImode.
+
+   Passing 1 as the final argument ensures that the modes come after all
+   other modes in the GET_MODE_WIDER chain, so that we never pick them
+   in preference to a full vector mode.  */
+VECTOR_MODES_WITH_PREFIX (VNx, INT, 2, 1);
+VECTOR_MODES_WITH_PREFIX (VNx, INT, 4, 1);
+VECTOR_MODES_WITH_PREFIX (VNx, INT, 8, 1);
+VECTOR_MODES_WITH_PREFIX (VNx, FLOAT, 4, 1);
+VECTOR_MODES_WITH_PREFIX (VNx, FLOAT, 8, 1);
+
+ADJUST_NUNITS (VNx2QI, aarch64_sve_vg);
+ADJUST_NUNITS (VNx2HI, aarch64_sve_vg);
+ADJUST_NUNITS (VNx2SI, aarch64_sve_vg);
+ADJUST_NUNITS (VNx2HF, aarch64_sve_vg);
+ADJUST_NUNITS (VNx2BF, aarch64_sve_vg);
+ADJUST_NUNITS (VNx2SF, aarch64_sve_vg);
+
+ADJUST_NUNITS (VNx4QI, aarch64_sve_vg * 2);
+ADJUST_NUNITS (VNx4HI, aarch64_sve_vg * 2);
+ADJUST_NUNITS (VNx4HF, aarch64_sve_vg * 2);
+ADJUST_NUNITS (VNx4BF, aarch64_sve_vg * 2);
+
+ADJUST_NUNITS (VNx8QI, aarch64_sve_vg * 4);
+
+ADJUST_ALIGNMENT (VNx2QI, 1);
+ADJUST_ALIGNMENT (VNx4QI, 1);
+ADJUST_ALIGNMENT (VNx8QI, 1);
+
+ADJUST_ALIGNMENT (VNx2HI, 2);
+ADJUST_ALIGNMENT (VNx4HI, 2);
+ADJUST_ALIGNMENT (VNx2HF, 2);
+ADJUST_ALIGNMENT (VNx2BF, 2);
+ADJUST_ALIGNMENT (VNx4HF, 2);
+ADJUST_ALIGNMENT (VNx4BF, 2);
+
+ADJUST_ALIGNMENT (VNx2SI, 4);
+ADJUST_ALIGNMENT (VNx2SF, 4);
+
 /* Quad float: 128-bit floating mode for long doubles.  */
 FLOAT_MODE (TF, 16, ieee_quad_format);
 
diff --git a/gcc/config/aarch64/aarch64-netbsd.h b/gcc/config/aarch64/aarch64-netbsd.h
new file mode 100644
index 0000000000000..76cfc00f70b68
--- /dev/null
+++ b/gcc/config/aarch64/aarch64-netbsd.h
@@ -0,0 +1,63 @@
+/* Definitions for AArch64 running NetBSD
+   Copyright (C) 2016-2021 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_AARCH64_NETBSD_H
+#define GCC_AARCH64_NETBSD_H
+
+#define TARGET_LINKER_BIG_EMULATION "aarch64nbsdb"
+#define TARGET_LINKER_LITTLE_EMULATION "aarch64nbsd"
+
+#if TARGET_BIG_ENDIAN_DEFAULT
+#define TARGET_LINKER_EMULATION  TARGET_LINKER_BIG_EMULATION
+#else
+#define TARGET_LINKER_EMULATION  TARGET_LINKER_LITTLE_EMULATION
+#endif
+
+#undef  SUBTARGET_EXTRA_LINK_SPEC
+#define SUBTARGET_EXTRA_LINK_SPEC " -m" TARGET_LINKER_EMULATION
+
+#define NETBSD_ENTRY_POINT "__start"
+
+#define NETBSD_TARGET_LINK_SPEC  "%{h*} "				\
+  "-X %{mbig-endian:-EB -m " TARGET_LINKER_BIG_EMULATION "} "		\
+  "%{mlittle-endian:-EL -m " TARGET_LINKER_LITTLE_EMULATION "} "	\
+  "%(netbsd_link_spec)"
+
+#undef  LINK_SPEC
+#define LINK_SPEC NETBSD_LINK_SPEC_ELF		\
+		  NETBSD_TARGET_LINK_SPEC	\
+		  AARCH64_ERRATA_LINK_SPEC
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      NETBSD_OS_CPP_BUILTINS_ELF();		\
+    }						\
+  while (0)
+
+#undef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC NETBSD_CPP_SPEC
+
+#undef EXTRA_SPECS
+#define EXTRA_SPECS \
+  { "asm_cpu_spec",             ASM_CPU_SPEC }, \
+  NETBSD_SUBTARGET_EXTRA_SPECS
+
+#endif  /* GCC_AARCH64_NETBSD_H */
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
index a575448e4068f..579328c48dfe8 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2018 Free Software Foundation, Inc.
+/* Copyright (C) 2012-2021 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -45,29 +45,46 @@
      entries: aes, pmull, sha1, sha2 being present).  In that case this field
      should contain a space (" ") separated list of the strings in 'Features'
      that are required.  Their order is not important.  An empty string means
-     do not detect this feature during auto detection.  */
+     do not detect this feature during auto detection.
 
-/* NOTE: This file is being parsed by config.gcc and so the
-   AARCH64_OPT_EXTENSION must adhere to a strict format:
-   1) No space between the AARCH64_OPT_EXTENSION and the opening (.
-   2) No space between the opening ( and the extension name.
-   3) No space after the extension name before the ,.
-   4) Spaces are only allowed after a , and around |.
-   5) Everything must be on one line.  */
+     NOTE: Any changes to the AARCH64_OPT_EXTENSION macro need to be mirrored in
+     config.gcc.  */
 
 /* Enabling "fp" just enables "fp".
    Disabling "fp" also disables "simd", "crypto", "fp16", "aes", "sha2",
-   "sha3", sm3/sm4 and "sve".  */
-AARCH64_OPT_EXTENSION("fp", AARCH64_FL_FP, 0, AARCH64_FL_SIMD | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2 | AARCH64_FL_SHA3 | AARCH64_FL_SM4 | AARCH64_FL_SVE, false, "fp")
+   "sha3", sm3/sm4, "sve", "sve2", "sve2-aes", "sve2-sha3", "sve2-sm4",
+   "sve2-bitperm", "i8mm", "f32mm", "f64mm", and "bf16".  */
+AARCH64_OPT_EXTENSION("fp", AARCH64_FL_FP, 0, AARCH64_FL_SIMD | \
+		      AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | \
+		      AARCH64_FL_SHA2 | AARCH64_FL_SHA3 | AARCH64_FL_SM4 | \
+		      AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_SVE2_AES | \
+		      AARCH64_FL_SVE2_SHA3 | AARCH64_FL_SVE2_SM4 | \
+		      AARCH64_FL_SVE2_BITPERM | AARCH64_FL_I8MM | \
+		      AARCH64_FL_F32MM | AARCH64_FL_F64MM | AARCH64_FL_BF16,
+		       false, "fp")
 
 /* Enabling "simd" also enables "fp".
    Disabling "simd" also disables "crypto", "dotprod", "aes", "sha2", "sha3",
-   "sm3/sm4" and "sve".  */
-AARCH64_OPT_EXTENSION("simd", AARCH64_FL_SIMD, AARCH64_FL_FP, AARCH64_FL_CRYPTO | AARCH64_FL_DOTPROD | AARCH64_FL_AES | AARCH64_FL_SHA2 | AARCH64_FL_SHA3 | AARCH64_FL_SM4 | AARCH64_FL_SVE, false, "asimd")
+   "sm3/sm4", "sve", "sve2", "sve2-aes", "sve2-sha3", "sve2-sm4",
+   "sve2-bitperm", "i8mm", "f32mm" and "f64mm".  */
+AARCH64_OPT_EXTENSION("simd", AARCH64_FL_SIMD, AARCH64_FL_FP, \
+		      AARCH64_FL_CRYPTO | AARCH64_FL_DOTPROD | \
+		      AARCH64_FL_AES | AARCH64_FL_SHA2 | AARCH64_FL_SHA3 | \
+		      AARCH64_FL_SM4 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | \
+		      AARCH64_FL_SVE2_AES | AARCH64_FL_SVE2_SHA3 | \
+		      AARCH64_FL_SVE2_SM4 | AARCH64_FL_SVE2_BITPERM | \
+		      AARCH64_FL_I8MM | AARCH64_FL_F32MM | AARCH64_FL_F64MM, \
+		      false, "asimd")
 
 /* Enabling "crypto" also enables "fp", "simd", "aes" and "sha2".
-   Disabling "crypto" disables "crypto", "aes", "sha2", "sha3" and "sm3/sm4".  */
-AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO, AARCH64_FL_FP | AARCH64_FL_SIMD | AARCH64_FL_AES | AARCH64_FL_SHA2, AARCH64_FL_AES | AARCH64_FL_SHA2 |AARCH64_FL_SHA3 | AARCH64_FL_SM4, true, "aes pmull sha1 sha2")
+   Disabling "crypto" disables "crypto", "aes", "sha2", "sha3" and "sm3/sm4",
+   "sve2-aes", "sve2-sha3", "sve2-sm4".  */
+AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO, AARCH64_FL_FP | \
+		      AARCH64_FL_SIMD | AARCH64_FL_AES | AARCH64_FL_SHA2, \
+		      AARCH64_FL_AES | AARCH64_FL_SHA2 | AARCH64_FL_SHA3 | \
+		      AARCH64_FL_SM4 | AARCH64_FL_SVE2_AES | \
+		      AARCH64_FL_SVE2_SHA3 | AARCH64_FL_SVE2_SM4, true, \
+		      "aes pmull sha1 sha2")
 
 /* Enabling or disabling "crc" only changes "crc".  */
 AARCH64_OPT_EXTENSION("crc", AARCH64_FL_CRC, 0, 0, false, "crc32")
@@ -76,42 +93,143 @@ AARCH64_OPT_EXTENSION("crc", AARCH64_FL_CRC, 0, 0, false, "crc32")
 AARCH64_OPT_EXTENSION("lse", AARCH64_FL_LSE, 0, 0, false, "atomics")
 
 /* Enabling "fp16" also enables "fp".
-   Disabling "fp16" disables "fp16", "fp16fml" and "sve".  */
-AARCH64_OPT_EXTENSION("fp16", AARCH64_FL_F16, AARCH64_FL_FP, AARCH64_FL_F16FML | AARCH64_FL_SVE, false, "fphp asimdhp")
+   Disabling "fp16" disables "fp16", "fp16fml", "sve", "sve2",
+   "sve2-aes", "sve2-sha3", "sve2-sm4", "sve2-bitperm", "f32mm" and
+    "f64mm".  */
+AARCH64_OPT_EXTENSION("fp16", AARCH64_FL_F16, AARCH64_FL_FP, \
+		      AARCH64_FL_F16FML | AARCH64_FL_SVE | AARCH64_FL_F32MM | \
+		      AARCH64_FL_F64MM | AARCH64_FL_SVE2 | \
+		      AARCH64_FL_SVE2_AES | AARCH64_FL_SVE2_SHA3 | \
+		      AARCH64_FL_SVE2_SM4 | AARCH64_FL_SVE2_BITPERM, false, \
+		      "fphp asimdhp")
 
 /* Enabling or disabling "rcpc" only changes "rcpc".  */
 AARCH64_OPT_EXTENSION("rcpc", AARCH64_FL_RCPC, 0, 0, false, "lrcpc")
 
 /* Enabling "rdma" also enables "fp", "simd".
    Disabling "rdma" just disables "rdma".  */
-AARCH64_OPT_EXTENSION("rdma", AARCH64_FL_RDMA, AARCH64_FL_FP | AARCH64_FL_SIMD, 0, false, "asimdrdm")
+AARCH64_OPT_EXTENSION("rdma", AARCH64_FL_RDMA, \
+		      AARCH64_FL_FP | AARCH64_FL_SIMD, 0, false, "asimdrdm")
 
 /* Enabling "dotprod" also enables "simd".
    Disabling "dotprod" only disables "dotprod".  */
-AARCH64_OPT_EXTENSION("dotprod", AARCH64_FL_DOTPROD, AARCH64_FL_SIMD, 0, false, "asimddp")
+AARCH64_OPT_EXTENSION("dotprod", AARCH64_FL_DOTPROD, AARCH64_FL_SIMD, 0, \
+		      false, "asimddp")
 
 /* Enabling "aes" also enables "simd".
-   Disabling "aes" just disables "aes".  */
-AARCH64_OPT_EXTENSION("aes", AARCH64_FL_AES, AARCH64_FL_SIMD, 0, false, "aes")
+   Disabling "aes" disables "aes" and "sve2-aes'.  */
+AARCH64_OPT_EXTENSION("aes", AARCH64_FL_AES, AARCH64_FL_SIMD, \
+		      AARCH64_FL_SVE2_AES, false, "aes")
 
 /* Enabling "sha2" also enables "simd".
    Disabling "sha2" just disables "sha2".  */
-AARCH64_OPT_EXTENSION("sha2", AARCH64_FL_SHA2, AARCH64_FL_SIMD, 0, false, "sha1 sha2")
+AARCH64_OPT_EXTENSION("sha2", AARCH64_FL_SHA2, AARCH64_FL_SIMD, 0, false, \
+		      "sha1 sha2")
 
 /* Enabling "sha3" enables "simd" and "sha2".
-   Disabling "sha3" just disables "sha3".  */
-AARCH64_OPT_EXTENSION("sha3", AARCH64_FL_SHA3, AARCH64_FL_SIMD | AARCH64_FL_SHA2, 0, false, "sha3 sha512")
+   Disabling "sha3" disables "sha3" and "sve2-sha3".  */
+AARCH64_OPT_EXTENSION("sha3", AARCH64_FL_SHA3, AARCH64_FL_SIMD | \
+		      AARCH64_FL_SHA2, AARCH64_FL_SVE2_SHA3, false, \
+		      "sha3 sha512")
 
 /* Enabling "sm4" also enables "simd".
-   Disabling "sm4" just disables "sm4".  */
-AARCH64_OPT_EXTENSION("sm4", AARCH64_FL_SM4, AARCH64_FL_SIMD, 0, false, "sm3 sm4")
+   Disabling "sm4" disables "sm4" and "sve2-sm4".  */
+AARCH64_OPT_EXTENSION("sm4", AARCH64_FL_SM4, AARCH64_FL_SIMD, \
+		      AARCH64_FL_SVE2_SM4, false, "sm3 sm4")
 
 /* Enabling "fp16fml" also enables "fp" and "fp16".
    Disabling "fp16fml" just disables "fp16fml".  */
-AARCH64_OPT_EXTENSION("fp16fml", AARCH64_FL_F16FML, AARCH64_FL_FP | AARCH64_FL_F16, 0, false, "asimdfml")
+AARCH64_OPT_EXTENSION("fp16fml", AARCH64_FL_F16FML, \
+		      AARCH64_FL_FP | AARCH64_FL_F16, 0, false, "asimdfhm")
 
 /* Enabling "sve" also enables "fp16", "fp" and "simd".
-   Disabling "sve" just disables "sve".  */
-AARCH64_OPT_EXTENSION("sve", AARCH64_FL_SVE, AARCH64_FL_FP | AARCH64_FL_SIMD | AARCH64_FL_F16, 0, false, "sve")
+   Disabling "sve" disables "sve", "f32mm", "f64mm", "sve2", "sve2-aes",
+   "sve2-sha3", "sve2-sm4" and "sve2-bitperm".  */
+AARCH64_OPT_EXTENSION("sve", AARCH64_FL_SVE, AARCH64_FL_FP | AARCH64_FL_SIMD | \
+		      AARCH64_FL_F16, AARCH64_FL_F32MM | AARCH64_FL_F64MM | \
+		      AARCH64_FL_SVE2 | AARCH64_FL_SVE2_AES | \
+		      AARCH64_FL_SVE2_SHA3 | AARCH64_FL_SVE2_SM4 | \
+		      AARCH64_FL_SVE2_BITPERM, false, "sve")
+
+/* Enabling/Disabling "profile" does not enable/disable any other feature.  */
+AARCH64_OPT_EXTENSION("profile", AARCH64_FL_PROFILE, 0, 0, false, "")
+
+/* Enabling/Disabling "rng" only changes "rng".  */
+AARCH64_OPT_EXTENSION("rng", AARCH64_FL_RNG, 0, 0, false, "rng")
+
+/* Enabling/Disabling "memtag" only changes "memtag".  */
+AARCH64_OPT_EXTENSION("memtag", AARCH64_FL_MEMTAG, 0, 0, false, "")
+
+/* Enabling/Disabling "sb" only changes "sb".  */
+AARCH64_OPT_EXTENSION("sb", AARCH64_FL_SB, 0, 0, false, "sb")
+
+/* Enabling/Disabling "ssbs" only changes "ssbs".  */
+AARCH64_OPT_EXTENSION("ssbs", AARCH64_FL_SSBS, 0, 0, false, "ssbs")
+
+/* Enabling/Disabling "predres" only changes "predres".  */
+AARCH64_OPT_EXTENSION("predres", AARCH64_FL_PREDRES, 0, 0, false, "")
+
+/* Enabling "sve2" also enables "sve", "fp16", "fp", and "simd".
+   Disabling "sve2" disables "sve2", "sve2-aes", "sve2-sha3", "sve2-sm4", and
+   "sve2-bitperm".  */
+AARCH64_OPT_EXTENSION("sve2", AARCH64_FL_SVE2, AARCH64_FL_SVE | \
+		      AARCH64_FL_FP | AARCH64_FL_SIMD | AARCH64_FL_F16, \
+		      AARCH64_FL_SVE2_AES | AARCH64_FL_SVE2_SHA3 | \
+		      AARCH64_FL_SVE2_SM4 | AARCH64_FL_SVE2_BITPERM, false, "sve2")
+
+/* Enabling "sve2-sm4" also enables "sm4", "simd", "fp16", "fp", "sve", and
+   "sve2". Disabling "sve2-sm4" just disables "sve2-sm4".  */
+AARCH64_OPT_EXTENSION("sve2-sm4", AARCH64_FL_SVE2_SM4, AARCH64_FL_SM4 | \
+		      AARCH64_FL_SIMD | AARCH64_FL_F16 | AARCH64_FL_FP | \
+		      AARCH64_FL_SVE | AARCH64_FL_SVE2, 0, false, "svesm4")
+
+/* Enabling "sve2-aes" also enables "aes", "simd", "fp16", "fp", "sve", and
+   "sve2". Disabling "sve2-aes" just disables "sve2-aes".  */
+AARCH64_OPT_EXTENSION("sve2-aes", AARCH64_FL_SVE2_AES, AARCH64_FL_AES | \
+		      AARCH64_FL_SIMD | AARCH64_FL_F16 | AARCH64_FL_FP | \
+		      AARCH64_FL_SVE | AARCH64_FL_SVE2, 0, false, "sveaes")
+
+/* Enabling "sve2-sha3" also enables "sha3", "simd", "fp16", "fp", "sve", and
+   "sve2". Disabling "sve2-sha3" just disables "sve2-sha3".  */
+AARCH64_OPT_EXTENSION("sve2-sha3", AARCH64_FL_SVE2_SHA3, AARCH64_FL_SHA3 | \
+		      AARCH64_FL_SIMD | AARCH64_FL_F16 | AARCH64_FL_FP | \
+		      AARCH64_FL_SVE | AARCH64_FL_SVE2, 0, false, "svesha3")
+
+/* Enabling "sve2-bitperm" also enables "simd", "fp16", "fp", "sve", and
+   "sve2".  Disabling "sve2-bitperm" just disables "sve2-bitperm".  */
+AARCH64_OPT_EXTENSION("sve2-bitperm", AARCH64_FL_SVE2_BITPERM, AARCH64_FL_SIMD | \
+		      AARCH64_FL_F16 | AARCH64_FL_FP | AARCH64_FL_SVE | \
+		      AARCH64_FL_SVE2, 0, false, "svebitperm")
+
+/* Enabling or disabling "tme" only changes "tme".  */
+AARCH64_OPT_EXTENSION("tme", AARCH64_FL_TME, 0, 0, false, "")
+
+/* Enabling "i8mm" also enables "simd" and "fp".
+   Disabling "i8mm" only disables "i8mm".  */
+AARCH64_OPT_EXTENSION("i8mm", AARCH64_FL_I8MM, \
+		      AARCH64_FL_SIMD | AARCH64_FL_FP, 0, false, "i8mm")
+
+/* Enabling "f32mm" also enables "sve", "fp16", "fp", and "simd".
+   Disabling "f32mm" only disables "f32mm".  */
+AARCH64_OPT_EXTENSION("f32mm", AARCH64_FL_F32MM, \
+		      AARCH64_FL_SVE | AARCH64_FL_F16 | AARCH64_FL_FP | \
+		      AARCH64_FL_SIMD, 0, false, "f32mm")
+
+/* Enabling "f64mm" also enables "sve", "fp16", "fp", and "simd".
+   Disabling "f64mm" only disables "f64mm".  */
+AARCH64_OPT_EXTENSION("f64mm", AARCH64_FL_F64MM, \
+		      AARCH64_FL_SVE | AARCH64_FL_F16 | AARCH64_FL_FP | \
+		      AARCH64_FL_SIMD, 0, false, "f64mm")
+
+/* Enabling "bf16" also enables "simd" and "fp".
+   Disabling "bf16" only disables "bf16".  */
+AARCH64_OPT_EXTENSION("bf16", AARCH64_FL_BF16, \
+		      AARCH64_FL_SIMD | AARCH64_FL_FP, 0, false, "bf16")
+
+/* Enabling/Disabling "flagm" only changes "flagm".  */
+AARCH64_OPT_EXTENSION("flagm", AARCH64_FL_FLAGM, 0, 0, false, "flagm")
+
+/* Enabling/Disabling "pauth" only changes "pauth".  */
+AARCH64_OPT_EXTENSION("pauth", AARCH64_FL_PAUTH, 0, 0, false, "paca pacg")
 
 #undef AARCH64_OPT_EXTENSION
diff --git a/gcc/config/aarch64/aarch64-opts.h b/gcc/config/aarch64/aarch64-opts.h
index 7a5c6d7664f47..af3b7364a748f 100644
--- a/gcc/config/aarch64/aarch64-opts.h
+++ b/gcc/config/aarch64/aarch64-opts.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2018 Free Software Foundation, Inc.
+/* Copyright (C) 2011-2021 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -84,6 +84,7 @@ enum aarch64_function_type {
 /* SVE vector register sizes.  */
 enum aarch64_sve_vector_bits_enum {
   SVE_SCALABLE,
+  SVE_NOT_IMPLEMENTED = SVE_SCALABLE,
   SVE_128 = 128,
   SVE_256 = 256,
   SVE_512 = 512,
@@ -91,4 +92,10 @@ enum aarch64_sve_vector_bits_enum {
   SVE_2048 = 2048
 };
 
+/* Where to get the canary for the stack protector.  */
+enum stack_protector_guard {
+  SSP_SYSREG,			/* per-thread canary in special system register */
+  SSP_GLOBAL			/* global canary */
+};
+
 #endif
diff --git a/gcc/config/aarch64/aarch64-passes.def b/gcc/config/aarch64/aarch64-passes.def
index 87747b420b0f3..0b773d2c34d44 100644
--- a/gcc/config/aarch64/aarch64-passes.def
+++ b/gcc/config/aarch64/aarch64-passes.def
@@ -1,5 +1,5 @@
 /* AArch64-specific passes declarations.
-   Copyright (C) 2016-2018 Free Software Foundation, Inc.
+   Copyright (C) 2016-2021 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -19,3 +19,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 INSERT_PASS_AFTER (pass_regrename, 1, pass_fma_steering);
+INSERT_PASS_BEFORE (pass_reorder_blocks, 1, pass_track_speculation);
+INSERT_PASS_AFTER (pass_machine_reorg, 1, pass_tag_collision_avoidance);
+INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_insert_bti);
+INSERT_PASS_AFTER (pass_if_after_combine, 1, pass_cc_fusion);
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index cda2895d28e74..c2033387384a4 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1,5 +1,5 @@
 /* Machine description for AArch64 architecture.
-   Copyright (C) 2009-2018 Free Software Foundation, Inc.
+   Copyright (C) 2009-2021 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -120,6 +120,10 @@ enum aarch64_symbol_type
    ADDR_QUERY_LDP_STP
       Query what is valid for a load/store pair.
 
+   ADDR_QUERY_LDP_STP_N
+      Query what is valid for a load/store pair, but narrow the incoming mode
+      for address checking.  This is used for the store_pair_lanes patterns.
+
    ADDR_QUERY_ANY
       Query what is valid for at least one memory constraint, which may
       allow things that "m" doesn't.  For example, the SVE LDR and STR
@@ -128,9 +132,29 @@ enum aarch64_symbol_type
 enum aarch64_addr_query_type {
   ADDR_QUERY_M,
   ADDR_QUERY_LDP_STP,
+  ADDR_QUERY_LDP_STP_N,
   ADDR_QUERY_ANY
 };
 
+/* Enumerates values that can be arbitrarily mixed into a calculation
+   in order to make the result of the calculation unique to its use case.
+
+   AARCH64_SALT_SSP_SET
+   AARCH64_SALT_SSP_TEST
+      Used when calculating the address of the stack protection canary value.
+      There is a separate value for setting and testing the canary, meaning
+      that these two operations produce unique addresses: they are different
+      from each other, and from all other address calculations.
+
+      The main purpose of this is to prevent the SET address being spilled
+      to the stack and reloaded for the TEST, since that would give an
+      attacker the opportunity to change the address of the expected
+      canary value.  */
+enum aarch64_salt_type {
+  AARCH64_SALT_SSP_SET,
+  AARCH64_SALT_SSP_TEST
+};
+
 /* A set of tuning parameters contains references to size and time
    cost models and vectors for address cost calculations, register
    move costs and memory move costs.  */
@@ -153,6 +177,8 @@ struct cpu_addrcost_table
   const struct scale_addr_mode_cost addr_scale_costs;
   const int pre_modify;
   const int post_modify;
+  const int post_modify_ld3_st3;
+  const int post_modify_ld4_st4;
   const int register_offset;
   const int register_sextend;
   const int register_zextend;
@@ -168,33 +194,266 @@ struct cpu_regmove_cost
   const int FP2FP;
 };
 
+struct simd_vec_cost
+{
+  /* Cost of any integer vector operation, excluding the ones handled
+     specially below.  */
+  const int int_stmt_cost;
+
+  /* Cost of any fp vector operation, excluding the ones handled
+     specially below.  */
+  const int fp_stmt_cost;
+
+  /* Per-vector cost of permuting vectors after an LD2, LD3 or LD4,
+     as well as the per-vector cost of permuting vectors before
+     an ST2, ST3 or ST4.  */
+  const int ld2_st2_permute_cost;
+  const int ld3_st3_permute_cost;
+  const int ld4_st4_permute_cost;
+
+  /* Cost of a permute operation.  */
+  const int permute_cost;
+
+  /* Cost of reductions for various vector types: iN is for N-bit
+     integer elements and fN is for N-bit floating-point elements.
+     We need to single out the element type because it affects the
+     depth of the reduction.  */
+  const int reduc_i8_cost;
+  const int reduc_i16_cost;
+  const int reduc_i32_cost;
+  const int reduc_i64_cost;
+  const int reduc_f16_cost;
+  const int reduc_f32_cost;
+  const int reduc_f64_cost;
+
+  /* Additional cost of storing a single vector element, on top of the
+     normal cost of a scalar store.  */
+  const int store_elt_extra_cost;
+
+  /* Cost of a vector-to-scalar operation.  */
+  const int vec_to_scalar_cost;
+
+  /* Cost of a scalar-to-vector operation.  */
+  const int scalar_to_vec_cost;
+
+  /* Cost of an aligned vector load.  */
+  const int align_load_cost;
+
+  /* Cost of an unaligned vector load.  */
+  const int unalign_load_cost;
+
+  /* Cost of an unaligned vector store.  */
+  const int unalign_store_cost;
+
+  /* Cost of a vector store.  */
+  const int store_cost;
+};
+
+typedef struct simd_vec_cost advsimd_vec_cost;
+
+/* SVE-specific extensions to the information provided by simd_vec_cost.  */
+struct sve_vec_cost : simd_vec_cost
+{
+  constexpr sve_vec_cost (const simd_vec_cost &base,
+			  unsigned int clast_cost,
+			  unsigned int fadda_f16_cost,
+			  unsigned int fadda_f32_cost,
+			  unsigned int fadda_f64_cost,
+			  unsigned int scatter_store_elt_cost)
+    : simd_vec_cost (base),
+      clast_cost (clast_cost),
+      fadda_f16_cost (fadda_f16_cost),
+      fadda_f32_cost (fadda_f32_cost),
+      fadda_f64_cost (fadda_f64_cost),
+      scatter_store_elt_cost (scatter_store_elt_cost)
+  {}
+
+  /* The cost of a vector-to-scalar CLASTA or CLASTB instruction,
+     with the scalar being stored in FP registers.  This cost is
+     assumed to be a cycle latency.  */
+  const int clast_cost;
+
+  /* The costs of FADDA for the three data types that it supports.
+     These costs are assumed to be cycle latencies.  */
+  const int fadda_f16_cost;
+  const int fadda_f32_cost;
+  const int fadda_f64_cost;
+
+  /* The per-element cost of a scatter store.  */
+  const int scatter_store_elt_cost;
+};
+
+/* Base information about how the CPU issues code, containing
+   information that is relevant to scalar, Advanced SIMD and SVE
+   operations.
+
+   The structure uses the general term "operation" to refer to
+   whichever subdivision of an instruction makes sense for the CPU.
+   These operations would typically be micro operations or macro
+   operations.
+
+   Note that this structure and the ones derived from it are only
+   as general as they need to be for the CPUs that currently use them.
+   They will probably need to be extended or refined as more CPUs are
+   added.  */
+struct aarch64_base_vec_issue_info
+{
+  /* How many loads and stores can be issued per cycle.  */
+  const unsigned int loads_stores_per_cycle;
+
+  /* How many stores can be issued per cycle.  */
+  const unsigned int stores_per_cycle;
+
+  /* How many integer or FP/SIMD operations can be issued per cycle.
+
+     Currently we don't try to distinguish the two.  For vector code,
+     we only really track FP/SIMD operations during vector costing;
+     we don't for example try to cost arithmetic operations like
+     address calculations, which are only decided later during ivopts.
+
+     For scalar code, we effectively assume that code operates entirely
+     on integers or entirely on floating-point values.  Again, we don't
+     try to take address calculations into account.
+
+     This is not very precise, but it's only meant to be a heuristic.
+     We could certainly try to do better in future if there's an example
+     of something that would benefit.  */
+  const unsigned int general_ops_per_cycle;
+
+  /* How many FP/SIMD operations to count for a floating-point or
+     vector load operation.
+
+     When constructing an Advanced SIMD vector from elements that have
+     been loaded from memory, these values apply to each individual load.
+     When using an SVE gather load, the values apply to each element of
+     the gather.  */
+  const unsigned int fp_simd_load_general_ops;
+
+  /* How many FP/SIMD operations to count for a floating-point or
+     vector store operation.
+
+     When storing individual elements of an Advanced SIMD vector out to
+     memory, these values apply to each individual store.  When using an
+     SVE scatter store, these values apply to each element of the scatter.  */
+  const unsigned int fp_simd_store_general_ops;
+};
+
+using aarch64_scalar_vec_issue_info = aarch64_base_vec_issue_info;
+
+/* Base information about the issue stage for vector operations.
+   This structure contains information that is relevant to both
+   Advanced SIMD and SVE.  */
+struct aarch64_simd_vec_issue_info : aarch64_base_vec_issue_info
+{
+  constexpr aarch64_simd_vec_issue_info (aarch64_base_vec_issue_info base,
+					 unsigned int ld2_st2_general_ops,
+					 unsigned int ld3_st3_general_ops,
+					 unsigned int ld4_st4_general_ops)
+    : aarch64_base_vec_issue_info (base),
+      ld2_st2_general_ops (ld2_st2_general_ops),
+      ld3_st3_general_ops (ld3_st3_general_ops),
+      ld4_st4_general_ops (ld4_st4_general_ops)
+  {}
+
+  /* How many FP/SIMD operations to count for each vector loaded or
+     stored by an LD[234] or ST[234] operation, in addition to the
+     base costs given in the parent class.  For example, the full
+     number of operations for an LD3 would be:
+
+       load ops:    3
+       general ops: 3 * (fp_simd_load_general_ops + ld3_st3_general_ops).  */
+  const unsigned int ld2_st2_general_ops;
+  const unsigned int ld3_st3_general_ops;
+  const unsigned int ld4_st4_general_ops;
+};
+
+using aarch64_advsimd_vec_issue_info = aarch64_simd_vec_issue_info;
+
+/* Information about the issue stage for SVE.  The main thing this adds
+   is a concept of "predicate operations".  */
+struct aarch64_sve_vec_issue_info : aarch64_simd_vec_issue_info
+{
+  constexpr aarch64_sve_vec_issue_info
+    (aarch64_simd_vec_issue_info base,
+     unsigned int pred_ops_per_cycle,
+     unsigned int while_pred_ops,
+     unsigned int int_cmp_pred_ops,
+     unsigned int fp_cmp_pred_ops,
+     unsigned int gather_scatter_pair_general_ops,
+     unsigned int gather_scatter_pair_pred_ops)
+    : aarch64_simd_vec_issue_info (base),
+      pred_ops_per_cycle (pred_ops_per_cycle),
+      while_pred_ops (while_pred_ops),
+      int_cmp_pred_ops (int_cmp_pred_ops),
+      fp_cmp_pred_ops (fp_cmp_pred_ops),
+      gather_scatter_pair_general_ops (gather_scatter_pair_general_ops),
+      gather_scatter_pair_pred_ops (gather_scatter_pair_pred_ops)
+  {}
+
+  /* How many predicate operations can be issued per cycle.  */
+  const unsigned int pred_ops_per_cycle;
+
+  /* How many predicate operations are generated by a WHILExx
+     instruction.  */
+  const unsigned int while_pred_ops;
+
+  /* How many predicate operations are generated by an integer
+     comparison instruction.  */
+  const unsigned int int_cmp_pred_ops;
+
+  /* How many predicate operations are generated by a floating-point
+     comparison instruction.  */
+  const unsigned int fp_cmp_pred_ops;
+
+  /* How many general and predicate operations are generated by each pair
+     of elements in a gather load or scatter store.  These values apply
+     on top of the per-element counts recorded in fp_simd_load_general_ops
+     and fp_simd_store_general_ops.
+
+     The reason for using pairs is that that is the largest possible
+     granule size for 128-bit SVE, which can load and store 2 64-bit
+     elements or 4 32-bit elements.  */
+  const unsigned int gather_scatter_pair_general_ops;
+  const unsigned int gather_scatter_pair_pred_ops;
+};
+
+/* Information related to instruction issue for a particular CPU.  */
+struct aarch64_vec_issue_info
+{
+  const aarch64_base_vec_issue_info *const scalar;
+  const aarch64_simd_vec_issue_info *const advsimd;
+  const aarch64_sve_vec_issue_info *const sve;
+};
+
 /* Cost for vector insn classes.  */
 struct cpu_vector_cost
 {
-  const int scalar_int_stmt_cost;	 /* Cost of any int scalar operation,
-					    excluding load and store.  */
-  const int scalar_fp_stmt_cost;	 /* Cost of any fp scalar operation,
-					    excluding load and store.  */
-  const int scalar_load_cost;		 /* Cost of scalar load.  */
-  const int scalar_store_cost;		 /* Cost of scalar store.  */
-  const int vec_int_stmt_cost;		 /* Cost of any int vector operation,
-					    excluding load, store, permute,
-					    vector-to-scalar and
-					    scalar-to-vector operation.  */
-  const int vec_fp_stmt_cost;		 /* Cost of any fp vector operation,
-					    excluding load, store, permute,
-					    vector-to-scalar and
-					    scalar-to-vector operation.  */
-  const int vec_permute_cost;		 /* Cost of permute operation.  */
-  const int vec_to_scalar_cost;		 /* Cost of vec-to-scalar operation.  */
-  const int scalar_to_vec_cost;		 /* Cost of scalar-to-vector
-					    operation.  */
-  const int vec_align_load_cost;	 /* Cost of aligned vector load.  */
-  const int vec_unalign_load_cost;	 /* Cost of unaligned vector load.  */
-  const int vec_unalign_store_cost;	 /* Cost of unaligned vector store.  */
-  const int vec_store_cost;		 /* Cost of vector store.  */
-  const int cond_taken_branch_cost;	 /* Cost of taken branch.  */
-  const int cond_not_taken_branch_cost;  /* Cost of not taken branch.  */
+  /* Cost of any integer scalar operation, excluding load and store.  */
+  const int scalar_int_stmt_cost;
+
+  /* Cost of any fp scalar operation, excluding load and store.  */
+  const int scalar_fp_stmt_cost;
+
+  /* Cost of a scalar load.  */
+  const int scalar_load_cost;
+
+  /* Cost of a scalar store.  */
+  const int scalar_store_cost;
+
+  /* Cost of a taken branch.  */
+  const int cond_taken_branch_cost;
+
+  /* Cost of a not-taken branch.  */
+  const int cond_not_taken_branch_cost;
+
+  /* Cost of an Advanced SIMD operations.  */
+  const advsimd_vec_cost *advsimd;
+
+  /* Cost of an SVE operations, or null if SVE is not implemented.  */
+  const sve_vec_cost *sve;
+
+  /* Issue information, or null if none is provided.  */
+  const aarch64_vec_issue_info *const issue_info;
 };
 
 /* Branch costs.  */
@@ -207,20 +466,20 @@ struct cpu_branch_cost
 /* Control approximate alternatives to certain FP operators.  */
 #define AARCH64_APPROX_MODE(MODE) \
   ((MIN_MODE_FLOAT <= (MODE) && (MODE) <= MAX_MODE_FLOAT) \
-   ? (1 << ((MODE) - MIN_MODE_FLOAT)) \
+   ? ((uint64_t) 1 << ((MODE) - MIN_MODE_FLOAT)) \
    : (MIN_MODE_VECTOR_FLOAT <= (MODE) && (MODE) <= MAX_MODE_VECTOR_FLOAT) \
-     ? (1 << ((MODE) - MIN_MODE_VECTOR_FLOAT \
-	      + MAX_MODE_FLOAT - MIN_MODE_FLOAT + 1)) \
+     ? ((uint64_t) 1 << ((MODE) - MIN_MODE_VECTOR_FLOAT \
+			 + MAX_MODE_FLOAT - MIN_MODE_FLOAT + 1)) \
      : (0))
-#define AARCH64_APPROX_NONE (0)
-#define AARCH64_APPROX_ALL (-1)
+#define AARCH64_APPROX_NONE ((uint64_t) 0)
+#define AARCH64_APPROX_ALL (~(uint64_t) 0)
 
 /* Allowed modes for approximations.  */
 struct cpu_approx_modes
 {
-  const unsigned int division;		/* Division.  */
-  const unsigned int sqrt;		/* Square root.  */
-  const unsigned int recip_sqrt;	/* Reciprocal square root.  */
+  const uint64_t division;	/* Division.  */
+  const uint64_t sqrt;		/* Square root.  */
+  const uint64_t recip_sqrt;	/* Reciprocal square root.  */
 };
 
 /* Cache prefetch settings for prefetch-loop-arrays.  */
@@ -230,6 +489,12 @@ struct cpu_prefetch_tune
   const int l1_cache_size;
   const int l1_cache_line_size;
   const int l2_cache_size;
+  /* Whether software prefetch hints should be issued for non-constant
+     strides.  */
+  const bool prefetch_dynamic_strides;
+  /* The minimum constant stride beyond which we should use prefetch
+     hints for.  */
+  const int minimum_stride;
   const int default_opt_level;
 };
 
@@ -241,12 +506,16 @@ struct tune_params
   const struct cpu_vector_cost *vec_costs;
   const struct cpu_branch_cost *branch_costs;
   const struct cpu_approx_modes *approx_modes;
+  /* Width of the SVE registers or SVE_NOT_IMPLEMENTED if not applicable.
+     Only used for tuning decisions, does not disable VLA
+     vectorization.  */
+  enum aarch64_sve_vector_bits_enum sve_width;
   int memmov_cost;
   int issue_rate;
   unsigned int fusible_ops;
-  int function_align;
-  int jump_align;
-  int loop_align;
+  const char *function_align;
+  const char *jump_align;
+  const char *loop_align;
   int int_reassoc_width;
   int fp_reassoc_width;
   int vec_reassoc_width;
@@ -277,6 +546,49 @@ struct tune_params
   const struct cpu_prefetch_tune *prefetch;
 };
 
+/* Classifies an address.
+
+   ADDRESS_REG_IMM
+       A simple base register plus immediate offset.
+
+   ADDRESS_REG_WB
+       A base register indexed by immediate offset with writeback.
+
+   ADDRESS_REG_REG
+       A base register indexed by (optionally scaled) register.
+
+   ADDRESS_REG_UXTW
+       A base register indexed by (optionally scaled) zero-extended register.
+
+   ADDRESS_REG_SXTW
+       A base register indexed by (optionally scaled) sign-extended register.
+
+   ADDRESS_LO_SUM
+       A LO_SUM rtx with a base register and "LO12" symbol relocation.
+
+   ADDRESS_SYMBOLIC:
+       A constant symbolic address, in pc-relative literal pool.  */
+
+enum aarch64_address_type {
+  ADDRESS_REG_IMM,
+  ADDRESS_REG_WB,
+  ADDRESS_REG_REG,
+  ADDRESS_REG_UXTW,
+  ADDRESS_REG_SXTW,
+  ADDRESS_LO_SUM,
+  ADDRESS_SYMBOLIC
+};
+
+/* Address information.  */
+struct aarch64_address_info {
+  enum aarch64_address_type type;
+  rtx base;
+  rtx offset;
+  poly_int64 const_offset;
+  int shift;
+  enum aarch64_symbol_type symbol_type;
+};
+
 #define AARCH64_FUSION_PAIR(x, name) \
   AARCH64_FUSE_##name##_index, 
 /* Supported fusion operations.  */
@@ -338,8 +650,81 @@ enum simd_immediate_check {
   AARCH64_CHECK_MOV  = AARCH64_CHECK_ORR | AARCH64_CHECK_BIC
 };
 
+/* The key type that -msign-return-address should use.  */
+enum aarch64_key_type {
+  AARCH64_KEY_A,
+  AARCH64_KEY_B
+};
+
+extern enum aarch64_key_type aarch64_ra_sign_key;
+
 extern struct tune_params aarch64_tune_params;
 
+/* The available SVE predicate patterns, known in the ACLE as "svpattern".  */
+#define AARCH64_FOR_SVPATTERN(T) \
+  T (POW2, pow2, 0) \
+  T (VL1, vl1, 1) \
+  T (VL2, vl2, 2) \
+  T (VL3, vl3, 3) \
+  T (VL4, vl4, 4) \
+  T (VL5, vl5, 5) \
+  T (VL6, vl6, 6) \
+  T (VL7, vl7, 7) \
+  T (VL8, vl8, 8) \
+  T (VL16, vl16, 9) \
+  T (VL32, vl32, 10) \
+  T (VL64, vl64, 11) \
+  T (VL128, vl128, 12) \
+  T (VL256, vl256, 13) \
+  T (MUL4, mul4, 29) \
+  T (MUL3, mul3, 30) \
+  T (ALL, all, 31)
+
+/* The available SVE prefetch operations, known in the ACLE as "svprfop".  */
+#define AARCH64_FOR_SVPRFOP(T) \
+  T (PLDL1KEEP, pldl1keep, 0) \
+  T (PLDL1STRM, pldl1strm, 1) \
+  T (PLDL2KEEP, pldl2keep, 2) \
+  T (PLDL2STRM, pldl2strm, 3) \
+  T (PLDL3KEEP, pldl3keep, 4) \
+  T (PLDL3STRM, pldl3strm, 5) \
+  T (PSTL1KEEP, pstl1keep, 8) \
+  T (PSTL1STRM, pstl1strm, 9) \
+  T (PSTL2KEEP, pstl2keep, 10) \
+  T (PSTL2STRM, pstl2strm, 11) \
+  T (PSTL3KEEP, pstl3keep, 12) \
+  T (PSTL3STRM, pstl3strm, 13)
+
+#define AARCH64_SVENUM(UPPER, LOWER, VALUE) AARCH64_SV_##UPPER = VALUE,
+enum aarch64_svpattern {
+  AARCH64_FOR_SVPATTERN (AARCH64_SVENUM)
+  AARCH64_NUM_SVPATTERNS
+};
+
+enum aarch64_svprfop {
+  AARCH64_FOR_SVPRFOP (AARCH64_SVENUM)
+  AARCH64_NUM_SVPRFOPS
+};
+#undef AARCH64_SVENUM
+
+/* It's convenient to divide the built-in function codes into groups,
+   rather than having everything in a single enum.  This type enumerates
+   those groups.  */
+enum aarch64_builtin_class
+{
+  AARCH64_BUILTIN_GENERAL,
+  AARCH64_BUILTIN_SVE
+};
+
+/* Built-in function codes are structured so that the low
+   AARCH64_BUILTIN_SHIFT bits contain the aarch64_builtin_class
+   and the upper bits contain a group-specific subcode.  */
+const unsigned int AARCH64_BUILTIN_SHIFT = 1;
+
+/* Mask that selects the aarch64_builtin_class part of a function code.  */
+const unsigned int AARCH64_BUILTIN_CLASS = (1 << AARCH64_BUILTIN_SHIFT) - 1;
+
+void aarch64_post_cfi_startproc (void);
 poly_int64 aarch64_initial_elimination_offset (unsigned, unsigned);
 int aarch64_get_condition_code (rtx);
 bool aarch64_address_valid_for_prefetch_p (rtx, bool);
@@ -349,6 +734,8 @@ unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in);
 bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode);
 int aarch64_branch_cost (bool, bool);
 enum aarch64_symbol_type aarch64_classify_symbolic_expression (rtx);
+opt_machine_mode aarch64_vq_mode (scalar_mode);
+opt_machine_mode aarch64_full_sve_mode (scalar_mode);
 bool aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode);
 bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT);
 bool aarch64_const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT,
@@ -356,40 +743,55 @@ bool aarch64_const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT,
 bool aarch64_constant_address_p (rtx);
 bool aarch64_emit_approx_div (rtx, rtx, rtx);
 bool aarch64_emit_approx_sqrt (rtx, rtx, bool);
-void aarch64_expand_call (rtx, rtx, bool);
-bool aarch64_expand_movmem (rtx *);
+void aarch64_expand_call (rtx, rtx, rtx, bool);
+bool aarch64_expand_cpymem (rtx *);
+bool aarch64_expand_setmem (rtx *);
 bool aarch64_float_const_zero_rtx_p (rtx);
 bool aarch64_float_const_rtx_p (rtx);
 bool aarch64_function_arg_regno_p (unsigned);
 bool aarch64_fusion_enabled_p (enum aarch64_fusion_pairs);
-bool aarch64_gen_movmemqi (rtx *);
-bool aarch64_gimple_fold_builtin (gimple_stmt_iterator *);
+bool aarch64_gen_cpymemqi (rtx *);
 bool aarch64_is_extend_from_extract (scalar_int_mode, rtx, rtx);
 bool aarch64_is_long_call_p (rtx);
 bool aarch64_is_noplt_call_p (rtx);
 bool aarch64_label_mentioned_p (rtx);
 void aarch64_declare_function_name (FILE *, const char*, tree);
+void aarch64_asm_output_alias (FILE *, const tree, const tree);
+void aarch64_asm_output_external (FILE *, tree, const char*);
 bool aarch64_legitimate_pic_operand_p (rtx);
 bool aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode, rtx, rtx);
+bool aarch64_masks_and_shift_for_bfi_p (scalar_int_mode, unsigned HOST_WIDE_INT,
+					unsigned HOST_WIDE_INT,
+					unsigned HOST_WIDE_INT);
 bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx);
 bool aarch64_move_imm (HOST_WIDE_INT, machine_mode);
+machine_mode aarch64_sve_int_mode (machine_mode);
 opt_machine_mode aarch64_sve_pred_mode (unsigned int);
+machine_mode aarch64_sve_pred_mode (machine_mode);
+opt_machine_mode aarch64_sve_data_mode (scalar_mode, poly_uint64);
+bool aarch64_sve_mode_p (machine_mode);
+HOST_WIDE_INT aarch64_fold_sve_cnt_pat (aarch64_svpattern, unsigned int);
 bool aarch64_sve_cnt_immediate_p (rtx);
+bool aarch64_sve_scalar_inc_dec_immediate_p (rtx);
 bool aarch64_sve_addvl_addpl_immediate_p (rtx);
-bool aarch64_sve_inc_dec_immediate_p (rtx);
+bool aarch64_sve_vector_inc_dec_immediate_p (rtx);
 int aarch64_add_offset_temporaries (rtx);
 void aarch64_split_add_offset (scalar_int_mode, rtx, rtx, rtx, rtx, rtx);
 bool aarch64_mov_operand_p (rtx, machine_mode);
 rtx aarch64_reverse_mask (machine_mode, unsigned int);
 bool aarch64_offset_7bit_signed_scaled_p (machine_mode, poly_int64);
+bool aarch64_offset_9bit_signed_unscaled_p (machine_mode, poly_int64);
+char *aarch64_output_sve_prefetch (const char *, rtx, const char *);
 char *aarch64_output_sve_cnt_immediate (const char *, const char *, rtx);
-char *aarch64_output_sve_addvl_addpl (rtx, rtx, rtx);
-char *aarch64_output_sve_inc_dec_immediate (const char *, rtx);
+char *aarch64_output_sve_cnt_pat_immediate (const char *, const char *, rtx *);
+char *aarch64_output_sve_scalar_inc_dec (rtx);
+char *aarch64_output_sve_addvl_addpl (rtx);
+char *aarch64_output_sve_vector_inc_dec (const char *, rtx);
 char *aarch64_output_scalar_simd_mov_immediate (rtx, scalar_int_mode);
 char *aarch64_output_simd_mov_immediate (rtx, unsigned,
 			enum simd_immediate_check w = AARCH64_CHECK_MOV);
 char *aarch64_output_sve_mov_immediate (rtx);
-char *aarch64_output_ptrue (machine_mode, char);
+char *aarch64_output_sve_ptrues (rtx);
 bool aarch64_pad_reg_upward (machine_mode, const_tree, bool);
 bool aarch64_regno_ok_for_base_p (int, bool);
 bool aarch64_regno_ok_for_index_p (int, bool);
@@ -398,11 +800,13 @@ bool aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
 					    bool high);
 bool aarch64_simd_scalar_immediate_valid_for_move (rtx, scalar_int_mode);
 bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool);
+bool aarch64_sve_ptrue_svpattern_p (rtx, struct simd_immediate_info *);
 bool aarch64_simd_valid_immediate (rtx, struct simd_immediate_info *,
 			enum simd_immediate_check w = AARCH64_CHECK_MOV);
 rtx aarch64_check_zero_based_sve_index_immediate (rtx);
 bool aarch64_sve_index_immediate_p (rtx);
-bool aarch64_sve_arith_immediate_p (rtx, bool);
+bool aarch64_sve_arith_immediate_p (machine_mode, rtx, bool);
+bool aarch64_sve_sqadd_sqsub_immediate_p (machine_mode, rtx, bool);
 bool aarch64_sve_bitmask_immediate_p (rtx);
 bool aarch64_sve_dup_immediate_p (rtx);
 bool aarch64_sve_cmp_immediate_p (rtx, bool);
@@ -411,15 +815,17 @@ bool aarch64_sve_float_mul_immediate_p (rtx);
 bool aarch64_split_dimode_const_store (rtx, rtx);
 bool aarch64_symbolic_address_p (rtx);
 bool aarch64_uimm12_shift (HOST_WIDE_INT);
+int aarch64_movk_shift (const wide_int_ref &, const wide_int_ref &);
 bool aarch64_use_return_insn_p (void);
-const char *aarch64_mangle_builtin_type (const_tree);
 const char *aarch64_output_casesi (rtx *);
 
+unsigned int aarch64_tlsdesc_abi_id ();
 enum aarch64_symbol_type aarch64_classify_symbol (rtx, HOST_WIDE_INT);
 enum aarch64_symbol_type aarch64_classify_tls_symbol (rtx);
 enum reg_class aarch64_regno_regclass (unsigned);
 int aarch64_asm_preferred_eh_data_format (int, int);
 int aarch64_fpconst_pow_of_2 (rtx);
+int aarch64_fpconst_pow2_recip (rtx);
 machine_mode aarch64_hard_regno_caller_save_mode (unsigned, unsigned,
 						       machine_mode);
 int aarch64_uxt_size (int, HOST_WIDE_INT);
@@ -427,32 +833,50 @@ int aarch64_vec_fpconst_pow_of_2 (rtx);
 rtx aarch64_eh_return_handler_rtx (void);
 rtx aarch64_mask_from_zextract_ops (rtx, rtx);
 const char *aarch64_output_move_struct (rtx *operands);
+rtx aarch64_return_addr_rtx (void);
 rtx aarch64_return_addr (int, rtx);
 rtx aarch64_simd_gen_const_vector_dup (machine_mode, HOST_WIDE_INT);
 bool aarch64_simd_mem_operand_p (rtx);
 bool aarch64_sve_ld1r_operand_p (rtx);
+bool aarch64_sve_ld1rq_operand_p (rtx);
+bool aarch64_sve_ld1ro_operand_p (rtx, scalar_mode);
+bool aarch64_sve_ldff1_operand_p (rtx);
+bool aarch64_sve_ldnf1_operand_p (rtx);
 bool aarch64_sve_ldr_operand_p (rtx);
+bool aarch64_sve_prefetch_operand_p (rtx, machine_mode);
 bool aarch64_sve_struct_memory_operand_p (rtx);
 rtx aarch64_simd_vect_par_cnst_half (machine_mode, int, bool);
+rtx aarch64_gen_stepped_int_parallel (unsigned int, int, int);
+bool aarch64_stepped_int_parallel_p (rtx, int);
 rtx aarch64_tls_get_addr (void);
-tree aarch64_fold_builtin (tree, int, tree *, bool);
 unsigned aarch64_dbx_register_number (unsigned);
 unsigned aarch64_trampoline_size (void);
 void aarch64_asm_output_labelref (FILE *, const char *);
 void aarch64_cpu_cpp_builtins (cpp_reader *);
 const char * aarch64_gen_far_branch (rtx *, int, const char *, const char *);
 const char * aarch64_output_probe_stack_range (rtx, rtx);
-void aarch64_err_no_fpadvsimd (machine_mode, const char *);
+const char * aarch64_output_probe_sve_stack_clash (rtx, rtx, rtx, rtx);
+void aarch64_err_no_fpadvsimd (machine_mode);
 void aarch64_expand_epilogue (bool);
-void aarch64_expand_mov_immediate (rtx, rtx, rtx (*) (rtx, rtx) = 0);
+rtx aarch64_ptrue_all (unsigned int);
+opt_machine_mode aarch64_ptrue_all_mode (rtx);
+rtx aarch64_convert_sve_data_to_pred (rtx, machine_mode, rtx);
+rtx aarch64_expand_sve_dupq (rtx, machine_mode, rtx);
+void aarch64_expand_mov_immediate (rtx, rtx);
+rtx aarch64_stack_protect_canary_mem (machine_mode, rtx, aarch64_salt_type);
+rtx aarch64_ptrue_reg (machine_mode);
+rtx aarch64_pfalse_reg (machine_mode);
+bool aarch64_sve_same_pred_for_ptest_p (rtx *, rtx *);
 void aarch64_emit_sve_pred_move (rtx, rtx, rtx);
 void aarch64_expand_sve_mem_move (rtx, rtx, machine_mode);
 bool aarch64_maybe_expand_sve_subreg_move (rtx, rtx);
+rtx aarch64_replace_reg_mode (rtx, machine_mode);
 void aarch64_split_sve_subreg_move (rtx, rtx, rtx);
 void aarch64_expand_prologue (void);
 void aarch64_expand_vector_init (rtx, rtx);
+void aarch64_sve_expand_vector_init (rtx, rtx);
 void aarch64_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx,
-				   const_tree, unsigned);
+				   const_tree, unsigned, bool = false);
 void aarch64_init_expanders (void);
 void aarch64_init_simd_builtins (void);
 void aarch64_emit_call_insn (rtx);
@@ -460,7 +884,18 @@ void aarch64_register_pragmas (void);
 void aarch64_relayout_simd_types (void);
 void aarch64_reset_previous_fndecl (void);
 bool aarch64_return_address_signing_enabled (void);
+bool aarch64_bti_enabled (void);
 void aarch64_save_restore_target_globals (tree);
+void aarch64_addti_scratch_regs (rtx, rtx, rtx *,
+				 rtx *, rtx *,
+				 rtx *, rtx *,
+				 rtx *);
+void aarch64_subvti_scratch_regs (rtx, rtx, rtx *,
+				  rtx *, rtx *,
+				  rtx *, rtx *, rtx *);
+void aarch64_expand_subvti (rtx, rtx, rtx,
+			    rtx, rtx, rtx, rtx, bool);
+
 
 /* Initialize builtins for SIMD intrinsics.  */
 void init_aarch64_simd_builtins (void);
@@ -486,8 +921,11 @@ void aarch64_split_simd_move (rtx, rtx);
 /* Check for a legitimate floating point constant for FMOV.  */
 bool aarch64_float_const_representable_p (rtx);
 
-#if defined (RTX_CODE)
+extern int aarch64_epilogue_uses (int);
 
+#if defined (RTX_CODE)
+void aarch64_gen_unlikely_cbranch (enum rtx_code, machine_mode cc_mode,
+				   rtx label_ref);
 bool aarch64_legitimate_address_p (machine_mode, rtx, bool,
 				   aarch64_addr_query_type = ADDR_QUERY_M);
 machine_mode aarch64_select_cc_mode (RTX_CODE, rtx, rtx);
@@ -496,33 +934,50 @@ rtx aarch64_load_tp (rtx);
 
 void aarch64_expand_compare_and_swap (rtx op[]);
 void aarch64_split_compare_and_swap (rtx op[]);
-void aarch64_gen_atomic_cas (rtx, rtx, rtx, rtx, rtx);
 
-bool aarch64_atomic_ldop_supported_p (enum rtx_code);
-void aarch64_gen_atomic_ldop (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
 void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx);
 
-bool aarch64_gen_adjusted_ldpstp (rtx *, bool, scalar_mode, RTX_CODE);
+bool aarch64_gen_adjusted_ldpstp (rtx *, bool, machine_mode, RTX_CODE);
 
 void aarch64_expand_sve_vec_cmp_int (rtx, rtx_code, rtx, rtx);
 bool aarch64_expand_sve_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
 void aarch64_expand_sve_vcond (machine_mode, machine_mode, rtx *);
-#endif /* RTX_CODE */
 
-void aarch64_init_builtins (void);
+bool aarch64_prepare_sve_int_fma (rtx *, rtx_code);
+bool aarch64_prepare_sve_cond_int_fma (rtx *, rtx_code);
+#endif /* RTX_CODE */
 
 bool aarch64_process_target_attr (tree);
 void aarch64_override_options_internal (struct gcc_options *);
 
-rtx aarch64_expand_builtin (tree exp,
-			    rtx target,
-			    rtx subtarget ATTRIBUTE_UNUSED,
-			    machine_mode mode ATTRIBUTE_UNUSED,
-			    int ignore ATTRIBUTE_UNUSED);
-tree aarch64_builtin_decl (unsigned, bool ATTRIBUTE_UNUSED);
-tree aarch64_builtin_rsqrt (unsigned int);
+const char *aarch64_general_mangle_builtin_type (const_tree);
+void aarch64_general_init_builtins (void);
+tree aarch64_general_fold_builtin (unsigned int, tree, unsigned int, tree *);
+gimple *aarch64_general_gimple_fold_builtin (unsigned int, gcall *);
+rtx aarch64_general_expand_builtin (unsigned int, tree, rtx, int);
+tree aarch64_general_builtin_decl (unsigned, bool);
+tree aarch64_general_builtin_rsqrt (unsigned int);
 tree aarch64_builtin_vectorized_function (unsigned int, tree, tree);
 
+namespace aarch64_sve {
+  void init_builtins ();
+  void handle_arm_sve_h ();
+  tree builtin_decl (unsigned, bool);
+  bool builtin_type_p (const_tree);
+  bool builtin_type_p (const_tree, unsigned int *, unsigned int *);
+  const char *mangle_builtin_type (const_tree);
+  tree resolve_overloaded_builtin (location_t, unsigned int,
+				   vec<tree, va_gc> *);
+  bool check_builtin_call (location_t, vec<location_t>, unsigned int,
+			   tree, unsigned int, tree *);
+  gimple *gimple_fold_builtin (unsigned int, gimple_stmt_iterator *, gcall *);
+  rtx expand_builtin (unsigned int, tree, rtx);
+  tree handle_arm_sve_vector_bits_attribute (tree *, tree, tree, int, bool *);
+#ifdef GCC_TARGET_H
+  bool verify_type_context (location_t, type_context_kind, const_tree, bool);
+#endif
+}
+
 extern void aarch64_split_combinev16qi (rtx operands[3]);
 extern void aarch64_expand_vec_perm (rtx, rtx, rtx, rtx, unsigned int);
 extern void aarch64_expand_sve_vec_perm (rtx, rtx, rtx, rtx);
@@ -533,22 +988,59 @@ int aarch64_ccmp_mode_to_code (machine_mode mode);
 
 bool extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset);
 bool aarch64_operands_ok_for_ldpstp (rtx *, bool, machine_mode);
-bool aarch64_operands_adjust_ok_for_ldpstp (rtx *, bool, scalar_mode);
+bool aarch64_operands_adjust_ok_for_ldpstp (rtx *, bool, machine_mode);
+void aarch64_swap_ldrstr_operands (rtx *, bool);
 
 extern void aarch64_asm_output_pool_epilogue (FILE *, const char *,
 					      tree, HOST_WIDE_INT);
 
+
+extern bool aarch64_classify_address (struct aarch64_address_info *, rtx,
+				      machine_mode, bool,
+				      aarch64_addr_query_type = ADDR_QUERY_M);
+
 /* Defined in common/config/aarch64-common.c.  */
 bool aarch64_handle_option (struct gcc_options *, struct gcc_options *,
 			     const struct cl_decoded_option *, location_t);
 const char *aarch64_rewrite_selected_cpu (const char *name);
 enum aarch64_parse_opt_result aarch64_parse_extension (const char *,
-						       unsigned long *);
-std::string aarch64_get_extension_string_for_isa_flags (unsigned long,
-							unsigned long);
+						       uint64_t *,
+						       std::string *);
+void aarch64_get_all_extension_candidates (auto_vec<const char *> *candidates);
+std::string aarch64_get_extension_string_for_isa_flags (uint64_t, uint64_t);
+
+/* Defined in aarch64-d.c  */
+extern void aarch64_d_target_versions (void);
+extern void aarch64_d_register_target_info (void);
 
-rtl_opt_pass *make_pass_fma_steering (gcc::context *ctxt);
+rtl_opt_pass *make_pass_fma_steering (gcc::context *);
+rtl_opt_pass *make_pass_track_speculation (gcc::context *);
+rtl_opt_pass *make_pass_tag_collision_avoidance (gcc::context *);
+rtl_opt_pass *make_pass_insert_bti (gcc::context *ctxt);
+rtl_opt_pass *make_pass_cc_fusion (gcc::context *ctxt);
 
 poly_uint64 aarch64_regmode_natural_size (machine_mode);
 
+bool aarch64_high_bits_all_ones_p (HOST_WIDE_INT);
+
+struct atomic_ool_names
+{
+    const char *str[5][4];
+};
+
+rtx aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
+			    const atomic_ool_names *names);
+extern const atomic_ool_names aarch64_ool_swp_names;
+extern const atomic_ool_names aarch64_ool_ldadd_names;
+extern const atomic_ool_names aarch64_ool_ldset_names;
+extern const atomic_ool_names aarch64_ool_ldclr_names;
+extern const atomic_ool_names aarch64_ool_ldeor_names;
+
+tree aarch64_resolve_overloaded_builtin_general (location_t, tree, void *);
+
+const char *aarch64_sls_barrier (int);
+const char *aarch64_indirect_call_asm (rtx);
+extern bool aarch64_harden_sls_retbr_p (void);
+extern bool aarch64_harden_sls_blr_p (void);
+
 #endif /* GCC_AARCH64_PROTOS_H */
diff --git a/gcc/config/aarch64/aarch64-simd-builtin-types.def b/gcc/config/aarch64/aarch64-simd-builtin-types.def
index 478f179af7bdc..21145e8f10315 100644
--- a/gcc/config/aarch64/aarch64-simd-builtin-types.def
+++ b/gcc/config/aarch64/aarch64-simd-builtin-types.def
@@ -1,5 +1,5 @@
 /* Builtin AdvSIMD types.
-   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   Copyright (C) 2014-2021 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -50,3 +50,5 @@
   ENTRY (Float32x4_t, V4SF, none, 13)
   ENTRY (Float64x1_t, V1DF, none, 13)
   ENTRY (Float64x2_t, V2DF, none, 13)
+  ENTRY (Bfloat16x4_t, V4BF, none, 14)
+  ENTRY (Bfloat16x8_t, V8BF, none, 14)
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index b383f2485e5a2..b885bd5b38bf7 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -1,5 +1,5 @@
 /* Machine description for AArch64 architecture.
-   Copyright (C) 2012-2018 Free Software Foundation, Inc.
+   Copyright (C) 2012-2021 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -37,608 +37,857 @@
    macro holding the RTL pattern for the intrinsic.  This mapping is:
    0 - CODE_FOR_aarch64_<name><mode>
    1-9 - CODE_FOR_<name><mode><1-9>
-   10 - CODE_FOR_<name><mode>.  */
-
-  BUILTIN_VDC (COMBINE, combine, 0)
-  VAR1 (COMBINEP, combine, 0, di)
-  BUILTIN_VB (BINOP, pmul, 0)
-  BUILTIN_VHSDF_HSDF (BINOP, fmulx, 0)
-  BUILTIN_VHSDF_DF (UNOP, sqrt, 2)
-  BUILTIN_VD_BHSI (BINOP, addp, 0)
-  VAR1 (UNOP, addp, 0, di)
-  BUILTIN_VDQ_BHSI (UNOP, clrsb, 2)
-  BUILTIN_VDQ_BHSI (UNOP, clz, 2)
-  BUILTIN_VS (UNOP, ctz, 2)
-  BUILTIN_VB (UNOP, popcount, 2)
+   10 - CODE_FOR_<name><mode>.
+
+   Parameter 4 is the 'flag' of the intrinsic.  This is used to
+   help describe the attributes (for example, pure) for the intrinsic
+   function.  */
+
+  BUILTIN_VDC (COMBINE, combine, 0, AUTO_FP)
+  VAR1 (COMBINEP, combine, 0, NONE, di)
+  BUILTIN_VB (BINOP, pmul, 0, NONE)
+  BUILTIN_VHSDF_HSDF (BINOP, fmulx, 0, FP)
+  BUILTIN_VHSDF_DF (UNOP, sqrt, 2, FP)
+  BUILTIN_VD_BHSI (BINOP, addp, 0, NONE)
+  VAR1 (UNOP, addp, 0, NONE, di)
+  BUILTIN_VDQ_BHSI (UNOP, clrsb, 2, NONE)
+  BUILTIN_VDQ_BHSI (UNOP, clz, 2, NONE)
+  BUILTIN_VS (UNOP, ctz, 2, NONE)
+  BUILTIN_VB (UNOP, popcount, 2, NONE)
+
+  /* Implemented by aarch64_get_low<mode>.  */
+  BUILTIN_VQMOV (UNOP, get_low, 0, AUTO_FP)
+  /* Implemented by aarch64_get_high<mode>.  */
+  BUILTIN_VQMOV (UNOP, get_high, 0, AUTO_FP)
 
   /* Implemented by aarch64_<sur>q<r>shl<mode>.  */
-  BUILTIN_VSDQ_I (BINOP, sqshl, 0)
-  BUILTIN_VSDQ_I (BINOP_UUS, uqshl, 0)
-  BUILTIN_VSDQ_I (BINOP, sqrshl, 0)
-  BUILTIN_VSDQ_I (BINOP_UUS, uqrshl, 0)
+  BUILTIN_VSDQ_I (BINOP, sqshl, 0, NONE)
+  BUILTIN_VSDQ_I (BINOP_UUS, uqshl, 0, NONE)
+  BUILTIN_VSDQ_I (BINOP, sqrshl, 0, NONE)
+  BUILTIN_VSDQ_I (BINOP_UUS, uqrshl, 0, NONE)
   /* Implemented by aarch64_<su_optab><optab><mode>.  */
-  BUILTIN_VSDQ_I (BINOP, sqadd, 0)
-  BUILTIN_VSDQ_I (BINOPU, uqadd, 0)
-  BUILTIN_VSDQ_I (BINOP, sqsub, 0)
-  BUILTIN_VSDQ_I (BINOPU, uqsub, 0)
+  BUILTIN_VSDQ_I (BINOP, sqadd, 0, NONE)
+  BUILTIN_VSDQ_I (BINOPU, uqadd, 0, NONE)
+  BUILTIN_VSDQ_I (BINOP, sqsub, 0, NONE)
+  BUILTIN_VSDQ_I (BINOPU, uqsub, 0, NONE)
   /* Implemented by aarch64_<sur>qadd<mode>.  */
-  BUILTIN_VSDQ_I (BINOP_SSU, suqadd, 0)
-  BUILTIN_VSDQ_I (BINOP_UUS, usqadd, 0)
+  BUILTIN_VSDQ_I (BINOP_SSU, suqadd, 0, NONE)
+  BUILTIN_VSDQ_I (BINOP_UUS, usqadd, 0, NONE)
 
   /* Implemented by aarch64_get_dreg<VSTRUCT:mode><VDC:mode>.  */
-  BUILTIN_VDC (GETREG, get_dregoi, 0)
-  BUILTIN_VDC (GETREG, get_dregci, 0)
-  BUILTIN_VDC (GETREG, get_dregxi, 0)
-  VAR1 (GETREGP, get_dregoi, 0, di)
-  VAR1 (GETREGP, get_dregci, 0, di)
-  VAR1 (GETREGP, get_dregxi, 0, di)
+  BUILTIN_VDC (GETREG, get_dregoi, 0, AUTO_FP)
+  BUILTIN_VDC (GETREG, get_dregci, 0, AUTO_FP)
+  BUILTIN_VDC (GETREG, get_dregxi, 0, AUTO_FP)
+  VAR1 (GETREGP, get_dregoi, 0, AUTO_FP, di)
+  VAR1 (GETREGP, get_dregci, 0, AUTO_FP, di)
+  VAR1 (GETREGP, get_dregxi, 0, AUTO_FP, di)
   /* Implemented by aarch64_get_qreg<VSTRUCT:mode><VQ:mode>.  */
-  BUILTIN_VQ (GETREG, get_qregoi, 0)
-  BUILTIN_VQ (GETREG, get_qregci, 0)
-  BUILTIN_VQ (GETREG, get_qregxi, 0)
-  VAR1 (GETREGP, get_qregoi, 0, v2di)
-  VAR1 (GETREGP, get_qregci, 0, v2di)
-  VAR1 (GETREGP, get_qregxi, 0, v2di)
+  BUILTIN_VQ (GETREG, get_qregoi, 0, AUTO_FP)
+  BUILTIN_VQ (GETREG, get_qregci, 0, AUTO_FP)
+  BUILTIN_VQ (GETREG, get_qregxi, 0, AUTO_FP)
+  VAR1 (GETREGP, get_qregoi, 0, AUTO_FP, v2di)
+  VAR1 (GETREGP, get_qregci, 0, AUTO_FP, v2di)
+  VAR1 (GETREGP, get_qregxi, 0, AUTO_FP, v2di)
   /* Implemented by aarch64_set_qreg<VSTRUCT:mode><VQ:mode>.  */
-  BUILTIN_VQ (SETREG, set_qregoi, 0)
-  BUILTIN_VQ (SETREG, set_qregci, 0)
-  BUILTIN_VQ (SETREG, set_qregxi, 0)
-  VAR1 (SETREGP, set_qregoi, 0, v2di)
-  VAR1 (SETREGP, set_qregci, 0, v2di)
-  VAR1 (SETREGP, set_qregxi, 0, v2di)
+  BUILTIN_VQ (SETREG, set_qregoi, 0, AUTO_FP)
+  BUILTIN_VQ (SETREG, set_qregci, 0, AUTO_FP)
+  BUILTIN_VQ (SETREG, set_qregxi, 0, AUTO_FP)
+  VAR1 (SETREGP, set_qregoi, 0, AUTO_FP, v2di)
+  VAR1 (SETREGP, set_qregci, 0, AUTO_FP, v2di)
+  VAR1 (SETREGP, set_qregxi, 0, AUTO_FP, v2di)
   /* Implemented by aarch64_ld1x2<VQ:mode>. */
-  BUILTIN_VQ (LOADSTRUCT, ld1x2, 0)
+  BUILTIN_VQ (LOADSTRUCT, ld1x2, 0, LOAD)
   /* Implemented by aarch64_ld1x2<VDC:mode>. */
-  BUILTIN_VDC (LOADSTRUCT, ld1x2, 0)
+  BUILTIN_VDC (LOADSTRUCT, ld1x2, 0, LOAD)
   /* Implemented by aarch64_ld<VSTRUCT:nregs><VDC:mode>.  */
-  BUILTIN_VDC (LOADSTRUCT, ld2, 0)
-  BUILTIN_VDC (LOADSTRUCT, ld3, 0)
-  BUILTIN_VDC (LOADSTRUCT, ld4, 0)
+  BUILTIN_VDC (LOADSTRUCT, ld2, 0, LOAD)
+  BUILTIN_VDC (LOADSTRUCT, ld3, 0, LOAD)
+  BUILTIN_VDC (LOADSTRUCT, ld4, 0, LOAD)
   /* Implemented by aarch64_ld<VSTRUCT:nregs><VQ:mode>.  */
-  BUILTIN_VQ (LOADSTRUCT, ld2, 0)
-  BUILTIN_VQ (LOADSTRUCT, ld3, 0)
-  BUILTIN_VQ (LOADSTRUCT, ld4, 0)
+  BUILTIN_VQ (LOADSTRUCT, ld2, 0, LOAD)
+  BUILTIN_VQ (LOADSTRUCT, ld3, 0, LOAD)
+  BUILTIN_VQ (LOADSTRUCT, ld4, 0, LOAD)
   /* Implemented by aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>.  */
-  BUILTIN_VALLDIF (LOADSTRUCT, ld2r, 0)
-  BUILTIN_VALLDIF (LOADSTRUCT, ld3r, 0)
-  BUILTIN_VALLDIF (LOADSTRUCT, ld4r, 0)
+  BUILTIN_VALLDIF (LOADSTRUCT, ld2r, 0, LOAD)
+  BUILTIN_VALLDIF (LOADSTRUCT, ld3r, 0, LOAD)
+  BUILTIN_VALLDIF (LOADSTRUCT, ld4r, 0, LOAD)
   /* Implemented by aarch64_ld<VSTRUCT:nregs>_lane<VQ:mode>.  */
-  BUILTIN_VALLDIF (LOADSTRUCT_LANE, ld2_lane, 0)
-  BUILTIN_VALLDIF (LOADSTRUCT_LANE, ld3_lane, 0)
-  BUILTIN_VALLDIF (LOADSTRUCT_LANE, ld4_lane, 0)
+  BUILTIN_VALLDIF (LOADSTRUCT_LANE, ld2_lane, 0, ALL)
+  BUILTIN_VALLDIF (LOADSTRUCT_LANE, ld3_lane, 0, ALL)
+  BUILTIN_VALLDIF (LOADSTRUCT_LANE, ld4_lane, 0, ALL)
   /* Implemented by aarch64_st<VSTRUCT:nregs><VDC:mode>.  */
-  BUILTIN_VDC (STORESTRUCT, st2, 0)
-  BUILTIN_VDC (STORESTRUCT, st3, 0)
-  BUILTIN_VDC (STORESTRUCT, st4, 0)
+  BUILTIN_VDC (STORESTRUCT, st2, 0, STORE)
+  BUILTIN_VDC (STORESTRUCT, st3, 0, STORE)
+  BUILTIN_VDC (STORESTRUCT, st4, 0, STORE)
   /* Implemented by aarch64_st<VSTRUCT:nregs><VQ:mode>.  */
-  BUILTIN_VQ (STORESTRUCT, st2, 0)
-  BUILTIN_VQ (STORESTRUCT, st3, 0)
-  BUILTIN_VQ (STORESTRUCT, st4, 0)
-
-  BUILTIN_VALLDIF (STORESTRUCT_LANE, st2_lane, 0)
-  BUILTIN_VALLDIF (STORESTRUCT_LANE, st3_lane, 0)
-  BUILTIN_VALLDIF (STORESTRUCT_LANE, st4_lane, 0)
-
-  BUILTIN_VQW (BINOP, saddl2, 0)
-  BUILTIN_VQW (BINOP, uaddl2, 0)
-  BUILTIN_VQW (BINOP, ssubl2, 0)
-  BUILTIN_VQW (BINOP, usubl2, 0)
-  BUILTIN_VQW (BINOP, saddw2, 0)
-  BUILTIN_VQW (BINOP, uaddw2, 0)
-  BUILTIN_VQW (BINOP, ssubw2, 0)
-  BUILTIN_VQW (BINOP, usubw2, 0)
+  BUILTIN_VQ (STORESTRUCT, st2, 0, STORE)
+  BUILTIN_VQ (STORESTRUCT, st3, 0, STORE)
+  BUILTIN_VQ (STORESTRUCT, st4, 0, STORE)
+
+  BUILTIN_VALLDIF (STORESTRUCT_LANE, st2_lane, 0, ALL)
+  BUILTIN_VALLDIF (STORESTRUCT_LANE, st3_lane, 0, ALL)
+  BUILTIN_VALLDIF (STORESTRUCT_LANE, st4_lane, 0, ALL)
+
+  BUILTIN_VQW (BINOP, saddl2, 0, NONE)
+  BUILTIN_VQW (BINOP, uaddl2, 0, NONE)
+  BUILTIN_VQW (BINOP, ssubl2, 0, NONE)
+  BUILTIN_VQW (BINOP, usubl2, 0, NONE)
+  BUILTIN_VQW (BINOP, saddw2, 0, NONE)
+  BUILTIN_VQW (BINOP, uaddw2, 0, NONE)
+  BUILTIN_VQW (BINOP, ssubw2, 0, NONE)
+  BUILTIN_VQW (BINOP, usubw2, 0, NONE)
   /* Implemented by aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>.  */
-  BUILTIN_VD_BHSI (BINOP, saddl, 0)
-  BUILTIN_VD_BHSI (BINOP, uaddl, 0)
-  BUILTIN_VD_BHSI (BINOP, ssubl, 0)
-  BUILTIN_VD_BHSI (BINOP, usubl, 0)
+  BUILTIN_VD_BHSI (BINOP, saddl, 0, NONE)
+  BUILTIN_VD_BHSI (BINOP, uaddl, 0, NONE)
+  BUILTIN_VD_BHSI (BINOP, ssubl, 0, NONE)
+  BUILTIN_VD_BHSI (BINOP, usubl, 0, NONE)
   /* Implemented by aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>.  */
-  BUILTIN_VD_BHSI (BINOP, saddw, 0)
-  BUILTIN_VD_BHSI (BINOP, uaddw, 0)
-  BUILTIN_VD_BHSI (BINOP, ssubw, 0)
-  BUILTIN_VD_BHSI (BINOP, usubw, 0)
+  BUILTIN_VD_BHSI (BINOP, saddw, 0, NONE)
+  BUILTIN_VD_BHSI (BINOP, uaddw, 0, NONE)
+  BUILTIN_VD_BHSI (BINOP, ssubw, 0, NONE)
+  BUILTIN_VD_BHSI (BINOP, usubw, 0, NONE)
   /* Implemented by aarch64_<sur>h<addsub><mode>.  */
-  BUILTIN_VDQ_BHSI (BINOP, shadd, 0)
-  BUILTIN_VDQ_BHSI (BINOP, shsub, 0)
-  BUILTIN_VDQ_BHSI (BINOP, uhadd, 0)
-  BUILTIN_VDQ_BHSI (BINOP, uhsub, 0)
-  BUILTIN_VDQ_BHSI (BINOP, srhadd, 0)
-  BUILTIN_VDQ_BHSI (BINOP, urhadd, 0)
+  BUILTIN_VDQ_BHSI (BINOP, shadd, 0, NONE)
+  BUILTIN_VDQ_BHSI (BINOP, shsub, 0, NONE)
+  BUILTIN_VDQ_BHSI (BINOP, uhadd, 0, NONE)
+  BUILTIN_VDQ_BHSI (BINOP, uhsub, 0, NONE)
+  BUILTIN_VDQ_BHSI (BINOP, srhadd, 0, NONE)
+  BUILTIN_VDQ_BHSI (BINOP, urhadd, 0, NONE)
+
+  /* Implemented by aarch64_<su>addlv<mode>.  */
+  BUILTIN_VDQV_L (UNOP, saddlv, 0, NONE)
+  BUILTIN_VDQV_L (UNOPU, uaddlv, 0, NONE)
+
+  /* Implemented by aarch64_<su>abd<mode>.  */
+  BUILTIN_VDQ_BHSI (BINOP, sabd, 0, NONE)
+  BUILTIN_VDQ_BHSI (BINOPU, uabd, 0, NONE)
+
+  /* Implemented by aarch64_<su>aba<mode>.  */
+  BUILTIN_VDQ_BHSI (TERNOP, saba, 0, NONE)
+  BUILTIN_VDQ_BHSI (TERNOPU, uaba, 0, NONE)
+
+  BUILTIN_VDQV_S (BINOP, sadalp, 0, NONE)
+  BUILTIN_VDQV_S (BINOPU, uadalp, 0, NONE)
+
+  /* Implemented by aarch64_<sur>abal<mode>.  */
+  BUILTIN_VD_BHSI (TERNOP, sabal, 0, NONE)
+  BUILTIN_VD_BHSI (TERNOPU, uabal, 0, NONE)
+
+  /* Implemented by aarch64_<sur>abal2<mode>.  */
+  BUILTIN_VQW (TERNOP, sabal2, 0, NONE)
+  BUILTIN_VQW (TERNOPU, uabal2, 0, NONE)
+
+  /* Implemented by aarch64_<sur>abdl<mode>.  */
+  BUILTIN_VD_BHSI (BINOP, sabdl, 0, NONE)
+  BUILTIN_VD_BHSI (BINOPU, uabdl, 0, NONE)
+
+  /* Implemented by aarch64_<sur>abdl2<mode>.  */
+  BUILTIN_VQW (BINOP, sabdl2, 0, NONE)
+  BUILTIN_VQW (BINOPU, uabdl2, 0, NONE)
+
   /* Implemented by aarch64_<sur><addsub>hn<mode>.  */
-  BUILTIN_VQN (BINOP, addhn, 0)
-  BUILTIN_VQN (BINOP, subhn, 0)
-  BUILTIN_VQN (BINOP, raddhn, 0)
-  BUILTIN_VQN (BINOP, rsubhn, 0)
+  BUILTIN_VQN (BINOP, addhn, 0, NONE)
+  BUILTIN_VQN (BINOP, subhn, 0, NONE)
+  BUILTIN_VQN (BINOP, raddhn, 0, NONE)
+  BUILTIN_VQN (BINOP, rsubhn, 0, NONE)
   /* Implemented by aarch64_<sur><addsub>hn2<mode>.  */
-  BUILTIN_VQN (TERNOP, addhn2, 0)
-  BUILTIN_VQN (TERNOP, subhn2, 0)
-  BUILTIN_VQN (TERNOP, raddhn2, 0)
-  BUILTIN_VQN (TERNOP, rsubhn2, 0)
+  BUILTIN_VQN (TERNOP, addhn2, 0, NONE)
+  BUILTIN_VQN (TERNOP, subhn2, 0, NONE)
+  BUILTIN_VQN (TERNOP, raddhn2, 0, NONE)
+  BUILTIN_VQN (TERNOP, rsubhn2, 0, NONE)
+
+  /* Implemented by aarch64_<us>xtl<mode>.  */
+  BUILTIN_VQN (UNOP, sxtl, 0, NONE)
+  BUILTIN_VQN (UNOPU, uxtl, 0, NONE)
+
+  /* Implemented by aarch64_xtn<mode>.  */
+  BUILTIN_VQN (UNOP, xtn, 0, NONE)
+
+  /* Implemented by aarch64_mla<mode>.  */
+  BUILTIN_VDQ_BHSI (TERNOP, mla, 0, NONE)
+  /* Implemented by aarch64_mla_n<mode>.  */
+  BUILTIN_VDQHS (TERNOP, mla_n, 0, NONE)
+
+  /* Implemented by aarch64_mls<mode>.  */
+  BUILTIN_VDQ_BHSI (TERNOP, mls, 0, NONE)
+  /* Implemented by aarch64_mls_n<mode>.  */
+  BUILTIN_VDQHS (TERNOP, mls_n, 0, NONE)
+
+  /* Implemented by aarch64_shrn<mode>".  */
+  BUILTIN_VQN (SHIFTIMM, shrn, 0, NONE)
+
+  /* Implemented by aarch64_shrn2<mode>.  */
+  BUILTIN_VQN (SHIFTACC, shrn2, 0, NONE)
+
+  /* Implemented by aarch64_rshrn<mode>".  */
+  BUILTIN_VQN (SHIFTIMM, rshrn, 0, NONE)
+
+  /* Implemented by aarch64_rshrn2<mode>.  */
+  BUILTIN_VQN (SHIFTACC, rshrn2, 0, NONE)
+
+  /* Implemented by aarch64_<su>mlsl<mode>.  */
+  BUILTIN_VD_BHSI (TERNOP, smlsl, 0, NONE)
+  BUILTIN_VD_BHSI (TERNOPU, umlsl, 0, NONE)
+
+  /* Implemented by aarch64_<su>mlsl_n<mode>.  */
+  BUILTIN_VD_HSI (TERNOP, smlsl_n, 0, NONE)
+  BUILTIN_VD_HSI (TERNOPU, umlsl_n, 0, NONE)
+
+  /* Implemented by aarch64_<su>mlal<mode>.  */
+  BUILTIN_VD_BHSI (TERNOP, smlal, 0, NONE)
+  BUILTIN_VD_BHSI (TERNOPU, umlal, 0, NONE)
+
+  /* Implemented by aarch64_<su>mlal_n<mode>.  */
+  BUILTIN_VD_HSI (TERNOP, smlal_n, 0, NONE)
+  BUILTIN_VD_HSI (TERNOPU, umlal_n, 0, NONE)
+
+  /* Implemented by aarch64_<su>mlsl_hi<mode>.  */
+  BUILTIN_VQW (TERNOP, smlsl_hi, 0, NONE)
+  BUILTIN_VQW (TERNOPU, umlsl_hi, 0, NONE)
+
+  /* Implemented by aarch64_<su>mlsl_hi_n<mode>.  */
+  BUILTIN_VQ_HSI (TERNOP, smlsl_hi_n, 0, NONE)
+  BUILTIN_VQ_HSI (TERNOPU, umlsl_hi_n, 0, NONE)
+
+  /* Implemented by aarch64_<su>mlal_hi<mode>.  */
+  BUILTIN_VQW (TERNOP, smlal_hi, 0, NONE)
+  BUILTIN_VQW (TERNOPU, umlal_hi, 0, NONE)
+
+  /* Implemented by aarch64_<su>mlal_hi_n<mode>.  */
+  BUILTIN_VQ_HSI (TERNOP, smlal_hi_n, 0, NONE)
+  BUILTIN_VQ_HSI (TERNOPU, umlal_hi_n, 0, NONE)
+
+  BUILTIN_VSQN_HSDI (UNOPUS, sqmovun, 0, NONE)
+
+  /* Implemented by aarch64_sqxtun2<mode>.  */
+  BUILTIN_VQN (BINOP_UUS, sqxtun2, 0, NONE)
 
-  BUILTIN_VSQN_HSDI (UNOP, sqmovun, 0)
   /* Implemented by aarch64_<sur>qmovn<mode>.  */
-  BUILTIN_VSQN_HSDI (UNOP, sqmovn, 0)
-  BUILTIN_VSQN_HSDI (UNOP, uqmovn, 0)
+  BUILTIN_VSQN_HSDI (UNOP, sqmovn, 0, NONE)
+  BUILTIN_VSQN_HSDI (UNOP, uqmovn, 0, NONE)
+
+  /* Implemented by aarch64_<su>qxtn2<mode>.  */
+  BUILTIN_VQN (BINOP, sqxtn2, 0, NONE)
+  BUILTIN_VQN (BINOPU, uqxtn2, 0, NONE)
+
   /* Implemented by aarch64_s<optab><mode>.  */
-  BUILTIN_VSDQ_I (UNOP, sqabs, 0)
-  BUILTIN_VSDQ_I (UNOP, sqneg, 0)
+  BUILTIN_VSDQ_I (UNOP, sqabs, 0, NONE)
+  BUILTIN_VSDQ_I (UNOP, sqneg, 0, NONE)
 
   /* Implemented by aarch64_sqdml<SBINQOPS:as>l<mode>.  */
-  BUILTIN_VSD_HSI (TERNOP, sqdmlal, 0)
-  BUILTIN_VSD_HSI (TERNOP, sqdmlsl, 0)
+  BUILTIN_VSD_HSI (TERNOP, sqdmlal, 0, NONE)
+  BUILTIN_VSD_HSI (TERNOP, sqdmlsl, 0, NONE)
   /* Implemented by aarch64_sqdml<SBINQOPS:as>l_lane<mode>.  */
-  BUILTIN_VSD_HSI (QUADOP_LANE, sqdmlal_lane, 0)
-  BUILTIN_VSD_HSI (QUADOP_LANE, sqdmlsl_lane, 0)
+  BUILTIN_VSD_HSI (QUADOP_LANE, sqdmlal_lane, 0, NONE)
+  BUILTIN_VSD_HSI (QUADOP_LANE, sqdmlsl_lane, 0, NONE)
   /* Implemented by aarch64_sqdml<SBINQOPS:as>l_laneq<mode>.  */
-  BUILTIN_VSD_HSI (QUADOP_LANE, sqdmlal_laneq, 0)
-  BUILTIN_VSD_HSI (QUADOP_LANE, sqdmlsl_laneq, 0)
+  BUILTIN_VSD_HSI (QUADOP_LANE, sqdmlal_laneq, 0, NONE)
+  BUILTIN_VSD_HSI (QUADOP_LANE, sqdmlsl_laneq, 0, NONE)
   /* Implemented by aarch64_sqdml<SBINQOPS:as>l_n<mode>.  */
-  BUILTIN_VD_HSI (TERNOP, sqdmlal_n, 0)
-  BUILTIN_VD_HSI (TERNOP, sqdmlsl_n, 0)
-
-  BUILTIN_VQ_HSI (TERNOP, sqdmlal2, 0)
-  BUILTIN_VQ_HSI (TERNOP, sqdmlsl2, 0)
-  BUILTIN_VQ_HSI (QUADOP_LANE, sqdmlal2_lane, 0)
-  BUILTIN_VQ_HSI (QUADOP_LANE, sqdmlsl2_lane, 0)
-  BUILTIN_VQ_HSI (QUADOP_LANE, sqdmlal2_laneq, 0)
-  BUILTIN_VQ_HSI (QUADOP_LANE, sqdmlsl2_laneq, 0)
-  BUILTIN_VQ_HSI (TERNOP, sqdmlal2_n, 0)
-  BUILTIN_VQ_HSI (TERNOP, sqdmlsl2_n, 0)
-
-  BUILTIN_VSD_HSI (BINOP, sqdmull, 0)
-  BUILTIN_VSD_HSI (TERNOP_LANE, sqdmull_lane, 0)
-  BUILTIN_VSD_HSI (TERNOP_LANE, sqdmull_laneq, 0)
-  BUILTIN_VD_HSI (BINOP, sqdmull_n, 0)
-  BUILTIN_VQ_HSI (BINOP, sqdmull2, 0)
-  BUILTIN_VQ_HSI (TERNOP_LANE, sqdmull2_lane, 0)
-  BUILTIN_VQ_HSI (TERNOP_LANE, sqdmull2_laneq, 0)
-  BUILTIN_VQ_HSI (BINOP, sqdmull2_n, 0)
+  BUILTIN_VD_HSI (TERNOP, sqdmlal_n, 0, NONE)
+  BUILTIN_VD_HSI (TERNOP, sqdmlsl_n, 0, NONE)
+
+  BUILTIN_VQ_HSI (TERNOP, sqdmlal2, 0, NONE)
+  BUILTIN_VQ_HSI (TERNOP, sqdmlsl2, 0, NONE)
+  BUILTIN_VQ_HSI (QUADOP_LANE, sqdmlal2_lane, 0, NONE)
+  BUILTIN_VQ_HSI (QUADOP_LANE, sqdmlsl2_lane, 0, NONE)
+  BUILTIN_VQ_HSI (QUADOP_LANE, sqdmlal2_laneq, 0, NONE)
+  BUILTIN_VQ_HSI (QUADOP_LANE, sqdmlsl2_laneq, 0, NONE)
+  BUILTIN_VQ_HSI (TERNOP, sqdmlal2_n, 0, NONE)
+  BUILTIN_VQ_HSI (TERNOP, sqdmlsl2_n, 0, NONE)
+
+  BUILTIN_VD_BHSI (BINOP, intrinsic_vec_smult_lo_, 0, NONE)
+  BUILTIN_VD_BHSI (BINOPU, intrinsic_vec_umult_lo_, 0, NONE)
+
+  BUILTIN_VQW (BINOP, vec_widen_smult_hi_, 10, NONE)
+  BUILTIN_VQW (BINOPU, vec_widen_umult_hi_, 10, NONE)
+
+  BUILTIN_VD_HSI (BINOP, smull_n, 0, NONE)
+  BUILTIN_VD_HSI (BINOPU, umull_n, 0, NONE)
+
+  BUILTIN_VQ_HSI (BINOP, smull_hi_n, 0, NONE)
+  BUILTIN_VQ_HSI (BINOPU, umull_hi_n, 0, NONE)
+
+  BUILTIN_VQ_HSI (TERNOP_LANE, smull_hi_lane, 0, NONE)
+  BUILTIN_VQ_HSI (TERNOP_LANE, smull_hi_laneq, 0, NONE)
+  BUILTIN_VQ_HSI (TERNOPU_LANE, umull_hi_lane, 0, NONE)
+  BUILTIN_VQ_HSI (TERNOPU_LANE, umull_hi_laneq, 0, NONE)
+
+  BUILTIN_VD_HSI (TERNOP_LANE, vec_smult_lane_, 0, NONE)
+  BUILTIN_VD_HSI (QUADOP_LANE, vec_smlal_lane_, 0, NONE)
+  BUILTIN_VD_HSI (TERNOP_LANE, vec_smult_laneq_, 0, NONE)
+  BUILTIN_VD_HSI (QUADOP_LANE, vec_smlal_laneq_, 0, NONE)
+  BUILTIN_VD_HSI (TERNOPU_LANE, vec_umult_lane_, 0, NONE)
+  BUILTIN_VD_HSI (QUADOPU_LANE, vec_umlal_lane_, 0, NONE)
+  BUILTIN_VD_HSI (TERNOPU_LANE, vec_umult_laneq_, 0, NONE)
+  BUILTIN_VD_HSI (QUADOPU_LANE, vec_umlal_laneq_, 0, NONE)
+
+  BUILTIN_VD_HSI (QUADOP_LANE, vec_smlsl_lane_, 0, NONE)
+  BUILTIN_VD_HSI (QUADOP_LANE, vec_smlsl_laneq_, 0, NONE)
+  BUILTIN_VD_HSI (QUADOPU_LANE, vec_umlsl_lane_, 0, NONE)
+  BUILTIN_VD_HSI (QUADOPU_LANE, vec_umlsl_laneq_, 0, NONE)
+
+  BUILTIN_VQ_HSI (QUADOP_LANE, smlal_hi_lane, 0, NONE)
+  BUILTIN_VQ_HSI (QUADOP_LANE, smlal_hi_laneq, 0, NONE)
+  BUILTIN_VQ_HSI (QUADOPU_LANE, umlal_hi_lane, 0, NONE)
+  BUILTIN_VQ_HSI (QUADOPU_LANE, umlal_hi_laneq, 0, NONE)
+
+  BUILTIN_VQ_HSI (QUADOP_LANE, smlsl_hi_lane, 0, NONE)
+  BUILTIN_VQ_HSI (QUADOP_LANE, smlsl_hi_laneq, 0, NONE)
+  BUILTIN_VQ_HSI (QUADOPU_LANE, umlsl_hi_lane, 0, NONE)
+  BUILTIN_VQ_HSI (QUADOPU_LANE, umlsl_hi_laneq, 0, NONE)
+
+  BUILTIN_VSD_HSI (BINOP, sqdmull, 0, NONE)
+  BUILTIN_VSD_HSI (TERNOP_LANE, sqdmull_lane, 0, NONE)
+  BUILTIN_VSD_HSI (TERNOP_LANE, sqdmull_laneq, 0, NONE)
+  BUILTIN_VD_HSI (BINOP, sqdmull_n, 0, NONE)
+  BUILTIN_VQ_HSI (BINOP, sqdmull2, 0, NONE)
+  BUILTIN_VQ_HSI (TERNOP_LANE, sqdmull2_lane, 0, NONE)
+  BUILTIN_VQ_HSI (TERNOP_LANE, sqdmull2_laneq, 0, NONE)
+  BUILTIN_VQ_HSI (BINOP, sqdmull2_n, 0, NONE)
   /* Implemented by aarch64_sq<r>dmulh<mode>.  */
-  BUILTIN_VSDQ_HSI (BINOP, sqdmulh, 0)
-  BUILTIN_VSDQ_HSI (BINOP, sqrdmulh, 0)
+  BUILTIN_VSDQ_HSI (BINOP, sqdmulh, 0, NONE)
+  BUILTIN_VSDQ_HSI (BINOP, sqrdmulh, 0, NONE)
   /* Implemented by aarch64_sq<r>dmulh_lane<q><mode>.  */
-  BUILTIN_VSDQ_HSI (TERNOP_LANE, sqdmulh_lane, 0)
-  BUILTIN_VSDQ_HSI (TERNOP_LANE, sqdmulh_laneq, 0)
-  BUILTIN_VSDQ_HSI (TERNOP_LANE, sqrdmulh_lane, 0)
-  BUILTIN_VSDQ_HSI (TERNOP_LANE, sqrdmulh_laneq, 0)
+  BUILTIN_VSDQ_HSI (TERNOP_LANE, sqdmulh_lane, 0, NONE)
+  BUILTIN_VSDQ_HSI (TERNOP_LANE, sqdmulh_laneq, 0, NONE)
+  BUILTIN_VSDQ_HSI (TERNOP_LANE, sqrdmulh_lane, 0, NONE)
+  BUILTIN_VSDQ_HSI (TERNOP_LANE, sqrdmulh_laneq, 0, NONE)
 
-  BUILTIN_VSDQ_I_DI (BINOP, ashl, 3)
+  BUILTIN_VSDQ_I_DI (BINOP, ashl, 3, NONE)
   /* Implemented by aarch64_<sur>shl<mode>.  */
-  BUILTIN_VSDQ_I_DI (BINOP, sshl, 0)
-  BUILTIN_VSDQ_I_DI (BINOP_UUS, ushl, 0)
-  BUILTIN_VSDQ_I_DI (BINOP, srshl, 0)
-  BUILTIN_VSDQ_I_DI (BINOP_UUS, urshl, 0)
+  BUILTIN_VSDQ_I_DI (BINOP, sshl, 0, NONE)
+  BUILTIN_VSDQ_I_DI (BINOP_UUS, ushl, 0, NONE)
+  BUILTIN_VSDQ_I_DI (BINOP, srshl, 0, NONE)
+  BUILTIN_VSDQ_I_DI (BINOP_UUS, urshl, 0, NONE)
 
   /* Implemented by aarch64_<sur><dotprod>{_lane}{q}<dot_mode>.  */
-  BUILTIN_VB (TERNOP, sdot, 0)
-  BUILTIN_VB (TERNOPU, udot, 0)
-  BUILTIN_VB (QUADOP_LANE, sdot_lane, 0)
-  BUILTIN_VB (QUADOPU_LANE, udot_lane, 0)
-  BUILTIN_VB (QUADOP_LANE, sdot_laneq, 0)
-  BUILTIN_VB (QUADOPU_LANE, udot_laneq, 0)
-
-  BUILTIN_VDQ_I (SHIFTIMM, ashr, 3)
-  VAR1 (SHIFTIMM, ashr_simd, 0, di)
-  BUILTIN_VDQ_I (SHIFTIMM, lshr, 3)
-  VAR1 (USHIFTIMM, lshr_simd, 0, di)
+  BUILTIN_VB (TERNOP, sdot, 0, NONE)
+  BUILTIN_VB (TERNOPU, udot, 0, NONE)
+  BUILTIN_VB (TERNOP_SSUS, usdot, 0, NONE)
+  BUILTIN_VB (QUADOP_LANE, sdot_lane, 0, NONE)
+  BUILTIN_VB (QUADOPU_LANE, udot_lane, 0, NONE)
+  BUILTIN_VB (QUADOP_LANE, sdot_laneq, 0, NONE)
+  BUILTIN_VB (QUADOPU_LANE, udot_laneq, 0, NONE)
+  BUILTIN_VB (QUADOPSSUS_LANE_QUADTUP, usdot_lane, 0, NONE)
+  BUILTIN_VB (QUADOPSSUS_LANE_QUADTUP, usdot_laneq, 0, NONE)
+  BUILTIN_VB (QUADOPSSSU_LANE_QUADTUP, sudot_lane, 0, NONE)
+  BUILTIN_VB (QUADOPSSSU_LANE_QUADTUP, sudot_laneq, 0, NONE)
+
+  /* Implemented by aarch64_fcadd<rot><mode>.   */
+  BUILTIN_VHSDF (BINOP, fcadd90, 0, FP)
+  BUILTIN_VHSDF (BINOP, fcadd270, 0, FP)
+
+  /* Implemented by aarch64_fcmla{_lane}{q}<rot><mode>.   */
+  BUILTIN_VHSDF (TERNOP, fcmla0, 0, FP)
+  BUILTIN_VHSDF (TERNOP, fcmla90, 0, FP)
+  BUILTIN_VHSDF (TERNOP, fcmla180, 0, FP)
+  BUILTIN_VHSDF (TERNOP, fcmla270, 0, FP)
+  BUILTIN_VHSDF (QUADOP_LANE_PAIR, fcmla_lane0, 0, FP)
+  BUILTIN_VHSDF (QUADOP_LANE_PAIR, fcmla_lane90, 0, FP)
+  BUILTIN_VHSDF (QUADOP_LANE_PAIR, fcmla_lane180, 0, FP)
+  BUILTIN_VHSDF (QUADOP_LANE_PAIR, fcmla_lane270, 0, FP)
+
+  BUILTIN_VQ_HSF (QUADOP_LANE_PAIR, fcmlaq_lane0, 0, FP)
+  BUILTIN_VQ_HSF (QUADOP_LANE_PAIR, fcmlaq_lane90, 0, FP)
+  BUILTIN_VQ_HSF (QUADOP_LANE_PAIR, fcmlaq_lane180, 0, FP)
+  BUILTIN_VQ_HSF (QUADOP_LANE_PAIR, fcmlaq_lane270, 0, FP)
+
+  BUILTIN_VDQ_I (SHIFTIMM, ashr, 3, NONE)
+  VAR1 (SHIFTIMM, ashr_simd, 0, NONE, di)
+  BUILTIN_VDQ_I (SHIFTIMM, lshr, 3, NONE)
+  VAR1 (USHIFTIMM, lshr_simd, 0, NONE, di)
   /* Implemented by aarch64_<sur>shr_n<mode>.  */
-  BUILTIN_VSDQ_I_DI (SHIFTIMM, srshr_n, 0)
-  BUILTIN_VSDQ_I_DI (USHIFTIMM, urshr_n, 0)
+  BUILTIN_VSDQ_I_DI (SHIFTIMM, srshr_n, 0, NONE)
+  BUILTIN_VSDQ_I_DI (USHIFTIMM, urshr_n, 0, NONE)
   /* Implemented by aarch64_<sur>sra_n<mode>.  */
-  BUILTIN_VSDQ_I_DI (SHIFTACC, ssra_n, 0)
-  BUILTIN_VSDQ_I_DI (USHIFTACC, usra_n, 0)
-  BUILTIN_VSDQ_I_DI (SHIFTACC, srsra_n, 0)
-  BUILTIN_VSDQ_I_DI (USHIFTACC, ursra_n, 0)
+  BUILTIN_VSDQ_I_DI (SHIFTACC, ssra_n, 0, NONE)
+  BUILTIN_VSDQ_I_DI (USHIFTACC, usra_n, 0, NONE)
+  BUILTIN_VSDQ_I_DI (SHIFTACC, srsra_n, 0, NONE)
+  BUILTIN_VSDQ_I_DI (USHIFTACC, ursra_n, 0, NONE)
   /* Implemented by aarch64_<sur>shll_n<mode>.  */
-  BUILTIN_VD_BHSI (SHIFTIMM, sshll_n, 0)
-  BUILTIN_VD_BHSI (USHIFTIMM, ushll_n, 0)
+  BUILTIN_VD_BHSI (SHIFTIMM, sshll_n, 0, NONE)
+  BUILTIN_VD_BHSI (USHIFTIMM, ushll_n, 0, NONE)
   /* Implemented by aarch64_<sur>shll2_n<mode>.  */
-  BUILTIN_VQW (SHIFTIMM, sshll2_n, 0)
-  BUILTIN_VQW (SHIFTIMM, ushll2_n, 0)
+  BUILTIN_VQW (SHIFTIMM, sshll2_n, 0, NONE)
+  BUILTIN_VQW (SHIFTIMM, ushll2_n, 0, NONE)
   /* Implemented by aarch64_<sur>q<r>shr<u>n_n<mode>.  */
-  BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrun_n, 0)
-  BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrun_n, 0)
-  BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrn_n, 0)
-  BUILTIN_VSQN_HSDI (USHIFTIMM, uqshrn_n, 0)
-  BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrn_n, 0)
-  BUILTIN_VSQN_HSDI (USHIFTIMM, uqrshrn_n, 0)
+  BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrun_n, 0, NONE)
+  BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrun_n, 0, NONE)
+  BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrn_n, 0, NONE)
+  BUILTIN_VSQN_HSDI (USHIFTIMM, uqshrn_n, 0, NONE)
+  BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrn_n, 0, NONE)
+  BUILTIN_VSQN_HSDI (USHIFTIMM, uqrshrn_n, 0, NONE)
+  /* Implemented by aarch64_<sur>q<r>shr<u>n2_n<mode>.  */
+  BUILTIN_VQN (SHIFT2IMM_UUSS, sqshrun2_n, 0, NONE)
+  BUILTIN_VQN (SHIFT2IMM_UUSS, sqrshrun2_n, 0, NONE)
+  BUILTIN_VQN (SHIFT2IMM, sqshrn2_n, 0, NONE)
+  BUILTIN_VQN (USHIFT2IMM, uqshrn2_n, 0, NONE)
+  BUILTIN_VQN (SHIFT2IMM, sqrshrn2_n, 0, NONE)
+  BUILTIN_VQN (USHIFT2IMM, uqrshrn2_n, 0, NONE)
   /* Implemented by aarch64_<sur>s<lr>i_n<mode>.  */
-  BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssri_n, 0)
-  BUILTIN_VSDQ_I_DI (USHIFTACC, usri_n, 0)
-  BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssli_n, 0)
-  VAR2 (SHIFTINSERTP, ssli_n, 0, di, v2di)
-  BUILTIN_VSDQ_I_DI (USHIFTACC, usli_n, 0)
+  BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssri_n, 0, NONE)
+  BUILTIN_VSDQ_I_DI (USHIFTACC, usri_n, 0, NONE)
+  BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssli_n, 0, NONE)
+  VAR2 (SHIFTINSERTP, ssli_n, 0, NONE, di, v2di)
+  BUILTIN_VSDQ_I_DI (USHIFTACC, usli_n, 0, NONE)
   /* Implemented by aarch64_<sur>qshl<u>_n<mode>.  */
-  BUILTIN_VSDQ_I (SHIFTIMM_USS, sqshlu_n, 0)
-  BUILTIN_VSDQ_I (SHIFTIMM, sqshl_n, 0)
-  BUILTIN_VSDQ_I (USHIFTIMM, uqshl_n, 0)
+  BUILTIN_VSDQ_I (SHIFTIMM_USS, sqshlu_n, 0, NONE)
+  BUILTIN_VSDQ_I (SHIFTIMM, sqshl_n, 0, NONE)
+  BUILTIN_VSDQ_I (USHIFTIMM, uqshl_n, 0, NONE)
+
+  /* Implemented by aarch64_xtn2<mode>.  */
+  BUILTIN_VQN (UNOP, xtn2, 0, NONE)
+
+  /* Implemented by vec_unpack<su>_hi_<mode>.  */
+  BUILTIN_VQW (UNOP, vec_unpacks_hi_, 10, NONE)
+  BUILTIN_VQW (UNOPU, vec_unpacku_hi_, 10, NONE)
 
   /* Implemented by aarch64_reduc_plus_<mode>.  */
-  BUILTIN_VALL (UNOP, reduc_plus_scal_, 10)
+  BUILTIN_VALL (UNOP, reduc_plus_scal_, 10, NONE)
 
   /* Implemented by reduc_<maxmin_uns>_scal_<mode> (producing scalar).  */
-  BUILTIN_VDQIF_F16 (UNOP, reduc_smax_scal_, 10)
-  BUILTIN_VDQIF_F16 (UNOP, reduc_smin_scal_, 10)
-  BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10)
-  BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10)
-  BUILTIN_VHSDF (UNOP, reduc_smax_nan_scal_, 10)
-  BUILTIN_VHSDF (UNOP, reduc_smin_nan_scal_, 10)
+  BUILTIN_VDQIF_F16 (UNOP, reduc_smax_scal_, 10, NONE)
+  BUILTIN_VDQIF_F16 (UNOP, reduc_smin_scal_, 10, NONE)
+  BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10, NONE)
+  BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10, NONE)
+  BUILTIN_VHSDF (UNOP, reduc_smax_nan_scal_, 10, NONE)
+  BUILTIN_VHSDF (UNOP, reduc_smin_nan_scal_, 10, NONE)
 
   /* Implemented by <maxmin_uns><mode>3.
      smax variants map to fmaxnm,
      smax_nan variants map to fmax.  */
-  BUILTIN_VDQ_BHSI (BINOP, smax, 3)
-  BUILTIN_VDQ_BHSI (BINOP, smin, 3)
-  BUILTIN_VDQ_BHSI (BINOP, umax, 3)
-  BUILTIN_VDQ_BHSI (BINOP, umin, 3)
-  BUILTIN_VHSDF_DF (BINOP, smax_nan, 3)
-  BUILTIN_VHSDF_DF (BINOP, smin_nan, 3)
+  BUILTIN_VDQ_BHSI (BINOP, smax, 3, NONE)
+  BUILTIN_VDQ_BHSI (BINOP, smin, 3, NONE)
+  BUILTIN_VDQ_BHSI (BINOP, umax, 3, NONE)
+  BUILTIN_VDQ_BHSI (BINOP, umin, 3, NONE)
+  BUILTIN_VHSDF_DF (BINOP, smax_nan, 3, NONE)
+  BUILTIN_VHSDF_DF (BINOP, smin_nan, 3, NONE)
 
   /* Implemented by <maxmin_uns><mode>3.  */
-  BUILTIN_VHSDF_HSDF (BINOP, fmax, 3)
-  BUILTIN_VHSDF_HSDF (BINOP, fmin, 3)
+  BUILTIN_VHSDF_HSDF (BINOP, fmax, 3, FP)
+  BUILTIN_VHSDF_HSDF (BINOP, fmin, 3, FP)
 
   /* Implemented by aarch64_<maxmin_uns>p<mode>.  */
-  BUILTIN_VDQ_BHSI (BINOP, smaxp, 0)
-  BUILTIN_VDQ_BHSI (BINOP, sminp, 0)
-  BUILTIN_VDQ_BHSI (BINOP, umaxp, 0)
-  BUILTIN_VDQ_BHSI (BINOP, uminp, 0)
-  BUILTIN_VHSDF (BINOP, smaxp, 0)
-  BUILTIN_VHSDF (BINOP, sminp, 0)
-  BUILTIN_VHSDF (BINOP, smax_nanp, 0)
-  BUILTIN_VHSDF (BINOP, smin_nanp, 0)
+  BUILTIN_VDQ_BHSI (BINOP, smaxp, 0, NONE)
+  BUILTIN_VDQ_BHSI (BINOP, sminp, 0, NONE)
+  BUILTIN_VDQ_BHSI (BINOP, umaxp, 0, NONE)
+  BUILTIN_VDQ_BHSI (BINOP, uminp, 0, NONE)
+  BUILTIN_VHSDF (BINOP, smaxp, 0, NONE)
+  BUILTIN_VHSDF (BINOP, sminp, 0, NONE)
+  BUILTIN_VHSDF (BINOP, smax_nanp, 0, NONE)
+  BUILTIN_VHSDF (BINOP, smin_nanp, 0, NONE)
 
   /* Implemented by <frint_pattern><mode>2.  */
-  BUILTIN_VHSDF (UNOP, btrunc, 2)
-  BUILTIN_VHSDF (UNOP, ceil, 2)
-  BUILTIN_VHSDF (UNOP, floor, 2)
-  BUILTIN_VHSDF (UNOP, nearbyint, 2)
-  BUILTIN_VHSDF (UNOP, rint, 2)
-  BUILTIN_VHSDF (UNOP, round, 2)
-  BUILTIN_VHSDF_DF (UNOP, frintn, 2)
-
-  VAR1 (UNOP, btrunc, 2, hf)
-  VAR1 (UNOP, ceil, 2, hf)
-  VAR1 (UNOP, floor, 2, hf)
-  VAR1 (UNOP, frintn, 2, hf)
-  VAR1 (UNOP, nearbyint, 2, hf)
-  VAR1 (UNOP, rint, 2, hf)
-  VAR1 (UNOP, round, 2, hf)
+  BUILTIN_VHSDF (UNOP, btrunc, 2, FP)
+  BUILTIN_VHSDF (UNOP, ceil, 2, FP)
+  BUILTIN_VHSDF (UNOP, floor, 2, FP)
+  BUILTIN_VHSDF (UNOP, nearbyint, 2, FP)
+  BUILTIN_VHSDF (UNOP, rint, 2, FP)
+  BUILTIN_VHSDF (UNOP, round, 2, FP)
+  BUILTIN_VHSDF_HSDF (UNOP, frintn, 2, FP)
+
+  VAR1 (UNOP, btrunc, 2, FP, hf)
+  VAR1 (UNOP, ceil, 2, FP, hf)
+  VAR1 (UNOP, floor, 2, FP, hf)
+  VAR1 (UNOP, nearbyint, 2, FP, hf)
+  VAR1 (UNOP, rint, 2, FP, hf)
+  VAR1 (UNOP, round, 2, FP, hf)
 
   /* Implemented by l<fcvt_pattern><su_optab><VQDF:mode><vcvt_target>2.  */
-  VAR1 (UNOP, lbtruncv4hf, 2, v4hi)
-  VAR1 (UNOP, lbtruncv8hf, 2, v8hi)
-  VAR1 (UNOP, lbtruncv2sf, 2, v2si)
-  VAR1 (UNOP, lbtruncv4sf, 2, v4si)
-  VAR1 (UNOP, lbtruncv2df, 2, v2di)
-
-  VAR1 (UNOPUS, lbtruncuv4hf, 2, v4hi)
-  VAR1 (UNOPUS, lbtruncuv8hf, 2, v8hi)
-  VAR1 (UNOPUS, lbtruncuv2sf, 2, v2si)
-  VAR1 (UNOPUS, lbtruncuv4sf, 2, v4si)
-  VAR1 (UNOPUS, lbtruncuv2df, 2, v2di)
-
-  VAR1 (UNOP, lroundv4hf, 2, v4hi)
-  VAR1 (UNOP, lroundv8hf, 2, v8hi)
-  VAR1 (UNOP, lroundv2sf, 2, v2si)
-  VAR1 (UNOP, lroundv4sf, 2, v4si)
-  VAR1 (UNOP, lroundv2df, 2, v2di)
+  VAR1 (UNOP, lbtruncv4hf, 2, FP, v4hi)
+  VAR1 (UNOP, lbtruncv8hf, 2, FP, v8hi)
+  VAR1 (UNOP, lbtruncv2sf, 2, FP, v2si)
+  VAR1 (UNOP, lbtruncv4sf, 2, FP, v4si)
+  VAR1 (UNOP, lbtruncv2df, 2, FP, v2di)
+
+  VAR1 (UNOPUS, lbtruncuv4hf, 2, FP, v4hi)
+  VAR1 (UNOPUS, lbtruncuv8hf, 2, FP, v8hi)
+  VAR1 (UNOPUS, lbtruncuv2sf, 2, FP, v2si)
+  VAR1 (UNOPUS, lbtruncuv4sf, 2, FP, v4si)
+  VAR1 (UNOPUS, lbtruncuv2df, 2, FP, v2di)
+
+  VAR1 (UNOP, lroundv4hf, 2, FP, v4hi)
+  VAR1 (UNOP, lroundv8hf, 2, FP, v8hi)
+  VAR1 (UNOP, lroundv2sf, 2, FP, v2si)
+  VAR1 (UNOP, lroundv4sf, 2, FP, v4si)
+  VAR1 (UNOP, lroundv2df, 2, FP, v2di)
   /* Implemented by l<fcvt_pattern><su_optab><GPF_F16:mode><GPI:mode>2.  */
-  BUILTIN_GPI_I16 (UNOP, lroundhf, 2)
-  VAR1 (UNOP, lroundsf, 2, si)
-  VAR1 (UNOP, lrounddf, 2, di)
-
-  VAR1 (UNOPUS, lrounduv4hf, 2, v4hi)
-  VAR1 (UNOPUS, lrounduv8hf, 2, v8hi)
-  VAR1 (UNOPUS, lrounduv2sf, 2, v2si)
-  VAR1 (UNOPUS, lrounduv4sf, 2, v4si)
-  VAR1 (UNOPUS, lrounduv2df, 2, v2di)
-  BUILTIN_GPI_I16 (UNOPUS, lrounduhf, 2)
-  VAR1 (UNOPUS, lroundusf, 2, si)
-  VAR1 (UNOPUS, lroundudf, 2, di)
-
-  VAR1 (UNOP, lceilv4hf, 2, v4hi)
-  VAR1 (UNOP, lceilv8hf, 2, v8hi)
-  VAR1 (UNOP, lceilv2sf, 2, v2si)
-  VAR1 (UNOP, lceilv4sf, 2, v4si)
-  VAR1 (UNOP, lceilv2df, 2, v2di)
-  BUILTIN_GPI_I16 (UNOP, lceilhf, 2)
-
-  VAR1 (UNOPUS, lceiluv4hf, 2, v4hi)
-  VAR1 (UNOPUS, lceiluv8hf, 2, v8hi)
-  VAR1 (UNOPUS, lceiluv2sf, 2, v2si)
-  VAR1 (UNOPUS, lceiluv4sf, 2, v4si)
-  VAR1 (UNOPUS, lceiluv2df, 2, v2di)
-  BUILTIN_GPI_I16 (UNOPUS, lceiluhf, 2)
-  VAR1 (UNOPUS, lceilusf, 2, si)
-  VAR1 (UNOPUS, lceiludf, 2, di)
-
-  VAR1 (UNOP, lfloorv4hf, 2, v4hi)
-  VAR1 (UNOP, lfloorv8hf, 2, v8hi)
-  VAR1 (UNOP, lfloorv2sf, 2, v2si)
-  VAR1 (UNOP, lfloorv4sf, 2, v4si)
-  VAR1 (UNOP, lfloorv2df, 2, v2di)
-  BUILTIN_GPI_I16 (UNOP, lfloorhf, 2)
-
-  VAR1 (UNOPUS, lflooruv4hf, 2, v4hi)
-  VAR1 (UNOPUS, lflooruv8hf, 2, v8hi)
-  VAR1 (UNOPUS, lflooruv2sf, 2, v2si)
-  VAR1 (UNOPUS, lflooruv4sf, 2, v4si)
-  VAR1 (UNOPUS, lflooruv2df, 2, v2di)
-  BUILTIN_GPI_I16 (UNOPUS, lflooruhf, 2)
-  VAR1 (UNOPUS, lfloorusf, 2, si)
-  VAR1 (UNOPUS, lfloorudf, 2, di)
-
-  VAR1 (UNOP, lfrintnv4hf, 2, v4hi)
-  VAR1 (UNOP, lfrintnv8hf, 2, v8hi)
-  VAR1 (UNOP, lfrintnv2sf, 2, v2si)
-  VAR1 (UNOP, lfrintnv4sf, 2, v4si)
-  VAR1 (UNOP, lfrintnv2df, 2, v2di)
-  BUILTIN_GPI_I16 (UNOP, lfrintnhf, 2)
-  VAR1 (UNOP, lfrintnsf, 2, si)
-  VAR1 (UNOP, lfrintndf, 2, di)
-
-  VAR1 (UNOPUS, lfrintnuv4hf, 2, v4hi)
-  VAR1 (UNOPUS, lfrintnuv8hf, 2, v8hi)
-  VAR1 (UNOPUS, lfrintnuv2sf, 2, v2si)
-  VAR1 (UNOPUS, lfrintnuv4sf, 2, v4si)
-  VAR1 (UNOPUS, lfrintnuv2df, 2, v2di)
-  BUILTIN_GPI_I16 (UNOPUS, lfrintnuhf, 2)
-  VAR1 (UNOPUS, lfrintnusf, 2, si)
-  VAR1 (UNOPUS, lfrintnudf, 2, di)
+  BUILTIN_GPI_I16 (UNOP, lroundhf, 2, FP)
+  VAR1 (UNOP, lroundsf, 2, FP, si)
+  VAR1 (UNOP, lrounddf, 2, FP, di)
+
+  VAR1 (UNOPUS, lrounduv4hf, 2, FP, v4hi)
+  VAR1 (UNOPUS, lrounduv8hf, 2, FP, v8hi)
+  VAR1 (UNOPUS, lrounduv2sf, 2, FP, v2si)
+  VAR1 (UNOPUS, lrounduv4sf, 2, FP, v4si)
+  VAR1 (UNOPUS, lrounduv2df, 2, FP, v2di)
+  BUILTIN_GPI_I16 (UNOPUS, lrounduhf, 2, FP)
+  VAR1 (UNOPUS, lroundusf, 2, FP, si)
+  VAR1 (UNOPUS, lroundudf, 2, FP, di)
+
+  VAR1 (UNOP, lceilv4hf, 2, FP, v4hi)
+  VAR1 (UNOP, lceilv8hf, 2, FP, v8hi)
+  VAR1 (UNOP, lceilv2sf, 2, FP, v2si)
+  VAR1 (UNOP, lceilv4sf, 2, FP, v4si)
+  VAR1 (UNOP, lceilv2df, 2, FP, v2di)
+  BUILTIN_GPI_I16 (UNOP, lceilhf, 2, FP)
+
+  VAR1 (UNOPUS, lceiluv4hf, 2, FP, v4hi)
+  VAR1 (UNOPUS, lceiluv8hf, 2, FP, v8hi)
+  VAR1 (UNOPUS, lceiluv2sf, 2, FP, v2si)
+  VAR1 (UNOPUS, lceiluv4sf, 2, FP, v4si)
+  VAR1 (UNOPUS, lceiluv2df, 2, FP, v2di)
+  BUILTIN_GPI_I16 (UNOPUS, lceiluhf, 2, FP)
+  VAR1 (UNOPUS, lceilusf, 2, FP, si)
+  VAR1 (UNOPUS, lceiludf, 2, FP, di)
+
+  VAR1 (UNOP, lfloorv4hf, 2, FP, v4hi)
+  VAR1 (UNOP, lfloorv8hf, 2, FP, v8hi)
+  VAR1 (UNOP, lfloorv2sf, 2, FP, v2si)
+  VAR1 (UNOP, lfloorv4sf, 2, FP, v4si)
+  VAR1 (UNOP, lfloorv2df, 2, FP, v2di)
+  BUILTIN_GPI_I16 (UNOP, lfloorhf, 2, FP)
+
+  VAR1 (UNOPUS, lflooruv4hf, 2, FP, v4hi)
+  VAR1 (UNOPUS, lflooruv8hf, 2, FP, v8hi)
+  VAR1 (UNOPUS, lflooruv2sf, 2, FP, v2si)
+  VAR1 (UNOPUS, lflooruv4sf, 2, FP, v4si)
+  VAR1 (UNOPUS, lflooruv2df, 2, FP, v2di)
+  BUILTIN_GPI_I16 (UNOPUS, lflooruhf, 2, FP)
+  VAR1 (UNOPUS, lfloorusf, 2, FP, si)
+  VAR1 (UNOPUS, lfloorudf, 2, FP, di)
+
+  VAR1 (UNOP, lfrintnv4hf, 2, FP, v4hi)
+  VAR1 (UNOP, lfrintnv8hf, 2, FP, v8hi)
+  VAR1 (UNOP, lfrintnv2sf, 2, FP, v2si)
+  VAR1 (UNOP, lfrintnv4sf, 2, FP, v4si)
+  VAR1 (UNOP, lfrintnv2df, 2, FP, v2di)
+  BUILTIN_GPI_I16 (UNOP, lfrintnhf, 2, FP)
+  VAR1 (UNOP, lfrintnsf, 2, FP, si)
+  VAR1 (UNOP, lfrintndf, 2, FP, di)
+
+  VAR1 (UNOPUS, lfrintnuv4hf, 2, FP, v4hi)
+  VAR1 (UNOPUS, lfrintnuv8hf, 2, FP, v8hi)
+  VAR1 (UNOPUS, lfrintnuv2sf, 2, FP, v2si)
+  VAR1 (UNOPUS, lfrintnuv4sf, 2, FP, v4si)
+  VAR1 (UNOPUS, lfrintnuv2df, 2, FP, v2di)
+  BUILTIN_GPI_I16 (UNOPUS, lfrintnuhf, 2, FP)
+  VAR1 (UNOPUS, lfrintnusf, 2, FP, si)
+  VAR1 (UNOPUS, lfrintnudf, 2, FP, di)
 
   /* Implemented by <optab><fcvt_target><VDQF:mode>2.  */
-  VAR1 (UNOP, floatv4hi, 2, v4hf)
-  VAR1 (UNOP, floatv8hi, 2, v8hf)
-  VAR1 (UNOP, floatv2si, 2, v2sf)
-  VAR1 (UNOP, floatv4si, 2, v4sf)
-  VAR1 (UNOP, floatv2di, 2, v2df)
+  VAR1 (UNOP, floatv4hi, 2, FP, v4hf)
+  VAR1 (UNOP, floatv8hi, 2, FP, v8hf)
+  VAR1 (UNOP, floatv2si, 2, FP, v2sf)
+  VAR1 (UNOP, floatv4si, 2, FP, v4sf)
+  VAR1 (UNOP, floatv2di, 2, FP, v2df)
 
-  VAR1 (UNOP, floatunsv4hi, 2, v4hf)
-  VAR1 (UNOP, floatunsv8hi, 2, v8hf)
-  VAR1 (UNOP, floatunsv2si, 2, v2sf)
-  VAR1 (UNOP, floatunsv4si, 2, v4sf)
-  VAR1 (UNOP, floatunsv2di, 2, v2df)
+  VAR1 (UNOP, floatunsv4hi, 2, FP, v4hf)
+  VAR1 (UNOP, floatunsv8hi, 2, FP, v8hf)
+  VAR1 (UNOP, floatunsv2si, 2, FP, v2sf)
+  VAR1 (UNOP, floatunsv4si, 2, FP, v4sf)
+  VAR1 (UNOP, floatunsv2di, 2, FP, v2df)
 
-  VAR5 (UNOPU, bswap, 2, v4hi, v8hi, v2si, v4si, v2di)
+  VAR5 (UNOPU, bswap, 2, NONE, v4hi, v8hi, v2si, v4si, v2di)
 
-  BUILTIN_VB (UNOP, rbit, 0)
+  BUILTIN_VB (UNOP, rbit, 0, NONE)
 
   /* Implemented by
-     aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>.  */
-  BUILTIN_VALL (BINOP, zip1, 0)
-  BUILTIN_VALL (BINOP, zip2, 0)
-  BUILTIN_VALL (BINOP, uzp1, 0)
-  BUILTIN_VALL (BINOP, uzp2, 0)
-  BUILTIN_VALL (BINOP, trn1, 0)
-  BUILTIN_VALL (BINOP, trn2, 0)
+     aarch64_<PERMUTE:perm_insn><mode>.  */
+  BUILTIN_VALL (BINOP, zip1, 0, AUTO_FP)
+  BUILTIN_VALL (BINOP, zip2, 0, AUTO_FP)
+  BUILTIN_VALL (BINOP, uzp1, 0, AUTO_FP)
+  BUILTIN_VALL (BINOP, uzp2, 0, AUTO_FP)
+  BUILTIN_VALL (BINOP, trn1, 0, AUTO_FP)
+  BUILTIN_VALL (BINOP, trn2, 0, AUTO_FP)
 
-  /* Implemented by
-     aarch64_frecp<FRECP:frecp_suffix><mode>.  */
-  BUILTIN_GPF_F16 (UNOP, frecpe, 0)
-  BUILTIN_GPF_F16 (UNOP, frecpx, 0)
+  BUILTIN_GPF_F16 (UNOP, frecpe, 0, FP)
+  BUILTIN_GPF_F16 (UNOP, frecpx, 0, FP)
 
-  BUILTIN_VDQ_SI (UNOP, urecpe, 0)
+  BUILTIN_VDQ_SI (UNOP, urecpe, 0, NONE)
 
-  BUILTIN_VHSDF (UNOP, frecpe, 0)
-  BUILTIN_VHSDF_HSDF (BINOP, frecps, 0)
+  BUILTIN_VHSDF (UNOP, frecpe, 0, FP)
+  BUILTIN_VHSDF_HSDF (BINOP, frecps, 0, FP)
 
   /* Implemented by a mixture of abs2 patterns.  Note the DImode builtin is
      only ever used for the int64x1_t intrinsic, there is no scalar version.  */
-  BUILTIN_VSDQ_I_DI (UNOP, abs, 0)
-  BUILTIN_VHSDF (UNOP, abs, 2)
-  VAR1 (UNOP, abs, 2, hf)
+  BUILTIN_VSDQ_I_DI (UNOP, abs, 0, AUTO_FP)
+  BUILTIN_VHSDF (UNOP, abs, 2, AUTO_FP)
+  VAR1 (UNOP, abs, 2, AUTO_FP, hf)
 
-  BUILTIN_VQ_HSF (UNOP, vec_unpacks_hi_, 10)
-  VAR1 (BINOP, float_truncate_hi_, 0, v4sf)
-  VAR1 (BINOP, float_truncate_hi_, 0, v8hf)
+  BUILTIN_VQ_HSF (UNOP, vec_unpacks_hi_, 10, FP)
+  VAR1 (BINOP, float_truncate_hi_, 0, FP, v4sf)
+  VAR1 (BINOP, float_truncate_hi_, 0, FP, v8hf)
 
-  VAR1 (UNOP, float_extend_lo_, 0, v2df)
-  VAR1 (UNOP, float_extend_lo_,  0, v4sf)
-  BUILTIN_VDF (UNOP, float_truncate_lo_, 0)
+  VAR1 (UNOP, float_extend_lo_, 0, FP, v2df)
+  VAR1 (UNOP, float_extend_lo_,  0, FP, v4sf)
+  BUILTIN_VDF (UNOP, float_truncate_lo_, 0, FP)
 
   /* Implemented by aarch64_ld1<VALL_F16:mode>.  */
-  BUILTIN_VALL_F16 (LOAD1, ld1, 0)
-  VAR1(STORE1P, ld1, 0, v2di)
+  BUILTIN_VALL_F16 (LOAD1, ld1, 0, LOAD)
+  VAR1(STORE1P, ld1, 0, ALL, v2di)
 
   /* Implemented by aarch64_st1<VALL_F16:mode>.  */
-  BUILTIN_VALL_F16 (STORE1, st1, 0)
-  VAR1(STORE1P, st1, 0, v2di)
+  BUILTIN_VALL_F16 (STORE1, st1, 0, STORE)
+  VAR1 (STORE1P, st1, 0, STORE, v2di)
+
+  /* Implemented by aarch64_ld1x3<VALLDIF:mode>.  */
+  BUILTIN_VALLDIF (LOADSTRUCT, ld1x3, 0, LOAD)
+
+  /* Implemented by aarch64_ld1x4<VALLDIF:mode>.  */
+  BUILTIN_VALLDIF (LOADSTRUCT, ld1x4, 0, LOAD)
+
+  /* Implemented by aarch64_st1x2<VALLDIF:mode>.  */
+  BUILTIN_VALLDIF (STORESTRUCT, st1x2, 0, STORE)
+
+  /* Implemented by aarch64_st1x3<VALLDIF:mode>.  */
+  BUILTIN_VALLDIF (STORESTRUCT, st1x3, 0, STORE)
+
+  /* Implemented by aarch64_st1x4<VALLDIF:mode>.  */
+  BUILTIN_VALLDIF (STORESTRUCT, st1x4, 0, STORE)
 
   /* Implemented by fma<mode>4.  */
-  BUILTIN_VHSDF (TERNOP, fma, 4)
-  VAR1 (TERNOP, fma, 4, hf)
+  BUILTIN_VHSDF (TERNOP, fma, 4, FP)
+  VAR1 (TERNOP, fma, 4, FP, hf)
   /* Implemented by fnma<mode>4.  */
-  BUILTIN_VHSDF (TERNOP, fnma, 4)
-  VAR1 (TERNOP, fnma, 4, hf)
+  BUILTIN_VHSDF (TERNOP, fnma, 4, FP)
+  VAR1 (TERNOP, fnma, 4, FP, hf)
 
   /* Implemented by aarch64_simd_bsl<mode>.  */
-  BUILTIN_VDQQH (BSL_P, simd_bsl, 0)
-  VAR2 (BSL_P, simd_bsl,0, di, v2di)
-  BUILTIN_VSDQ_I_DI (BSL_U, simd_bsl, 0)
-  BUILTIN_VALLDIF (BSL_S, simd_bsl, 0)
+  BUILTIN_VDQQH (BSL_P, simd_bsl, 0, NONE)
+  VAR2 (BSL_P, simd_bsl,0, NONE, di, v2di)
+  BUILTIN_VSDQ_I_DI (BSL_U, simd_bsl, 0, NONE)
+  BUILTIN_VALLDIF (BSL_S, simd_bsl, 0, AUTO_FP)
 
   /* Implemented by aarch64_crypto_aes<op><mode>.  */
-  VAR1 (BINOPU, crypto_aese, 0, v16qi)
-  VAR1 (BINOPU, crypto_aesd, 0, v16qi)
-  VAR1 (UNOPU, crypto_aesmc, 0, v16qi)
-  VAR1 (UNOPU, crypto_aesimc, 0, v16qi)
+  VAR1 (BINOPU, crypto_aese, 0, NONE, v16qi)
+  VAR1 (BINOPU, crypto_aesd, 0, NONE, v16qi)
+  VAR1 (UNOPU, crypto_aesmc, 0, NONE, v16qi)
+  VAR1 (UNOPU, crypto_aesimc, 0, NONE, v16qi)
 
   /* Implemented by aarch64_crypto_sha1<op><mode>.  */
-  VAR1 (UNOPU, crypto_sha1h, 0, si)
-  VAR1 (BINOPU, crypto_sha1su1, 0, v4si)
-  VAR1 (TERNOPU, crypto_sha1c, 0, v4si)
-  VAR1 (TERNOPU, crypto_sha1m, 0, v4si)
-  VAR1 (TERNOPU, crypto_sha1p, 0, v4si)
-  VAR1 (TERNOPU, crypto_sha1su0, 0, v4si)
+  VAR1 (UNOPU, crypto_sha1h, 0, NONE, si)
+  VAR1 (BINOPU, crypto_sha1su1, 0, NONE, v4si)
+  VAR1 (TERNOPU, crypto_sha1c, 0, NONE, v4si)
+  VAR1 (TERNOPU, crypto_sha1m, 0, NONE, v4si)
+  VAR1 (TERNOPU, crypto_sha1p, 0, NONE, v4si)
+  VAR1 (TERNOPU, crypto_sha1su0, 0, NONE, v4si)
 
   /* Implemented by aarch64_crypto_sha256<op><mode>.  */
-  VAR1 (TERNOPU, crypto_sha256h, 0, v4si)
-  VAR1 (TERNOPU, crypto_sha256h2, 0, v4si)
-  VAR1 (BINOPU, crypto_sha256su0, 0, v4si)
-  VAR1 (TERNOPU, crypto_sha256su1, 0, v4si)
+  VAR1 (TERNOPU, crypto_sha256h, 0, NONE, v4si)
+  VAR1 (TERNOPU, crypto_sha256h2, 0, NONE, v4si)
+  VAR1 (BINOPU, crypto_sha256su0, 0, NONE, v4si)
+  VAR1 (TERNOPU, crypto_sha256su1, 0, NONE, v4si)
 
   /* Implemented by aarch64_crypto_pmull<mode>.  */
-  VAR1 (BINOPP, crypto_pmull, 0, di)
-  VAR1 (BINOPP, crypto_pmull, 0, v2di)
+  VAR1 (BINOPP, crypto_pmull, 0, NONE, di)
+  VAR1 (BINOPP, crypto_pmull, 0, NONE, v2di)
 
   /* Implemented by aarch64_tbl3<mode>.  */
-  VAR1 (BINOP, tbl3, 0, v8qi)
-  VAR1 (BINOP, tbl3, 0, v16qi)
+  VAR1 (BINOP, tbl3, 0, NONE, v8qi)
+  VAR1 (BINOP, tbl3, 0, NONE, v16qi)
 
   /* Implemented by aarch64_qtbl3<mode>.  */
-  VAR1 (BINOP, qtbl3, 0, v8qi)
-  VAR1 (BINOP, qtbl3, 0, v16qi)
+  VAR1 (BINOP, qtbl3, 0, NONE, v8qi)
+  VAR1 (BINOP, qtbl3, 0, NONE, v16qi)
 
   /* Implemented by aarch64_qtbl4<mode>.  */
-  VAR1 (BINOP, qtbl4, 0, v8qi)
-  VAR1 (BINOP, qtbl4, 0, v16qi)
+  VAR1 (BINOP, qtbl4, 0, NONE, v8qi)
+  VAR1 (BINOP, qtbl4, 0, NONE, v16qi)
 
   /* Implemented by aarch64_tbx4<mode>.  */
-  VAR1 (TERNOP, tbx4, 0, v8qi)
-  VAR1 (TERNOP, tbx4, 0, v16qi)
+  VAR1 (TERNOP, tbx4, 0, NONE, v8qi)
+  VAR1 (TERNOP, tbx4, 0, NONE, v16qi)
 
   /* Implemented by aarch64_qtbx3<mode>.  */
-  VAR1 (TERNOP, qtbx3, 0, v8qi)
-  VAR1 (TERNOP, qtbx3, 0, v16qi)
+  VAR1 (TERNOP, qtbx3, 0, NONE, v8qi)
+  VAR1 (TERNOP, qtbx3, 0, NONE, v16qi)
 
   /* Implemented by aarch64_qtbx4<mode>.  */
-  VAR1 (TERNOP, qtbx4, 0, v8qi)
-  VAR1 (TERNOP, qtbx4, 0, v16qi)
+  VAR1 (TERNOP, qtbx4, 0, NONE, v8qi)
+  VAR1 (TERNOP, qtbx4, 0, NONE, v16qi)
 
   /* Builtins for ARMv8.1-A Adv.SIMD instructions.  */
 
   /* Implemented by aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>.  */
-  BUILTIN_VSDQ_HSI (TERNOP, sqrdmlah, 0)
-  BUILTIN_VSDQ_HSI (TERNOP, sqrdmlsh, 0)
+  BUILTIN_VSDQ_HSI (TERNOP, sqrdmlah, 0, NONE)
+  BUILTIN_VSDQ_HSI (TERNOP, sqrdmlsh, 0, NONE)
 
   /* Implemented by aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>.  */
-  BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlah_lane, 0)
-  BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlsh_lane, 0)
+  BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlah_lane, 0, NONE)
+  BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlsh_lane, 0, NONE)
 
   /* Implemented by aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>.  */
-  BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlah_laneq, 0)
-  BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlsh_laneq, 0)
+  BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlah_laneq, 0, NONE)
+  BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlsh_laneq, 0, NONE)
 
   /* Implemented by <FCVT_F2FIXED/FIXED2F:fcvt_fixed_insn><*><*>3.  */
-  BUILTIN_VSDQ_HSDI (SHIFTIMM, scvtf, 3)
-  BUILTIN_VSDQ_HSDI (FCVTIMM_SUS, ucvtf, 3)
-  BUILTIN_VHSDF_HSDF (SHIFTIMM, fcvtzs, 3)
-  BUILTIN_VHSDF_HSDF (SHIFTIMM_USS, fcvtzu, 3)
-  VAR1 (SHIFTIMM, scvtfsi, 3, hf)
-  VAR1 (SHIFTIMM, scvtfdi, 3, hf)
-  VAR1 (FCVTIMM_SUS, ucvtfsi, 3, hf)
-  VAR1 (FCVTIMM_SUS, ucvtfdi, 3, hf)
-  BUILTIN_GPI (SHIFTIMM, fcvtzshf, 3)
-  BUILTIN_GPI (SHIFTIMM_USS, fcvtzuhf, 3)
+  BUILTIN_VSDQ_HSDI (SHIFTIMM, scvtf, 3, FP)
+  BUILTIN_VSDQ_HSDI (FCVTIMM_SUS, ucvtf, 3, FP)
+  BUILTIN_VHSDF_HSDF (SHIFTIMM, fcvtzs, 3, FP)
+  BUILTIN_VHSDF_HSDF (SHIFTIMM_USS, fcvtzu, 3, FP)
+  VAR1 (SHIFTIMM, scvtfsi, 3, FP, hf)
+  VAR1 (SHIFTIMM, scvtfdi, 3, FP, hf)
+  VAR1 (FCVTIMM_SUS, ucvtfsi, 3, FP, hf)
+  VAR1 (FCVTIMM_SUS, ucvtfdi, 3, FP, hf)
+  BUILTIN_GPI (SHIFTIMM, fcvtzshf, 3, FP)
+  BUILTIN_GPI (SHIFTIMM_USS, fcvtzuhf, 3, FP)
 
   /* Implemented by aarch64_rsqrte<mode>.  */
-  BUILTIN_VHSDF_HSDF (UNOP, rsqrte, 0)
+  BUILTIN_VHSDF_HSDF (UNOP, rsqrte, 0, FP)
 
   /* Implemented by aarch64_rsqrts<mode>.  */
-  BUILTIN_VHSDF_HSDF (BINOP, rsqrts, 0)
+  BUILTIN_VHSDF_HSDF (BINOP, rsqrts, 0, FP)
+
+  /* Implemented by aarch64_ursqrte<mode>.  */
+  BUILTIN_VDQ_SI (UNOPU, ursqrte, 0, NONE)
 
   /* Implemented by fabd<mode>3.  */
-  BUILTIN_VHSDF_HSDF (BINOP, fabd, 3)
+  BUILTIN_VHSDF_HSDF (BINOP, fabd, 3, FP)
 
   /* Implemented by aarch64_faddp<mode>.  */
-  BUILTIN_VHSDF (BINOP, faddp, 0)
+  BUILTIN_VHSDF (BINOP, faddp, 0, FP)
 
   /* Implemented by aarch64_cm<optab><mode>.  */
-  BUILTIN_VHSDF_HSDF (BINOP_USS, cmeq, 0)
-  BUILTIN_VHSDF_HSDF (BINOP_USS, cmge, 0)
-  BUILTIN_VHSDF_HSDF (BINOP_USS, cmgt, 0)
-  BUILTIN_VHSDF_HSDF (BINOP_USS, cmle, 0)
-  BUILTIN_VHSDF_HSDF (BINOP_USS, cmlt, 0)
+  BUILTIN_VHSDF_HSDF (BINOP_USS, cmeq, 0, FP)
+  BUILTIN_VHSDF_HSDF (BINOP_USS, cmge, 0, FP)
+  BUILTIN_VHSDF_HSDF (BINOP_USS, cmgt, 0, FP)
+  BUILTIN_VHSDF_HSDF (BINOP_USS, cmle, 0, FP)
+  BUILTIN_VHSDF_HSDF (BINOP_USS, cmlt, 0, FP)
 
   /* Implemented by neg<mode>2.  */
-  BUILTIN_VHSDF_HSDF (UNOP, neg, 2)
+  BUILTIN_VHSDF_HSDF (UNOP, neg, 2, ALL)
 
   /* Implemented by aarch64_fac<optab><mode>.  */
-  BUILTIN_VHSDF_HSDF (BINOP_USS, faclt, 0)
-  BUILTIN_VHSDF_HSDF (BINOP_USS, facle, 0)
-  BUILTIN_VHSDF_HSDF (BINOP_USS, facgt, 0)
-  BUILTIN_VHSDF_HSDF (BINOP_USS, facge, 0)
+  BUILTIN_VHSDF_HSDF (BINOP_USS, faclt, 0, FP)
+  BUILTIN_VHSDF_HSDF (BINOP_USS, facle, 0, FP)
+  BUILTIN_VHSDF_HSDF (BINOP_USS, facgt, 0, FP)
+  BUILTIN_VHSDF_HSDF (BINOP_USS, facge, 0, FP)
 
   /* Implemented by sqrt<mode>2.  */
-  VAR1 (UNOP, sqrt, 2, hf)
+  VAR1 (UNOP, sqrt, 2, FP, hf)
 
   /* Implemented by <optab><mode>hf2.  */
-  VAR1 (UNOP, floatdi, 2, hf)
-  VAR1 (UNOP, floatsi, 2, hf)
-  VAR1 (UNOP, floathi, 2, hf)
-  VAR1 (UNOPUS, floatunsdi, 2, hf)
-  VAR1 (UNOPUS, floatunssi, 2, hf)
-  VAR1 (UNOPUS, floatunshi, 2, hf)
-  BUILTIN_GPI_I16 (UNOP, fix_trunchf, 2)
-  BUILTIN_GPI (UNOP, fix_truncsf, 2)
-  BUILTIN_GPI (UNOP, fix_truncdf, 2)
-  BUILTIN_GPI_I16 (UNOPUS, fixuns_trunchf, 2)
-  BUILTIN_GPI (UNOPUS, fixuns_truncsf, 2)
-  BUILTIN_GPI (UNOPUS, fixuns_truncdf, 2)
+  VAR1 (UNOP, floatdi, 2, FP, hf)
+  VAR1 (UNOP, floatsi, 2, FP, hf)
+  VAR1 (UNOP, floathi, 2, FP, hf)
+  VAR1 (UNOPUS, floatunsdi, 2, FP, hf)
+  VAR1 (UNOPUS, floatunssi, 2, FP, hf)
+  VAR1 (UNOPUS, floatunshi, 2, FP, hf)
+  BUILTIN_GPI_I16 (UNOP, fix_trunchf, 2, FP)
+  BUILTIN_GPI (UNOP, fix_truncsf, 2, FP)
+  BUILTIN_GPI (UNOP, fix_truncdf, 2, FP)
+  BUILTIN_GPI_I16 (UNOPUS, fixuns_trunchf, 2, FP)
+  BUILTIN_GPI (UNOPUS, fixuns_truncsf, 2, FP)
+  BUILTIN_GPI (UNOPUS, fixuns_truncdf, 2, FP)
 
   /* Implemented by aarch64_sm3ss1qv4si.  */
-  VAR1 (TERNOPU, sm3ss1q, 0, v4si)
+  VAR1 (TERNOPU, sm3ss1q, 0, NONE, v4si)
   /* Implemented by aarch64_sm3tt<sm3tt_op>qv4si.  */
-  VAR1 (QUADOPUI, sm3tt1aq, 0, v4si)
-  VAR1 (QUADOPUI, sm3tt1bq, 0, v4si)
-  VAR1 (QUADOPUI, sm3tt2aq, 0, v4si)
-  VAR1 (QUADOPUI, sm3tt2bq, 0, v4si)
+  VAR1 (QUADOPUI, sm3tt1aq, 0, NONE, v4si)
+  VAR1 (QUADOPUI, sm3tt1bq, 0, NONE, v4si)
+  VAR1 (QUADOPUI, sm3tt2aq, 0, NONE, v4si)
+  VAR1 (QUADOPUI, sm3tt2bq, 0, NONE, v4si)
   /* Implemented by aarch64_sm3partw<sm3part_op>qv4si.  */
-  VAR1 (TERNOPU, sm3partw1q, 0, v4si)
-  VAR1 (TERNOPU, sm3partw2q, 0, v4si)
+  VAR1 (TERNOPU, sm3partw1q, 0, NONE, v4si)
+  VAR1 (TERNOPU, sm3partw2q, 0, NONE, v4si)
   /* Implemented by aarch64_sm4eqv4si.  */
-  VAR1 (BINOPU, sm4eq, 0, v4si)
+  VAR1 (BINOPU, sm4eq, 0, NONE, v4si)
   /* Implemented by aarch64_sm4ekeyqv4si.  */
-  VAR1 (BINOPU, sm4ekeyq, 0, v4si)
+  VAR1 (BINOPU, sm4ekeyq, 0, NONE, v4si)
   /* Implemented by aarch64_crypto_sha512hqv2di.  */
-  VAR1 (TERNOPU, crypto_sha512hq, 0, v2di)
+  VAR1 (TERNOPU, crypto_sha512hq, 0, NONE, v2di)
   /* Implemented by aarch64_sha512h2qv2di.  */
-  VAR1 (TERNOPU, crypto_sha512h2q, 0, v2di)
+  VAR1 (TERNOPU, crypto_sha512h2q, 0, NONE, v2di)
   /* Implemented by aarch64_crypto_sha512su0qv2di.  */
-  VAR1 (BINOPU, crypto_sha512su0q, 0, v2di)
+  VAR1 (BINOPU, crypto_sha512su0q, 0, NONE, v2di)
   /* Implemented by aarch64_crypto_sha512su1qv2di.  */
-  VAR1 (TERNOPU, crypto_sha512su1q, 0, v2di)
-  /* Implemented by aarch64_eor3qv8hi.  */
-  VAR1 (TERNOPU, eor3q, 0, v8hi)
+  VAR1 (TERNOPU, crypto_sha512su1q, 0, NONE, v2di)
+  /* Implemented by eor3q<mode>4.  */
+  BUILTIN_VQ_I (TERNOPU, eor3q, 4, NONE)
+  BUILTIN_VQ_I (TERNOP, eor3q, 4, NONE)
   /* Implemented by aarch64_rax1qv2di.  */
-  VAR1 (BINOPU, rax1q, 0, v2di)
+  VAR1 (BINOPU, rax1q, 0, NONE, v2di)
   /* Implemented by aarch64_xarqv2di.  */
-  VAR1 (TERNOPUI, xarq, 0, v2di)
-  /* Implemented by aarch64_bcaxqv8hi.  */
-  VAR1 (TERNOPU, bcaxq, 0, v8hi)
+  VAR1 (TERNOPUI, xarq, 0, NONE, v2di)
+  /* Implemented by bcaxq<mode>4.  */
+  BUILTIN_VQ_I (TERNOPU, bcaxq, 4, NONE)
+  BUILTIN_VQ_I (TERNOP, bcaxq, 4, NONE)
 
   /* Implemented by aarch64_fml<f16mac1>l<f16quad>_low<mode>.  */
-  VAR1 (TERNOP, fmlal_low, 0, v2sf)
-  VAR1 (TERNOP, fmlsl_low, 0, v2sf)
-  VAR1 (TERNOP, fmlalq_low, 0, v4sf)
-  VAR1 (TERNOP, fmlslq_low, 0, v4sf)
+  VAR1 (TERNOP, fmlal_low, 0, FP, v2sf)
+  VAR1 (TERNOP, fmlsl_low, 0, FP, v2sf)
+  VAR1 (TERNOP, fmlalq_low, 0, FP, v4sf)
+  VAR1 (TERNOP, fmlslq_low, 0, FP, v4sf)
   /* Implemented by aarch64_fml<f16mac1>l<f16quad>_high<mode>.  */
-  VAR1 (TERNOP, fmlal_high, 0, v2sf)
-  VAR1 (TERNOP, fmlsl_high, 0, v2sf)
-  VAR1 (TERNOP, fmlalq_high, 0, v4sf)
-  VAR1 (TERNOP, fmlslq_high, 0, v4sf)
+  VAR1 (TERNOP, fmlal_high, 0, FP, v2sf)
+  VAR1 (TERNOP, fmlsl_high, 0, FP, v2sf)
+  VAR1 (TERNOP, fmlalq_high, 0, FP, v4sf)
+  VAR1 (TERNOP, fmlslq_high, 0, FP, v4sf)
   /* Implemented by aarch64_fml<f16mac1>l_lane_lowv2sf.  */
-  VAR1 (QUADOP_LANE, fmlal_lane_low, 0, v2sf)
-  VAR1 (QUADOP_LANE, fmlsl_lane_low, 0, v2sf)
+  VAR1 (QUADOP_LANE, fmlal_lane_low, 0, FP, v2sf)
+  VAR1 (QUADOP_LANE, fmlsl_lane_low, 0, FP, v2sf)
   /* Implemented by aarch64_fml<f16mac1>l_laneq_lowv2sf.  */
-  VAR1 (QUADOP_LANE, fmlal_laneq_low, 0, v2sf)
-  VAR1 (QUADOP_LANE, fmlsl_laneq_low, 0, v2sf)
+  VAR1 (QUADOP_LANE, fmlal_laneq_low, 0, FP, v2sf)
+  VAR1 (QUADOP_LANE, fmlsl_laneq_low, 0, FP, v2sf)
   /* Implemented by aarch64_fml<f16mac1>lq_lane_lowv4sf.  */
-  VAR1 (QUADOP_LANE, fmlalq_lane_low, 0, v4sf)
-  VAR1 (QUADOP_LANE, fmlslq_lane_low, 0, v4sf)
+  VAR1 (QUADOP_LANE, fmlalq_lane_low, 0, FP, v4sf)
+  VAR1 (QUADOP_LANE, fmlslq_lane_low, 0, FP, v4sf)
   /* Implemented by aarch64_fml<f16mac1>lq_laneq_lowv4sf.  */
-  VAR1 (QUADOP_LANE, fmlalq_laneq_low, 0, v4sf)
-  VAR1 (QUADOP_LANE, fmlslq_laneq_low, 0, v4sf)
+  VAR1 (QUADOP_LANE, fmlalq_laneq_low, 0, FP, v4sf)
+  VAR1 (QUADOP_LANE, fmlslq_laneq_low, 0, FP, v4sf)
   /* Implemented by aarch64_fml<f16mac1>l_lane_highv2sf.  */
-  VAR1 (QUADOP_LANE, fmlal_lane_high, 0, v2sf)
-  VAR1 (QUADOP_LANE, fmlsl_lane_high, 0, v2sf)
+  VAR1 (QUADOP_LANE, fmlal_lane_high, 0, FP, v2sf)
+  VAR1 (QUADOP_LANE, fmlsl_lane_high, 0, FP, v2sf)
   /* Implemented by aarch64_fml<f16mac1>l_laneq_highv2sf.  */
-  VAR1 (QUADOP_LANE, fmlal_laneq_high, 0, v2sf)
-  VAR1 (QUADOP_LANE, fmlsl_laneq_high, 0, v2sf)
+  VAR1 (QUADOP_LANE, fmlal_laneq_high, 0, FP, v2sf)
+  VAR1 (QUADOP_LANE, fmlsl_laneq_high, 0, FP, v2sf)
   /* Implemented by aarch64_fml<f16mac1>lq_lane_highv4sf.  */
-  VAR1 (QUADOP_LANE, fmlalq_lane_high, 0, v4sf)
-  VAR1 (QUADOP_LANE, fmlslq_lane_high, 0, v4sf)
+  VAR1 (QUADOP_LANE, fmlalq_lane_high, 0, FP, v4sf)
+  VAR1 (QUADOP_LANE, fmlslq_lane_high, 0, FP, v4sf)
   /* Implemented by aarch64_fml<f16mac1>lq_laneq_highv4sf.  */
-  VAR1 (QUADOP_LANE, fmlalq_laneq_high, 0, v4sf)
-  VAR1 (QUADOP_LANE, fmlslq_laneq_high, 0, v4sf)
+  VAR1 (QUADOP_LANE, fmlalq_laneq_high, 0, FP, v4sf)
+  VAR1 (QUADOP_LANE, fmlslq_laneq_high, 0, FP, v4sf)
+
+  /* Implemented by aarch64_<frintnzs_op><mode>.  */
+  BUILTIN_VSFDF (UNOP, frint32z, 0, FP)
+  BUILTIN_VSFDF (UNOP, frint32x, 0, FP)
+  BUILTIN_VSFDF (UNOP, frint64z, 0, FP)
+  BUILTIN_VSFDF (UNOP, frint64x, 0, FP)
+
+  /* Implemented by aarch64_bfdot{_lane}{q}<mode>.  */
+  VAR2 (TERNOP, bfdot, 0, AUTO_FP, v2sf, v4sf)
+  VAR2 (QUADOP_LANE_PAIR, bfdot_lane, 0, AUTO_FP, v2sf, v4sf)
+  VAR2 (QUADOP_LANE_PAIR, bfdot_laneq, 0, AUTO_FP, v2sf, v4sf)
+
+  /* Implemented by aarch64_bfmmlaqv4sf  */
+  VAR1 (TERNOP, bfmmlaq, 0, AUTO_FP, v4sf)
+
+  /* Implemented by aarch64_bfmlal<bt>{_lane{q}}v4sf  */
+  VAR1 (TERNOP, bfmlalb, 0, FP, v4sf)
+  VAR1 (TERNOP, bfmlalt, 0, FP, v4sf)
+  VAR1 (QUADOP_LANE, bfmlalb_lane, 0, FP, v4sf)
+  VAR1 (QUADOP_LANE, bfmlalt_lane, 0, FP, v4sf)
+  VAR1 (QUADOP_LANE, bfmlalb_lane_q, 0, FP, v4sf)
+  VAR1 (QUADOP_LANE, bfmlalt_lane_q, 0, FP, v4sf)
+
+  /* Implemented by aarch64_vget_lo/hi_halfv8bf.  */
+  VAR1 (UNOP, vget_lo_half, 0, AUTO_FP, v8bf)
+  VAR1 (UNOP, vget_hi_half, 0, AUTO_FP, v8bf)
+
+  /* Implemented by aarch64_simd_<sur>mmlav16qi.  */
+  VAR1 (TERNOP, simd_smmla, 0, NONE, v16qi)
+  VAR1 (TERNOPU, simd_ummla, 0, NONE, v16qi)
+  VAR1 (TERNOP_SSUS, simd_usmmla, 0, NONE, v16qi)
+
+  /* Implemented by aarch64_bfcvtn{q}{2}<mode>  */
+  VAR1 (UNOP, bfcvtn, 0, FP, v4bf)
+  VAR1 (UNOP, bfcvtn_q, 0, FP, v8bf)
+  VAR1 (BINOP, bfcvtn2, 0, FP, v8bf)
+  VAR1 (UNOP, bfcvt, 0, FP, bf)
+
+  /* Implemented by aarch64_{v}bfcvt{_high}<mode>.  */
+  VAR2 (UNOP, vbfcvt, 0, AUTO_FP, v4bf, v8bf)
+  VAR1 (UNOP, vbfcvt_high, 0, AUTO_FP, v8bf)
+  VAR1 (UNOP, bfcvt, 0, AUTO_FP, sf)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 1154fc3d58dea..4edee99051c4e 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1,5 +1,5 @@
 ;; Machine description for AArch64 AdvSIMD architecture.
-;; Copyright (C) 2011-2018 Free Software Foundation, Inc.
+;; Copyright (C) 2011-2021 Free Software Foundation, Inc.
 ;; Contributed by ARM Ltd.
 ;;
 ;; This file is part of GCC.
@@ -19,8 +19,8 @@
 ;; <http://www.gnu.org/licenses/>.
 
 (define_expand "mov<mode>"
-  [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
-	(match_operand:VALL_F16 1 "general_operand" ""))]
+  [(set (match_operand:VALL_F16MOV 0 "nonimmediate_operand")
+	(match_operand:VALL_F16MOV 1 "general_operand"))]
   "TARGET_SIMD"
   "
   /* Force the operand into a register if it is not an
@@ -35,13 +35,24 @@
 		&& aarch64_mem_pair_operand (operands[0], DImode))
 	       || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
       operands[1] = force_reg (<MODE>mode, operands[1]);
+
+  /* If a constant is too complex to force to memory (e.g. because it
+     contains CONST_POLY_INTs), build it up from individual elements instead.
+     We should only need to do this before RA; aarch64_legitimate_constant_p
+     should ensure that we don't try to rematerialize the constant later.  */
+  if (GET_CODE (operands[1]) == CONST_VECTOR
+      && targetm.cannot_force_const_mem (<MODE>mode, operands[1]))
+    {
+      aarch64_expand_vector_init (operands[0], operands[1]);
+      DONE;
+    }
   "
 )
 
 (define_expand "movmisalign<mode>"
-  [(set (match_operand:VALL 0 "nonimmediate_operand" "")
-        (match_operand:VALL 1 "general_operand" ""))]
-  "TARGET_SIMD"
+  [(set (match_operand:VALL 0 "nonimmediate_operand")
+        (match_operand:VALL 1 "general_operand"))]
+  "TARGET_SIMD && !STRICT_ALIGNMENT"
 {
   /* This pattern is not permitted to fail during expansion: if both arguments
      are non-registers (e.g. memory := constant, which can be created by the
@@ -101,10 +112,10 @@
   [(set_attr "type" "neon_dup<q>")]
 )
 
-(define_insn "*aarch64_simd_mov<VD:mode>"
-  [(set (match_operand:VD 0 "nonimmediate_operand"
+(define_insn "*aarch64_simd_mov<VDMOV:mode>"
+  [(set (match_operand:VDMOV 0 "nonimmediate_operand"
 		"=w, m,  m,  w, ?r, ?w, ?r, w")
-	(match_operand:VD 1 "general_operand"
+	(match_operand:VDMOV 1 "general_operand"
 		"m,  Dz, w,  w,  w,  r,  r, Dn"))]
   "TARGET_SIMD
    && (register_operand (operands[0], <MODE>mode)
@@ -129,10 +140,10 @@
 		     mov_reg, neon_move<q>")]
 )
 
-(define_insn "*aarch64_simd_mov<VQ:mode>"
-  [(set (match_operand:VQ 0 "nonimmediate_operand"
-		"=w, Umq,  m,  w, ?r, ?w, ?r, w")
-	(match_operand:VQ 1 "general_operand"
+(define_insn "*aarch64_simd_mov<VQMOV:mode>"
+  [(set (match_operand:VQMOV 0 "nonimmediate_operand"
+		"=w, Umn,  m,  w, ?r, ?w, ?r, w")
+	(match_operand:VQMOV 1 "general_operand"
 		"m,  Dz, w,  w,  w,  r,  r, Dn"))]
   "TARGET_SIMD
    && (register_operand (operands[0], <MODE>mode)
@@ -177,37 +188,65 @@
   [(set_attr "type" "neon_store1_1reg<q>")]
 )
 
-(define_insn "load_pair<mode>"
-  [(set (match_operand:VD 0 "register_operand" "=w")
-	(match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
-   (set (match_operand:VD 2 "register_operand" "=w")
-	(match_operand:VD 3 "memory_operand" "m"))]
+(define_insn "load_pair<DREG:mode><DREG2:mode>"
+  [(set (match_operand:DREG 0 "register_operand" "=w")
+	(match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
+   (set (match_operand:DREG2 2 "register_operand" "=w")
+	(match_operand:DREG2 3 "memory_operand" "m"))]
   "TARGET_SIMD
    && rtx_equal_p (XEXP (operands[3], 0),
 		   plus_constant (Pmode,
 				  XEXP (operands[1], 0),
-				  GET_MODE_SIZE (<MODE>mode)))"
-  "ldp\\t%d0, %d2, %1"
+				  GET_MODE_SIZE (<DREG:MODE>mode)))"
+  "ldp\\t%d0, %d2, %z1"
   [(set_attr "type" "neon_ldp")]
 )
 
-(define_insn "store_pair<mode>"
-  [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
-	(match_operand:VD 1 "register_operand" "w"))
-   (set (match_operand:VD 2 "memory_operand" "=m")
-	(match_operand:VD 3 "register_operand" "w"))]
+(define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
+  [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
+	(match_operand:DREG 1 "register_operand" "w"))
+   (set (match_operand:DREG2 2 "memory_operand" "=m")
+	(match_operand:DREG2 3 "register_operand" "w"))]
   "TARGET_SIMD
    && rtx_equal_p (XEXP (operands[2], 0),
 		   plus_constant (Pmode,
 				  XEXP (operands[0], 0),
-				  GET_MODE_SIZE (<MODE>mode)))"
-  "stp\\t%d1, %d3, %0"
+				  GET_MODE_SIZE (<DREG:MODE>mode)))"
+  "stp\\t%d1, %d3, %z0"
   [(set_attr "type" "neon_stp")]
 )
 
+(define_insn "load_pair<VQ:mode><VQ2:mode>"
+  [(set (match_operand:VQ 0 "register_operand" "=w")
+	(match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
+   (set (match_operand:VQ2 2 "register_operand" "=w")
+	(match_operand:VQ2 3 "memory_operand" "m"))]
+  "TARGET_SIMD
+    && rtx_equal_p (XEXP (operands[3], 0),
+		    plus_constant (Pmode,
+			       XEXP (operands[1], 0),
+			       GET_MODE_SIZE (<VQ:MODE>mode)))"
+  "ldp\\t%q0, %q2, %z1"
+  [(set_attr "type" "neon_ldp_q")]
+)
+
+(define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
+  [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
+	(match_operand:VQ 1 "register_operand" "w"))
+   (set (match_operand:VQ2 2 "memory_operand" "=m")
+	(match_operand:VQ2 3 "register_operand" "w"))]
+  "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
+		plus_constant (Pmode,
+			       XEXP (operands[0], 0),
+			       GET_MODE_SIZE (<VQ:MODE>mode)))"
+  "stp\\t%q1, %q3, %z0"
+  [(set_attr "type" "neon_stp_q")]
+)
+
+
 (define_split
-  [(set (match_operand:VQ 0 "register_operand" "")
-      (match_operand:VQ 1 "register_operand" ""))]
+  [(set (match_operand:VQMOV 0 "register_operand" "")
+      (match_operand:VQMOV 1 "register_operand" ""))]
   "TARGET_SIMD && reload_completed
    && GP_REGNUM_P (REGNO (operands[0]))
    && GP_REGNUM_P (REGNO (operands[1]))"
@@ -218,8 +257,8 @@
 })
 
 (define_split
-  [(set (match_operand:VQ 0 "register_operand" "")
-        (match_operand:VQ 1 "register_operand" ""))]
+  [(set (match_operand:VQMOV 0 "register_operand" "")
+        (match_operand:VQMOV 1 "register_operand" ""))]
   "TARGET_SIMD && reload_completed
    && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
        || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
@@ -229,9 +268,9 @@
   DONE;
 })
 
-(define_expand "aarch64_split_simd_mov<mode>"
-  [(set (match_operand:VQ 0)
-        (match_operand:VQ 1))]
+(define_expand "@aarch64_split_simd_mov<mode>"
+  [(set (match_operand:VQMOV 0)
+        (match_operand:VQMOV 1))]
   "TARGET_SIMD"
   {
     rtx dst = operands[0];
@@ -254,37 +293,73 @@
         rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
 	rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
 	rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
-
-        emit_insn
-          (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
-        emit_insn
-          (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
+        emit_insn (gen_aarch64_get_half<mode> (dst_low_part, src, lo));
+        emit_insn (gen_aarch64_get_half<mode> (dst_high_part, src, hi));
       }
     DONE;
   }
 )
 
-(define_insn "aarch64_simd_mov_from_<mode>low"
-  [(set (match_operand:<VHALF> 0 "register_operand" "=r")
+(define_expand "aarch64_get_half<mode>"
+  [(set (match_operand:<VHALF> 0 "register_operand")
         (vec_select:<VHALF>
-          (match_operand:VQ 1 "register_operand" "w")
-          (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
-  "TARGET_SIMD && reload_completed"
-  "umov\t%0, %1.d[0]"
-  [(set_attr "type" "neon_to_gp<q>")
-   (set_attr "length" "4")
-  ])
+          (match_operand:VQMOV 1 "register_operand")
+          (match_operand 2 "ascending_int_parallel")))]
+  "TARGET_SIMD"
+)
+
+(define_expand "aarch64_get_low<mode>"
+  [(match_operand:<VHALF> 0 "register_operand")
+   (match_operand:VQMOV 1 "register_operand")]
+  "TARGET_SIMD"
+  {
+    rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
+    emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], lo));
+    DONE;
+  }
+)
+
+(define_expand "aarch64_get_high<mode>"
+  [(match_operand:<VHALF> 0 "register_operand")
+   (match_operand:VQMOV 1 "register_operand")]
+  "TARGET_SIMD"
+  {
+    rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
+    emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], hi));
+    DONE;
+  }
+)
+
+(define_insn_and_split "aarch64_simd_mov_from_<mode>low"
+  [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
+        (vec_select:<VHALF>
+          (match_operand:VQMOV_NO2E 1 "register_operand" "w,w")
+          (match_operand:VQMOV_NO2E 2 "vect_par_cnst_lo_half" "")))]
+  "TARGET_SIMD"
+  "@
+   #
+   umov\t%0, %1.d[0]"
+  "&& reload_completed && aarch64_simd_register (operands[0], <VHALF>mode)"
+  [(set (match_dup 0) (match_dup 1))]
+  {
+    operands[1] = aarch64_replace_reg_mode (operands[1], <VHALF>mode);
+  }
+  [(set_attr "type" "mov_reg,neon_to_gp<q>")
+   (set_attr "length" "4")]
+)
 
 (define_insn "aarch64_simd_mov_from_<mode>high"
-  [(set (match_operand:<VHALF> 0 "register_operand" "=r")
+  [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
         (vec_select:<VHALF>
-          (match_operand:VQ 1 "register_operand" "w")
-          (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
-  "TARGET_SIMD && reload_completed"
-  "umov\t%0, %1.d[1]"
-  [(set_attr "type" "neon_to_gp<q>")
-   (set_attr "length" "4")
-  ])
+          (match_operand:VQMOV_NO2E 1 "register_operand" "w,w")
+          (match_operand:VQMOV_NO2E 2 "vect_par_cnst_hi_half" "")))]
+  "TARGET_SIMD"
+  "@
+   dup\\t%d0, %1.d[1]
+   umov\t%0, %1.d[1]"
+  [(set_attr "type" "neon_dup<q>,neon_to_gp<q>")
+   (set_attr "length" "4")]
+)
 
 (define_insn "orn<mode>3"
  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
@@ -391,6 +466,127 @@
 }
 )
 
+;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
+;; fact that their usage need to guarantee that the source vectors are
+;; contiguous.  It would be wrong to describe the operation without being able
+;; to describe the permute that is also required, but even if that is done
+;; the permute would have been created as a LOAD_LANES which means the values
+;; in the registers are in the wrong order.
+(define_insn "aarch64_fcadd<rot><mode>"
+  [(set (match_operand:VHSDF 0 "register_operand" "=w")
+	(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
+		       (match_operand:VHSDF 2 "register_operand" "w")]
+		       FCADD))]
+  "TARGET_COMPLEX"
+  "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
+  [(set_attr "type" "neon_fcadd")]
+)
+
+(define_expand "cadd<rot><mode>3"
+  [(set (match_operand:VHSDF 0 "register_operand")
+	(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
+		       (match_operand:VHSDF 2 "register_operand")]
+		       FCADD))]
+  "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
+)
+
+(define_insn "aarch64_fcmla<rot><mode>"
+  [(set (match_operand:VHSDF 0 "register_operand" "=w")
+	(plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
+		    (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
+				   (match_operand:VHSDF 3 "register_operand" "w")]
+				   FCMLA)))]
+  "TARGET_COMPLEX"
+  "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
+  [(set_attr "type" "neon_fcmla")]
+)
+
+
+(define_insn "aarch64_fcmla_lane<rot><mode>"
+  [(set (match_operand:VHSDF 0 "register_operand" "=w")
+	(plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
+		    (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
+				   (match_operand:VHSDF 3 "register_operand" "w")
+				   (match_operand:SI 4 "const_int_operand" "n")]
+				   FCMLA)))]
+  "TARGET_COMPLEX"
+{
+  operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
+  return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
+}
+  [(set_attr "type" "neon_fcmla")]
+)
+
+(define_insn "aarch64_fcmla_laneq<rot>v4hf"
+  [(set (match_operand:V4HF 0 "register_operand" "=w")
+	(plus:V4HF (match_operand:V4HF 1 "register_operand" "0")
+		   (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
+				 (match_operand:V8HF 3 "register_operand" "w")
+				 (match_operand:SI 4 "const_int_operand" "n")]
+				 FCMLA)))]
+  "TARGET_COMPLEX"
+{
+  operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
+  return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
+}
+  [(set_attr "type" "neon_fcmla")]
+)
+
+(define_insn "aarch64_fcmlaq_lane<rot><mode>"
+  [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
+	(plus:VQ_HSF (match_operand:VQ_HSF 1 "register_operand" "0")
+		     (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
+				     (match_operand:<VHALF> 3 "register_operand" "w")
+				     (match_operand:SI 4 "const_int_operand" "n")]
+				     FCMLA)))]
+  "TARGET_COMPLEX"
+{
+  int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
+  operands[4]
+    = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
+  return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
+}
+  [(set_attr "type" "neon_fcmla")]
+)
+
+;; The complex mla/mls operations always need to expand to two instructions.
+;; The first operation does half the computation and the second does the
+;; remainder.  Because of this, expand early.
+(define_expand "cml<fcmac1><conj_op><mode>4"
+  [(set (match_operand:VHSDF 0 "register_operand")
+	(plus:VHSDF (match_operand:VHSDF 1 "register_operand")
+		    (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand")
+				   (match_operand:VHSDF 3 "register_operand")]
+				   FCMLA_OP)))]
+  "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[1],
+						 operands[3], operands[2]));
+  emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], tmp,
+						 operands[3], operands[2]));
+  DONE;
+})
+
+;; The complex mul operations always need to expand to two instructions.
+;; The first operation does half the computation and the second does the
+;; remainder.  Because of this, expand early.
+(define_expand "cmul<conj_op><mode>3"
+  [(set (match_operand:VHSDF 0 "register_operand")
+	(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
+		       (match_operand:VHSDF 2 "register_operand")]
+		       FCMUL_OP))]
+  "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
+{
+  rtx tmp = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
+  rtx res1 = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (res1, tmp,
+						 operands[2], operands[1]));
+  emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], res1,
+						 operands[2], operands[1]));
+  DONE;
+})
+
 ;; These instructions map to the __builtins for the Dot Product operations.
 (define_insn "aarch64_<sur>dot<vsi2qi>"
   [(set (match_operand:VS 0 "register_operand" "=w")
@@ -400,7 +596,21 @@
 		DOTPROD)))]
   "TARGET_DOTPROD"
   "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
-  [(set_attr "type" "neon_dot")]
+  [(set_attr "type" "neon_dot<q>")]
+)
+
+;; These instructions map to the __builtins for the armv8.6a I8MM usdot
+;; (vector) Dot Product operation.
+(define_insn "aarch64_usdot<vsi2qi>"
+  [(set (match_operand:VS 0 "register_operand" "=w")
+	(plus:VS
+	  (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
+		      (match_operand:<VSI2QI> 3 "register_operand" "w")]
+	  UNSPEC_USDOT)
+	  (match_operand:VS 1 "register_operand" "0")))]
+  "TARGET_I8MM"
+  "usdot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
+  [(set_attr "type" "neon_dot<q>")]
 )
 
 ;; These expands map to the Dot Product optab the vectorizer checks for.
@@ -452,7 +662,7 @@
     operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
   }
-  [(set_attr "type" "neon_dot")]
+  [(set_attr "type" "neon_dot<q>")]
 )
 
 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
@@ -467,7 +677,27 @@
     operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
   }
-  [(set_attr "type" "neon_dot")]
+  [(set_attr "type" "neon_dot<q>")]
+)
+
+;; These instructions map to the __builtins for the armv8.6a I8MM usdot, sudot
+;; (by element) Dot Product operations.
+(define_insn "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi>"
+  [(set (match_operand:VS 0 "register_operand" "=w")
+	(plus:VS
+	  (unspec:VS [(match_operand:<VS:VSI2QI> 2 "register_operand" "w")
+		      (match_operand:VB 3 "register_operand" "w")
+		      (match_operand:SI 4 "immediate_operand" "i")]
+	  DOTPROD_I8MM)
+	  (match_operand:VS 1 "register_operand" "0")))]
+  "TARGET_I8MM"
+  {
+    int nunits = GET_MODE_NUNITS (<VB:MODE>mode).to_constant ();
+    int lane = INTVAL (operands[4]);
+    operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane), SImode);
+    return "<DOTPROD_I8MM:sur>dot\\t%0.<VS:Vtype>, %2.<VS:Vdottype>, %3.4b[%4]";
+  }
+  [(set_attr "type" "neon_dot<VS:q>")]
 )
 
 (define_expand "copysign<mode>3"
@@ -531,7 +761,7 @@
   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 )
 
-(define_insn "aarch64_rsqrte<mode>"
+(define_insn "@aarch64_rsqrte<mode>"
   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 	(unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
 		     UNSPEC_RSQRTE))]
@@ -539,7 +769,7 @@
   "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
   [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
 
-(define_insn "aarch64_rsqrts<mode>"
+(define_insn "@aarch64_rsqrts<mode>"
   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 	(unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
 			    (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
@@ -549,8 +779,8 @@
   [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
 
 (define_expand "rsqrt<mode>2"
-  [(set (match_operand:VALLF 0 "register_operand" "=w")
-	(unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
+  [(set (match_operand:VALLF 0 "register_operand")
+	(unspec:VALLF [(match_operand:VALLF 1 "register_operand")]
 		     UNSPEC_RSQRT))]
   "TARGET_SIMD"
 {
@@ -558,6 +788,14 @@
   DONE;
 })
 
+(define_insn "aarch64_ursqrte<mode>"
+[(set (match_operand:VDQ_SI 0 "register_operand" "=w")
+      (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
+		   UNSPEC_RSQRTE))]
+"TARGET_SIMD"
+"ursqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
+[(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
+
 (define_insn "*aarch64_mul3_elt_to_64v2df"
   [(set (match_operand:DF 0 "register_operand" "=w")
      (mult:DF
@@ -602,24 +840,139 @@
   [(set_attr "type" "neon_abs<q>")]
 )
 
-(define_insn "abd<mode>_3"
+;; It's tempting to represent SABD as ABS (MINUS op1 op2).
+;; This isn't accurate as ABS treats always its input as a signed value.
+;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
+;; Whereas SABD would return 192 (-64 signed) on the above example.
+;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
+(define_insn "aarch64_<su>abd<mode>"
   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
-	(abs:VDQ_BHSI (minus:VDQ_BHSI
-		       (match_operand:VDQ_BHSI 1 "register_operand" "w")
-		       (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
+	(minus:VDQ_BHSI
+	  (USMAX:VDQ_BHSI
+	    (match_operand:VDQ_BHSI 1 "register_operand" "w")
+	    (match_operand:VDQ_BHSI 2 "register_operand" "w"))
+	  (<max_opp>:VDQ_BHSI
+	    (match_dup 1)
+	    (match_dup 2))))]
+  "TARGET_SIMD"
+  "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_abd<q>")]
+)
+
+
+(define_insn "aarch64_<sur>abdl<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+	(unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
+			 (match_operand:VD_BHSI 2 "register_operand" "w")]
+	ABDL))]
   "TARGET_SIMD"
-  "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+  "<sur>abdl\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
   [(set_attr "type" "neon_abd<q>")]
 )
 
-(define_insn "aba<mode>_3"
-  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
-	(plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
-			 (match_operand:VDQ_BHSI 1 "register_operand" "w")
-			 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
-		       (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
+(define_insn "aarch64_<sur>abdl2<mode>"
+  [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
+	(unspec:<VDBLW> [(match_operand:VQW 1 "register_operand" "w")
+			 (match_operand:VQW 2 "register_operand" "w")]
+	ABDL2))]
+  "TARGET_SIMD"
+  "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_abd<q>")]
+)
+
+(define_insn "aarch64_<sur>abal<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+	(unspec:<VWIDE> [(match_operand:VD_BHSI 2 "register_operand" "w")
+			  (match_operand:VD_BHSI 3 "register_operand" "w")
+			 (match_operand:<VWIDE> 1 "register_operand" "0")]
+	ABAL))]
+  "TARGET_SIMD"
+  "<sur>abal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
+  [(set_attr "type" "neon_arith_acc<q>")]
+)
+
+(define_insn "aarch64_<sur>abal2<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+	(unspec:<VWIDE> [(match_operand:VQW 2 "register_operand" "w")
+			  (match_operand:VQW 3 "register_operand" "w")
+			 (match_operand:<VWIDE> 1 "register_operand" "0")]
+	ABAL2))]
+  "TARGET_SIMD"
+  "<sur>abal2\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
+  [(set_attr "type" "neon_arith_acc<q>")]
+)
+
+(define_insn "aarch64_<sur>adalp<mode>"
+  [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
+	(unspec:<VDBLW> [(match_operand:VDQV_S 2 "register_operand" "w")
+			  (match_operand:<VDBLW> 1 "register_operand" "0")]
+	ADALP))]
+  "TARGET_SIMD"
+  "<sur>adalp\t%0.<Vwhalf>, %2.<Vtype>"
+  [(set_attr "type" "neon_reduc_add<q>")]
+)
+
+;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
+;; inputs in operands 1 and 2.  The sequence also has to perform a widening
+;; reduction of the difference into a V4SI vector and accumulate that into
+;; operand 3 before copying that into the result operand 0.
+;; Perform that with a sequence of:
+;; UABDL2	tmp.8h, op1.16b, op2.16b
+;; UABAL	tmp.8h, op1.8b, op2.8b
+;; UADALP	op3.4s, tmp.8h
+;; MOV		op0, op3 // should be eliminated in later passes.
+;;
+;; For TARGET_DOTPROD we do:
+;; MOV	tmp1.16b, #1 // Can be CSE'd and hoisted out of loops.
+;; UABD	tmp2.16b, op1.16b, op2.16b
+;; UDOT	op3.4s, tmp2.16b, tmp1.16b
+;; MOV	op0, op3 // RA will tie the operands of UDOT appropriately.
+;;
+;; The signed version just uses the signed variants of the above instructions
+;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is
+;; unsigned.
+
+(define_expand "<sur>sadv16qi"
+  [(use (match_operand:V4SI 0 "register_operand"))
+   (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
+		  (use (match_operand:V16QI 2 "register_operand"))] ABAL)
+   (use (match_operand:V4SI 3 "register_operand"))]
   "TARGET_SIMD"
-  "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+  {
+    if (TARGET_DOTPROD)
+      {
+	rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
+	rtx abd = gen_reg_rtx (V16QImode);
+	emit_insn (gen_aarch64_<sur>abdv16qi (abd, operands[1], operands[2]));
+	emit_insn (gen_aarch64_udotv16qi (operands[0], operands[3],
+					  abd, ones));
+	DONE;
+      }
+    rtx reduc = gen_reg_rtx (V8HImode);
+    emit_insn (gen_aarch64_<sur>abdl2v16qi (reduc, operands[1],
+					    operands[2]));
+    emit_insn (gen_aarch64_<sur>abalv8qi (reduc, reduc,
+					  gen_lowpart (V8QImode, operands[1]),
+					  gen_lowpart (V8QImode,
+						       operands[2])));
+    emit_insn (gen_aarch64_<sur>adalpv8hi (operands[3], operands[3], reduc));
+    emit_move_insn (operands[0], operands[3]);
+    DONE;
+  }
+)
+
+(define_insn "aarch64_<su>aba<mode>"
+  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
+	(plus:VDQ_BHSI (minus:VDQ_BHSI
+			 (USMAX:VDQ_BHSI
+			   (match_operand:VDQ_BHSI 2 "register_operand" "w")
+			   (match_operand:VDQ_BHSI 3 "register_operand" "w"))
+			 (<max_opp>:VDQ_BHSI
+			   (match_dup 2)
+			   (match_dup 3)))
+		       (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
+  "TARGET_SIMD"
+  "<su>aba\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
   [(set_attr "type" "neon_arith_acc<q>")]
 )
 
@@ -694,11 +1047,11 @@
 )
 
 (define_insn "aarch64_simd_vec_set<mode>"
-  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w")
-        (vec_merge:VDQ_BHSI
-	    (vec_duplicate:VDQ_BHSI
-		(match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv"))
-	    (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0")
+  [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
+	(vec_merge:VALL_F16
+	    (vec_duplicate:VALL_F16
+		(match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
+	    (match_operand:VALL_F16 3 "register_operand" "0,0,0")
 	    (match_operand:SI 2 "immediate_operand" "i,i,i")))]
   "TARGET_SIMD"
   {
@@ -707,19 +1060,19 @@
    switch (which_alternative)
      {
      case 0:
-	return "ins\\t%0.<Vetype>[%p2], %w1";
-     case 1:
 	return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
+     case 1:
+	return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
      case 2:
         return "ld1\\t{%0.<Vetype>}[%p2], %1";
      default:
 	gcc_unreachable ();
      }
   }
-  [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_one_lane<q>")]
+  [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
 )
 
-(define_insn "*aarch64_simd_vec_copy_lane<mode>"
+(define_insn "@aarch64_simd_vec_copy_lane<mode>"
   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
 	(vec_merge:VALL_F16
 	    (vec_duplicate:VALL_F16
@@ -762,6 +1115,21 @@
   [(set_attr "type" "neon_ins<q>")]
 )
 
+(define_expand "signbit<mode>2"
+  [(use (match_operand:<V_INT_EQUIV> 0 "register_operand"))
+   (use (match_operand:VDQSF 1 "register_operand"))]
+  "TARGET_SIMD"
+{
+  int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1;
+  rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
+                                                        shift_amount);
+  operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
+
+  emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1],
+                                                 shift_vector));
+  DONE;
+})
+
 (define_insn "aarch64_simd_lshr<mode>"
  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
        (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
@@ -780,6 +1148,18 @@
   [(set_attr "type" "neon_shift_imm<q>")]
 )
 
+(define_insn "*aarch64_simd_sra<mode>"
+ [(set (match_operand:VDQ_I 0 "register_operand" "=w")
+	(plus:VDQ_I
+	   (SHIFTRT:VDQ_I
+		(match_operand:VDQ_I 1 "register_operand" "w")
+		(match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr"))
+	   (match_operand:VDQ_I 3 "register_operand" "0")))]
+  "TARGET_SIMD"
+  "<sra_op>sra\t%0.<Vtype>, %1.<Vtype>, %2"
+  [(set_attr "type" "neon_shift_acc<q>")]
+)
+
 (define_insn "aarch64_simd_imm_shl<mode>"
  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
@@ -819,9 +1199,9 @@
 )
 
 (define_expand "ashl<mode>3"
-  [(match_operand:VDQ_I 0 "register_operand" "")
-   (match_operand:VDQ_I 1 "register_operand" "")
-   (match_operand:SI  2 "general_operand" "")]
+  [(match_operand:VDQ_I 0 "register_operand")
+   (match_operand:VDQ_I 1 "register_operand")
+   (match_operand:SI  2 "general_operand")]
  "TARGET_SIMD"
 {
   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
@@ -839,36 +1219,22 @@
 						     tmp));
           DONE;
         }
-      else
-        {
-          operands[2] = force_reg (SImode, operands[2]);
-        }
-    }
-  else if (MEM_P (operands[2]))
-    {
-      operands[2] = force_reg (SImode, operands[2]);
     }
 
-  if (REG_P (operands[2]))
-    {
-      rtx tmp = gen_reg_rtx (<MODE>mode);
-      emit_insn (gen_aarch64_simd_dup<mode> (tmp,
-					     convert_to_mode (<VEL>mode,
-							      operands[2],
-							      0)));
-      emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
-						  tmp));
-      DONE;
-    }
-  else
-    FAIL;
-}
-)
+  operands[2] = force_reg (SImode, operands[2]);
+
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_aarch64_simd_dup<mode> (tmp, convert_to_mode (<VEL>mode,
+							       operands[2],
+							       0)));
+  emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1], tmp));
+  DONE;
+})
 
 (define_expand "lshr<mode>3"
-  [(match_operand:VDQ_I 0 "register_operand" "")
-   (match_operand:VDQ_I 1 "register_operand" "")
-   (match_operand:SI  2 "general_operand" "")]
+  [(match_operand:VDQ_I 0 "register_operand")
+   (match_operand:VDQ_I 1 "register_operand")
+   (match_operand:SI  2 "general_operand")]
  "TARGET_SIMD"
 {
   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
@@ -886,36 +1252,24 @@
 						  tmp));
 	  DONE;
 	}
-      else
-        operands[2] = force_reg (SImode, operands[2]);
-    }
-  else if (MEM_P (operands[2]))
-    {
-      operands[2] = force_reg (SImode, operands[2]);
     }
 
-  if (REG_P (operands[2]))
-    {
-      rtx tmp = gen_reg_rtx (SImode);
-      rtx tmp1 = gen_reg_rtx (<MODE>mode);
-      emit_insn (gen_negsi2 (tmp, operands[2]));
-      emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
-					     convert_to_mode (<VEL>mode,
-							      tmp, 0)));
-      emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
-							  operands[1],
-							  tmp1));
-      DONE;
-    }
-  else
-    FAIL;
-}
-)
+  operands[2] = force_reg (SImode, operands[2]);
+
+  rtx tmp = gen_reg_rtx (SImode);
+  rtx tmp1 = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_negsi2 (tmp, operands[2]));
+  emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
+					 convert_to_mode (<VEL>mode, tmp, 0)));
+  emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
+						      tmp1));
+  DONE;
+})
 
 (define_expand "ashr<mode>3"
-  [(match_operand:VDQ_I 0 "register_operand" "")
-   (match_operand:VDQ_I 1 "register_operand" "")
-   (match_operand:SI  2 "general_operand" "")]
+  [(match_operand:VDQ_I 0 "register_operand")
+   (match_operand:VDQ_I 1 "register_operand")
+   (match_operand:SI  2 "general_operand")]
  "TARGET_SIMD"
 {
   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
@@ -933,36 +1287,24 @@
 						  tmp));
           DONE;
 	}
-      else
-        operands[2] = force_reg (SImode, operands[2]);
-    }
-  else if (MEM_P (operands[2]))
-    {
-      operands[2] = force_reg (SImode, operands[2]);
     }
 
-  if (REG_P (operands[2]))
-    {
-      rtx tmp = gen_reg_rtx (SImode);
-      rtx tmp1 = gen_reg_rtx (<MODE>mode);
-      emit_insn (gen_negsi2 (tmp, operands[2]));
-      emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
-					     convert_to_mode (<VEL>mode,
-							      tmp, 0)));
-      emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
-							operands[1],
-							tmp1));
-      DONE;
-    }
-  else
-    FAIL;
-}
-)
+  operands[2] = force_reg (SImode, operands[2]);
+
+  rtx tmp = gen_reg_rtx (SImode);
+  rtx tmp1 = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_negsi2 (tmp, operands[2]));
+  emit_insn (gen_aarch64_simd_dup<mode> (tmp1, convert_to_mode (<VEL>mode,
+								tmp, 0)));
+  emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
+						    tmp1));
+  DONE;
+})
 
 (define_expand "vashl<mode>3"
- [(match_operand:VDQ_I 0 "register_operand" "")
-  (match_operand:VDQ_I 1 "register_operand" "")
-  (match_operand:VDQ_I 2 "register_operand" "")]
+ [(match_operand:VDQ_I 0 "register_operand")
+  (match_operand:VDQ_I 1 "register_operand")
+  (match_operand:VDQ_I 2 "register_operand")]
  "TARGET_SIMD"
 {
   emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
@@ -974,9 +1316,9 @@
 ;; Negating individual lanes most certainly offsets the
 ;; gain from vectorization.
 (define_expand "vashr<mode>3"
- [(match_operand:VDQ_BHSI 0 "register_operand" "")
-  (match_operand:VDQ_BHSI 1 "register_operand" "")
-  (match_operand:VDQ_BHSI 2 "register_operand" "")]
+ [(match_operand:VDQ_BHSI 0 "register_operand")
+  (match_operand:VDQ_BHSI 1 "register_operand")
+  (match_operand:VDQ_BHSI 2 "register_operand")]
  "TARGET_SIMD"
 {
   rtx neg = gen_reg_rtx (<MODE>mode);
@@ -988,9 +1330,9 @@
 
 ;; DI vector shift
 (define_expand "aarch64_ashr_simddi"
-  [(match_operand:DI 0 "register_operand" "=w")
-   (match_operand:DI 1 "register_operand" "w")
-   (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (match_operand:SI 2 "aarch64_shift_imm64_di")]
   "TARGET_SIMD"
   {
     /* An arithmetic shift right by 64 fills the result with copies of the sign
@@ -1004,9 +1346,9 @@
 )
 
 (define_expand "vlshr<mode>3"
- [(match_operand:VDQ_BHSI 0 "register_operand" "")
-  (match_operand:VDQ_BHSI 1 "register_operand" "")
-  (match_operand:VDQ_BHSI 2 "register_operand" "")]
+ [(match_operand:VDQ_BHSI 0 "register_operand")
+  (match_operand:VDQ_BHSI 1 "register_operand")
+  (match_operand:VDQ_BHSI 2 "register_operand")]
  "TARGET_SIMD"
 {
   rtx neg = gen_reg_rtx (<MODE>mode);
@@ -1017,9 +1359,9 @@
 })
 
 (define_expand "aarch64_lshr_simddi"
-  [(match_operand:DI 0 "register_operand" "=w")
-   (match_operand:DI 1 "register_operand" "w")
-   (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (match_operand:SI 2 "aarch64_shift_imm64_di")]
   "TARGET_SIMD"
   {
     if (INTVAL (operands[2]) == 64)
@@ -1030,19 +1372,6 @@
   }
 )
 
-(define_expand "vec_set<mode>"
-  [(match_operand:VDQ_BHSI 0 "register_operand")
-   (match_operand:<VEL> 1 "register_operand")
-   (match_operand:SI 2 "immediate_operand")]
-  "TARGET_SIMD"
-  {
-    HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
-    emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
-					    GEN_INT (elem), operands[0]));
-    DONE;
-  }
-)
-
 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
 (define_insn "vec_shr_<mode>"
   [(set (match_operand:VD 0 "register_operand" "=w")
@@ -1059,64 +1388,10 @@
   [(set_attr "type" "neon_shift_imm")]
 )
 
-(define_insn "aarch64_simd_vec_setv2di"
-  [(set (match_operand:V2DI 0 "register_operand" "=w,w")
-        (vec_merge:V2DI
-	    (vec_duplicate:V2DI
-		(match_operand:DI 1 "register_operand" "r,w"))
-	    (match_operand:V2DI 3 "register_operand" "0,0")
-	    (match_operand:SI 2 "immediate_operand" "i,i")))]
-  "TARGET_SIMD"
-  {
-    int elt = ENDIAN_LANE_N (2, exact_log2 (INTVAL (operands[2])));
-    operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
-    switch (which_alternative)
-      {
-      case 0:
-	return "ins\\t%0.d[%p2], %1";
-      case 1:
-        return "ins\\t%0.d[%p2], %1.d[0]";
-      default:
-	gcc_unreachable ();
-      }
-  }
-  [(set_attr "type" "neon_from_gp, neon_ins_q")]
-)
-
-(define_expand "vec_setv2di"
-  [(match_operand:V2DI 0 "register_operand")
-   (match_operand:DI 1 "register_operand")
-   (match_operand:SI 2 "immediate_operand")]
-  "TARGET_SIMD"
-  {
-    HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
-    emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
-					  GEN_INT (elem), operands[0]));
-    DONE;
-  }
-)
-
-(define_insn "aarch64_simd_vec_set<mode>"
-  [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
-	(vec_merge:VDQF_F16
-	    (vec_duplicate:VDQF_F16
-		(match_operand:<VEL> 1 "register_operand" "w"))
-	    (match_operand:VDQF_F16 3 "register_operand" "0")
-	    (match_operand:SI 2 "immediate_operand" "i")))]
-  "TARGET_SIMD"
-  {
-    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
-
-    operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
-    return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
-  }
-  [(set_attr "type" "neon_ins<q>")]
-)
-
 (define_expand "vec_set<mode>"
-  [(match_operand:VDQF_F16 0 "register_operand" "+w")
-   (match_operand:<VEL> 1 "register_operand" "w")
-   (match_operand:SI 2 "immediate_operand" "")]
+  [(match_operand:VALL_F16 0 "register_operand")
+   (match_operand:<VEL> 1 "register_operand")
+   (match_operand:SI 2 "immediate_operand")]
   "TARGET_SIMD"
   {
     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
@@ -1151,7 +1426,7 @@
  "TARGET_SIMD"
   {
     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
-    return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
+    return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
   }
   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
 )
@@ -1169,20 +1444,21 @@
  "TARGET_SIMD"
   {
     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
-    return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
+    return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
   }
   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
 )
 
-(define_insn "*aarch64_mla_elt_merge<mode>"
-  [(set (match_operand:VDQHS 0 "register_operand" "=w")
+(define_insn "aarch64_mla_n<mode>"
+ [(set (match_operand:VDQHS 0 "register_operand" "=w")
 	(plus:VDQHS
-	  (mult:VDQHS (vec_duplicate:VDQHS
-		  (match_operand:<VEL> 1 "register_operand" "<h_con>"))
-		(match_operand:VDQHS 2 "register_operand" "w"))
-	  (match_operand:VDQHS 3 "register_operand" "0")))]
+	  (mult:VDQHS
+	    (vec_duplicate:VDQHS
+	      (match_operand:<VEL> 3 "register_operand" "<h_con>"))
+	    (match_operand:VDQHS 2 "register_operand" "w"))
+	  (match_operand:VDQHS 1 "register_operand" "0")))]
  "TARGET_SIMD"
- "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
+ "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[0]"
   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
 )
 
@@ -1209,7 +1485,7 @@
  "TARGET_SIMD"
   {
     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
-    return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
+    return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
   }
   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
 )
@@ -1227,20 +1503,21 @@
  "TARGET_SIMD"
   {
     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
-    return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
+    return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
   }
   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
 )
 
-(define_insn "*aarch64_mls_elt_merge<mode>"
+(define_insn "aarch64_mls_n<mode>"
   [(set (match_operand:VDQHS 0 "register_operand" "=w")
 	(minus:VDQHS
 	  (match_operand:VDQHS 1 "register_operand" "0")
-	  (mult:VDQHS (vec_duplicate:VDQHS
-		  (match_operand:<VEL> 2 "register_operand" "<h_con>"))
-		(match_operand:VDQHS 3 "register_operand" "w"))))]
+	  (mult:VDQHS
+	    (vec_duplicate:VDQHS
+	      (match_operand:<VEL> 3 "register_operand" "<h_con>"))
+	    (match_operand:VDQHS 2 "register_operand" "w"))))]
   "TARGET_SIMD"
-  "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
+  "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[0]"
   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
 )
 
@@ -1255,9 +1532,9 @@
 )
 
 (define_expand "<su><maxmin>v2di3"
- [(set (match_operand:V2DI 0 "register_operand" "")
-       (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
-                    (match_operand:V2DI 2 "register_operand" "")))]
+ [(set (match_operand:V2DI 0 "register_operand")
+       (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand")
+                    (match_operand:V2DI 2 "register_operand")))]
  "TARGET_SIMD"
 {
   enum rtx_code cmp_operator;
@@ -1320,41 +1597,24 @@
 ;; On big-endian this is { zeroes, operand }
 
 (define_insn "move_lo_quad_internal_<mode>"
-  [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
-	(vec_concat:VQ_NO2E
-	  (match_operand:<VHALF> 1 "register_operand" "w,r,r")
-	  (vec_duplicate:<VHALF> (const_int 0))))]
-  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
-  "@
-   dup\\t%d0, %1.d[0]
-   fmov\\t%d0, %1
-   dup\\t%d0, %1"
-  [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
-   (set_attr "simd" "yes,*,yes")
-   (set_attr "fp" "*,yes,*")
-   (set_attr "length" "4")]
-)
-
-(define_insn "move_lo_quad_internal_<mode>"
-  [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
-	(vec_concat:VQ_2E
+  [(set (match_operand:VQMOV 0 "register_operand" "=w,w,w")
+	(vec_concat:VQMOV
 	  (match_operand:<VHALF> 1 "register_operand" "w,r,r")
-	  (const_int 0)))]
+	  (match_operand:<VHALF> 2 "aarch64_simd_or_scalar_imm_zero")))]
   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
   "@
    dup\\t%d0, %1.d[0]
    fmov\\t%d0, %1
    dup\\t%d0, %1"
   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
-   (set_attr "simd" "yes,*,yes")
-   (set_attr "fp" "*,yes,*")
-   (set_attr "length" "4")]
+   (set_attr "length" "4")
+   (set_attr "arch" "simd,fp,simd")]
 )
 
 (define_insn "move_lo_quad_internal_be_<mode>"
-  [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
-	(vec_concat:VQ_NO2E
-	  (vec_duplicate:<VHALF> (const_int 0))
+  [(set (match_operand:VQMOV 0 "register_operand" "=w,w,w")
+	(vec_concat:VQMOV
+	  (match_operand:<VHALF> 2 "aarch64_simd_or_scalar_imm_zero")
 	  (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
   "TARGET_SIMD && BYTES_BIG_ENDIAN"
   "@
@@ -1362,36 +1622,20 @@
    fmov\\t%d0, %1
    dup\\t%d0, %1"
   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
-   (set_attr "simd" "yes,*,yes")
-   (set_attr "fp" "*,yes,*")
-   (set_attr "length" "4")]
-)
-
-(define_insn "move_lo_quad_internal_be_<mode>"
-  [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
-	(vec_concat:VQ_2E
-	  (const_int 0)
-	  (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
-  "TARGET_SIMD && BYTES_BIG_ENDIAN"
-  "@
-   dup\\t%d0, %1.d[0]
-   fmov\\t%d0, %1
-   dup\\t%d0, %1"
-  [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
-   (set_attr "simd" "yes,*,yes")
-   (set_attr "fp" "*,yes,*")
-   (set_attr "length" "4")]
+   (set_attr "length" "4")
+   (set_attr "arch" "simd,fp,simd")]
 )
 
 (define_expand "move_lo_quad_<mode>"
-  [(match_operand:VQ 0 "register_operand")
-   (match_operand:VQ 1 "register_operand")]
+  [(match_operand:VQMOV 0 "register_operand")
+   (match_operand:<VHALF> 1 "register_operand")]
   "TARGET_SIMD"
 {
+  rtx zs = CONST0_RTX (<VHALF>mode);
   if (BYTES_BIG_ENDIAN)
-    emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
+    emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1], zs));
   else
-    emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
+    emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1], zs));
   DONE;
 }
 )
@@ -1402,11 +1646,11 @@
 ;; For big-endian this is { operand1, operand2 }
 
 (define_insn "aarch64_simd_move_hi_quad_<mode>"
-  [(set (match_operand:VQ 0 "register_operand" "+w,w")
-        (vec_concat:VQ
+  [(set (match_operand:VQMOV 0 "register_operand" "+w,w")
+        (vec_concat:VQMOV
           (vec_select:<VHALF>
                 (match_dup 0)
-                (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
+                (match_operand:VQMOV 2 "vect_par_cnst_lo_half" ""))
 	  (match_operand:<VHALF> 1 "register_operand" "w,r")))]
   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
   "@
@@ -1416,12 +1660,12 @@
 )
 
 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
-  [(set (match_operand:VQ 0 "register_operand" "+w,w")
-        (vec_concat:VQ
+  [(set (match_operand:VQMOV 0 "register_operand" "+w,w")
+        (vec_concat:VQMOV
 	  (match_operand:<VHALF> 1 "register_operand" "w,r")
           (vec_select:<VHALF>
                 (match_dup 0)
-                (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
+                (match_operand:VQMOV 2 "vect_par_cnst_lo_half" ""))))]
   "TARGET_SIMD && BYTES_BIG_ENDIAN"
   "@
    ins\\t%0.d[1], %1.d[0]
@@ -1430,8 +1674,8 @@
 )
 
 (define_expand "move_hi_quad_<mode>"
- [(match_operand:VQ 0 "register_operand" "")
-  (match_operand:<VHALF> 1 "register_operand" "")]
+ [(match_operand:VQMOV 0 "register_operand")
+  (match_operand:<VHALF> 1 "register_operand")]
  "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
@@ -1456,9 +1700,9 @@
 )
 
 (define_expand "vec_pack_trunc_<mode>"
- [(match_operand:<VNARROWD> 0 "register_operand" "")
-  (match_operand:VDN 1 "register_operand" "")
-  (match_operand:VDN 2 "register_operand" "")]
+ [(match_operand:<VNARROWD> 0 "register_operand")
+  (match_operand:VDN 1 "register_operand")
+  (match_operand:VDN 2 "register_operand")]
  "TARGET_SIMD"
 {
   rtx tempreg = gen_reg_rtx (<VDBL>mode);
@@ -1471,6 +1715,190 @@
   DONE;
 })
 
+(define_insn "aarch64_shrn<mode>_insn_le"
+  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+	(vec_concat:<VNARROWQ2>
+	  (truncate:<VNARROWQ>
+	    (lshiftrt:VQN (match_operand:VQN 1 "register_operand" "w")
+	      (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")))
+	  (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
+  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
+  "shrn\\t%0.<Vntype>, %1.<Vtype>, %2"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
+(define_insn "aarch64_shrn<mode>_insn_be"
+  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+	(vec_concat:<VNARROWQ2>
+	  (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
+	  (truncate:<VNARROWQ>
+	    (lshiftrt:VQN (match_operand:VQN 1 "register_operand" "w")
+	      (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
+  "TARGET_SIMD && BYTES_BIG_ENDIAN"
+  "shrn\\t%0.<Vntype>, %1.<Vtype>, %2"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
+(define_expand "aarch64_shrn<mode>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand")
+	(truncate:<VNARROWQ>
+	  (lshiftrt:VQN (match_operand:VQN 1 "register_operand")
+	    (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>"))))]
+  "TARGET_SIMD"
+  {
+    operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+						 INTVAL (operands[2]));
+    rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
+    if (BYTES_BIG_ENDIAN)
+      emit_insn (gen_aarch64_shrn<mode>_insn_be (tmp, operands[1],
+				operands[2], CONST0_RTX (<VNARROWQ>mode)));
+    else
+      emit_insn (gen_aarch64_shrn<mode>_insn_le (tmp, operands[1],
+				operands[2], CONST0_RTX (<VNARROWQ>mode)));
+
+    /* The intrinsic expects a narrow result, so emit a subreg that will get
+       optimized away as appropriate.  */
+    emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
+						 <VNARROWQ2>mode));
+    DONE;
+  }
+)
+
+(define_insn "aarch64_rshrn<mode>_insn_le"
+  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+	(vec_concat:<VNARROWQ2>
+	  (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
+		(match_operand:VQN 2
+		  "aarch64_simd_shift_imm_vec_<vn_mode>")] UNSPEC_RSHRN)
+	  (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
+  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
+  "rshrn\\t%0.<Vntype>, %1.<Vtype>, %2"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
+(define_insn "aarch64_rshrn<mode>_insn_be"
+  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+	(vec_concat:<VNARROWQ2>
+	  (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
+	  (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
+		(match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")]
+		  UNSPEC_RSHRN)))]
+  "TARGET_SIMD && BYTES_BIG_ENDIAN"
+  "rshrn\\t%0.<Vntype>, %1.<Vtype>, %2"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
+(define_expand "aarch64_rshrn<mode>"
+  [(match_operand:<VNARROWQ> 0 "register_operand")
+   (match_operand:VQN 1 "register_operand")
+   (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>")]
+  "TARGET_SIMD"
+  {
+    operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+						 INTVAL (operands[2]));
+    rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
+    if (BYTES_BIG_ENDIAN)
+      emit_insn (gen_aarch64_rshrn<mode>_insn_be (tmp, operands[1],
+				operands[2], CONST0_RTX (<VNARROWQ>mode)));
+    else
+      emit_insn (gen_aarch64_rshrn<mode>_insn_le (tmp, operands[1],
+				operands[2], CONST0_RTX (<VNARROWQ>mode)));
+
+    /* The intrinsic expects a narrow result, so emit a subreg that will get
+       optimized away as appropriate.  */
+    emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
+						 <VNARROWQ2>mode));
+    DONE;
+  }
+)
+
+(define_insn "aarch64_shrn2<mode>_insn_le"
+  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+	(vec_concat:<VNARROWQ2>
+	  (match_operand:<VNARROWQ> 1 "register_operand" "0")
+	  (truncate:<VNARROWQ>
+	    (lshiftrt:VQN (match_operand:VQN 2 "register_operand" "w")
+	      (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
+  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
+  "shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
+(define_insn "aarch64_shrn2<mode>_insn_be"
+  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+	(vec_concat:<VNARROWQ2>
+	  (truncate:<VNARROWQ>
+	    (lshiftrt:VQN (match_operand:VQN 2 "register_operand" "w")
+	      (match_operand:VQN 3
+		"aarch64_simd_shift_imm_vec_<vn_mode>")))
+	  (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
+  "TARGET_SIMD && BYTES_BIG_ENDIAN"
+  "shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
+(define_expand "aarch64_shrn2<mode>"
+  [(match_operand:<VNARROWQ2> 0 "register_operand")
+   (match_operand:<VNARROWQ> 1 "register_operand")
+   (match_operand:VQN 2 "register_operand")
+   (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
+  "TARGET_SIMD"
+  {
+    operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+						 INTVAL (operands[3]));
+    if (BYTES_BIG_ENDIAN)
+      emit_insn (gen_aarch64_shrn2<mode>_insn_be (operands[0], operands[1],
+						  operands[2], operands[3]));
+    else
+      emit_insn (gen_aarch64_shrn2<mode>_insn_le (operands[0], operands[1],
+						  operands[2], operands[3]));
+    DONE;
+  }
+)
+
+(define_insn "aarch64_rshrn2<mode>_insn_le"
+  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+	(vec_concat:<VNARROWQ2>
+	  (match_operand:<VNARROWQ> 1 "register_operand" "0")
+	  (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
+	    (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")]
+		UNSPEC_RSHRN)))]
+  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
+  "rshrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
+(define_insn "aarch64_rshrn2<mode>_insn_be"
+  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+	(vec_concat:<VNARROWQ2>
+	  (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
+		(match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")]
+		  UNSPEC_RSHRN)
+	  (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
+  "TARGET_SIMD && BYTES_BIG_ENDIAN"
+  "rshrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
+(define_expand "aarch64_rshrn2<mode>"
+  [(match_operand:<VNARROWQ2> 0 "register_operand")
+   (match_operand:<VNARROWQ> 1 "register_operand")
+   (match_operand:VQN 2 "register_operand")
+   (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
+  "TARGET_SIMD"
+  {
+    operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+						 INTVAL (operands[3]));
+    if (BYTES_BIG_ENDIAN)
+      emit_insn (gen_aarch64_rshrn2<mode>_insn_be (operands[0], operands[1],
+						  operands[2], operands[3]));
+    else
+      emit_insn (gen_aarch64_rshrn2<mode>_insn_le (operands[0], operands[1],
+						  operands[2], operands[3]));
+    DONE;
+  }
+)
+
 ;; For quads.
 
 (define_insn "vec_pack_trunc_<mode>"
@@ -1498,7 +1926,7 @@
 			       (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
 			    )))]
   "TARGET_SIMD"
-  "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
+  "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
   [(set_attr "type" "neon_shift_imm_long")]
 )
 
@@ -1509,12 +1937,12 @@
 			       (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
 			    )))]
   "TARGET_SIMD"
-  "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
+  "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
   [(set_attr "type" "neon_shift_imm_long")]
 )
 
 (define_expand "vec_unpack<su>_hi_<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "")
+  [(match_operand:<VWIDE> 0 "register_operand")
    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
   "TARGET_SIMD"
   {
@@ -1526,8 +1954,8 @@
 )
 
 (define_expand "vec_unpack<su>_lo_<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "")
-   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
   "TARGET_SIMD"
   {
     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
@@ -1555,7 +1983,7 @@
   [(set_attr "type" "neon_mla_<Vetype>_long")]
 )
 
-(define_insn "*aarch64_<su>mlal_hi<mode>"
+(define_insn "aarch64_<su>mlal_hi<mode>_insn"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
         (plus:<VWIDE>
           (mult:<VWIDE>
@@ -1571,6 +1999,49 @@
   [(set_attr "type" "neon_mla_<Vetype>_long")]
 )
 
+(define_expand "aarch64_<su>mlal_hi<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:<VWIDE> 1 "register_operand")
+   (ANY_EXTEND:<VWIDE>(match_operand:VQW 2 "register_operand"))
+   (match_operand:VQW 3 "register_operand")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
+  emit_insn (gen_aarch64_<su>mlal_hi<mode>_insn (operands[0], operands[1],
+						 operands[2], p, operands[3]));
+  DONE;
+}
+)
+
+(define_insn "aarch64_<su>mlal_hi_n<mode>_insn"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (plus:<VWIDE>
+          (mult:<VWIDE>
+              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+                 (match_operand:VQ_HSI 2 "register_operand" "w")
+                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
+              (ANY_EXTEND:<VWIDE> (vec_duplicate:<VCOND>
+	               (match_operand:<VEL> 4 "register_operand" "<h_con>"))))
+          (match_operand:<VWIDE> 1 "register_operand" "0")))]
+  "TARGET_SIMD"
+  "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]"
+  [(set_attr "type" "neon_mla_<Vetype>_long")]
+)
+
+(define_expand "aarch64_<su>mlal_hi_n<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:<VWIDE> 1 "register_operand")
+   (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
+   (match_operand:<VEL> 3 "register_operand")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
+  emit_insn (gen_aarch64_<su>mlal_hi_n<mode>_insn (operands[0],
+             operands[1], operands[2], p, operands[3]));
+  DONE;
+}
+)
+
 (define_insn "*aarch64_<su>mlsl_lo<mode>"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
         (minus:<VWIDE>
@@ -1587,7 +2058,7 @@
   [(set_attr "type" "neon_mla_<Vetype>_long")]
 )
 
-(define_insn "*aarch64_<su>mlsl_hi<mode>"
+(define_insn "aarch64_<su>mlsl_hi<mode>_insn"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
         (minus:<VWIDE>
           (match_operand:<VWIDE> 1 "register_operand" "0")
@@ -1603,21 +2074,79 @@
   [(set_attr "type" "neon_mla_<Vetype>_long")]
 )
 
-(define_insn "*aarch64_<su>mlal<mode>"
+(define_expand "aarch64_<su>mlsl_hi<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:<VWIDE> 1 "register_operand")
+   (ANY_EXTEND:<VWIDE>(match_operand:VQW 2 "register_operand"))
+   (match_operand:VQW 3 "register_operand")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
+  emit_insn (gen_aarch64_<su>mlsl_hi<mode>_insn (operands[0], operands[1],
+						 operands[2], p, operands[3]));
+  DONE;
+}
+)
+
+(define_insn "aarch64_<su>mlsl_hi_n<mode>_insn"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (minus:<VWIDE>
+          (match_operand:<VWIDE> 1 "register_operand" "0")
+          (mult:<VWIDE>
+            (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+              (match_operand:VQ_HSI 2 "register_operand" "w")
+              (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
+            (ANY_EXTEND:<VWIDE> (vec_duplicate:<VCOND>
+	            (match_operand:<VEL> 4 "register_operand" "<h_con>"))))))]
+  "TARGET_SIMD"
+  "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]"
+  [(set_attr "type" "neon_mla_<Vetype>_long")]
+)
+
+(define_expand "aarch64_<su>mlsl_hi_n<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:<VWIDE> 1 "register_operand")
+   (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
+   (match_operand:<VEL> 3 "register_operand")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
+  emit_insn (gen_aarch64_<su>mlsl_hi_n<mode>_insn (operands[0],
+             operands[1], operands[2], p, operands[3]));
+  DONE;
+}
+)
+
+(define_insn "aarch64_<su>mlal<mode>"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
         (plus:<VWIDE>
           (mult:<VWIDE>
             (ANY_EXTEND:<VWIDE>
-              (match_operand:VD_BHSI 1 "register_operand" "w"))
+              (match_operand:VD_BHSI 2 "register_operand" "w"))
             (ANY_EXTEND:<VWIDE>
-              (match_operand:VD_BHSI 2 "register_operand" "w")))
-          (match_operand:<VWIDE> 3 "register_operand" "0")))]
+              (match_operand:VD_BHSI 3 "register_operand" "w")))
+          (match_operand:<VWIDE> 1 "register_operand" "0")))]
   "TARGET_SIMD"
-  "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
+  "<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
   [(set_attr "type" "neon_mla_<Vetype>_long")]
 )
 
-(define_insn "*aarch64_<su>mlsl<mode>"
+(define_insn "aarch64_<su>mlal_n<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (plus:<VWIDE>
+          (mult:<VWIDE>
+            (ANY_EXTEND:<VWIDE>
+              (match_operand:VD_HSI 2 "register_operand" "w"))
+            (ANY_EXTEND:<VWIDE>
+              (vec_duplicate:VD_HSI
+	              (match_operand:<VEL> 3 "register_operand" "<h_con>"))))
+          (match_operand:<VWIDE> 1 "register_operand" "0")))]
+  "TARGET_SIMD"
+  "<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
+  [(set_attr "type" "neon_mla_<Vetype>_long")]
+)
+
+(define_insn "aarch64_<su>mlsl<mode>"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
         (minus:<VWIDE>
           (match_operand:<VWIDE> 1 "register_operand" "0")
@@ -1631,6 +2160,21 @@
   [(set_attr "type" "neon_mla_<Vetype>_long")]
 )
 
+(define_insn "aarch64_<su>mlsl_n<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (minus:<VWIDE>
+          (match_operand:<VWIDE> 1 "register_operand" "0")
+          (mult:<VWIDE>
+            (ANY_EXTEND:<VWIDE>
+              (match_operand:VD_HSI 2 "register_operand" "w"))
+            (ANY_EXTEND:<VWIDE>
+              (vec_duplicate:VD_HSI
+	              (match_operand:<VEL> 3 "register_operand" "<h_con>"))))))]
+  "TARGET_SIMD"
+  "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
+  [(set_attr "type" "neon_mla_<Vetype>_long")]
+)
+
 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
        (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
@@ -1644,10 +2188,21 @@
   [(set_attr "type" "neon_mul_<Vetype>_long")]
 )
 
+(define_insn "aarch64_intrinsic_vec_<su>mult_lo_<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+	(mult:<VWIDE> (ANY_EXTEND:<VWIDE>
+			 (match_operand:VD_BHSI 1 "register_operand" "w"))
+		      (ANY_EXTEND:<VWIDE>
+			 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
+  "TARGET_SIMD"
+  "<su>mull\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_mul_<Vetype>_long")]
+)
+
 (define_expand "vec_widen_<su>mult_lo_<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "")
-   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
-   (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
+   (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
  "TARGET_SIMD"
  {
    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
@@ -1658,32 +2213,338 @@
  }
 )
 
-(define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
- [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-      (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
-			    (match_operand:VQW 1 "register_operand" "w")
-			    (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
-		    (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
-			    (match_operand:VQW 2 "register_operand" "w")
-			    (match_dup 3)))))]
+(define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+      (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+			    (match_operand:VQW 1 "register_operand" "w")
+			    (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
+		    (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+			    (match_operand:VQW 2 "register_operand" "w")
+			    (match_dup 3)))))]
+  "TARGET_SIMD"
+  "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_mul_<Vetype>_long")]
+)
+
+(define_expand "vec_widen_<su>mult_hi_<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
+   (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
+ "TARGET_SIMD"
+ {
+   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
+   emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
+						       operands[1],
+						       operands[2], p));
+   DONE;
+
+ }
+)
+
+;; vmull_lane_s16 intrinsics
+(define_insn "aarch64_vec_<su>mult_lane<Qlane>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+	(mult:<VWIDE>
+	  (ANY_EXTEND:<VWIDE>
+	    (match_operand:<VCOND> 1 "register_operand" "w"))
+	  (ANY_EXTEND:<VWIDE>
+	    (vec_duplicate:<VCOND>
+	      (vec_select:<VEL>
+		(match_operand:VDQHS 2 "register_operand" "<vwx>")
+		(parallel [(match_operand:SI 3 "immediate_operand" "i")]))))))]
+  "TARGET_SIMD"
+  {
+    operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
+    return "<su>mull\\t%0.<Vwtype>, %1.<Vcondtype>, %2.<Vetype>[%3]";
+  }
+  [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_<su>mull_hi_lane<mode>_insn"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+	(mult:<VWIDE>
+	  (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+	    (match_operand:VQ_HSI 1 "register_operand" "w")
+	    (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
+	  (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF>
+	    (vec_select:<VEL>
+	      (match_operand:<VCOND> 3 "register_operand" "<vwx>")
+	      (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
+  "TARGET_SIMD"
+  {
+    operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
+    return "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %3.<Vetype>[%4]";
+  }
+  [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
+)
+
+(define_expand "aarch64_<su>mull_hi_lane<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 1 "register_operand"))
+   (match_operand:<VCOND> 2 "register_operand")
+   (match_operand:SI 3 "immediate_operand")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
+  emit_insn (gen_aarch64_<su>mull_hi_lane<mode>_insn (operands[0],
+	     operands[1], p, operands[2], operands[3]));
+  DONE;
+}
+)
+
+(define_insn "aarch64_<su>mull_hi_laneq<mode>_insn"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+	(mult:<VWIDE>
+	  (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+	    (match_operand:VQ_HSI 1 "register_operand" "w")
+	    (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
+	  (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF>
+	    (vec_select:<VEL>
+	      (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
+	      (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
+  "TARGET_SIMD"
+  {
+    operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
+    return "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %3.<Vetype>[%4]";
+  }
+  [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
+)
+
+(define_expand "aarch64_<su>mull_hi_laneq<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 1 "register_operand"))
+   (match_operand:<VCONQ> 2 "register_operand")
+   (match_operand:SI 3 "immediate_operand")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
+  emit_insn (gen_aarch64_<su>mull_hi_laneq<mode>_insn (operands[0],
+	     operands[1], p, operands[2], operands[3]));
+  DONE;
+}
+)
+
+(define_insn "aarch64_<su>mull_n<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (mult:<VWIDE>
+          (ANY_EXTEND:<VWIDE>
+            (match_operand:VD_HSI 1 "register_operand" "w"))
+          (ANY_EXTEND:<VWIDE>
+            (vec_duplicate:<VCOND>
+	      (match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
+  "TARGET_SIMD"
+  "<su>mull\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
+  [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_<su>mull_hi_n<mode>_insn"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+	(mult:<VWIDE>
+	  (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+	    (match_operand:VQ_HSI 1 "register_operand" "w")
+	    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
+	  (ANY_EXTEND:<VWIDE>
+	    (vec_duplicate:<VCOND>
+	      (match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
+  "TARGET_SIMD"
+  "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
+  [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
+)
+
+(define_expand "aarch64_<su>mull_hi_n<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (ANY_EXTEND:<VWIDE> (match_operand:VQ_HSI 1 "register_operand"))
+   (match_operand:<VEL> 2 "register_operand")]
+ "TARGET_SIMD"
+ {
+   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
+   emit_insn (gen_aarch64_<su>mull_hi_n<mode>_insn (operands[0], operands[1],
+						    operands[2], p));
+   DONE;
+ }
+)
+
+;; vmlal_lane_s16 intrinsics
+(define_insn "aarch64_vec_<su>mlal_lane<Qlane>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+	(plus:<VWIDE>
+	  (mult:<VWIDE>
+	    (ANY_EXTEND:<VWIDE>
+	      (match_operand:<VCOND> 2 "register_operand" "w"))
+	    (ANY_EXTEND:<VWIDE>
+	      (vec_duplicate:<VCOND>
+		(vec_select:<VEL>
+		  (match_operand:VDQHS 3 "register_operand" "<vwx>")
+		  (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))
+	  (match_operand:<VWIDE> 1 "register_operand" "0")))]
+  "TARGET_SIMD"
+  {
+    operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
+    return "<su>mlal\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]";
+  }
+  [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_<su>mlal_hi_lane<mode>_insn"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+	(plus:<VWIDE>
+	  (mult:<VWIDE>
+	    (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+	      (match_operand:VQ_HSI 2 "register_operand" "w")
+	      (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
+	    (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF>
+	      (vec_select:<VEL>
+		(match_operand:<VCOND> 4 "register_operand" "<vwx>")
+		(parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
+	  (match_operand:<VWIDE> 1 "register_operand" "0")))]
+  "TARGET_SIMD"
+  {
+    operands[5] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[5]));
+    return "<su>mlal2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
+  }
+  [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
+)
+
+(define_expand "aarch64_<su>mlal_hi_lane<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:<VWIDE> 1 "register_operand")
+   (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
+   (match_operand:<VCOND> 3 "register_operand")
+   (match_operand:SI 4 "immediate_operand")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
+  emit_insn (gen_aarch64_<su>mlal_hi_lane<mode>_insn (operands[0],
+	     operands[1], operands[2], p, operands[3], operands[4]));
+  DONE;
+}
+)
+
+(define_insn "aarch64_<su>mlal_hi_laneq<mode>_insn"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+	(plus:<VWIDE>
+	  (mult:<VWIDE>
+	    (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+	      (match_operand:VQ_HSI 2 "register_operand" "w")
+	      (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
+	    (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF>
+	      (vec_select:<VEL>
+		(match_operand:<VCONQ> 4 "register_operand" "<vwx>")
+		(parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
+	  (match_operand:<VWIDE> 1 "register_operand" "0")))]
+  "TARGET_SIMD"
+  {
+    operands[5] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[5]));
+    return "<su>mlal2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
+  }
+  [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
+)
+
+(define_expand "aarch64_<su>mlal_hi_laneq<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:<VWIDE> 1 "register_operand")
+   (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
+   (match_operand:<VCONQ> 3 "register_operand")
+   (match_operand:SI 4 "immediate_operand")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
+  emit_insn (gen_aarch64_<su>mlal_hi_laneq<mode>_insn (operands[0],
+	     operands[1], operands[2], p, operands[3], operands[4]));
+  DONE;
+}
+)
+
+(define_insn "aarch64_vec_<su>mlsl_lane<Qlane>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+   (minus:<VWIDE>
+     (match_operand:<VWIDE> 1 "register_operand" "0")
+     (mult:<VWIDE>
+       (ANY_EXTEND:<VWIDE>
+	 (match_operand:<VCOND> 2 "register_operand" "w"))
+       (ANY_EXTEND:<VWIDE>
+	 (vec_duplicate:<VCOND>
+	   (vec_select:<VEL>
+	     (match_operand:VDQHS 3 "register_operand" "<vwx>")
+	     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))))]
+  "TARGET_SIMD"
+  {
+    operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
+    return "<su>mlsl\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]";
+  }
+  [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_<su>mlsl_hi_lane<mode>_insn"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+	(minus:<VWIDE>
+	  (match_operand:<VWIDE> 1 "register_operand" "0")
+	  (mult:<VWIDE>
+	    (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+	      (match_operand:VQ_HSI 2 "register_operand" "w")
+	      (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
+	    (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF>
+	      (vec_select:<VEL>
+		(match_operand:<VCOND> 4 "register_operand" "<vwx>")
+		(parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
+	  )))]
+  "TARGET_SIMD"
+  {
+    operands[5] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[5]));
+    return "<su>mlsl2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
+  }
+  [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
+)
+
+(define_expand "aarch64_<su>mlsl_hi_lane<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:<VWIDE> 1 "register_operand")
+   (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
+   (match_operand:<VCOND> 3 "register_operand")
+   (match_operand:SI 4 "immediate_operand")]
   "TARGET_SIMD"
-  "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
-  [(set_attr "type" "neon_mul_<Vetype>_long")]
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
+  emit_insn (gen_aarch64_<su>mlsl_hi_lane<mode>_insn (operands[0],
+	     operands[1], operands[2], p, operands[3], operands[4]));
+  DONE;
+}
 )
 
-(define_expand "vec_widen_<su>mult_hi_<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "")
-   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
-   (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
- "TARGET_SIMD"
- {
-   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
-   emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
-						       operands[1],
-						       operands[2], p));
-   DONE;
+(define_insn "aarch64_<su>mlsl_hi_laneq<mode>_insn"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+	(minus:<VWIDE>
+	  (match_operand:<VWIDE> 1 "register_operand" "0")
+	  (mult:<VWIDE>
+	    (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+	      (match_operand:VQ_HSI 2 "register_operand" "w")
+	      (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
+	    (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF>
+	      (vec_select:<VEL>
+		(match_operand:<VCONQ> 4 "register_operand" "<vwx>")
+		(parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
+	  )))]
+  "TARGET_SIMD"
+  {
+    operands[5] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[5]));
+    return "<su>mlsl2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
+  }
+  [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
+)
 
- }
+(define_expand "aarch64_<su>mlsl_hi_laneq<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:<VWIDE> 1 "register_operand")
+   (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
+   (match_operand:<VCONQ> 3 "register_operand")
+   (match_operand:SI 4 "immediate_operand")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
+  emit_insn (gen_aarch64_<su>mlsl_hi_laneq<mode>_insn (operands[0],
+	     operands[1], operands[2], p, operands[3], operands[4]));
+  DONE;
+}
 )
 
 ;; FP vector operations.
@@ -1739,9 +2600,9 @@
 )
 
 (define_expand "div<mode>3"
- [(set (match_operand:VHSDF 0 "register_operand" "=w")
-       (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
-		  (match_operand:VHSDF 2 "register_operand" "w")))]
+ [(set (match_operand:VHSDF 0 "register_operand")
+       (div:VHSDF (match_operand:VHSDF 1 "register_operand")
+		  (match_operand:VHSDF 2 "register_operand")))]
  "TARGET_SIMD"
 {
   if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
@@ -1797,7 +2658,7 @@
   "TARGET_SIMD"
   {
     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
-    return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
+    return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
   }
   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
 )
@@ -1814,7 +2675,7 @@
   "TARGET_SIMD"
   {
     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
-    return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
+    return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
   }
   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
 )
@@ -1842,7 +2703,7 @@
   "TARGET_SIMD"
   {
     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
-    return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
+    return "fmla\\t%0.2d, %3.2d, %1.d[%2]";
   }
   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
 )
@@ -1871,7 +2732,7 @@
   "TARGET_SIMD"
   {
     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
-    return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
+    return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
   }
   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
 )
@@ -1889,7 +2750,7 @@
   "TARGET_SIMD"
   {
     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
-    return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
+    return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
   }
   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
 )
@@ -1919,7 +2780,7 @@
   "TARGET_SIMD"
   {
     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
-    return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
+    return "fmls\\t%0.2d, %3.2d, %1.d[%2]";
   }
   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
 )
@@ -2076,8 +2937,8 @@
 ;; other big-endian patterns their behavior is as required.
 
 (define_expand "vec_unpacks_lo_<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "")
-   (match_operand:VQ_HSF 1 "register_operand" "")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:VQ_HSF 1 "register_operand")]
   "TARGET_SIMD"
   {
     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
@@ -2099,8 +2960,8 @@
 )
 
 (define_expand "vec_unpacks_hi_<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "")
-   (match_operand:VQ_HSF 1 "register_operand" "")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:VQ_HSF 1 "register_operand")]
   "TARGET_SIMD"
   {
     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -2152,9 +3013,9 @@
 )
 
 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
-  [(match_operand:<VDBL> 0 "register_operand" "=w")
-   (match_operand:VDF 1 "register_operand" "0")
-   (match_operand:<VWIDE> 2 "register_operand" "w")]
+  [(match_operand:<VDBL> 0 "register_operand")
+   (match_operand:VDF 1 "register_operand")
+   (match_operand:<VWIDE> 2 "register_operand")]
   "TARGET_SIMD"
 {
   rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
@@ -2211,8 +3072,9 @@
 ;; Max/Min are introduced by idiom recognition by GCC's mid-end.  An
 ;; expression like:
 ;;      a = (b < c) ? b : c;
-;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
-;; either explicitly or indirectly via -ffast-math.
+;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
+;; -fno-signed-zeros are enabled either explicitly or indirectly via
+;; -ffast-math.
 ;;
 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
@@ -2246,8 +3108,8 @@
 ;; 'across lanes' add.
 
 (define_expand "reduc_plus_scal_<mode>"
-  [(match_operand:<VEL> 0 "register_operand" "=w")
-   (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
+  [(match_operand:<VEL> 0 "register_operand")
+   (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand")]
 	       UNSPEC_ADDV)]
   "TARGET_SIMD"
   {
@@ -2278,6 +3140,26 @@
   [(set_attr "type" "neon_reduc_add<q>")]
 )
 
+(define_insn "aarch64_<su>addlv<mode>"
+ [(set (match_operand:<VWIDE_S> 0 "register_operand" "=w")
+       (unspec:<VWIDE_S> [(match_operand:VDQV_L 1 "register_operand" "w")]
+		    USADDLV))]
+ "TARGET_SIMD"
+ "<su>addl<vp>\\t%<Vwstype>0<Vwsuf>, %1.<Vtype>"
+  [(set_attr "type" "neon_reduc_add<q>")]
+)
+
+;; ADDV with result zero-extended to SI/DImode (for popcount).
+(define_insn "aarch64_zero_extend<GPI:mode>_reduc_plus_<VDQV_E:mode>"
+ [(set (match_operand:GPI 0 "register_operand" "=w")
+       (zero_extend:GPI
+	(unspec:<VDQV_E:VEL> [(match_operand:VDQV_E 1 "register_operand" "w")]
+			     UNSPEC_ADDV)))]
+ "TARGET_SIMD"
+ "add<VDQV_E:vp>\\t%<VDQV_E:Vetype>0, %1.<VDQV_E:Vtype>"
+  [(set_attr "type" "neon_reduc_add<VDQV_E:q>")]
+)
+
 (define_insn "aarch64_reduc_plus_internalv2si"
  [(set (match_operand:V2SI 0 "register_operand" "=w")
        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
@@ -2999,36 +3881,40 @@
 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(sign_extend:GPI
-	  (vec_select:<VEL>
+	  (vec_select:<VDQQH:VEL>
 	    (match_operand:VDQQH 1 "register_operand" "w")
 	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
   "TARGET_SIMD"
   {
-    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
+    operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
+					   INTVAL (operands[2]));
     return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
   }
-  [(set_attr "type" "neon_to_gp<q>")]
+  [(set_attr "type" "neon_to_gp<VDQQH:q>")]
 )
 
-(define_insn "*aarch64_get_lane_zero_extendsi<mode>"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(zero_extend:SI
-	  (vec_select:<VEL>
+(define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(zero_extend:GPI
+	  (vec_select:<VDQQH:VEL>
 	    (match_operand:VDQQH 1 "register_operand" "w")
 	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
   "TARGET_SIMD"
   {
-    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
-    return "umov\\t%w0, %1.<Vetype>[%2]";
+    operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
+					   INTVAL (operands[2]));
+    return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]";
   }
-  [(set_attr "type" "neon_to_gp<q>")]
+  [(set_attr "type" "neon_to_gp<VDQQH:q>")]
 )
 
 ;; Lane extraction of a value, neither sign nor zero extension
 ;; is guaranteed so upper bits should be considered undefined.
 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
-(define_insn "aarch64_get_lane<mode>"
-  [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
+;; Extracting lane zero is split into a simple move when it is between SIMD
+;; registers or a store.
+(define_insn_and_split "aarch64_get_lane<mode>"
+  [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
 	(vec_select:<VEL>
 	  (match_operand:VALL_F16 1 "register_operand" "w, w, w")
 	  (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
@@ -3047,6 +3933,12 @@
 	  gcc_unreachable ();
       }
   }
+ "&& reload_completed
+  && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
+ [(set (match_dup 0) (match_dup 1))]
+ {
+   operands[1] = aarch64_replace_reg_mode (operands[1], <VEL>mode);
+ }
   [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
 )
 
@@ -3065,7 +3957,7 @@
 )
 
 (define_insn "store_pair_lanes<mode>"
-  [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Uml, Uml")
+  [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
 	(vec_concat:<VDBL>
 	   (match_operand:VDC 1 "register_operand" "w, r")
 	   (match_operand:VDC 2 "register_operand" "w, r")))]
@@ -3079,7 +3971,7 @@
 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
 ;; dest vector.
 
-(define_insn "*aarch64_combinez<mode>"
+(define_insn "@aarch64_combinez<mode>"
   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
 	(vec_concat:<VDBL>
 	  (match_operand:VDC 1 "general_operand" "w,?r,m")
@@ -3090,11 +3982,10 @@
    fmov\t%d0, %1
    ldr\\t%d0, %1"
   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
-   (set_attr "simd" "yes,*,yes")
-   (set_attr "fp" "*,yes,*")]
+   (set_attr "arch" "simd,fp,simd")]
 )
 
-(define_insn "*aarch64_combinez_be<mode>"
+(define_insn "@aarch64_combinez_be<mode>"
   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
         (vec_concat:<VDBL>
 	  (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
@@ -3105,23 +3996,31 @@
    fmov\t%d0, %1
    ldr\\t%d0, %1"
   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
-   (set_attr "simd" "yes,*,yes")
-   (set_attr "fp" "*,yes,*")]
+   (set_attr "arch" "simd,fp,simd")]
 )
 
 (define_expand "aarch64_combine<mode>"
   [(match_operand:<VDBL> 0 "register_operand")
    (match_operand:VDC 1 "register_operand")
-   (match_operand:VDC 2 "register_operand")]
+   (match_operand:VDC 2 "aarch64_simd_reg_or_zero")]
   "TARGET_SIMD"
 {
-  aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
-
+  if (operands[2] == CONST0_RTX (<MODE>mode))
+    {
+      if (BYTES_BIG_ENDIAN)
+	emit_insn (gen_aarch64_combinez_be<mode> (operands[0], operands[1],
+						  operands[2]));
+      else
+	emit_insn (gen_aarch64_combinez<mode> (operands[0], operands[1],
+					       operands[2]));
+    }
+  else
+    aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
   DONE;
 }
 )
 
-(define_expand "aarch64_simd_combine<mode>"
+(define_expand "@aarch64_simd_combine<mode>"
   [(match_operand:<VDBL> 0 "register_operand")
    (match_operand:VDC 1 "register_operand")
    (match_operand:VDC 2 "register_operand")]
@@ -3162,11 +4061,58 @@
   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
 )
 
+(define_expand "vec_widen_<su>addl_lo_<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
+   (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
+  emit_insn (gen_aarch64_<su>addl<mode>_lo_internal (operands[0], operands[1],
+						     operands[2], p));
+  DONE;
+})
+
+(define_expand "vec_widen_<su>addl_hi_<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
+   (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
+  emit_insn (gen_aarch64_<su>addl<mode>_hi_internal (operands[0], operands[1],
+						     operands[2], p));
+  DONE;
+})
+
+(define_expand "vec_widen_<su>subl_lo_<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
+   (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
+  emit_insn (gen_aarch64_<su>subl<mode>_lo_internal (operands[0], operands[1],
+						     operands[2], p));
+  DONE;
+})
+
+(define_expand "vec_widen_<su>subl_hi_<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
+   (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
+  emit_insn (gen_aarch64_<su>subl<mode>_hi_internal (operands[0], operands[1],
+						     operands[2], p));
+  DONE;
+})
 
 (define_expand "aarch64_saddl2<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:VQW 1 "register_operand" "w")
-   (match_operand:VQW 2 "register_operand" "w")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:VQW 1 "register_operand")
+   (match_operand:VQW 2 "register_operand")]
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -3176,9 +4122,9 @@
 })
 
 (define_expand "aarch64_uaddl2<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:VQW 1 "register_operand" "w")
-   (match_operand:VQW 2 "register_operand" "w")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:VQW 1 "register_operand")
+   (match_operand:VQW 2 "register_operand")]
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -3188,9 +4134,9 @@
 })
 
 (define_expand "aarch64_ssubl2<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:VQW 1 "register_operand" "w")
-   (match_operand:VQW 2 "register_operand" "w")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:VQW 1 "register_operand")
+   (match_operand:VQW 2 "register_operand")]
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -3200,9 +4146,9 @@
 })
 
 (define_expand "aarch64_usubl2<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:VQW 1 "register_operand" "w")
-   (match_operand:VQW 2 "register_operand" "w")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:VQW 1 "register_operand")
+   (match_operand:VQW 2 "register_operand")]
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -3225,10 +4171,10 @@
 ;; <su><addsub>w<q>.
 
 (define_expand "widen_ssum<mode>3"
-  [(set (match_operand:<VDBLW> 0 "register_operand" "")
+  [(set (match_operand:<VDBLW> 0 "register_operand")
 	(plus:<VDBLW> (sign_extend:<VDBLW> 
-		        (match_operand:VQW 1 "register_operand" ""))
-		      (match_operand:<VDBLW> 2 "register_operand" "")))]
+		        (match_operand:VQW 1 "register_operand"))
+		      (match_operand:<VDBLW> 2 "register_operand")))]
   "TARGET_SIMD"
   {
     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
@@ -3242,10 +4188,10 @@
 )
 
 (define_expand "widen_ssum<mode>3"
-  [(set (match_operand:<VWIDE> 0 "register_operand" "")
+  [(set (match_operand:<VWIDE> 0 "register_operand")
 	(plus:<VWIDE> (sign_extend:<VWIDE>
-		        (match_operand:VD_BHSI 1 "register_operand" ""))
-		      (match_operand:<VWIDE> 2 "register_operand" "")))]
+		        (match_operand:VD_BHSI 1 "register_operand"))
+		      (match_operand:<VWIDE> 2 "register_operand")))]
   "TARGET_SIMD"
 {
   emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
@@ -3253,10 +4199,10 @@
 })
 
 (define_expand "widen_usum<mode>3"
-  [(set (match_operand:<VDBLW> 0 "register_operand" "")
+  [(set (match_operand:<VDBLW> 0 "register_operand")
 	(plus:<VDBLW> (zero_extend:<VDBLW> 
-		        (match_operand:VQW 1 "register_operand" ""))
-		      (match_operand:<VDBLW> 2 "register_operand" "")))]
+		        (match_operand:VQW 1 "register_operand"))
+		      (match_operand:<VDBLW> 2 "register_operand")))]
   "TARGET_SIMD"
   {
     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
@@ -3270,54 +4216,90 @@
 )
 
 (define_expand "widen_usum<mode>3"
-  [(set (match_operand:<VWIDE> 0 "register_operand" "")
+  [(set (match_operand:<VWIDE> 0 "register_operand")
 	(plus:<VWIDE> (zero_extend:<VWIDE>
-		        (match_operand:VD_BHSI 1 "register_operand" ""))
-		      (match_operand:<VWIDE> 2 "register_operand" "")))]
+		        (match_operand:VD_BHSI 1 "register_operand"))
+		      (match_operand:<VWIDE> 2 "register_operand")))]
   "TARGET_SIMD"
 {
   emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
   DONE;
 })
 
-(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
+(define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
-			(ANY_EXTEND:<VWIDE>
-			  (match_operand:VD_BHSI 2 "register_operand" "w"))))]
+	(minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
+	  (ANY_EXTEND:<VWIDE>
+	    (match_operand:VD_BHSI 2 "register_operand" "w"))))]
   "TARGET_SIMD"
-  "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
-  [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
+  "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_sub_widen")]
 )
 
-(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
+(define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
-			(ANY_EXTEND:<VWIDE>
-			  (vec_select:<VHALF>
-			   (match_operand:VQW 2 "register_operand" "w")
-			   (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
+	(minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
+	  (ANY_EXTEND:<VWIDE>
+	    (vec_select:<VHALF>
+	      (match_operand:VQW 2 "register_operand" "w")
+	      (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
   "TARGET_SIMD"
-  "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
-  [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
+  "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
+  [(set_attr "type" "neon_sub_widen")]
 )
 
-(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
+(define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
-			(ANY_EXTEND:<VWIDE>
-			  (vec_select:<VHALF>
-			   (match_operand:VQW 2 "register_operand" "w")
-			   (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
+	(minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
+	  (ANY_EXTEND:<VWIDE>
+	    (vec_select:<VHALF>
+	      (match_operand:VQW 2 "register_operand" "w")
+	      (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
+  "TARGET_SIMD"
+  "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_sub_widen")]
+)
+
+(define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+	(plus:<VWIDE>
+	  (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
+	  (match_operand:<VWIDE> 1 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_add_widen")]
+)
+
+(define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+	(plus:<VWIDE>
+	  (ANY_EXTEND:<VWIDE>
+	    (vec_select:<VHALF>
+	      (match_operand:VQW 2 "register_operand" "w")
+	      (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
+	  (match_operand:<VWIDE> 1 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
+  [(set_attr "type" "neon_add_widen")]
+)
+
+(define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+	(plus:<VWIDE>
+	  (ANY_EXTEND:<VWIDE>
+	    (vec_select:<VHALF>
+	      (match_operand:VQW 2 "register_operand" "w")
+	      (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
+	  (match_operand:<VWIDE> 1 "register_operand" "w")))]
   "TARGET_SIMD"
-  "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
-  [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
+  "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_add_widen")]
 )
 
 (define_expand "aarch64_saddw2<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:<VWIDE> 1 "register_operand" "w")
-   (match_operand:VQW 2 "register_operand" "w")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:<VWIDE> 1 "register_operand")
+   (match_operand:VQW 2 "register_operand")]
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -3327,9 +4309,9 @@
 })
 
 (define_expand "aarch64_uaddw2<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:<VWIDE> 1 "register_operand" "w")
-   (match_operand:VQW 2 "register_operand" "w")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:<VWIDE> 1 "register_operand")
+   (match_operand:VQW 2 "register_operand")]
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -3340,9 +4322,9 @@
 
 
 (define_expand "aarch64_ssubw2<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:<VWIDE> 1 "register_operand" "w")
-   (match_operand:VQW 2 "register_operand" "w")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:<VWIDE> 1 "register_operand")
+   (match_operand:VQW 2 "register_operand")]
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -3352,9 +4334,9 @@
 })
 
 (define_expand "aarch64_usubw2<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:<VWIDE> 1 "register_operand" "w")
-   (match_operand:VQW 2 "register_operand" "w")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:<VWIDE> 1 "register_operand")
+   (match_operand:VQW 2 "register_operand")]
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -3365,6 +4347,22 @@
 
 ;; <su><r>h<addsub>.
 
+(define_expand "<u>avg<mode>3_floor"
+  [(set (match_operand:VDQ_BHSI 0 "register_operand")
+	(unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
+			  (match_operand:VDQ_BHSI 2 "register_operand")]
+			 HADD))]
+  "TARGET_SIMD"
+)
+
+(define_expand "<u>avg<mode>3_ceil"
+  [(set (match_operand:VDQ_BHSI 0 "register_operand")
+	(unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
+			  (match_operand:VDQ_BHSI 2 "register_operand")]
+			 RHADD))]
+  "TARGET_SIMD"
+)
+
 (define_insn "aarch64_<sur>h<addsub><mode>"
   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
@@ -3496,13 +4494,13 @@
 )
 ;; <su>q<addsub>
 
-(define_insn "aarch64_<su_optab><optab><mode>"
+(define_insn "aarch64_<su_optab>q<addsub><mode>"
   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
 	(BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
-			  (match_operand:VSDQ_I 2 "register_operand" "w")))]
+			(match_operand:VSDQ_I 2 "register_operand" "w")))]
   "TARGET_SIMD"
-  "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
-  [(set_attr "type" "neon_<optab><q>")]
+  "<su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
+  [(set_attr "type" "neon_q<addsub><q>")]
 )
 
 ;; suqadd and usqadd
@@ -3539,6 +4537,84 @@
    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
 )
 
+(define_insn "aarch64_<su>qxtn2<mode>_le"
+  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+	(vec_concat:<VNARROWQ2>
+	  (match_operand:<VNARROWQ> 1 "register_operand" "0")
+	  (SAT_TRUNC:<VNARROWQ>
+	    (match_operand:VQN 2 "register_operand" "w"))))]
+  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
+  "<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>"
+   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
+)
+
+(define_insn "aarch64_<su>qxtn2<mode>_be"
+  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+	(vec_concat:<VNARROWQ2>
+	  (SAT_TRUNC:<VNARROWQ>
+	    (match_operand:VQN 2 "register_operand" "w"))
+	  (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
+  "TARGET_SIMD && BYTES_BIG_ENDIAN"
+  "<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>"
+   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
+)
+
+(define_expand "aarch64_<su>qxtn2<mode>"
+  [(match_operand:<VNARROWQ2> 0 "register_operand")
+   (match_operand:<VNARROWQ> 1 "register_operand")
+   (SAT_TRUNC:<VNARROWQ>
+     (match_operand:VQN 2 "register_operand"))]
+  "TARGET_SIMD"
+  {
+    if (BYTES_BIG_ENDIAN)
+      emit_insn (gen_aarch64_<su>qxtn2<mode>_be (operands[0], operands[1],
+						 operands[2]));
+    else
+      emit_insn (gen_aarch64_<su>qxtn2<mode>_le (operands[0], operands[1],
+						 operands[2]));
+    DONE;
+  }
+)
+
+(define_insn "aarch64_sqxtun2<mode>_le"
+  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+	(vec_concat:<VNARROWQ2>
+	  (match_operand:<VNARROWQ> 1 "register_operand" "0")
+	  (unspec:<VNARROWQ>
+	    [(match_operand:VQN 2 "register_operand" "w")] UNSPEC_SQXTUN2)))]
+  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
+  "sqxtun2\\t%0.<V2ntype>, %2.<Vtype>"
+   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
+)
+
+(define_insn "aarch64_sqxtun2<mode>_be"
+  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+	(vec_concat:<VNARROWQ2>
+	  (unspec:<VNARROWQ>
+	    [(match_operand:VQN 2 "register_operand" "w")] UNSPEC_SQXTUN2)
+	  (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
+  "TARGET_SIMD && BYTES_BIG_ENDIAN"
+  "sqxtun2\\t%0.<V2ntype>, %2.<Vtype>"
+   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
+)
+
+(define_expand "aarch64_sqxtun2<mode>"
+  [(match_operand:<VNARROWQ2> 0 "register_operand")
+   (match_operand:<VNARROWQ> 1 "register_operand")
+   (unspec:<VNARROWQ>
+     [(match_operand:VQN 2 "register_operand")] UNSPEC_SQXTUN2)]
+  "TARGET_SIMD"
+  {
+    if (BYTES_BIG_ENDIAN)
+      emit_insn (gen_aarch64_sqxtun2<mode>_be (operands[0], operands[1],
+					      operands[2]));
+    else
+      emit_insn (gen_aarch64_sqxtun2<mode>_le (operands[0], operands[1],
+					       operands[2]));
+    DONE;
+  }
+)
+
 ;; <su>q<absneg>
 
 (define_insn "aarch64_s<optab><mode>"
@@ -3717,9 +4793,25 @@
 
 ;; vqdml[sa]l
 
-(define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
+(define_insn "aarch64_sqdmlal<mode>"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (SBINQOPS:<VWIDE>
+        (ss_plus:<VWIDE>
+	  (ss_ashift:<VWIDE>
+	      (mult:<VWIDE>
+		(sign_extend:<VWIDE>
+		      (match_operand:VSD_HSI 2 "register_operand" "w"))
+		(sign_extend:<VWIDE>
+		      (match_operand:VSD_HSI 3 "register_operand" "w")))
+	      (const_int 1))
+	  (match_operand:<VWIDE> 1 "register_operand" "0")))]
+  "TARGET_SIMD"
+  "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
+  [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
+)
+
+(define_insn "aarch64_sqdmlsl<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (ss_minus:<VWIDE>
 	  (match_operand:<VWIDE> 1 "register_operand" "0")
 	  (ss_ashift:<VWIDE>
 	      (mult:<VWIDE>
@@ -3729,15 +4821,39 @@
 		      (match_operand:VSD_HSI 3 "register_operand" "w")))
 	      (const_int 1))))]
   "TARGET_SIMD"
-  "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
+  "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
 )
 
 ;; vqdml[sa]l_lane
 
-(define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
+(define_insn "aarch64_sqdmlal_lane<mode>"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (SBINQOPS:<VWIDE>
+        (ss_plus:<VWIDE>
+	  (ss_ashift:<VWIDE>
+	    (mult:<VWIDE>
+	      (sign_extend:<VWIDE>
+		(match_operand:VD_HSI 2 "register_operand" "w"))
+	      (sign_extend:<VWIDE>
+		(vec_duplicate:VD_HSI
+		  (vec_select:<VEL>
+		    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
+		    (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
+              ))
+	    (const_int 1))
+	  (match_operand:<VWIDE> 1 "register_operand" "0")))]
+  "TARGET_SIMD"
+  {
+    operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
+    return
+      "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
+  }
+  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdmlsl_lane<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (ss_minus:<VWIDE>
 	  (match_operand:<VWIDE> 1 "register_operand" "0")
 	  (ss_ashift:<VWIDE>
 	    (mult:<VWIDE>
@@ -3754,14 +4870,15 @@
   {
     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
     return
-      "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
+      "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
   }
   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
 )
 
-(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
+
+(define_insn "aarch64_sqdmlsl_laneq<mode>"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (SBINQOPS:<VWIDE>
+        (ss_minus:<VWIDE>
 	  (match_operand:<VWIDE> 1 "register_operand" "0")
 	  (ss_ashift:<VWIDE>
 	    (mult:<VWIDE>
@@ -3778,14 +4895,62 @@
   {
     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
     return
-      "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
+      "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
   }
   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
 )
 
-(define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
+(define_insn "aarch64_sqdmlal_laneq<mode>"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (SBINQOPS:<VWIDE>
+        (ss_plus:<VWIDE>
+	  (ss_ashift:<VWIDE>
+	    (mult:<VWIDE>
+	      (sign_extend:<VWIDE>
+		(match_operand:VD_HSI 2 "register_operand" "w"))
+	      (sign_extend:<VWIDE>
+		(vec_duplicate:VD_HSI
+		  (vec_select:<VEL>
+		    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
+		    (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
+              ))
+	    (const_int 1))
+	  (match_operand:<VWIDE> 1 "register_operand" "0")))]
+  "TARGET_SIMD"
+  {
+    operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
+    return
+      "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
+  }
+  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+
+(define_insn "aarch64_sqdmlal_lane<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (ss_plus:<VWIDE>
+	  (ss_ashift:<VWIDE>
+	    (mult:<VWIDE>
+	      (sign_extend:<VWIDE>
+		(match_operand:SD_HSI 2 "register_operand" "w"))
+	      (sign_extend:<VWIDE>
+		(vec_select:<VEL>
+		  (match_operand:<VCOND> 3 "register_operand" "<vwx>")
+		  (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
+              )
+	    (const_int 1))
+	  (match_operand:<VWIDE> 1 "register_operand" "0")))]
+  "TARGET_SIMD"
+  {
+    operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
+    return
+      "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
+  }
+  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdmlsl_lane<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (ss_minus:<VWIDE>
 	  (match_operand:<VWIDE> 1 "register_operand" "0")
 	  (ss_ashift:<VWIDE>
 	    (mult:<VWIDE>
@@ -3801,14 +4966,38 @@
   {
     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
     return
-      "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
+      "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
   }
   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
 )
 
-(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
+
+(define_insn "aarch64_sqdmlal_laneq<mode>"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (SBINQOPS:<VWIDE>
+        (ss_plus:<VWIDE>
+	  (ss_ashift:<VWIDE>
+	    (mult:<VWIDE>
+	      (sign_extend:<VWIDE>
+		(match_operand:SD_HSI 2 "register_operand" "w"))
+	      (sign_extend:<VWIDE>
+		(vec_select:<VEL>
+		  (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
+		  (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
+              )
+	    (const_int 1))
+	  (match_operand:<VWIDE> 1 "register_operand" "0")))]
+  "TARGET_SIMD"
+  {
+    operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
+    return
+      "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
+  }
+  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdmlsl_laneq<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (ss_minus:<VWIDE>
 	  (match_operand:<VWIDE> 1 "register_operand" "0")
 	  (ss_ashift:<VWIDE>
 	    (mult:<VWIDE>
@@ -3824,16 +5013,16 @@
   {
     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
     return
-      "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
+      "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
   }
   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
 )
 
 ;; vqdml[sa]l_n
 
-(define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
+(define_insn "aarch64_sqdmlsl_n<mode>"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (SBINQOPS:<VWIDE>
+        (ss_minus:<VWIDE>
 	  (match_operand:<VWIDE> 1 "register_operand" "0")
 	  (ss_ashift:<VWIDE>
 	      (mult:<VWIDE>
@@ -3844,15 +5033,53 @@
 		    (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
 	      (const_int 1))))]
   "TARGET_SIMD"
-  "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
+  "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
+  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdmlal_n<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (ss_plus:<VWIDE>
+	  (ss_ashift:<VWIDE>
+	      (mult:<VWIDE>
+		(sign_extend:<VWIDE>
+		      (match_operand:VD_HSI 2 "register_operand" "w"))
+		(sign_extend:<VWIDE>
+		  (vec_duplicate:VD_HSI
+		    (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
+	      (const_int 1))
+	  (match_operand:<VWIDE> 1 "register_operand" "0")))]
+  "TARGET_SIMD"
+  "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
+  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+
+;; sqdml[as]l2
+
+(define_insn "aarch64_sqdmlal2<mode>_internal"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (ss_plus:<VWIDE>
+         (ss_ashift:<VWIDE>
+             (mult:<VWIDE>
+               (sign_extend:<VWIDE>
+                 (vec_select:<VHALF>
+                     (match_operand:VQ_HSI 2 "register_operand" "w")
+                     (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
+               (sign_extend:<VWIDE>
+                 (vec_select:<VHALF>
+                     (match_operand:VQ_HSI 3 "register_operand" "w")
+                     (match_dup 4))))
+             (const_int 1))
+	  (match_operand:<VWIDE> 1 "register_operand" "0")))]
+  "TARGET_SIMD"
+  "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
 )
 
-;; sqdml[as]l2
-
-(define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
+(define_insn "aarch64_sqdmlsl2<mode>_internal"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (SBINQOPS:<VWIDE>
+        (ss_minus:<VWIDE>
          (match_operand:<VWIDE> 1 "register_operand" "0")
          (ss_ashift:<VWIDE>
              (mult:<VWIDE>
@@ -3866,15 +5093,15 @@
                      (match_dup 4))))
              (const_int 1))))]
   "TARGET_SIMD"
-  "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
+  "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
 )
 
 (define_expand "aarch64_sqdmlal2<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:<VWIDE> 1 "register_operand" "w")
-   (match_operand:VQ_HSI 2 "register_operand" "w")
-   (match_operand:VQ_HSI 3 "register_operand" "w")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:<VWIDE> 1 "register_operand")
+   (match_operand:VQ_HSI 2 "register_operand")
+   (match_operand:VQ_HSI 3 "register_operand")]
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -3884,10 +5111,10 @@
 })
 
 (define_expand "aarch64_sqdmlsl2<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:<VWIDE> 1 "register_operand" "w")
-   (match_operand:VQ_HSI 2 "register_operand" "w")
-   (match_operand:VQ_HSI 3 "register_operand" "w")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:<VWIDE> 1 "register_operand")
+   (match_operand:VQ_HSI 2 "register_operand")
+   (match_operand:VQ_HSI 3 "register_operand")]
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -3951,11 +5178,11 @@
 )
 
 (define_expand "aarch64_sqdmlal2_lane<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:<VWIDE> 1 "register_operand" "w")
-   (match_operand:VQ_HSI 2 "register_operand" "w")
-   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
-   (match_operand:SI 4 "immediate_operand" "i")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:<VWIDE> 1 "register_operand")
+   (match_operand:VQ_HSI 2 "register_operand")
+   (match_operand:<VCOND> 3 "register_operand")
+   (match_operand:SI 4 "immediate_operand")]
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -3966,11 +5193,11 @@
 })
 
 (define_expand "aarch64_sqdmlal2_laneq<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:<VWIDE> 1 "register_operand" "w")
-   (match_operand:VQ_HSI 2 "register_operand" "w")
-   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
-   (match_operand:SI 4 "immediate_operand" "i")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:<VWIDE> 1 "register_operand")
+   (match_operand:VQ_HSI 2 "register_operand")
+   (match_operand:<VCONQ> 3 "register_operand")
+   (match_operand:SI 4 "immediate_operand")]
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -3981,11 +5208,11 @@
 })
 
 (define_expand "aarch64_sqdmlsl2_lane<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:<VWIDE> 1 "register_operand" "w")
-   (match_operand:VQ_HSI 2 "register_operand" "w")
-   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
-   (match_operand:SI 4 "immediate_operand" "i")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:<VWIDE> 1 "register_operand")
+   (match_operand:VQ_HSI 2 "register_operand")
+   (match_operand:<VCOND> 3 "register_operand")
+   (match_operand:SI 4 "immediate_operand")]
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -3996,11 +5223,11 @@
 })
 
 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:<VWIDE> 1 "register_operand" "w")
-   (match_operand:VQ_HSI 2 "register_operand" "w")
-   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
-   (match_operand:SI 4 "immediate_operand" "i")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:<VWIDE> 1 "register_operand")
+   (match_operand:VQ_HSI 2 "register_operand")
+   (match_operand:<VCONQ> 3 "register_operand")
+   (match_operand:SI 4 "immediate_operand")]
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -4030,10 +5257,10 @@
 )
 
 (define_expand "aarch64_sqdmlal2_n<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:<VWIDE> 1 "register_operand" "w")
-   (match_operand:VQ_HSI 2 "register_operand" "w")
-   (match_operand:<VEL> 3 "register_operand" "w")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:<VWIDE> 1 "register_operand")
+   (match_operand:VQ_HSI 2 "register_operand")
+   (match_operand:<VEL> 3 "register_operand")]
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -4044,10 +5271,10 @@
 })
 
 (define_expand "aarch64_sqdmlsl2_n<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:<VWIDE> 1 "register_operand" "w")
-   (match_operand:VQ_HSI 2 "register_operand" "w")
-   (match_operand:<VEL> 3 "register_operand" "w")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:<VWIDE> 1 "register_operand")
+   (match_operand:VQ_HSI 2 "register_operand")
+   (match_operand:<VEL> 3 "register_operand")]
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -4199,9 +5426,9 @@
 )
 
 (define_expand "aarch64_sqdmull2<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:VQ_HSI 1 "register_operand" "w")
-   (match_operand:VQ_HSI 2 "register_operand" "w")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:VQ_HSI 1 "register_operand")
+   (match_operand:VQ_HSI 2 "register_operand")]
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -4259,10 +5486,10 @@
 )
 
 (define_expand "aarch64_sqdmull2_lane<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:VQ_HSI 1 "register_operand" "w")
-   (match_operand:<VCOND> 2 "register_operand" "<vwx>")
-   (match_operand:SI 3 "immediate_operand" "i")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:VQ_HSI 1 "register_operand")
+   (match_operand:<VCOND> 2 "register_operand")
+   (match_operand:SI 3 "immediate_operand")]
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -4273,10 +5500,10 @@
 })
 
 (define_expand "aarch64_sqdmull2_laneq<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:VQ_HSI 1 "register_operand" "w")
-   (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
-   (match_operand:SI 3 "immediate_operand" "i")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:VQ_HSI 1 "register_operand")
+   (match_operand:<VCONQ> 2 "register_operand")
+   (match_operand:SI 3 "immediate_operand")]
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -4307,9 +5534,9 @@
 )
 
 (define_expand "aarch64_sqdmull2_n<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:VQ_HSI 1 "register_operand" "w")
-   (match_operand:<VEL> 2 "register_operand" "w")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:VQ_HSI 1 "register_operand")
+   (match_operand:<VEL> 2 "register_operand")]
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -4345,8 +5572,74 @@
   [(set_attr "type" "neon_sat_shift_reg<q>")]
 )
 
+(define_expand "vec_widen_<sur>shiftl_lo_<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+	(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
+			 (match_operand:SI 2
+			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
+			 VSHLL))]
+  "TARGET_SIMD"
+  {
+    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
+    emit_insn (gen_aarch64_<sur>shll<mode>_internal (operands[0], operands[1],
+						     p, operands[2]));
+    DONE;
+  }
+)
+
+(define_expand "vec_widen_<sur>shiftl_hi_<mode>"
+   [(set (match_operand:<VWIDE> 0 "register_operand")
+	(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
+			 (match_operand:SI 2
+			   "immediate_operand" "i")]
+			  VSHLL))]
+   "TARGET_SIMD"
+   {
+    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
+    emit_insn (gen_aarch64_<sur>shll2<mode>_internal (operands[0], operands[1],
+						      p, operands[2]));
+    DONE;
+   }
+)
+
 ;; vshll_n
 
+(define_insn "aarch64_<sur>shll<mode>_internal"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+	(unspec:<VWIDE> [(vec_select:<VHALF>
+			    (match_operand:VQW 1 "register_operand" "w")
+			    (match_operand:VQW 2 "vect_par_cnst_lo_half" ""))
+			 (match_operand:SI 3
+			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
+			 VSHLL))]
+  "TARGET_SIMD"
+  {
+    if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
+      return "shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
+    else
+      return "<sur>shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
+  }
+  [(set_attr "type" "neon_shift_imm_long")]
+)
+
+(define_insn "aarch64_<sur>shll2<mode>_internal"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+	(unspec:<VWIDE> [(vec_select:<VHALF>
+			    (match_operand:VQW 1 "register_operand" "w")
+			    (match_operand:VQW 2 "vect_par_cnst_hi_half" ""))
+			 (match_operand:SI 3
+			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
+			 VSHLL))]
+  "TARGET_SIMD"
+  {
+    if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
+      return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
+    else
+      return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
+  }
+  [(set_attr "type" "neon_shift_imm_long")]
+)
+
 (define_insn "aarch64_<sur>shll_n<mode>"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
 	(unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
@@ -4448,6 +5741,17 @@
   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
 )
 
+(define_insn "aarch64_<sur>q<r>shr<u>n2_n<mode>"
+  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+        (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
+			     (match_operand:VQN 2 "register_operand" "w")
+			     (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
+                            VQSHRN_N))]
+  "TARGET_SIMD"
+  "<sur>q<r>shr<u>n2\\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
+  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
+)
+
 
 ;; cm(eq|ge|gt|lt|le)
 ;; Note, we have constraints for Dz and Z as different expanders
@@ -4711,8 +6015,8 @@
 ;; sqrt
 
 (define_expand "sqrt<mode>2"
-  [(set (match_operand:VHSDF 0 "register_operand" "=w")
-	(sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
+  [(set (match_operand:VHSDF 0 "register_operand")
+	(sqrt:VHSDF (match_operand:VHSDF 1 "register_operand")))]
   "TARGET_SIMD"
 {
   if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
@@ -4765,8 +6069,8 @@
 )
 
 (define_expand "vec_load_lanesoi<mode>"
-  [(set (match_operand:OI 0 "register_operand" "=w")
-	(unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
+  [(set (match_operand:OI 0 "register_operand")
+	(unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand")
 		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
 		   UNSPEC_LD2))]
   "TARGET_SIMD"
@@ -4809,8 +6113,8 @@
 )
 
 (define_expand "vec_store_lanesoi<mode>"
-  [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
-	(unspec:OI [(match_operand:OI 1 "register_operand" "w")
+  [(set (match_operand:OI 0 "aarch64_simd_struct_operand")
+	(unspec:OI [(match_operand:OI 1 "register_operand")
                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                    UNSPEC_ST2))]
   "TARGET_SIMD"
@@ -4863,8 +6167,8 @@
 )
 
 (define_expand "vec_load_lanesci<mode>"
-  [(set (match_operand:CI 0 "register_operand" "=w")
-	(unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
+  [(set (match_operand:CI 0 "register_operand")
+	(unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand")
 		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
 		   UNSPEC_LD3))]
   "TARGET_SIMD"
@@ -4907,8 +6211,8 @@
 )
 
 (define_expand "vec_store_lanesci<mode>"
-  [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
-	(unspec:CI [(match_operand:CI 1 "register_operand" "w")
+  [(set (match_operand:CI 0 "aarch64_simd_struct_operand")
+	(unspec:CI [(match_operand:CI 1 "register_operand")
                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                    UNSPEC_ST3))]
   "TARGET_SIMD"
@@ -4961,8 +6265,8 @@
 )
 
 (define_expand "vec_load_lanesxi<mode>"
-  [(set (match_operand:XI 0 "register_operand" "=w")
-	(unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
+  [(set (match_operand:XI 0 "register_operand")
+	(unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand")
 		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
 		   UNSPEC_LD4))]
   "TARGET_SIMD"
@@ -5005,8 +6309,8 @@
 )
 
 (define_expand "vec_store_lanesxi<mode>"
-  [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
-	(unspec:XI [(match_operand:XI 1 "register_operand" "w")
+  [(set (match_operand:XI 0 "aarch64_simd_struct_operand")
+	(unspec:XI [(match_operand:XI 1 "register_operand")
                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                    UNSPEC_ST4))]
   "TARGET_SIMD"
@@ -5051,8 +6355,8 @@
 ;; Reload patterns for AdvSIMD register list operands.
 
 (define_expand "mov<mode>"
-  [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
-	(match_operand:VSTRUCT 1 "general_operand" ""))]
+  [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
+	(match_operand:VSTRUCT 1 "general_operand"))]
   "TARGET_SIMD"
 {
   if (can_create_pseudo_p ())
@@ -5062,6 +6366,114 @@
     }
 })
 
+
+(define_expand "aarch64_ld1x3<VALLDIF:mode>"
+  [(match_operand:CI 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_SIMD"
+{
+  rtx mem = gen_rtx_MEM (CImode, operands[1]);
+  emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
+  DONE;
+})
+
+(define_insn "aarch64_ld1_x3_<mode>"
+  [(set (match_operand:CI 0 "register_operand" "=w")
+        (unspec:CI
+	  [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
+	   (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
+  "TARGET_SIMD"
+  "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
+  [(set_attr "type" "neon_load1_3reg<q>")]
+)
+
+(define_expand "aarch64_ld1x4<VALLDIF:mode>"
+  [(match_operand:XI 0 "register_operand" "=w")
+   (match_operand:DI 1 "register_operand" "r")
+   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_SIMD"
+{
+  rtx mem = gen_rtx_MEM (XImode, operands[1]);
+  emit_insn (gen_aarch64_ld1_x4_<VALLDIF:mode> (operands[0], mem));
+  DONE;
+})
+
+(define_insn "aarch64_ld1_x4_<mode>"
+  [(set (match_operand:XI 0 "register_operand" "=w")
+	(unspec:XI
+	  [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
+	   (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)]
+	UNSPEC_LD1))]
+  "TARGET_SIMD"
+  "ld1\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
+  [(set_attr "type" "neon_load1_4reg<q>")]
+)
+
+(define_expand "aarch64_st1x2<VALLDIF:mode>"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:OI 1 "register_operand")
+   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_SIMD"
+{
+  rtx mem = gen_rtx_MEM (OImode, operands[0]);
+  emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
+  DONE;
+})
+
+(define_insn "aarch64_st1_x2_<mode>"
+   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
+	 (unspec:OI
+	  [(match_operand:OI 1 "register_operand" "w")
+          (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
+  "TARGET_SIMD"
+  "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
+  [(set_attr "type" "neon_store1_2reg<q>")]
+)
+
+(define_expand "aarch64_st1x3<VALLDIF:mode>"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:CI 1 "register_operand")
+   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_SIMD"
+{
+  rtx mem = gen_rtx_MEM (CImode, operands[0]);
+  emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
+  DONE;
+})
+
+(define_insn "aarch64_st1_x3_<mode>"
+   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
+	(unspec:CI
+         [(match_operand:CI 1 "register_operand" "w")
+	  (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
+  "TARGET_SIMD"
+  "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
+  [(set_attr "type" "neon_store1_3reg<q>")]
+)
+
+(define_expand "aarch64_st1x4<VALLDIF:mode>"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:XI 1 "register_operand" "")
+   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_SIMD"
+{
+  rtx mem = gen_rtx_MEM (XImode, operands[0]);
+  emit_insn (gen_aarch64_st1_x4_<VALLDIF:mode> (mem, operands[1]));
+  DONE;
+})
+
+(define_insn "aarch64_st1_x4_<mode>"
+  [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
+	(unspec:XI
+	   [(match_operand:XI 1 "register_operand" "w")
+	   (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)]
+	UNSPEC_ST1))]
+  "TARGET_SIMD"
+  "st1\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
+  [(set_attr "type" "neon_store1_4reg<q>")]
+)
+
 (define_insn "*aarch64_mov<mode>"
   [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
 	(match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
@@ -5195,8 +6607,8 @@
 })
 
 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
-  [(match_operand:VSTRUCT 0 "register_operand" "=w")
-   (match_operand:DI 1 "register_operand" "w")
+  [(match_operand:VSTRUCT 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
   "TARGET_SIMD"
 {
@@ -5270,8 +6682,8 @@
 )
 
 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
- [(match_operand:VSTRUCT 0 "register_operand" "=w")
-  (match_operand:DI 1 "register_operand" "r")
+ [(match_operand:VSTRUCT 0 "register_operand")
+  (match_operand:DI 1 "register_operand")
   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
   "TARGET_SIMD"
 {
@@ -5298,8 +6710,8 @@
 })
 
 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
- [(match_operand:VSTRUCT 0 "register_operand" "=w")
-  (match_operand:DI 1 "register_operand" "r")
+ [(match_operand:VSTRUCT 0 "register_operand")
+  (match_operand:DI 1 "register_operand")
   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
   "TARGET_SIMD"
 {
@@ -5311,8 +6723,8 @@
 })
 
 (define_expand "aarch64_ld1x2<VQ:mode>"
- [(match_operand:OI 0 "register_operand" "=w")
-  (match_operand:DI 1 "register_operand" "r")
+ [(match_operand:OI 0 "register_operand")
+  (match_operand:DI 1 "register_operand")
   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
   "TARGET_SIMD"
 {
@@ -5324,8 +6736,8 @@
 })
 
 (define_expand "aarch64_ld1x2<VDC:mode>"
- [(match_operand:OI 0 "register_operand" "=w")
-  (match_operand:DI 1 "register_operand" "r")
+ [(match_operand:OI 0 "register_operand")
+  (match_operand:DI 1 "register_operand")
   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
   "TARGET_SIMD"
 {
@@ -5338,10 +6750,10 @@
 
 
 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
-  [(match_operand:VSTRUCT 0 "register_operand" "=w")
-	(match_operand:DI 1 "register_operand" "w")
-	(match_operand:VSTRUCT 2 "register_operand" "0")
-	(match_operand:SI 3 "immediate_operand" "i")
+  [(match_operand:VSTRUCT 0 "register_operand")
+	(match_operand:DI 1 "register_operand")
+	(match_operand:VSTRUCT 2 "register_operand")
+	(match_operand:SI 3 "immediate_operand")
 	(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
   "TARGET_SIMD"
 {
@@ -5361,9 +6773,9 @@
 ;; D-register list.
 
 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
- [(match_operand:VDC 0 "register_operand" "=w")
-  (match_operand:VSTRUCT 1 "register_operand" "w")
-  (match_operand:SI 2 "immediate_operand" "i")]
+ [(match_operand:VDC 0 "register_operand")
+  (match_operand:VSTRUCT 1 "register_operand")
+  (match_operand:SI 2 "immediate_operand")]
   "TARGET_SIMD"
 {
   int part = INTVAL (operands[2]);
@@ -5378,9 +6790,9 @@
 ;; Q-register list.
 
 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
- [(match_operand:VQ 0 "register_operand" "=w")
-  (match_operand:VSTRUCT 1 "register_operand" "w")
-  (match_operand:SI 2 "immediate_operand" "i")]
+ [(match_operand:VQ 0 "register_operand")
+  (match_operand:VSTRUCT 1 "register_operand")
+  (match_operand:SI 2 "immediate_operand")]
   "TARGET_SIMD"
 {
   int part = INTVAL (operands[2]);
@@ -5517,13 +6929,13 @@
 ;; This instruction's pattern is generated directly by
 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
 ;; need corresponding changes there.
-(define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
+(define_insn "aarch64_<PERMUTE:perm_insn><mode>"
   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
 	(unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
 			  (match_operand:VALL_F16 2 "register_operand" "w")]
 	 PERMUTE))]
   "TARGET_SIMD"
-  "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+  "<PERMUTE:perm_insn>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
   [(set_attr "type" "neon_permute<q>")]
 )
 
@@ -5619,8 +7031,8 @@
 )
 
 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
- [(match_operand:DI 0 "register_operand" "r")
-  (match_operand:VSTRUCT 1 "register_operand" "w")
+ [(match_operand:DI 0 "register_operand")
+  (match_operand:VSTRUCT 1 "register_operand")
   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
   "TARGET_SIMD"
 {
@@ -5632,8 +7044,8 @@
 })
 
 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
- [(match_operand:DI 0 "register_operand" "r")
-  (match_operand:VSTRUCT 1 "register_operand" "w")
+ [(match_operand:DI 0 "register_operand")
+  (match_operand:VSTRUCT 1 "register_operand")
   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
   "TARGET_SIMD"
 {
@@ -5645,8 +7057,8 @@
 })
 
 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
- [(match_operand:DI 0 "register_operand" "r")
-  (match_operand:VSTRUCT 1 "register_operand" "w")
+ [(match_operand:DI 0 "register_operand")
+  (match_operand:VSTRUCT 1 "register_operand")
   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
   (match_operand:SI 2 "immediate_operand")]
   "TARGET_SIMD"
@@ -5682,10 +7094,10 @@
 ;; extend them in arm_neon.h and insert the resulting Q-regs.
 
 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
- [(match_operand:VSTRUCT 0 "register_operand" "+w")
-  (match_operand:VSTRUCT 1 "register_operand" "0")
-  (match_operand:VQ 2 "register_operand" "w")
-  (match_operand:SI 3 "immediate_operand" "i")]
+ [(match_operand:VSTRUCT 0 "register_operand")
+  (match_operand:VSTRUCT 1 "register_operand")
+  (match_operand:VQ 2 "register_operand")
+  (match_operand:SI 3 "immediate_operand")]
   "TARGET_SIMD"
 {
   int part = INTVAL (operands[3]);
@@ -5700,7 +7112,16 @@
 ;; Standard pattern name vec_init<mode><Vel>.
 
 (define_expand "vec_init<mode><Vel>"
-  [(match_operand:VALL_F16 0 "register_operand" "")
+  [(match_operand:VALL_F16 0 "register_operand")
+   (match_operand 1 "" "")]
+  "TARGET_SIMD"
+{
+  aarch64_expand_vector_init (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "vec_init<mode><Vhalf>"
+  [(match_operand:VQ_NO2E 0 "register_operand")
    (match_operand 1 "" "")]
   "TARGET_SIMD"
 {
@@ -5738,25 +7159,26 @@
 )
 
 
-(define_insn "aarch64_frecpe<mode>"
-  [(set (match_operand:VHSDF 0 "register_operand" "=w")
-	(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
+(define_insn "@aarch64_frecpe<mode>"
+  [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
+	(unspec:VHSDF_HSDF
+	 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
 	 UNSPEC_FRECPE))]
   "TARGET_SIMD"
-  "frecpe\\t%0.<Vtype>, %1.<Vtype>"
+  "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
   [(set_attr "type" "neon_fp_recpe_<stype><q>")]
 )
 
-(define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
+(define_insn "aarch64_frecpx<mode>"
   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
 	(unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
-	 FRECP))]
+	 UNSPEC_FRECPX))]
   "TARGET_SIMD"
-  "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
-  [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
+  "frecpx\t%<s>0, %<s>1"
+  [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
 )
 
-(define_insn "aarch64_frecps<mode>"
+(define_insn "@aarch64_frecps<mode>"
   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 	(unspec:VHSDF_HSDF
 	  [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
@@ -5778,9 +7200,9 @@
 ;; Standard pattern name vec_extract<mode><Vel>.
 
 (define_expand "vec_extract<mode><Vel>"
-  [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
-   (match_operand:VALL_F16 1 "register_operand" "")
-   (match_operand:SI 2 "immediate_operand" "")]
+  [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
+   (match_operand:VALL_F16 1 "register_operand")
+   (match_operand:SI 2 "immediate_operand")]
   "TARGET_SIMD"
 {
     emit_insn
@@ -5788,37 +7210,98 @@
     DONE;
 })
 
+;; Extract a 64-bit vector from one half of a 128-bit vector.
+(define_expand "vec_extract<mode><Vhalf>"
+  [(match_operand:<VHALF> 0 "register_operand")
+   (match_operand:VQMOV_NO2E 1 "register_operand")
+   (match_operand 2 "immediate_operand")]
+  "TARGET_SIMD"
+{
+  int start = INTVAL (operands[2]);
+  if (start != 0 && start != <nunits> / 2)
+    FAIL;
+  rtx sel = aarch64_gen_stepped_int_parallel (<nunits> / 2, start, 1);
+  emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], sel));
+  DONE;
+})
+
+;; Extract a single-element 64-bit vector from one half of a 128-bit vector.
+(define_expand "vec_extractv2dfv1df"
+  [(match_operand:V1DF 0 "register_operand")
+   (match_operand:V2DF 1 "register_operand")
+   (match_operand 2 "immediate_operand")]
+  "TARGET_SIMD"
+{
+  /* V1DF is rarely used by other patterns, so it should be better to hide
+     it in a subreg destination of a normal DF op.  */
+  rtx scalar0 = gen_lowpart (DFmode, operands[0]);
+  emit_insn (gen_vec_extractv2dfdf (scalar0, operands[1], operands[2]));
+  DONE;
+})
+
 ;; aes
 
 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
   [(set (match_operand:V16QI 0 "register_operand" "=w")
-        (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
-		       (match_operand:V16QI 2 "register_operand" "w")]
+	(unspec:V16QI
+		[(xor:V16QI
+		 (match_operand:V16QI 1 "register_operand" "%0")
+		 (match_operand:V16QI 2 "register_operand" "w"))]
          CRYPTO_AES))]
   "TARGET_SIMD && TARGET_AES"
   "aes<aes_op>\\t%0.16b, %2.16b"
   [(set_attr "type" "crypto_aese")]
 )
 
-;; When AES/AESMC fusion is enabled we want the register allocation to
-;; look like:
-;;    AESE Vn, _
-;;    AESMC Vn, Vn
-;; So prefer to tie operand 1 to operand 0 when fusing.
-
 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
-  [(set (match_operand:V16QI 0 "register_operand" "=w,w")
-	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
+  [(set (match_operand:V16QI 0 "register_operand" "=w")
+	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
 	 CRYPTO_AESMC))]
   "TARGET_SIMD && TARGET_AES"
   "aes<aesmc_op>\\t%0.16b, %1.16b"
-  [(set_attr "type" "crypto_aesmc")
-   (set_attr_alternative "enabled"
-     [(if_then_else (match_test
-		       "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
-		     (const_string "yes" )
-		     (const_string "no"))
-      (const_string "yes")])]
+  [(set_attr "type" "crypto_aesmc")]
+)
+
+;; When AESE/AESMC fusion is enabled we really want to keep the two together
+;; and enforce the register dependency without scheduling or register
+;; allocation messing up the order or introducing moves inbetween.
+;;  Mash the two together during combine.
+
+(define_insn "*aarch64_crypto_aese_fused"
+  [(set (match_operand:V16QI 0 "register_operand" "=w")
+	(unspec:V16QI
+	  [(unspec:V16QI
+	   [(xor:V16QI
+		(match_operand:V16QI 1 "register_operand" "%0")
+		(match_operand:V16QI 2 "register_operand" "w"))]
+	     UNSPEC_AESE)]
+	UNSPEC_AESMC))]
+  "TARGET_SIMD && TARGET_AES
+   && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
+  "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
+  [(set_attr "type" "crypto_aese")
+   (set_attr "length" "8")]
+)
+
+;; When AESD/AESIMC fusion is enabled we really want to keep the two together
+;; and enforce the register dependency without scheduling or register
+;; allocation messing up the order or introducing moves inbetween.
+;;  Mash the two together during combine.
+
+(define_insn "*aarch64_crypto_aesd_fused"
+  [(set (match_operand:V16QI 0 "register_operand" "=w")
+	(unspec:V16QI
+	  [(unspec:V16QI
+		    [(xor:V16QI
+			(match_operand:V16QI 1 "register_operand" "%0")
+			(match_operand:V16QI 2 "register_operand" "w"))]
+		UNSPEC_AESD)]
+	  UNSPEC_AESIMC))]
+  "TARGET_SIMD && TARGET_AES
+   && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
+  "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
+  [(set_attr "type" "crypto_aese")
+   (set_attr "length" "8")]
 )
 
 ;; sha1
@@ -5955,13 +7438,13 @@
 
 ;; sha3
 
-(define_insn "aarch64_eor3qv8hi"
-  [(set (match_operand:V8HI 0 "register_operand" "=w")
-	(xor:V8HI
-	 (xor:V8HI
-	  (match_operand:V8HI 2 "register_operand" "%w")
-	  (match_operand:V8HI 3 "register_operand" "w"))
-	 (match_operand:V8HI 1 "register_operand" "w")))]
+(define_insn "eor3q<mode>4"
+  [(set (match_operand:VQ_I 0 "register_operand" "=w")
+	(xor:VQ_I
+	 (xor:VQ_I
+	  (match_operand:VQ_I 2 "register_operand" "w")
+	  (match_operand:VQ_I 3 "register_operand" "w"))
+	 (match_operand:VQ_I 1 "register_operand" "w")))]
   "TARGET_SIMD && TARGET_SHA3"
   "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
   [(set_attr "type" "crypto_sha3")]
@@ -5991,13 +7474,13 @@
   [(set_attr "type" "crypto_sha3")]
 )
 
-(define_insn "aarch64_bcaxqv8hi"
-  [(set (match_operand:V8HI 0 "register_operand" "=w")
-	(xor:V8HI
-	 (and:V8HI
-	  (not:V8HI (match_operand:V8HI 3 "register_operand" "w"))
-	  (match_operand:V8HI 2 "register_operand" "w"))
-	 (match_operand:V8HI 1 "register_operand" "w")))]
+(define_insn "bcaxq<mode>4"
+  [(set (match_operand:VQ_I 0 "register_operand" "=w")
+	(xor:VQ_I
+	 (and:VQ_I
+	  (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
+	  (match_operand:VQ_I 2 "register_operand" "w"))
+	 (match_operand:VQ_I 1 "register_operand" "w")))]
   "TARGET_SIMD && TARGET_SHA3"
   "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
   [(set_attr "type" "crypto_sha3")]
@@ -6065,11 +7548,11 @@
 ;; fp16fml
 
 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
-  [(set (match_operand:VDQSF 0 "register_operand" "=w")
+  [(set (match_operand:VDQSF 0 "register_operand")
 	(unspec:VDQSF
-	 [(match_operand:VDQSF 1 "register_operand" "0")
-	  (match_operand:<VFMLA_W> 2 "register_operand" "w")
-	  (match_operand:<VFMLA_W> 3 "register_operand" "w")]
+	 [(match_operand:VDQSF 1 "register_operand")
+	  (match_operand:<VFMLA_W> 2 "register_operand")
+	  (match_operand:<VFMLA_W> 3 "register_operand")]
 	 VFMLA16_LOW))]
   "TARGET_F16FML"
 {
@@ -6088,11 +7571,11 @@
 })
 
 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
-  [(set (match_operand:VDQSF 0 "register_operand" "=w")
+  [(set (match_operand:VDQSF 0 "register_operand")
 	(unspec:VDQSF
-	 [(match_operand:VDQSF 1 "register_operand" "0")
-	  (match_operand:<VFMLA_W> 2 "register_operand" "w")
-	  (match_operand:<VFMLA_W> 3 "register_operand" "w")]
+	 [(match_operand:VDQSF 1 "register_operand")
+	  (match_operand:<VFMLA_W> 2 "register_operand")
+	  (match_operand:<VFMLA_W> 3 "register_operand")]
 	 VFMLA16_HIGH))]
   "TARGET_F16FML"
 {
@@ -6178,11 +7661,11 @@
 )
 
 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
-  [(set (match_operand:V2SF 0 "register_operand" "")
-	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
-			   (match_operand:V4HF 2 "register_operand" "")
-			   (match_operand:V4HF 3 "register_operand" "")
-			   (match_operand:SI 4 "aarch64_imm2" "")]
+  [(set (match_operand:V2SF 0 "register_operand")
+	(unspec:V2SF [(match_operand:V2SF 1 "register_operand")
+			   (match_operand:V4HF 2 "register_operand")
+			   (match_operand:V4HF 3 "register_operand")
+			   (match_operand:SI 4 "aarch64_imm2")]
 	 VFMLA16_LOW))]
   "TARGET_F16FML"
 {
@@ -6199,11 +7682,11 @@
 )
 
 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
-  [(set (match_operand:V2SF 0 "register_operand" "")
-	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
-			   (match_operand:V4HF 2 "register_operand" "")
-			   (match_operand:V4HF 3 "register_operand" "")
-			   (match_operand:SI 4 "aarch64_imm2" "")]
+  [(set (match_operand:V2SF 0 "register_operand")
+	(unspec:V2SF [(match_operand:V2SF 1 "register_operand")
+			   (match_operand:V4HF 2 "register_operand")
+			   (match_operand:V4HF 3 "register_operand")
+			   (match_operand:SI 4 "aarch64_imm2")]
 	 VFMLA16_HIGH))]
   "TARGET_F16FML"
 {
@@ -6293,11 +7776,11 @@
 )
 
 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
-  [(set (match_operand:V4SF 0 "register_operand" "")
-	(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
-			   (match_operand:V8HF 2 "register_operand" "")
-			   (match_operand:V8HF 3 "register_operand" "")
-			   (match_operand:SI 4 "aarch64_lane_imm3" "")]
+  [(set (match_operand:V4SF 0 "register_operand")
+	(unspec:V4SF [(match_operand:V4SF 1 "register_operand")
+			   (match_operand:V8HF 2 "register_operand")
+			   (match_operand:V8HF 3 "register_operand")
+			   (match_operand:SI 4 "aarch64_lane_imm3")]
 	 VFMLA16_LOW))]
   "TARGET_F16FML"
 {
@@ -6313,11 +7796,11 @@
 })
 
 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
-  [(set (match_operand:V4SF 0 "register_operand" "")
-	(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
-			   (match_operand:V8HF 2 "register_operand" "")
-			   (match_operand:V8HF 3 "register_operand" "")
-			   (match_operand:SI 4 "aarch64_lane_imm3" "")]
+  [(set (match_operand:V4SF 0 "register_operand")
+	(unspec:V4SF [(match_operand:V4SF 1 "register_operand")
+			   (match_operand:V8HF 2 "register_operand")
+			   (match_operand:V8HF 3 "register_operand")
+			   (match_operand:SI 4 "aarch64_lane_imm3")]
 	 VFMLA16_HIGH))]
   "TARGET_F16FML"
 {
@@ -6407,11 +7890,11 @@
 )
 
 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
-  [(set (match_operand:V2SF 0 "register_operand" "")
-	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
-		      (match_operand:V4HF 2 "register_operand" "")
-		      (match_operand:V8HF 3 "register_operand" "")
-		      (match_operand:SI 4 "aarch64_lane_imm3" "")]
+  [(set (match_operand:V2SF 0 "register_operand")
+	(unspec:V2SF [(match_operand:V2SF 1 "register_operand")
+		      (match_operand:V4HF 2 "register_operand")
+		      (match_operand:V8HF 3 "register_operand")
+		      (match_operand:SI 4 "aarch64_lane_imm3")]
 	 VFMLA16_LOW))]
   "TARGET_F16FML"
 {
@@ -6428,11 +7911,11 @@
 })
 
 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
-  [(set (match_operand:V2SF 0 "register_operand" "")
-	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
-		      (match_operand:V4HF 2 "register_operand" "")
-		      (match_operand:V8HF 3 "register_operand" "")
-		      (match_operand:SI 4 "aarch64_lane_imm3" "")]
+  [(set (match_operand:V2SF 0 "register_operand")
+	(unspec:V2SF [(match_operand:V2SF 1 "register_operand")
+		      (match_operand:V4HF 2 "register_operand")
+		      (match_operand:V8HF 3 "register_operand")
+		      (match_operand:SI 4 "aarch64_lane_imm3")]
 	 VFMLA16_HIGH))]
   "TARGET_F16FML"
 {
@@ -6523,11 +8006,11 @@
 )
 
 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
-  [(set (match_operand:V4SF 0 "register_operand" "")
-	(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
-		      (match_operand:V8HF 2 "register_operand" "")
-		      (match_operand:V4HF 3 "register_operand" "")
-		      (match_operand:SI 4 "aarch64_imm2" "")]
+  [(set (match_operand:V4SF 0 "register_operand")
+	(unspec:V4SF [(match_operand:V4SF 1 "register_operand")
+		      (match_operand:V8HF 2 "register_operand")
+		      (match_operand:V4HF 3 "register_operand")
+		      (match_operand:SI 4 "aarch64_imm2")]
 	 VFMLA16_LOW))]
   "TARGET_F16FML"
 {
@@ -6543,11 +8026,11 @@
 })
 
 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
-  [(set (match_operand:V4SF 0 "register_operand" "")
-	(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
-		      (match_operand:V8HF 2 "register_operand" "")
-		      (match_operand:V4HF 3 "register_operand" "")
-		      (match_operand:SI 4 "aarch64_imm2" "")]
+  [(set (match_operand:V4SF 0 "register_operand")
+	(unspec:V4SF [(match_operand:V4SF 1 "register_operand")
+		      (match_operand:V8HF 2 "register_operand")
+		      (match_operand:V4HF 3 "register_operand")
+		      (match_operand:SI 4 "aarch64_imm2")]
 	 VFMLA16_HIGH))]
   "TARGET_F16FML"
 {
@@ -6657,3 +8140,232 @@
   "pmull2\\t%0.1q, %1.2d, %2.2d"
   [(set_attr "type" "crypto_pmull")]
 )
+
+;; Sign- or zero-extend a 64-bit integer vector to a 128-bit vector.
+(define_insn "<optab><Vnarrowq><mode>2"
+  [(set (match_operand:VQN 0 "register_operand" "=w")
+	(ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "<su>xtl\t%0.<Vtype>, %1.<Vntype>"
+  [(set_attr "type" "neon_shift_imm_long")]
+)
+
+(define_expand "aarch64_<su>xtl<mode>"
+  [(set (match_operand:VQN 0 "register_operand" "=w")
+	(ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
+  "TARGET_SIMD"
+  ""
+)
+
+(define_expand "aarch64_xtn<mode>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
+	(truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
+  "TARGET_SIMD"
+  ""
+)
+
+;; Truncate a 128-bit integer vector to a 64-bit vector.
+(define_insn "trunc<mode><Vnarrowq>2"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
+	(truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "xtn\t%0.<Vntype>, %1.<Vtype>"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
+(define_insn "aarch64_xtn2<mode>_le"
+  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+	(vec_concat:<VNARROWQ2>
+	  (match_operand:<VNARROWQ> 1 "register_operand" "0")
+	  (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
+  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
+  "xtn2\t%0.<V2ntype>, %2.<Vtype>"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
+(define_insn "aarch64_xtn2<mode>_be"
+  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+	(vec_concat:<VNARROWQ2>
+	  (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))
+	  (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
+  "TARGET_SIMD && BYTES_BIG_ENDIAN"
+  "xtn2\t%0.<V2ntype>, %2.<Vtype>"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
+(define_expand "aarch64_xtn2<mode>"
+  [(match_operand:<VNARROWQ2> 0 "register_operand")
+   (match_operand:<VNARROWQ> 1 "register_operand")
+   (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))]
+  "TARGET_SIMD"
+  {
+    if (BYTES_BIG_ENDIAN)
+      emit_insn (gen_aarch64_xtn2<mode>_be (operands[0], operands[1],
+					     operands[2]));
+    else
+      emit_insn (gen_aarch64_xtn2<mode>_le (operands[0], operands[1],
+					     operands[2]));
+    DONE;
+  }
+)
+
+(define_insn "aarch64_bfdot<mode>"
+  [(set (match_operand:VDQSF 0 "register_operand" "=w")
+	(plus:VDQSF
+	  (unspec:VDQSF
+	   [(match_operand:<VBFMLA_W> 2 "register_operand" "w")
+	    (match_operand:<VBFMLA_W> 3 "register_operand" "w")]
+	    UNSPEC_BFDOT)
+	  (match_operand:VDQSF 1 "register_operand" "0")))]
+  "TARGET_BF16_SIMD"
+  "bfdot\t%0.<Vtype>, %2.<Vbfdottype>, %3.<Vbfdottype>"
+  [(set_attr "type" "neon_dot<q>")]
+)
+
+(define_insn "aarch64_bfdot_lane<VBF:isquadop><VDQSF:mode>"
+  [(set (match_operand:VDQSF 0 "register_operand" "=w")
+	(plus:VDQSF
+	  (unspec:VDQSF
+	   [(match_operand:<VDQSF:VBFMLA_W> 2 "register_operand" "w")
+	    (match_operand:VBF 3 "register_operand" "w")
+	    (match_operand:SI 4 "const_int_operand" "n")]
+	    UNSPEC_BFDOT)
+	  (match_operand:VDQSF 1 "register_operand" "0")))]
+  "TARGET_BF16_SIMD"
+{
+  int nunits = GET_MODE_NUNITS (<VBF:MODE>mode).to_constant ();
+  int lane = INTVAL (operands[4]);
+  operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane), SImode);
+  return "bfdot\t%0.<VDQSF:Vtype>, %2.<VDQSF:Vbfdottype>, %3.2h[%4]";
+}
+  [(set_attr "type" "neon_dot<VDQSF:q>")]
+)
+
+;; vget_low/high_bf16
+(define_expand "aarch64_vget_lo_halfv8bf"
+  [(match_operand:V4BF 0 "register_operand")
+   (match_operand:V8BF 1 "register_operand")]
+  "TARGET_BF16_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, false);
+  emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
+  DONE;
+})
+
+(define_expand "aarch64_vget_hi_halfv8bf"
+  [(match_operand:V4BF 0 "register_operand")
+   (match_operand:V8BF 1 "register_operand")]
+  "TARGET_BF16_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, true);
+  emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
+  DONE;
+})
+
+;; bfmmla
+(define_insn "aarch64_bfmmlaqv4sf"
+  [(set (match_operand:V4SF 0 "register_operand" "=w")
+        (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
+                   (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
+                                 (match_operand:V8BF 3 "register_operand" "w")]
+                    UNSPEC_BFMMLA)))]
+  "TARGET_BF16_SIMD"
+  "bfmmla\\t%0.4s, %2.8h, %3.8h"
+  [(set_attr "type" "neon_fp_mla_s_q")]
+)
+
+;; bfmlal<bt>
+(define_insn "aarch64_bfmlal<bt>v4sf"
+  [(set (match_operand:V4SF 0 "register_operand" "=w")
+        (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
+                    (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
+                                  (match_operand:V8BF 3 "register_operand" "w")]
+                     BF_MLA)))]
+  "TARGET_BF16_SIMD"
+  "bfmlal<bt>\\t%0.4s, %2.8h, %3.8h"
+  [(set_attr "type" "neon_fp_mla_s_q")]
+)
+
+(define_insn "aarch64_bfmlal<bt>_lane<q>v4sf"
+  [(set (match_operand:V4SF 0 "register_operand" "=w")
+        (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
+                    (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
+                                  (match_operand:VBF 3 "register_operand" "w")
+                                  (match_operand:SI 4 "const_int_operand" "n")]
+                     BF_MLA)))]
+  "TARGET_BF16_SIMD"
+{
+  operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
+  return "bfmlal<bt>\\t%0.4s, %2.8h, %3.h[%4]";
+}
+  [(set_attr "type" "neon_fp_mla_s_scalar_q")]
+)
+
+;; 8-bit integer matrix multiply-accumulate
+(define_insn "aarch64_simd_<sur>mmlav16qi"
+  [(set (match_operand:V4SI 0 "register_operand" "=w")
+	(plus:V4SI
+	 (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w")
+		       (match_operand:V16QI 3 "register_operand" "w")] MATMUL)
+	 (match_operand:V4SI 1 "register_operand" "0")))]
+  "TARGET_I8MM"
+  "<sur>mmla\\t%0.4s, %2.16b, %3.16b"
+  [(set_attr "type" "neon_mla_s_q")]
+)
+
+;; bfcvtn
+(define_insn "aarch64_bfcvtn<q><mode>"
+  [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w")
+        (unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")]
+                            UNSPEC_BFCVTN))]
+  "TARGET_BF16_SIMD"
+  "bfcvtn\\t%0.4h, %1.4s"
+  [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
+)
+
+(define_insn "aarch64_bfcvtn2v8bf"
+  [(set (match_operand:V8BF 0 "register_operand" "=w")
+        (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0")
+                      (match_operand:V4SF 2 "register_operand" "w")]
+                      UNSPEC_BFCVTN2))]
+  "TARGET_BF16_SIMD"
+  "bfcvtn2\\t%0.8h, %2.4s"
+  [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
+)
+
+(define_insn "aarch64_bfcvtbf"
+  [(set (match_operand:BF 0 "register_operand" "=w")
+        (unspec:BF [(match_operand:SF 1 "register_operand" "w")]
+                    UNSPEC_BFCVT))]
+  "TARGET_BF16_FP"
+  "bfcvt\\t%h0, %s1"
+  [(set_attr "type" "f_cvt")]
+)
+
+;; Use shl/shll/shll2 to convert BF scalar/vector modes to SF modes.
+(define_insn "aarch64_vbfcvt<mode>"
+  [(set (match_operand:V4SF 0 "register_operand" "=w")
+	(unspec:V4SF [(match_operand:VBF 1 "register_operand" "w")]
+		      UNSPEC_BFCVTN))]
+  "TARGET_BF16_SIMD"
+  "shll\\t%0.4s, %1.4h, #16"
+  [(set_attr "type" "neon_shift_imm_long")]
+)
+
+(define_insn "aarch64_vbfcvt_highv8bf"
+  [(set (match_operand:V4SF 0 "register_operand" "=w")
+	(unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")]
+		      UNSPEC_BFCVTN2))]
+  "TARGET_BF16_SIMD"
+  "shll2\\t%0.4s, %1.8h, #16"
+  [(set_attr "type" "neon_shift_imm_long")]
+)
+
+(define_insn "aarch64_bfcvtsf"
+  [(set (match_operand:SF 0 "register_operand" "=w")
+	(unspec:SF [(match_operand:BF 1 "register_operand" "w")]
+		    UNSPEC_BFCVT))]
+  "TARGET_BF16_FP"
+  "shl\\t%d0, %d1, #16"
+  [(set_attr "type" "neon_shift_imm")]
+)
diff --git a/gcc/config/aarch64/aarch64-speculation.cc b/gcc/config/aarch64/aarch64-speculation.cc
new file mode 100644
index 0000000000000..dbade37c537d0
--- /dev/null
+++ b/gcc/config/aarch64/aarch64-speculation.cc
@@ -0,0 +1,487 @@
+/* Speculation tracking and mitigation (e.g. CVE 2017-5753) for AArch64.
+   Copyright (C) 2018-2021 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "target.h"
+#include "rtl.h"
+#include "tree-pass.h"
+#include "profile-count.h"
+#include "backend.h"
+#include "cfgbuild.h"
+#include "print-rtl.h"
+#include "cfgrtl.h"
+#include "function.h"
+#include "basic-block.h"
+#include "memmodel.h"
+#include "emit-rtl.h"
+#include "insn-attr.h"
+#include "df.h"
+#include "tm_p.h"
+#include "insn-config.h"
+#include "recog.h"
+
+/* This pass scans the RTL just before the final branch
+   re-organisation pass.  The aim is to identify all places where
+   there is conditional control flow and to insert code that tracks
+   any speculative execution of a conditional branch.
+
+   To do this we reserve a call-clobbered register (so that it can be
+   initialized very early in the function prologue) that can then be
+   updated each time there is a conditional branch.  At each such
+   branch we then generate a code sequence that uses conditional
+   select operations that are not subject to speculation themselves
+   (we ignore for the moment situations where that might not always be
+   strictly true).  For example, a branch sequence such as:
+
+	B.EQ	<dst>
+	...
+   <dst>:
+
+   is transformed to:
+
+	B.EQ	<dst>
+	CSEL	tracker, tracker, XZr, ne
+	...
+   <dst>:
+	CSEL	tracker, tracker, XZr, eq
+
+   Since we start with the tracker initialized to all bits one, if at any
+   time the predicted control flow diverges from the architectural program
+   behavior, then the tracker will become zero (but not otherwise).
+
+   The tracker value can be used at any time at which a value needs
+   guarding against incorrect speculation.  This can be done in
+   several ways, but they all amount to the same thing.  For an
+   untrusted address, or an untrusted offset to a trusted address, we
+   can simply mask the address with the tracker with the untrusted
+   value.  If the CPU is not speculating, or speculating correctly,
+   then the value will remain unchanged, otherwise it will be clamped
+   to zero.  For more complex scenarios we can compare the tracker
+   against zero and use the flags to form a new selection with an
+   alternate safe value.
+
+   On implementations where the data processing instructions may
+   themselves produce speculative values, the architecture requires
+   that a CSDB instruction will resolve such data speculation, so each
+   time we use the tracker for protecting a vulnerable value we also
+   emit a CSDB: we do not need to do that each time the tracker itself
+   is updated.
+
+   At function boundaries, we need to communicate the speculation
+   tracking state with the caller or the callee.  This is tricky
+   because there is no register available for such a purpose without
+   creating a new ABI.  We deal with this by relying on the principle
+   that in all real programs the stack pointer, SP will never be NULL
+   at a function boundary; we can thus encode the speculation state in
+   SP by clearing SP if the speculation tracker itself is NULL.  After
+   the call we recover the tracking state back from SP into the
+   tracker register.  The results is that a function call sequence is
+   transformed to
+
+	MOV	tmp, SP
+	AND	tmp, tmp, tracker
+	MOV	SP, tmp
+	BL	<callee>
+	CMP	SP, #0
+	CSETM	tracker, ne
+
+   The additional MOV instructions in the pre-call sequence are needed
+   because SP cannot be used directly with the AND instruction.
+
+   The code inside a function body uses the post-call sequence in the
+   prologue to establish the tracker and the pre-call sequence in the
+   epilogue to re-encode the state for the return.
+
+   The code sequences have the nice property that if called from, or
+   calling a function that does not track speculation then the stack pointer
+   will always be non-NULL and hence the tracker will be initialized to all
+   bits one as we need: we lose the ability to fully track speculation in that
+   case, but we are still architecturally safe.
+
+   Tracking speculation in this way is quite expensive, both in code
+   size and execution time.  We employ a number of tricks to try to
+   limit this:
+
+   1) Simple leaf functions with no conditional branches (or use of
+   the tracker) do not need to establish a new tracker: they simply
+   carry the tracking state through SP for the duration of the call.
+   The same is also true for leaf functions that end in a tail-call.
+
+   2) Back-to-back function calls in a single basic block also do not
+   need to re-establish the tracker between the calls.  Again, we can
+   carry the tracking state in SP for this period of time unless the
+   tracker value is needed at that point in time.
+
+   We run the pass just before the final branch reorganization pass so
+   that we can handle most of the conditional branch cases using the
+   standard edge insertion code.  The reorg pass will hopefully clean
+   things up for afterwards so that the results aren't too
+   horrible.  */
+
+/* Generate a code sequence to clobber SP if speculating incorreclty.  */
+static rtx_insn *
+aarch64_speculation_clobber_sp ()
+{
+  rtx sp = gen_rtx_REG (DImode, SP_REGNUM);
+  rtx tracker = gen_rtx_REG (DImode, SPECULATION_TRACKER_REGNUM);
+  rtx scratch = gen_rtx_REG (DImode, SPECULATION_SCRATCH_REGNUM);
+
+  start_sequence ();
+  emit_insn (gen_rtx_SET (scratch, sp));
+  emit_insn (gen_anddi3 (scratch, scratch, tracker));
+  emit_insn (gen_rtx_SET (sp, scratch));
+  rtx_insn *seq = get_insns ();
+  end_sequence ();
+  return seq;
+}
+
+/* Generate a code sequence to establish the tracker variable from the
+   contents of SP.  */
+static rtx_insn *
+aarch64_speculation_establish_tracker ()
+{
+  rtx sp = gen_rtx_REG (DImode, SP_REGNUM);
+  rtx tracker = gen_rtx_REG (DImode, SPECULATION_TRACKER_REGNUM);
+  start_sequence ();
+  rtx cc = aarch64_gen_compare_reg (EQ, sp, const0_rtx);
+  emit_insn (gen_cstoredi_neg (tracker,
+			       gen_rtx_NE (CCmode, cc, const0_rtx), cc));
+  rtx_insn *seq = get_insns ();
+  end_sequence ();
+  return seq;
+}
+
+/* Main speculation tracking pass.  */
+unsigned int
+aarch64_do_track_speculation ()
+{
+  basic_block bb;
+  bool needs_tracking = false;
+  bool need_second_pass = false;
+  rtx_insn *insn;
+  int fixups_pending = 0;
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      insn = BB_END (bb);
+
+      if (dump_file)
+	fprintf (dump_file, "Basic block %d:\n", bb->index);
+
+      while (insn != BB_HEAD (bb)
+	     && NOTE_P (insn))
+	insn = PREV_INSN (insn);
+
+      if (control_flow_insn_p (insn))
+	{
+	  if (any_condjump_p (insn))
+	    {
+	      if (dump_file)
+		{
+		  fprintf (dump_file, "  condjump\n");
+		  dump_insn_slim (dump_file, insn);
+		}
+
+	      rtx src = SET_SRC (pc_set (insn));
+
+	      /* Check for an inverted jump, where the fall-through edge
+		 appears first.  */
+	      bool inverted = GET_CODE (XEXP (src, 2)) != PC;
+	      /* The other edge must be the PC (we assume that we don't
+		 have conditional return instructions).  */
+	      gcc_assert (GET_CODE (XEXP (src, 1 + !inverted)) == PC);
+
+	      rtx cond = copy_rtx (XEXP (src, 0));
+	      gcc_assert (COMPARISON_P (cond)
+			  && REG_P (XEXP (cond, 0))
+			  && REGNO (XEXP (cond, 0)) == CC_REGNUM
+			  && XEXP (cond, 1) == const0_rtx);
+	      rtx branch_tracker = gen_speculation_tracker (copy_rtx (cond));
+	      rtx fallthru_tracker = gen_speculation_tracker_rev (cond);
+	      if (inverted)
+		std::swap (branch_tracker, fallthru_tracker);
+
+	      insert_insn_on_edge (branch_tracker, BRANCH_EDGE (bb));
+	      insert_insn_on_edge (fallthru_tracker, FALLTHRU_EDGE (bb));
+	      needs_tracking = true;
+	    }
+	  else if (GET_CODE (PATTERN (insn)) == RETURN)
+	    {
+	      /* If we already know we'll need a second pass, don't put
+		 out the return sequence now, or we might end up with
+		 two copies.  Instead, we'll do all return statements
+		 during the second pass.  However, if this is the
+		 first return insn we've found and we already
+		 know that we'll need to emit the code, we can save a
+		 second pass by emitting the code now.  */
+	      if (needs_tracking && ! need_second_pass)
+		{
+		  rtx_insn *seq = aarch64_speculation_clobber_sp ();
+		  emit_insn_before (seq, insn);
+		}
+	      else
+		{
+		  fixups_pending++;
+		  need_second_pass = true;
+		}
+	    }
+	  else if (find_reg_note (insn, REG_NON_LOCAL_GOTO, NULL_RTX))
+	    {
+	      rtx_insn *seq = aarch64_speculation_clobber_sp ();
+	      emit_insn_before (seq, insn);
+	      needs_tracking = true;
+	    }
+	}
+      else
+	{
+	  if (dump_file)
+	    {
+	      fprintf (dump_file, "  other\n");
+	      dump_insn_slim (dump_file, insn);
+	    }
+	}
+    }
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      rtx_insn *end = BB_END (bb);
+      rtx_insn *call_insn = NULL;
+
+      if (bb->flags & BB_NON_LOCAL_GOTO_TARGET)
+	{
+	  rtx_insn *label = NULL;
+	  /* For non-local goto targets we have to recover the
+	     speculation state from SP.  Find the last code label at
+	     the head of the block and place the fixup sequence after
+	     that.  */
+	  for (insn = BB_HEAD (bb); insn != end; insn = NEXT_INSN (insn))
+	    {
+	      if (LABEL_P (insn))
+		label = insn;
+	      /* Never put anything before the basic block note.  */
+	      if (NOTE_INSN_BASIC_BLOCK_P (insn))
+		label = insn;
+	      if (INSN_P (insn))
+		break;
+	    }
+
+	  gcc_assert (label);
+	  emit_insn_after (aarch64_speculation_establish_tracker (), label);
+	}
+
+      /* Scan the insns looking for calls.  We need to pass the
+	 speculation tracking state encoded in to SP.  After a call we
+	 restore the speculation tracking into the tracker register.
+	 To avoid unnecessary transfers we look for two or more calls
+	 within a single basic block and eliminate, where possible,
+	 any redundant operations.  */
+      for (insn = BB_HEAD (bb); ; insn = NEXT_INSN (insn))
+	{
+	  if (NONDEBUG_INSN_P (insn)
+	      && recog_memoized (insn) >= 0
+	      && (get_attr_speculation_barrier (insn)
+		  == SPECULATION_BARRIER_TRUE))
+	    {
+	      if (call_insn)
+		{
+		  /* This instruction requires the speculation
+		     tracking to be in the tracker register.  If there
+		     was an earlier call in this block, we need to
+		     copy the speculation tracking back there.  */
+		  emit_insn_after (aarch64_speculation_establish_tracker (),
+				   call_insn);
+		  call_insn = NULL;
+		}
+
+	      needs_tracking = true;
+	    }
+
+	  if (CALL_P (insn))
+	    {
+	      bool tailcall
+		= (SIBLING_CALL_P (insn)
+		   || find_reg_note (insn, REG_NORETURN, NULL_RTX));
+
+	      /* Tailcalls are like returns, we can eliminate the
+		 transfer between the tracker register and SP if we
+		 know that this function does not itself need
+		 tracking.  */
+	      if (tailcall && (need_second_pass || !needs_tracking))
+		{
+		  /* Don't clear call_insn if it is set - needs_tracking
+		     will be true in that case and so we will end
+		     up putting out mitigation sequences.  */
+		  fixups_pending++;
+		  need_second_pass = true;
+		  break;
+		}
+
+	      needs_tracking = true;
+
+	      /* We always need a transfer before the first call in a BB.  */
+	      if (!call_insn)
+		emit_insn_before (aarch64_speculation_clobber_sp (), insn);
+
+	      /* Tail-calls and no-return calls don't need any post-call
+		 reestablishment of the tracker.  */
+	      if (! tailcall)
+		call_insn = insn;
+	      else
+		call_insn = NULL;
+	    }
+
+	  if (insn == end)
+	    break;
+	}
+
+      if (call_insn)
+	{
+	  rtx_insn *seq = aarch64_speculation_establish_tracker ();
+
+	  /* Handle debug insns at the end of the BB.  Put the extra
+	     insns after them.  This ensures that we have consistent
+	     behaviour for the placement of the extra insns between
+	     debug and non-debug builds.  */
+	  for (insn = call_insn;
+	       insn != end && DEBUG_INSN_P (NEXT_INSN (insn));
+	       insn = NEXT_INSN (insn))
+	    ;
+
+	  if (insn == end)
+	    {
+	      edge e = find_fallthru_edge (bb->succs);
+	      /* We need to be very careful about some calls that
+		 appear at the end of a basic block.  If the call
+		 involves exceptions, then the compiler may depend on
+		 this being the last instruction in the block.  The
+		 easiest way to handle this is to commit the new
+		 instructions on the fall-through edge and to let
+		 commit_edge_insertions clean things up for us.
+
+		 Sometimes, eg with OMP, there may not even be an
+		 outgoing edge after the call.  In that case, there's
+		 not much we can do, presumably the compiler has
+		 decided that the call can never return in this
+		 context.  */
+	      if (e)
+		{
+		  /* We need to set the location lists explicitly in
+		     this case.  */
+		  if (! INSN_P (seq))
+		    {
+		      start_sequence ();
+		      emit_insn (seq);
+		      seq = get_insns ();
+		      end_sequence ();
+		    }
+
+		  for (rtx_insn *list = seq; list; list = NEXT_INSN (list))
+		    INSN_LOCATION (list) = INSN_LOCATION (call_insn);
+
+		  insert_insn_on_edge (seq, e);
+		}
+	    }
+	  else
+	    emit_insn_after (seq, call_insn);
+	}
+    }
+
+  if (needs_tracking)
+    {
+      if (need_second_pass)
+	{
+	  /* We found a return instruction before we found out whether
+	     or not we need to emit the tracking code, but we now
+	     know we do.  Run quickly over the basic blocks and
+	     fix up the return insns.  */
+	  FOR_EACH_BB_FN (bb, cfun)
+	    {
+	      insn = BB_END (bb);
+
+	      while (insn != BB_HEAD (bb)
+		     && NOTE_P (insn))
+		insn = PREV_INSN (insn);
+
+	      if ((control_flow_insn_p (insn)
+		   && GET_CODE (PATTERN (insn)) == RETURN)
+		  || (CALL_P (insn)
+		      && (SIBLING_CALL_P (insn)
+			  || find_reg_note (insn, REG_NORETURN, NULL_RTX))))
+		{
+		  rtx_insn *seq = aarch64_speculation_clobber_sp ();
+		  emit_insn_before (seq, insn);
+		  fixups_pending--;
+		}
+	    }
+	  gcc_assert (fixups_pending == 0);
+	}
+
+      /* Set up the initial value of the tracker, using the incoming SP.  */
+      insert_insn_on_edge (aarch64_speculation_establish_tracker (),
+			   single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
+      commit_edge_insertions ();
+    }
+
+  return 0;
+}
+
+namespace {
+
+const pass_data pass_data_aarch64_track_speculation =
+{
+  RTL_PASS,		/* type.  */
+  "speculation",	/* name.  */
+  OPTGROUP_NONE,	/* optinfo_flags.  */
+  TV_MACH_DEP,		/* tv_id.  */
+  0,			/* properties_required.  */
+  0,			/* properties_provided.  */
+  0,			/* properties_destroyed.  */
+  0,			/* todo_flags_start.  */
+  0			/* todo_flags_finish.  */
+};
+
+class pass_track_speculation : public rtl_opt_pass
+{
+ public:
+  pass_track_speculation(gcc::context *ctxt)
+    : rtl_opt_pass(pass_data_aarch64_track_speculation, ctxt)
+    {}
+
+  /* opt_pass methods:  */
+  virtual bool gate (function *)
+    {
+      return aarch64_track_speculation;
+    }
+
+  virtual unsigned int execute (function *)
+    {
+      return aarch64_do_track_speculation ();
+    }
+}; // class pass_track_speculation.
+} // anon namespace.
+
+/* Create a new pass instance.  */
+rtl_opt_pass *
+make_pass_track_speculation (gcc::context *ctxt)
+{
+  return new pass_track_speculation (ctxt);
+}
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
new file mode 100644
index 0000000000000..dfdf0e2fd1863
--- /dev/null
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -0,0 +1,2809 @@
+/* ACLE support for AArch64 SVE (__ARM_FEATURE_SVE intrinsics)
+   Copyright (C) 2018-2021 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "rtl.h"
+#include "tm_p.h"
+#include "memmodel.h"
+#include "insn-codes.h"
+#include "optabs.h"
+#include "recog.h"
+#include "expr.h"
+#include "basic-block.h"
+#include "function.h"
+#include "fold-const.h"
+#include "gimple.h"
+#include "gimple-iterator.h"
+#include "gimplify.h"
+#include "explow.h"
+#include "emit-rtl.h"
+#include "tree-vector-builder.h"
+#include "rtx-vector-builder.h"
+#include "vec-perm-indices.h"
+#include "aarch64-sve-builtins.h"
+#include "aarch64-sve-builtins-shapes.h"
+#include "aarch64-sve-builtins-base.h"
+#include "aarch64-sve-builtins-functions.h"
+
+using namespace aarch64_sve;
+
+namespace {
+
+/* Return the UNSPEC_CMLA* unspec for rotation amount ROT.  */
+static int
+unspec_cmla (int rot)
+{
+  switch (rot)
+    {
+    case 0: return UNSPEC_CMLA;
+    case 90: return UNSPEC_CMLA90;
+    case 180: return UNSPEC_CMLA180;
+    case 270: return UNSPEC_CMLA270;
+    default: gcc_unreachable ();
+    }
+}
+
+/* Return the UNSPEC_FCMLA* unspec for rotation amount ROT.  */
+static int
+unspec_fcmla (int rot)
+{
+  switch (rot)
+    {
+    case 0: return UNSPEC_FCMLA;
+    case 90: return UNSPEC_FCMLA90;
+    case 180: return UNSPEC_FCMLA180;
+    case 270: return UNSPEC_FCMLA270;
+    default: gcc_unreachable ();
+    }
+}
+
+/* Return the UNSPEC_COND_FCMLA* unspec for rotation amount ROT.  */
+static int
+unspec_cond_fcmla (int rot)
+{
+  switch (rot)
+    {
+    case 0: return UNSPEC_COND_FCMLA;
+    case 90: return UNSPEC_COND_FCMLA90;
+    case 180: return UNSPEC_COND_FCMLA180;
+    case 270: return UNSPEC_COND_FCMLA270;
+    default: gcc_unreachable ();
+    }
+}
+
+/* Expand a call to svmad, or svmla after reordering its operands.
+   Make _m forms merge with argument MERGE_ARGNO.  */
+static rtx
+expand_mad (function_expander &e,
+	    unsigned int merge_argno = DEFAULT_MERGE_ARGNO)
+{
+  if (e.pred == PRED_x)
+    {
+      insn_code icode;
+      if (e.type_suffix (0).integer_p)
+	icode = code_for_aarch64_pred_fma (e.vector_mode (0));
+      else
+	icode = code_for_aarch64_pred (UNSPEC_COND_FMLA, e.vector_mode (0));
+      return e.use_pred_x_insn (icode);
+    }
+
+  insn_code icode = e.direct_optab_handler (cond_fma_optab);
+  return e.use_cond_insn (icode, merge_argno);
+}
+
+/* Expand a call to svmla_lane or svmls_lane using floating-point unspec
+   UNSPEC.  */
+static rtx
+expand_mla_mls_lane (function_expander &e, int unspec)
+{
+  /* Put the operands in the normal (fma ...) order, with the accumulator
+     last.  This fits naturally since that's also the unprinted operand
+     in the asm output.  */
+  e.rotate_inputs_left (0, 4);
+  insn_code icode = code_for_aarch64_lane (unspec, e.vector_mode (0));
+  return e.use_exact_insn (icode);
+}
+
+/* Expand a call to svmsb, or svmls after reordering its operands.
+   Make _m forms merge with argument MERGE_ARGNO.  */
+static rtx
+expand_msb (function_expander &e,
+	    unsigned int merge_argno = DEFAULT_MERGE_ARGNO)
+{
+  if (e.pred == PRED_x)
+    {
+      insn_code icode;
+      if (e.type_suffix (0).integer_p)
+	icode = code_for_aarch64_pred_fnma (e.vector_mode (0));
+      else
+	icode = code_for_aarch64_pred (UNSPEC_COND_FMLS, e.vector_mode (0));
+      return e.use_pred_x_insn (icode);
+    }
+
+  insn_code icode = e.direct_optab_handler (cond_fnma_optab);
+  return e.use_cond_insn (icode, merge_argno);
+}
+
+class svabd_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* The integer operations are represented as the subtraction of the
+       minimum from the maximum, with the signedness of the instruction
+       keyed off the signedness of the maximum operation.  */
+    rtx_code max_code = e.type_suffix (0).unsigned_p ? UMAX : SMAX;
+    insn_code icode;
+    if (e.pred == PRED_x)
+      {
+	if (e.type_suffix (0).integer_p)
+	  icode = code_for_aarch64_pred_abd (max_code, e.vector_mode (0));
+	else
+	  icode = code_for_aarch64_pred_abd (e.vector_mode (0));
+	return e.use_pred_x_insn (icode);
+      }
+
+    if (e.type_suffix (0).integer_p)
+      icode = code_for_aarch64_cond_abd (max_code, e.vector_mode (0));
+    else
+      icode = code_for_aarch64_cond_abd (e.vector_mode (0));
+    return e.use_cond_insn (icode);
+  }
+};
+
+/* Implements svacge, svacgt, svacle and svaclt.  */
+class svac_impl : public function_base
+{
+public:
+  CONSTEXPR svac_impl (int unspec) : m_unspec (unspec) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    e.add_ptrue_hint (0, e.gp_mode (0));
+    insn_code icode = code_for_aarch64_pred_fac (m_unspec, e.vector_mode (0));
+    return e.use_exact_insn (icode);
+  }
+
+  /* The unspec code for the underlying comparison.  */
+  int m_unspec;
+};
+
+class svadda_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* Put the predicate last, as required by mask_fold_left_plus_optab.  */
+    e.rotate_inputs_left (0, 3);
+    machine_mode mode = e.vector_mode (0);
+    insn_code icode = direct_optab_handler (mask_fold_left_plus_optab, mode);
+    return e.use_exact_insn (icode);
+  }
+};
+
+/* Implements svadr[bhwd].  */
+class svadr_bhwd_impl : public function_base
+{
+public:
+  CONSTEXPR svadr_bhwd_impl (unsigned int shift) : m_shift (shift) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    machine_mode mode = GET_MODE (e.args[0]);
+    if (m_shift == 0)
+      return e.use_exact_insn (code_for_aarch64_adr (mode));
+
+    /* Turn the access size into an extra shift argument.  */
+    rtx shift = gen_int_mode (m_shift, GET_MODE_INNER (mode));
+    e.args.quick_push (expand_vector_broadcast (mode, shift));
+    return e.use_exact_insn (code_for_aarch64_adr_shift (mode));
+  }
+
+  /* How many bits left to shift the vector displacement.  */
+  unsigned int m_shift;
+};
+
+class svbic_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* Convert svbic of a constant into svand of its inverse.  */
+    if (CONST_INT_P (e.args[2]))
+      {
+	machine_mode mode = GET_MODE_INNER (e.vector_mode (0));
+	e.args[2] = simplify_unary_operation (NOT, mode, e.args[2], mode);
+	return e.map_to_rtx_codes (AND, AND, -1);
+      }
+
+    if (e.type_suffix_ids[0] == TYPE_SUFFIX_b)
+      {
+	gcc_assert (e.pred == PRED_z);
+	return e.use_exact_insn (CODE_FOR_aarch64_pred_bicvnx16bi_z);
+      }
+
+    if (e.pred == PRED_x)
+      return e.use_unpred_insn (code_for_aarch64_bic (e.vector_mode (0)));
+
+    return e.use_cond_insn (code_for_cond_bic (e.vector_mode (0)));
+  }
+};
+
+/* Implements svbrkn, svbrkpa and svbrkpb.  */
+class svbrk_binary_impl : public function_base
+{
+public:
+  CONSTEXPR svbrk_binary_impl (int unspec) : m_unspec (unspec) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    return e.use_exact_insn (code_for_aarch64_brk (m_unspec));
+  }
+
+  /* The unspec code associated with the operation.  */
+  int m_unspec;
+};
+
+/* Implements svbrka and svbrkb.  */
+class svbrk_unary_impl : public function_base
+{
+public:
+  CONSTEXPR svbrk_unary_impl (int unspec) : m_unspec (unspec) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    return e.use_cond_insn (code_for_aarch64_brk (m_unspec));
+  }
+
+  /* The unspec code associated with the operation.  */
+  int m_unspec;
+};
+
+class svcadd_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* Convert the rotation amount into a specific unspec.  */
+    int rot = INTVAL (e.args.pop ());
+    if (rot == 90)
+      return e.map_to_unspecs (UNSPEC_CADD90, UNSPEC_CADD90,
+			       UNSPEC_COND_FCADD90);
+    if (rot == 270)
+      return e.map_to_unspecs (UNSPEC_CADD270, UNSPEC_CADD270,
+			       UNSPEC_COND_FCADD270);
+    gcc_unreachable ();
+  }
+};
+
+/* Implements svclasta and svclastb.  */
+class svclast_impl : public quiet<function_base>
+{
+public:
+  CONSTEXPR svclast_impl (int unspec) : m_unspec (unspec) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* Match the fold_extract_optab order.  */
+    std::swap (e.args[0], e.args[1]);
+    machine_mode mode = e.vector_mode (0);
+    insn_code icode;
+    if (e.mode_suffix_id == MODE_n)
+      icode = code_for_fold_extract (m_unspec, mode);
+    else
+      icode = code_for_aarch64_fold_extract_vector (m_unspec, mode);
+    return e.use_exact_insn (icode);
+  }
+
+  /* The unspec code associated with the operation.  */
+  int m_unspec;
+};
+
+class svcmla_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* Convert the rotation amount into a specific unspec.  */
+    int rot = INTVAL (e.args.pop ());
+    if (e.type_suffix (0).float_p)
+      {
+	/* Make the operand order the same as the one used by the fma optabs,
+	   with the accumulator last.  */
+	e.rotate_inputs_left (1, 4);
+	return e.map_to_unspecs (-1, -1, unspec_cond_fcmla (rot), 3);
+      }
+    else
+      {
+	int cmla = unspec_cmla (rot);
+	return e.map_to_unspecs (cmla, cmla, -1);
+      }
+  }
+};
+
+class svcmla_lane_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* Convert the rotation amount into a specific unspec.  */
+    int rot = INTVAL (e.args.pop ());
+    machine_mode mode = e.vector_mode (0);
+    if (e.type_suffix (0).float_p)
+      {
+	/* Make the operand order the same as the one used by the fma optabs,
+	   with the accumulator last.  */
+	e.rotate_inputs_left (0, 4);
+	insn_code icode = code_for_aarch64_lane (unspec_fcmla (rot), mode);
+	return e.use_exact_insn (icode);
+      }
+    else
+      {
+	insn_code icode = code_for_aarch64_lane (unspec_cmla (rot), mode);
+	return e.use_exact_insn (icode);
+      }
+  }
+};
+
+/* Implements svcmp<cc> (except svcmpuo, which is handled separately).  */
+class svcmp_impl : public function_base
+{
+public:
+  CONSTEXPR svcmp_impl (tree_code code, int unspec_for_fp)
+    : m_code (code), m_unspec_for_fp (unspec_for_fp) {}
+
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    tree pg = gimple_call_arg (f.call, 0);
+    tree rhs1 = gimple_call_arg (f.call, 1);
+    tree rhs2 = gimple_call_arg (f.call, 2);
+
+    /* Convert a ptrue-predicated integer comparison into the corresponding
+       gimple-level operation.  */
+    if (integer_all_onesp (pg)
+	&& f.type_suffix (0).element_bytes == 1
+	&& f.type_suffix (0).integer_p)
+      {
+	gimple_seq stmts = NULL;
+	rhs2 = f.force_vector (stmts, TREE_TYPE (rhs1), rhs2);
+	gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
+	return gimple_build_assign (f.lhs, m_code, rhs1, rhs2);
+      }
+
+    return NULL;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    machine_mode mode = e.vector_mode (0);
+
+    /* Comparisons are UNSPEC_PRED_Z operations and so need a hint
+       operand.  */
+    e.add_ptrue_hint (0, e.gp_mode (0));
+
+    if (e.type_suffix (0).integer_p)
+      {
+	bool unsigned_p = e.type_suffix (0).unsigned_p;
+	rtx_code code = get_rtx_code (m_code, unsigned_p);
+	return e.use_exact_insn (code_for_aarch64_pred_cmp (code, mode));
+      }
+
+    insn_code icode = code_for_aarch64_pred_fcm (m_unspec_for_fp, mode);
+    return e.use_exact_insn (icode);
+  }
+
+  /* The tree code associated with the comparison.  */
+  tree_code m_code;
+
+  /* The unspec code to use for floating-point comparisons.  */
+  int m_unspec_for_fp;
+};
+
+/* Implements svcmp<cc>_wide.  */
+class svcmp_wide_impl : public function_base
+{
+public:
+  CONSTEXPR svcmp_wide_impl (tree_code code, int unspec_for_sint,
+			     int unspec_for_uint)
+    : m_code (code), m_unspec_for_sint (unspec_for_sint),
+      m_unspec_for_uint (unspec_for_uint) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    machine_mode mode = e.vector_mode (0);
+    bool unsigned_p = e.type_suffix (0).unsigned_p;
+    rtx_code code = get_rtx_code (m_code, unsigned_p);
+
+    /* Comparisons are UNSPEC_PRED_Z operations and so need a hint
+       operand.  */
+    e.add_ptrue_hint (0, e.gp_mode (0));
+
+    /* If the argument is a constant that the unwidened comparisons
+       can handle directly, use them instead.  */
+    insn_code icode = code_for_aarch64_pred_cmp (code, mode);
+    rtx op2 = unwrap_const_vec_duplicate (e.args[3]);
+    if (CONSTANT_P (op2)
+	&& insn_data[icode].operand[4].predicate (op2, DImode))
+      {
+	e.args[3] = op2;
+	return e.use_exact_insn (icode);
+      }
+
+    int unspec = (unsigned_p ? m_unspec_for_uint : m_unspec_for_sint);
+    return e.use_exact_insn (code_for_aarch64_pred_cmp_wide (unspec, mode));
+  }
+
+  /* The tree code associated with the comparison.  */
+  tree_code m_code;
+
+  /* The unspec codes for signed and unsigned wide comparisons
+     respectively.  */
+  int m_unspec_for_sint;
+  int m_unspec_for_uint;
+};
+
+class svcmpuo_impl : public quiet<function_base>
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    e.add_ptrue_hint (0, e.gp_mode (0));
+    return e.use_exact_insn (code_for_aarch64_pred_fcmuo (e.vector_mode (0)));
+  }
+};
+
+class svcnot_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    machine_mode mode = e.vector_mode (0);
+    if (e.pred == PRED_x)
+      {
+	/* The pattern for CNOT includes an UNSPEC_PRED_Z, so needs
+	   a ptrue hint.  */
+	e.add_ptrue_hint (0, e.gp_mode (0));
+	return e.use_pred_x_insn (code_for_aarch64_pred_cnot (mode));
+      }
+
+    return e.use_cond_insn (code_for_cond_cnot (mode), 0);
+  }
+};
+
+/* Implements svcnt[bhwd], which count the number of elements
+   in a particular vector mode.  */
+class svcnt_bhwd_impl : public function_base
+{
+public:
+  CONSTEXPR svcnt_bhwd_impl (machine_mode ref_mode) : m_ref_mode (ref_mode) {}
+
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    tree count = build_int_cstu (TREE_TYPE (f.lhs),
+				 GET_MODE_NUNITS (m_ref_mode));
+    return gimple_build_assign (f.lhs, count);
+  }
+
+  rtx
+  expand (function_expander &) const OVERRIDE
+  {
+    return gen_int_mode (GET_MODE_NUNITS (m_ref_mode), DImode);
+  }
+
+  /* The mode of the vector associated with the [bhwd] suffix.  */
+  machine_mode m_ref_mode;
+};
+
+/* Implements svcnt[bhwd]_pat.  */
+class svcnt_bhwd_pat_impl : public svcnt_bhwd_impl
+{
+public:
+  CONSTEXPR svcnt_bhwd_pat_impl (machine_mode ref_mode)
+    : svcnt_bhwd_impl (ref_mode) {}
+
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    tree pattern_arg = gimple_call_arg (f.call, 0);
+    aarch64_svpattern pattern = (aarch64_svpattern) tree_to_shwi (pattern_arg);
+
+    if (pattern == AARCH64_SV_ALL)
+      /* svcvnt[bwhd]_pat (SV_ALL) == svcnt[bwhd] ().  */
+      return svcnt_bhwd_impl::fold (f);
+
+    /* See whether we can count the number of elements in the pattern
+       at compile time.  */
+    unsigned int elements_per_vq = 128 / GET_MODE_UNIT_BITSIZE (m_ref_mode);
+    HOST_WIDE_INT value = aarch64_fold_sve_cnt_pat (pattern, elements_per_vq);
+    if (value >= 0)
+      {
+	tree count = build_int_cstu (TREE_TYPE (f.lhs), value);
+	return gimple_build_assign (f.lhs, count);
+      }
+
+    return NULL;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    unsigned int elements_per_vq = 128 / GET_MODE_UNIT_BITSIZE (m_ref_mode);
+    e.args.quick_push (gen_int_mode (elements_per_vq, DImode));
+    e.args.quick_push (const1_rtx);
+    return e.use_exact_insn (CODE_FOR_aarch64_sve_cnt_pat);
+  }
+};
+
+class svcntp_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    machine_mode mode = e.vector_mode (0);
+    e.add_ptrue_hint (0, mode);
+    return e.use_exact_insn (code_for_aarch64_pred_cntp (mode));
+  }
+};
+
+/* Implements svcreate2, svcreate3 and svcreate4.  */
+class svcreate_impl : public quiet<multi_vector_function>
+{
+public:
+  CONSTEXPR svcreate_impl (unsigned int vectors_per_tuple)
+    : quiet<multi_vector_function> (vectors_per_tuple) {}
+
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    unsigned int nargs = gimple_call_num_args (f.call);
+    tree lhs_type = TREE_TYPE (f.lhs);
+
+    /* Replace the call with a clobber of the result (to prevent it from
+       becoming upwards exposed) followed by stores into each individual
+       vector of tuple.
+
+       The fold routines expect the replacement statement to have the
+       same lhs as the original call, so return the clobber statement
+       rather than the final vector store.  */
+    gassign *clobber = gimple_build_assign (f.lhs, build_clobber (lhs_type));
+
+    for (unsigned int i = nargs; i-- > 0; )
+      {
+	tree rhs_vector = gimple_call_arg (f.call, i);
+	tree field = tuple_type_field (TREE_TYPE (f.lhs));
+	tree lhs_array = build3 (COMPONENT_REF, TREE_TYPE (field),
+				 unshare_expr (f.lhs), field, NULL_TREE);
+	tree lhs_vector = build4 (ARRAY_REF, TREE_TYPE (rhs_vector),
+				  lhs_array, size_int (i),
+				  NULL_TREE, NULL_TREE);
+	gassign *assign = gimple_build_assign (lhs_vector, rhs_vector);
+	gsi_insert_after (f.gsi, assign, GSI_SAME_STMT);
+      }
+    return clobber;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    rtx lhs_tuple = e.get_nonoverlapping_reg_target ();
+
+    /* Record that LHS_TUPLE is dead before the first store.  */
+    emit_clobber (lhs_tuple);
+    for (unsigned int i = 0; i < e.args.length (); ++i)
+      {
+	/* Use an lvalue subreg to refer to vector I in LHS_TUPLE.  */
+	rtx lhs_vector = simplify_gen_subreg (GET_MODE (e.args[i]),
+					      lhs_tuple, GET_MODE (lhs_tuple),
+					      i * BYTES_PER_SVE_VECTOR);
+	emit_move_insn (lhs_vector, e.args[i]);
+      }
+    return lhs_tuple;
+  }
+};
+
+class svcvt_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    machine_mode mode0 = e.vector_mode (0);
+    machine_mode mode1 = e.vector_mode (1);
+    insn_code icode;
+    /* All this complication comes from the need to select four things
+       simultaneously:
+
+       (1) the kind of conversion (int<-float, float<-int, float<-float)
+       (2) signed vs. unsigned integers, where relevant
+       (3) the predication mode, which must be the wider of the predication
+	   modes for MODE0 and MODE1
+       (4) the predication type (m, x or z)
+
+       The only supported int<->float conversions for which the integer is
+       narrower than the float are SI<->DF.  It's therefore more convenient
+       to handle (3) by defining two patterns for int<->float conversions:
+       one in which the integer is at least as wide as the float and so
+       determines the predication mode, and another single SI<->DF pattern
+       in which the float's mode determines the predication mode (which is
+       always VNx2BI in that case).
+
+       The names of the patterns follow the optab convention of giving
+       the source mode before the destination mode.  */
+    if (e.type_suffix (1).integer_p)
+      {
+	int unspec = (e.type_suffix (1).unsigned_p
+		      ? UNSPEC_COND_UCVTF
+		      : UNSPEC_COND_SCVTF);
+	if (e.type_suffix (0).element_bytes <= e.type_suffix (1).element_bytes)
+	  icode = (e.pred == PRED_x
+		   ? code_for_aarch64_sve_nonextend (unspec, mode1, mode0)
+		   : code_for_cond_nonextend (unspec, mode1, mode0));
+	else
+	  icode = (e.pred == PRED_x
+		   ? code_for_aarch64_sve_extend (unspec, mode1, mode0)
+		   : code_for_cond_extend (unspec, mode1, mode0));
+      }
+    else
+      {
+	int unspec = (!e.type_suffix (0).integer_p ? UNSPEC_COND_FCVT
+		      : e.type_suffix (0).unsigned_p ? UNSPEC_COND_FCVTZU
+		      : UNSPEC_COND_FCVTZS);
+	if (e.type_suffix (0).element_bytes >= e.type_suffix (1).element_bytes)
+	  icode = (e.pred == PRED_x
+		   ? code_for_aarch64_sve_nontrunc (unspec, mode1, mode0)
+		   : code_for_cond_nontrunc (unspec, mode1, mode0));
+	else
+	  icode = (e.pred == PRED_x
+		   ? code_for_aarch64_sve_trunc (unspec, mode1, mode0)
+		   : code_for_cond_trunc (unspec, mode1, mode0));
+      }
+
+    if (e.pred == PRED_x)
+      return e.use_pred_x_insn (icode);
+    return e.use_cond_insn (icode);
+  }
+};
+
+class svdot_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* In the optab, the multiplication operands come before the accumulator
+       operand.  The optab is keyed off the multiplication mode.  */
+    e.rotate_inputs_left (0, 3);
+    insn_code icode
+      = e.direct_optab_handler_for_sign (sdot_prod_optab, udot_prod_optab,
+					 0, GET_MODE (e.args[0]));
+    return e.use_unpred_insn (icode);
+  }
+};
+
+class svdotprod_lane_impl : public unspec_based_function_base
+{
+public:
+  CONSTEXPR svdotprod_lane_impl (int unspec_for_sint,
+				 int unspec_for_uint,
+				 int unspec_for_float)
+    : unspec_based_function_base (unspec_for_sint,
+				  unspec_for_uint,
+				  unspec_for_float) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* Use the same ordering as the dot_prod_optab, with the
+       accumulator last.  */
+    e.rotate_inputs_left (0, 4);
+    int unspec = unspec_for (e);
+    machine_mode mode = e.vector_mode (0);
+    return e.use_exact_insn (code_for_aarch64_dot_prod_lane (unspec, mode));
+  }
+};
+
+class svdup_impl : public quiet<function_base>
+{
+public:
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    tree vec_type = TREE_TYPE (f.lhs);
+    tree rhs = gimple_call_arg (f.call, f.pred == PRED_none ? 0 : 1);
+
+    if (f.pred == PRED_none || f.pred == PRED_x)
+      {
+	if (CONSTANT_CLASS_P (rhs))
+	  {
+	    if (f.type_suffix (0).bool_p)
+	      return (tree_to_shwi (rhs)
+		      ? f.fold_to_ptrue ()
+		      : f.fold_to_pfalse ());
+
+	    tree rhs_vector = build_vector_from_val (vec_type, rhs);
+	    return gimple_build_assign (f.lhs, rhs_vector);
+	  }
+
+	/* Avoid folding _b to a VEC_DUPLICATE_EXPR, since to do that we
+	   would need to introduce an extra and unwanted conversion to
+	   the truth vector element type.  */
+	if (!f.type_suffix (0).bool_p)
+	  return gimple_build_assign (f.lhs, VEC_DUPLICATE_EXPR, rhs);
+      }
+
+    /* svdup_z (pg, x) == VEC_COND_EXPR <pg, VEC_DUPLICATE_EXPR <x>, 0>.  */
+    if (f.pred == PRED_z)
+      {
+	gimple_seq stmts = NULL;
+	tree pred = f.convert_pred (stmts, vec_type, 0);
+	rhs = f.force_vector (stmts, vec_type, rhs);
+	gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
+	return gimple_build_assign (f.lhs, VEC_COND_EXPR, pred, rhs,
+				    build_zero_cst (vec_type));
+      }
+
+    return NULL;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    if (e.pred == PRED_none || e.pred == PRED_x)
+      /* There's no benefit to using predicated instructions for _x here.  */
+      return e.use_unpred_insn (e.direct_optab_handler (vec_duplicate_optab));
+
+    /* Model predicated svdups as a SEL in which the "true" value is
+       the duplicate of the function argument and the "false" value
+       is the value of inactive lanes.  */
+    insn_code icode;
+    machine_mode mode = e.vector_mode (0);
+    if (valid_for_const_vector_p (GET_MODE_INNER (mode), e.args.last ()))
+      /* Duplicate the constant to fill a vector.  The pattern optimizes
+	 various cases involving constant operands, falling back to SEL
+	 if necessary.  */
+      icode = code_for_vcond_mask (mode, mode);
+    else
+      /* Use the pattern for selecting between a duplicated scalar
+	 variable and a vector fallback.  */
+      icode = code_for_aarch64_sel_dup (mode);
+    return e.use_vcond_mask_insn (icode);
+  }
+};
+
+class svdup_lane_impl : public quiet<function_base>
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* The native DUP lane has an index range of 64 bytes.  */
+    machine_mode mode = e.vector_mode (0);
+    if (CONST_INT_P (e.args[1])
+	&& IN_RANGE (INTVAL (e.args[1]) * GET_MODE_UNIT_SIZE (mode), 0, 63))
+      return e.use_exact_insn (code_for_aarch64_sve_dup_lane (mode));
+
+    /* Treat svdup_lane as if it were svtbl_n.  */
+    return e.use_exact_insn (code_for_aarch64_sve_tbl (e.vector_mode (0)));
+  }
+};
+
+class svdupq_impl : public quiet<function_base>
+{
+public:
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    tree vec_type = TREE_TYPE (f.lhs);
+    unsigned int nargs = gimple_call_num_args (f.call);
+    /* For predicates, pad out each argument so that we have one element
+       per bit.  */
+    unsigned int factor = (f.type_suffix (0).bool_p
+			   ? f.type_suffix (0).element_bytes : 1);
+    tree_vector_builder builder (vec_type, nargs * factor, 1);
+    for (unsigned int i = 0; i < nargs; ++i)
+      {
+	tree elt = gimple_call_arg (f.call, i);
+	if (!CONSTANT_CLASS_P (elt))
+	  return NULL;
+	builder.quick_push (elt);
+	for (unsigned int j = 1; j < factor; ++j)
+	  builder.quick_push (build_zero_cst (TREE_TYPE (vec_type)));
+      }
+    return gimple_build_assign (f.lhs, builder.build ());
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    machine_mode mode = e.vector_mode (0);
+    unsigned int elements_per_vq = e.args.length ();
+    if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
+      {
+	/* Construct a vector of integers so that we can compare them against
+	   zero below.  Zero vs. nonzero is the only distinction that
+	   matters.  */
+	mode = aarch64_sve_int_mode (mode);
+	for (unsigned int i = 0; i < elements_per_vq; ++i)
+	  e.args[i] = simplify_gen_unary (ZERO_EXTEND, GET_MODE_INNER (mode),
+					  e.args[i], QImode);
+      }
+
+    /* Get the 128-bit Advanced SIMD vector for this data size.  */
+    scalar_mode element_mode = GET_MODE_INNER (mode);
+    machine_mode vq_mode = aarch64_vq_mode (element_mode).require ();
+    gcc_assert (known_eq (elements_per_vq, GET_MODE_NUNITS (vq_mode)));
+
+    /* Put the arguments into a 128-bit Advanced SIMD vector.  We want
+       argument N to go into architectural lane N, whereas Advanced SIMD
+       vectors are loaded memory lsb to register lsb.  We therefore need
+       to reverse the elements for big-endian targets.  */
+    rtx vq_reg = gen_reg_rtx (vq_mode);
+    rtvec vec = rtvec_alloc (elements_per_vq);
+    for (unsigned int i = 0; i < elements_per_vq; ++i)
+      {
+	unsigned int argno = BYTES_BIG_ENDIAN ? elements_per_vq - i - 1 : i;
+	RTVEC_ELT (vec, i) = e.args[argno];
+      }
+    aarch64_expand_vector_init (vq_reg, gen_rtx_PARALLEL (vq_mode, vec));
+
+    /* If the result is a boolean, compare the data vector against zero.  */
+    if (mode != e.vector_mode (0))
+      {
+	rtx data_dupq = aarch64_expand_sve_dupq (NULL, mode, vq_reg);
+	return aarch64_convert_sve_data_to_pred (e.possible_target,
+						 e.vector_mode (0), data_dupq);
+      }
+
+    return aarch64_expand_sve_dupq (e.possible_target, mode, vq_reg);
+  }
+};
+
+class svdupq_lane_impl : public quiet<function_base>
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    machine_mode mode = e.vector_mode (0);
+    rtx index = e.args[1];
+    if (CONST_INT_P (index) && IN_RANGE (INTVAL (index), 0, 3))
+      {
+	/* Use the .Q form of DUP, which is the native instruction for
+	   this function.  */
+	insn_code icode = code_for_aarch64_sve_dupq_lane (mode);
+	unsigned int num_indices = e.elements_per_vq (0);
+	rtx indices = aarch64_gen_stepped_int_parallel
+	  (num_indices, INTVAL (index) * num_indices, 1);
+
+	e.add_output_operand (icode);
+	e.add_input_operand (icode, e.args[0]);
+	e.add_fixed_operand (indices);
+	return e.generate_insn (icode);
+      }
+
+    /* Build a .D TBL index for the pairs of doublewords that we want to
+       duplicate.  */
+    if (CONST_INT_P (index))
+      {
+	/* The index vector is a constant.  */
+	rtx_vector_builder builder (VNx2DImode, 2, 1);
+	builder.quick_push (gen_int_mode (INTVAL (index) * 2, DImode));
+	builder.quick_push (gen_int_mode (INTVAL (index) * 2 + 1, DImode));
+	index = builder.build ();
+      }
+    else
+      {
+	/* Duplicate INDEX * 2 to fill a DImode vector.  The ACLE spec
+	   explicitly allows the top of the index to be dropped.  */
+	index = force_reg (DImode, simplify_gen_binary (ASHIFT, DImode,
+							index, const1_rtx));
+	index = expand_vector_broadcast (VNx2DImode, index);
+
+	/* Get an alternating 0, 1 predicate.  */
+	rtx_vector_builder builder (VNx2BImode, 2, 1);
+	builder.quick_push (const0_rtx);
+	builder.quick_push (constm1_rtx);
+	rtx pg = force_reg (VNx2BImode, builder.build ());
+
+	/* Add one to the odd elements of the index.  */
+	rtx one = force_reg (VNx2DImode, CONST1_RTX (VNx2DImode));
+	rtx target = gen_reg_rtx (VNx2DImode);
+	emit_insn (gen_cond_addvnx2di (target, pg, index, one, index));
+	index = target;
+      }
+
+    e.args[0] = gen_lowpart (VNx2DImode, e.args[0]);
+    e.args[1] = index;
+    return e.use_exact_insn (CODE_FOR_aarch64_sve_tblvnx2di);
+  }
+};
+
+/* Implements svextb, svexth and svextw.  */
+class svext_bhw_impl : public function_base
+{
+public:
+  CONSTEXPR svext_bhw_impl (scalar_int_mode from_mode)
+    : m_from_mode (from_mode) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    if (e.type_suffix (0).unsigned_p)
+      {
+	/* Convert to an AND.  The widest we go is 0xffffffff, which fits
+	   in a CONST_INT.  */
+	e.args.quick_push (GEN_INT (GET_MODE_MASK (m_from_mode)));
+	if (e.pred == PRED_m)
+	  /* We now have arguments "(inactive, pg, op, mask)".  Convert this
+	     to "(pg, op, mask, inactive)" so that the order matches svand_m
+	     with an extra argument on the end.  Take the inactive elements
+	     from this extra argument.  */
+	  e.rotate_inputs_left (0, 4);
+	return e.map_to_rtx_codes (AND, AND, -1, 3);
+      }
+
+    machine_mode wide_mode = e.vector_mode (0);
+    poly_uint64 nunits = GET_MODE_NUNITS (wide_mode);
+    machine_mode narrow_mode
+      = aarch64_sve_data_mode (m_from_mode, nunits).require ();
+    if (e.pred == PRED_x)
+      {
+	insn_code icode = code_for_aarch64_pred_sxt (wide_mode, narrow_mode);
+	return e.use_pred_x_insn (icode);
+      }
+
+    insn_code icode = code_for_aarch64_cond_sxt (wide_mode, narrow_mode);
+    return e.use_cond_insn (icode);
+  }
+
+  /* The element mode that we're extending from.  */
+  scalar_int_mode m_from_mode;
+};
+
+/* Implements svget2, svget3 and svget4.  */
+class svget_impl : public quiet<multi_vector_function>
+{
+public:
+  CONSTEXPR svget_impl (unsigned int vectors_per_tuple)
+    : quiet<multi_vector_function> (vectors_per_tuple) {}
+
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    /* Fold into a normal gimple component access.  */
+    tree rhs_tuple = gimple_call_arg (f.call, 0);
+    tree index = gimple_call_arg (f.call, 1);
+    tree field = tuple_type_field (TREE_TYPE (rhs_tuple));
+    tree rhs_array = build3 (COMPONENT_REF, TREE_TYPE (field),
+			     rhs_tuple, field, NULL_TREE);
+    tree rhs_vector = build4 (ARRAY_REF, TREE_TYPE (f.lhs),
+			      rhs_array, index, NULL_TREE, NULL_TREE);
+    return gimple_build_assign (f.lhs, rhs_vector);
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* Fold the access into a subreg rvalue.  */
+    return simplify_gen_subreg (e.vector_mode (0), e.args[0],
+				GET_MODE (e.args[0]),
+				INTVAL (e.args[1]) * BYTES_PER_SVE_VECTOR);
+  }
+};
+
+class svindex_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    return e.use_exact_insn (e.direct_optab_handler (vec_series_optab));
+  }
+};
+
+class svinsr_impl : public quiet<function_base>
+{
+public:
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    gcall *new_call = gimple_build_call_internal (IFN_VEC_SHL_INSERT, 2,
+						  gimple_call_arg (f.call, 0),
+						  gimple_call_arg (f.call, 1));
+    gimple_call_set_lhs (new_call, f.lhs);
+    return new_call;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    insn_code icode = direct_optab_handler (vec_shl_insert_optab,
+					    e.vector_mode (0));
+    return e.use_exact_insn (icode);
+  }
+};
+
+/* Implements svlasta and svlastb.  */
+class svlast_impl : public quiet<function_base>
+{
+public:
+  CONSTEXPR svlast_impl (int unspec) : m_unspec (unspec) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    return e.use_exact_insn (code_for_extract (m_unspec, e.vector_mode (0)));
+  }
+
+  /* The unspec code associated with the operation.  */
+  int m_unspec;
+};
+
+class svld1_impl : public full_width_access
+{
+public:
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_READ_MEMORY;
+  }
+
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    tree vectype = f.vector_type (0);
+
+    /* Get the predicate and base pointer.  */
+    gimple_seq stmts = NULL;
+    tree pred = f.convert_pred (stmts, vectype, 0);
+    tree base = f.fold_contiguous_base (stmts, vectype);
+    gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
+
+    tree cookie = f.load_store_cookie (TREE_TYPE (vectype));
+    gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD, 3,
+						  base, cookie, pred);
+    gimple_call_set_lhs (new_call, f.lhs);
+    return new_call;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    insn_code icode = convert_optab_handler (maskload_optab,
+					     e.vector_mode (0), e.gp_mode (0));
+    return e.use_contiguous_load_insn (icode);
+  }
+};
+
+/* Implements extending contiguous forms of svld1.  */
+class svld1_extend_impl : public extending_load
+{
+public:
+  CONSTEXPR svld1_extend_impl (type_suffix_index memory_type)
+    : extending_load (memory_type) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    insn_code icode = code_for_aarch64_load (extend_rtx_code (),
+					     e.vector_mode (0),
+					     e.memory_vector_mode ());
+    return e.use_contiguous_load_insn (icode);
+  }
+};
+
+class svld1_gather_impl : public full_width_access
+{
+public:
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_READ_MEMORY;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    e.prepare_gather_address_operands (1);
+    /* Put the predicate last, as required by mask_gather_load_optab.  */
+    e.rotate_inputs_left (0, 5);
+    machine_mode mem_mode = e.memory_vector_mode ();
+    machine_mode int_mode = aarch64_sve_int_mode (mem_mode);
+    insn_code icode = convert_optab_handler (mask_gather_load_optab,
+					     mem_mode, int_mode);
+    return e.use_exact_insn (icode);
+  }
+};
+
+/* Implements extending forms of svld1_gather.  */
+class svld1_gather_extend_impl : public extending_load
+{
+public:
+  CONSTEXPR svld1_gather_extend_impl (type_suffix_index memory_type)
+    : extending_load (memory_type) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    e.prepare_gather_address_operands (1);
+    /* Put the predicate last, since the extending gathers use the same
+       operand order as mask_gather_load_optab.  */
+    e.rotate_inputs_left (0, 5);
+    /* Add a constant predicate for the extension rtx.  */
+    e.args.quick_push (CONSTM1_RTX (VNx16BImode));
+    insn_code icode = code_for_aarch64_gather_load (extend_rtx_code (),
+						    e.vector_mode (0),
+						    e.memory_vector_mode ());
+    return e.use_exact_insn (icode);
+  }
+};
+
+class load_replicate : public function_base
+{
+public:
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_READ_MEMORY;
+  }
+
+  tree
+  memory_scalar_type (const function_instance &fi) const OVERRIDE
+  {
+    return fi.scalar_type (0);
+  }
+};
+
+class svld1rq_impl : public load_replicate
+{
+public:
+  machine_mode
+  memory_vector_mode (const function_instance &fi) const OVERRIDE
+  {
+    return aarch64_vq_mode (GET_MODE_INNER (fi.vector_mode (0))).require ();
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    insn_code icode = code_for_aarch64_sve_ld1rq (e.vector_mode (0));
+    return e.use_contiguous_load_insn (icode);
+  }
+};
+
+class svld1ro_impl : public load_replicate
+{
+public:
+  machine_mode
+  memory_vector_mode (const function_instance &) const OVERRIDE
+  {
+    return OImode;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    insn_code icode = code_for_aarch64_sve_ld1ro (e.vector_mode (0));
+    return e.use_contiguous_load_insn (icode);
+  }
+};
+
+/* Implements svld2, svld3 and svld4.  */
+class svld234_impl : public full_width_access
+{
+public:
+  CONSTEXPR svld234_impl (unsigned int vectors_per_tuple)
+    : full_width_access (vectors_per_tuple) {}
+
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_READ_MEMORY;
+  }
+
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    tree tuple_type = TREE_TYPE (f.lhs);
+    tree vectype = f.vector_type (0);
+
+    /* Get the predicate and base pointer.  */
+    gimple_seq stmts = NULL;
+    tree pred = f.convert_pred (stmts, vectype, 0);
+    tree base = f.fold_contiguous_base (stmts, vectype);
+    gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
+
+    /* Emit two statements: a clobber of the lhs, so that it isn't
+       upwards exposed, and then the load itself.
+
+       The fold routines expect the replacement statement to have the
+       same lhs as the original call, so return the clobber statement
+       rather than the load.  */
+    gimple *clobber = gimple_build_assign (f.lhs, build_clobber (tuple_type));
+
+    /* View the loaded data as an array of vectors.  */
+    tree field = tuple_type_field (tuple_type);
+    tree lhs_array = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (field),
+			     unshare_expr (f.lhs));
+
+    /* Emit the load itself.  */
+    tree cookie = f.load_store_cookie (TREE_TYPE (vectype));
+    gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
+						  base, cookie, pred);
+    gimple_call_set_lhs (new_call, lhs_array);
+    gsi_insert_after (f.gsi, new_call, GSI_SAME_STMT);
+
+    return clobber;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    machine_mode tuple_mode = TYPE_MODE (TREE_TYPE (e.call_expr));
+    insn_code icode = convert_optab_handler (vec_mask_load_lanes_optab,
+					     tuple_mode, e.vector_mode (0));
+    return e.use_contiguous_load_insn (icode);
+  }
+};
+
+class svldff1_gather_impl : public full_width_access
+{
+public:
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* See the block comment in aarch64-sve.md for details about the
+       FFR handling.  */
+    emit_insn (gen_aarch64_update_ffr_for_load ());
+
+    e.prepare_gather_address_operands (1);
+    /* Put the predicate last, since ldff1_gather uses the same operand
+       order as mask_gather_load_optab.  */
+    e.rotate_inputs_left (0, 5);
+    machine_mode mem_mode = e.memory_vector_mode ();
+    return e.use_exact_insn (code_for_aarch64_ldff1_gather (mem_mode));
+  }
+};
+
+/* Implements extending forms of svldff1_gather.  */
+class svldff1_gather_extend : public extending_load
+{
+public:
+  CONSTEXPR svldff1_gather_extend (type_suffix_index memory_type)
+    : extending_load (memory_type) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* See the block comment in aarch64-sve.md for details about the
+       FFR handling.  */
+    emit_insn (gen_aarch64_update_ffr_for_load ());
+
+    e.prepare_gather_address_operands (1);
+    /* Put the predicate last, since ldff1_gather uses the same operand
+       order as mask_gather_load_optab.  */
+    e.rotate_inputs_left (0, 5);
+    /* Add a constant predicate for the extension rtx.  */
+    e.args.quick_push (CONSTM1_RTX (VNx16BImode));
+    insn_code icode = code_for_aarch64_ldff1_gather (extend_rtx_code (),
+						     e.vector_mode (0),
+						     e.memory_vector_mode ());
+    return e.use_exact_insn (icode);
+  }
+};
+
+class svldnt1_impl : public full_width_access
+{
+public:
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_READ_MEMORY;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    insn_code icode = code_for_aarch64_ldnt1 (e.vector_mode (0));
+    return e.use_contiguous_load_insn (icode);
+  }
+};
+
+/* Implements svldff1 and svldnf1.  */
+class svldxf1_impl : public full_width_access
+{
+public:
+  CONSTEXPR svldxf1_impl (int unspec) : m_unspec (unspec) {}
+
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* See the block comment in aarch64-sve.md for details about the
+       FFR handling.  */
+    emit_insn (gen_aarch64_update_ffr_for_load ());
+
+    machine_mode mode = e.vector_mode (0);
+    return e.use_contiguous_load_insn (code_for_aarch64_ldf1 (m_unspec, mode));
+  }
+
+  /* The unspec associated with the load.  */
+  int m_unspec;
+};
+
+/* Implements extending contiguous forms of svldff1 and svldnf1.  */
+class svldxf1_extend_impl : public extending_load
+{
+public:
+  CONSTEXPR svldxf1_extend_impl (type_suffix_index memory_type, int unspec)
+    : extending_load (memory_type), m_unspec (unspec) {}
+
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* See the block comment in aarch64-sve.md for details about the
+       FFR handling.  */
+    emit_insn (gen_aarch64_update_ffr_for_load ());
+
+    insn_code icode = code_for_aarch64_ldf1 (m_unspec, extend_rtx_code (),
+					     e.vector_mode (0),
+					     e.memory_vector_mode ());
+    return e.use_contiguous_load_insn (icode);
+  }
+
+  /* The unspec associated with the load.  */
+  int m_unspec;
+};
+
+class svlen_impl : public quiet<function_base>
+{
+public:
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    /* The argument only exists for its type.  */
+    tree rhs_type = TREE_TYPE (gimple_call_arg (f.call, 0));
+    tree count = build_int_cstu (TREE_TYPE (f.lhs),
+				 TYPE_VECTOR_SUBPARTS (rhs_type));
+    return gimple_build_assign (f.lhs, count);
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* The argument only exists for its type.  */
+    return gen_int_mode (GET_MODE_NUNITS (e.vector_mode (0)), DImode);
+  }
+};
+
+class svmad_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    return expand_mad (e);
+  }
+};
+
+class svmla_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* Put the accumulator at the end (argument 3), but keep it as the
+       merge input for _m functions.  */
+    e.rotate_inputs_left (1, 4);
+    return expand_mad (e, 3);
+  }
+};
+
+class svmla_lane_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    if (e.type_suffix (0).integer_p)
+      {
+	machine_mode mode = e.vector_mode (0);
+	return e.use_exact_insn (code_for_aarch64_sve_add_mul_lane (mode));
+      }
+    return expand_mla_mls_lane (e, UNSPEC_FMLA);
+  }
+};
+
+class svmls_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* Put the accumulator at the end (argument 3), but keep it as the
+       merge input for _m functions.  */
+    e.rotate_inputs_left (1, 4);
+    return expand_msb (e, 3);
+  }
+};
+
+class svmov_impl : public function_base
+{
+public:
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    return gimple_build_assign (f.lhs, BIT_AND_EXPR,
+				gimple_call_arg (f.call, 0),
+				gimple_call_arg (f.call, 1));
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* The canonical form for the assembler alias "MOV Pa.B, Pb/Z, Pc.B"
+       is "AND Pa.B, Pb/Z, Pc.B, Pc.B".  */
+    gcc_assert (e.pred == PRED_z);
+    e.args.quick_push (e.args[1]);
+    return e.use_exact_insn (CODE_FOR_aarch64_pred_andvnx16bi_z);
+  }
+};
+
+class svmls_lane_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    if (e.type_suffix (0).integer_p)
+      {
+	machine_mode mode = e.vector_mode (0);
+	return e.use_exact_insn (code_for_aarch64_sve_sub_mul_lane (mode));
+      }
+    return expand_mla_mls_lane (e, UNSPEC_FMLS);
+  }
+};
+
+class svmmla_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    insn_code icode;
+    if (e.type_suffix (0).integer_p)
+      {
+	if (e.type_suffix (0).unsigned_p)
+	  icode = code_for_aarch64_sve_add (UNSPEC_UMATMUL, e.vector_mode (0));
+	else
+	  icode = code_for_aarch64_sve_add (UNSPEC_SMATMUL, e.vector_mode (0));
+      }
+    else
+      icode = code_for_aarch64_sve (UNSPEC_FMMLA, e.vector_mode (0));
+    return e.use_exact_insn (icode);
+  }
+};
+
+class svmsb_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    return expand_msb (e);
+  }
+};
+
+class svnand_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    gcc_assert (e.pred == PRED_z);
+    return e.use_exact_insn (CODE_FOR_aarch64_pred_nandvnx16bi_z);
+  }
+};
+
+class svnor_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    gcc_assert (e.pred == PRED_z);
+    return e.use_exact_insn (CODE_FOR_aarch64_pred_norvnx16bi_z);
+  }
+};
+
+class svnot_impl : public rtx_code_function
+{
+public:
+  CONSTEXPR svnot_impl () : rtx_code_function (NOT, NOT, -1) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    if (e.type_suffix_ids[0] == TYPE_SUFFIX_b)
+      {
+	/* The canonical form for the assembler alias "NOT Pa.B, Pb/Z, Pc.B"
+	   is "EOR Pa.B, Pb/Z, Pb.B, Pc.B".  */
+	gcc_assert (e.pred == PRED_z);
+	e.args.quick_insert (1, e.args[0]);
+	return e.use_exact_insn (CODE_FOR_aarch64_pred_xorvnx16bi_z);
+      }
+    return rtx_code_function::expand (e);
+  }
+};
+
+class svorn_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    gcc_assert (e.pred == PRED_z);
+    return e.use_exact_insn (CODE_FOR_aarch64_pred_ornvnx16bi_z);
+  }
+};
+
+class svpfalse_impl : public function_base
+{
+public:
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    return f.fold_to_pfalse ();
+  }
+
+  rtx
+  expand (function_expander &) const OVERRIDE
+  {
+    return CONST0_RTX (VNx16BImode);
+  }
+};
+
+/* Implements svpfirst and svpnext, which share the same .md patterns.  */
+class svpfirst_svpnext_impl : public function_base
+{
+public:
+  CONSTEXPR svpfirst_svpnext_impl (int unspec) : m_unspec (unspec) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    machine_mode mode = e.vector_mode (0);
+    e.add_ptrue_hint (0, mode);
+    return e.use_exact_insn (code_for_aarch64_sve (m_unspec, mode));
+  }
+
+  /* The unspec associated with the operation.  */
+  int m_unspec;
+};
+
+/* Implements contiguous forms of svprf[bhwd].  */
+class svprf_bhwd_impl : public function_base
+{
+public:
+  CONSTEXPR svprf_bhwd_impl (machine_mode mode) : m_mode (mode) {}
+
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_PREFETCH_MEMORY;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    e.prepare_prefetch_operands ();
+    insn_code icode = code_for_aarch64_sve_prefetch (m_mode);
+    return e.use_contiguous_prefetch_insn (icode);
+  }
+
+  /* The mode that we'd use to hold one vector of prefetched data.  */
+  machine_mode m_mode;
+};
+
+/* Implements svprf[bhwd]_gather.  */
+class svprf_bhwd_gather_impl : public function_base
+{
+public:
+  CONSTEXPR svprf_bhwd_gather_impl (machine_mode mode) : m_mode (mode) {}
+
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_PREFETCH_MEMORY;
+  }
+
+  machine_mode
+  memory_vector_mode (const function_instance &) const OVERRIDE
+  {
+    return m_mode;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    e.prepare_prefetch_operands ();
+    e.prepare_gather_address_operands (1);
+
+    /* Insert a zero operand to identify the mode of the memory being
+       accessed.  This goes between the gather operands and prefetch
+       operands created above.  */
+    e.args.quick_insert (5, CONST0_RTX (m_mode));
+
+    machine_mode reg_mode = GET_MODE (e.args[2]);
+    insn_code icode = code_for_aarch64_sve_gather_prefetch (m_mode, reg_mode);
+    return e.use_exact_insn (icode);
+  }
+
+  /* The mode that we'd use to hold one vector of prefetched data.  */
+  machine_mode m_mode;
+};
+
+/* Implements svptest_any, svptest_first and svptest_last.  */
+class svptest_impl : public function_base
+{
+public:
+  CONSTEXPR svptest_impl (rtx_code compare) : m_compare (compare) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* See whether GP is an exact ptrue for some predicate mode;
+       i.e. whether converting the GP to that mode will not drop
+       set bits and will leave all significant bits set.  */
+    machine_mode wide_mode;
+    int hint;
+    if (aarch64_ptrue_all_mode (e.args[0]).exists (&wide_mode))
+      hint = SVE_KNOWN_PTRUE;
+    else
+      {
+	hint = SVE_MAYBE_NOT_PTRUE;
+	wide_mode = VNx16BImode;
+      }
+
+    /* Generate the PTEST itself.  */
+    rtx pg = force_reg (VNx16BImode, e.args[0]);
+    rtx wide_pg = gen_lowpart (wide_mode, pg);
+    rtx hint_rtx = gen_int_mode (hint, DImode);
+    rtx op = force_reg (wide_mode, gen_lowpart (wide_mode, e.args[1]));
+    emit_insn (gen_aarch64_ptestvnx16bi (pg, wide_pg, hint_rtx, op));
+
+    /* Get the location of the boolean result.  We can provide SImode and
+       DImode values directly; rely on generic code to convert others.  */
+    rtx target = e.possible_target;
+    if (!target
+	|| !REG_P (target)
+	|| (GET_MODE (target) != SImode && GET_MODE (target) != DImode))
+      target = gen_reg_rtx (DImode);
+
+    /* Generate a CSET to convert the CC result of the PTEST to a boolean.  */
+    rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
+    rtx compare = gen_rtx_fmt_ee (m_compare, GET_MODE (target),
+				  cc_reg, const0_rtx);
+    emit_insn (gen_rtx_SET (target, compare));
+    return target;
+  }
+
+  /* The comparison code associated with ptest condition.  */
+  rtx_code m_compare;
+};
+
+class svptrue_impl : public function_base
+{
+public:
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    return f.fold_to_ptrue ();
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    return aarch64_ptrue_all (e.type_suffix (0).element_bytes);
+  }
+};
+
+class svptrue_pat_impl : public function_base
+{
+public:
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    tree pattern_arg = gimple_call_arg (f.call, 0);
+    aarch64_svpattern pattern = (aarch64_svpattern) tree_to_shwi (pattern_arg);
+
+    if (pattern == AARCH64_SV_ALL)
+      /* svptrue_pat_bN (SV_ALL) == svptrue_bN ().  */
+      return f.fold_to_ptrue ();
+
+    /* See whether we can count the number of elements in the pattern
+       at compile time.  If so, construct a predicate with that number
+       of 1s followed by all 0s.  */
+    int nelts_per_vq = f.elements_per_vq (0);
+    HOST_WIDE_INT value = aarch64_fold_sve_cnt_pat (pattern, nelts_per_vq);
+    if (value >= 0)
+      return f.fold_to_vl_pred (value);
+
+    return NULL;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* In rtl, the predicate is represented as the constant:
+
+         (const:V16BI (unspec:V16BI [(const_int PATTERN)
+				     (const_vector:VnnBI [zeros])]
+				    UNSPEC_PTRUE))
+
+       where nn determines the element size.  */
+    rtvec vec = gen_rtvec (2, e.args[0], CONST0_RTX (e.vector_mode (0)));
+    return gen_rtx_CONST (VNx16BImode,
+			  gen_rtx_UNSPEC (VNx16BImode, vec, UNSPEC_PTRUE));
+  }
+};
+
+/* Implements svqdec[bhwd]{,_pat} and svqinc[bhwd]{,_pat}.  */
+class svqdec_svqinc_bhwd_impl : public function_base
+{
+public:
+  CONSTEXPR svqdec_svqinc_bhwd_impl (rtx_code code_for_sint,
+				     rtx_code code_for_uint,
+				     scalar_int_mode elem_mode)
+    : m_code_for_sint (code_for_sint),
+      m_code_for_uint (code_for_uint),
+      m_elem_mode (elem_mode)
+  {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* Treat non-_pat functions in the same way as _pat functions with
+       an SV_ALL argument.  */
+    if (e.args.length () == 2)
+      e.args.quick_insert (1, gen_int_mode (AARCH64_SV_ALL, DImode));
+
+    /* Insert the number of elements per 128-bit block as a fake argument,
+       between the pattern and the multiplier.  Arguments 1, 2 and 3 then
+       correspond exactly with the 3 UNSPEC_SVE_CNT_PAT operands; see
+       aarch64_sve_cnt_pat for details.  */
+    unsigned int elements_per_vq = 128 / GET_MODE_BITSIZE (m_elem_mode);
+    e.args.quick_insert (2, gen_int_mode (elements_per_vq, DImode));
+
+    rtx_code code = (e.type_suffix (0).unsigned_p
+		     ? m_code_for_uint
+		     : m_code_for_sint);
+
+    /* Choose between operating on integer scalars or integer vectors.  */
+    machine_mode mode = e.vector_mode (0);
+    if (e.mode_suffix_id == MODE_n)
+      mode = GET_MODE_INNER (mode);
+    return e.use_exact_insn (code_for_aarch64_sve_pat (code, mode));
+  }
+
+  /* The saturating addition or subtraction codes to use for signed and
+     unsigned values respectively.  */
+  rtx_code m_code_for_sint;
+  rtx_code m_code_for_uint;
+
+  /* The integer mode associated with the [bhwd] suffix.  */
+  scalar_int_mode m_elem_mode;
+};
+
+/* Implements svqdec[bhwd]{,_pat}.  */
+class svqdec_bhwd_impl : public svqdec_svqinc_bhwd_impl
+{
+public:
+  CONSTEXPR svqdec_bhwd_impl (scalar_int_mode elem_mode)
+    : svqdec_svqinc_bhwd_impl (SS_MINUS, US_MINUS, elem_mode) {}
+};
+
+/* Implements svqinc[bhwd]{,_pat}.  */
+class svqinc_bhwd_impl : public svqdec_svqinc_bhwd_impl
+{
+public:
+  CONSTEXPR svqinc_bhwd_impl (scalar_int_mode elem_mode)
+    : svqdec_svqinc_bhwd_impl (SS_PLUS, US_PLUS, elem_mode) {}
+};
+
+/* Implements svqdecp and svqincp.  */
+class svqdecp_svqincp_impl : public function_base
+{
+public:
+  CONSTEXPR svqdecp_svqincp_impl (rtx_code code_for_sint,
+				  rtx_code code_for_uint)
+    : m_code_for_sint (code_for_sint),
+      m_code_for_uint (code_for_uint)
+  {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    rtx_code code = (e.type_suffix (0).unsigned_p
+		     ? m_code_for_uint
+		     : m_code_for_sint);
+    insn_code icode;
+    if (e.mode_suffix_id == MODE_n)
+      {
+	/* Increment or decrement a scalar (whose mode is given by the first
+	   type suffix) by the number of active elements in a predicate
+	   (whose mode is given by the second type suffix).  */
+	machine_mode mode = GET_MODE_INNER (e.vector_mode (0));
+	icode = code_for_aarch64_sve_cntp (code, mode, e.vector_mode (1));
+      }
+    else
+      /* Increment a vector by the number of active elements in a predicate,
+	 with the vector mode determining the predicate mode.  */
+      icode = code_for_aarch64_sve_cntp (code, e.vector_mode (0));
+    return e.use_exact_insn (icode);
+  }
+
+  /* The saturating addition or subtraction codes to use for signed and
+     unsigned values respectively.  */
+  rtx_code m_code_for_sint;
+  rtx_code m_code_for_uint;
+};
+
+class svrdffr_impl : public function_base
+{
+public:
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_READ_FFR;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* See the block comment in aarch64-sve.md for details about the
+       FFR handling.  */
+    emit_insn (gen_aarch64_copy_ffr_to_ffrt ());
+    rtx result = e.use_exact_insn (e.pred == PRED_z
+				   ? CODE_FOR_aarch64_rdffr_z
+				   : CODE_FOR_aarch64_rdffr);
+    emit_insn (gen_aarch64_update_ffrt ());
+    return result;
+  }
+};
+
+class svreinterpret_impl : public quiet<function_base>
+{
+public:
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    /* Punt to rtl if the effect of the reinterpret on registers does not
+       conform to GCC's endianness model.  */
+    if (!targetm.can_change_mode_class (f.vector_mode (0),
+					f.vector_mode (1), FP_REGS))
+      return NULL;
+
+    /* Otherwise svreinterpret corresponds directly to a VIEW_CONVERT_EXPR
+       reinterpretation.  */
+    tree rhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (f.lhs),
+		       gimple_call_arg (f.call, 0));
+    return gimple_build_assign (f.lhs, VIEW_CONVERT_EXPR, rhs);
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    machine_mode mode = e.vector_mode (0);
+    return e.use_exact_insn (code_for_aarch64_sve_reinterpret (mode));
+  }
+};
+
+class svrev_impl : public permute
+{
+public:
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    /* Punt for now on _b16 and wider; we'd need more complex evpc logic
+       to rerecognize the result.  */
+    if (f.type_suffix (0).bool_p && f.type_suffix (0).element_bits > 8)
+      return NULL;
+
+    /* Permute as { nelts - 1, nelts - 2, nelts - 3, ... }.  */
+    poly_int64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs));
+    vec_perm_builder builder (nelts, 1, 3);
+    for (int i = 0; i < 3; ++i)
+      builder.quick_push (nelts - i - 1);
+    return fold_permute (f, builder);
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    return e.use_exact_insn (code_for_aarch64_sve_rev (e.vector_mode (0)));
+  }
+};
+
+class svsel_impl : public quiet<function_base>
+{
+public:
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    /* svsel corresponds exactly to VEC_COND_EXPR.  */
+    gimple_seq stmts = NULL;
+    tree pred = f.convert_pred (stmts, f.vector_type (0), 0);
+    gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
+    return gimple_build_assign (f.lhs, VEC_COND_EXPR, pred,
+				gimple_call_arg (f.call, 1),
+				gimple_call_arg (f.call, 2));
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* svsel (cond, truev, falsev) is vcond_mask (truev, falsev, cond).  */
+    e.rotate_inputs_left (0, 3);
+    insn_code icode = convert_optab_handler (vcond_mask_optab,
+					     e.vector_mode (0),
+					     e.gp_mode (0));
+    return e.use_exact_insn (icode);
+  }
+};
+
+/* Implements svset2, svset3 and svset4.  */
+class svset_impl : public quiet<multi_vector_function>
+{
+public:
+  CONSTEXPR svset_impl (unsigned int vectors_per_tuple)
+    : quiet<multi_vector_function> (vectors_per_tuple) {}
+
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    tree rhs_tuple = gimple_call_arg (f.call, 0);
+    tree index = gimple_call_arg (f.call, 1);
+    tree rhs_vector = gimple_call_arg (f.call, 2);
+
+    /* Replace the call with two statements: a copy of the full tuple
+       to the call result, followed by an update of the individual vector.
+
+       The fold routines expect the replacement statement to have the
+       same lhs as the original call, so return the copy statement
+       rather than the field update.  */
+    gassign *copy = gimple_build_assign (unshare_expr (f.lhs), rhs_tuple);
+
+    /* Get a reference to the individual vector.  */
+    tree field = tuple_type_field (TREE_TYPE (f.lhs));
+    tree lhs_array = build3 (COMPONENT_REF, TREE_TYPE (field),
+			     f.lhs, field, NULL_TREE);
+    tree lhs_vector = build4 (ARRAY_REF, TREE_TYPE (rhs_vector),
+			      lhs_array, index, NULL_TREE, NULL_TREE);
+    gassign *update = gimple_build_assign (lhs_vector, rhs_vector);
+    gsi_insert_after (f.gsi, update, GSI_SAME_STMT);
+
+    return copy;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    rtx rhs_tuple = e.args[0];
+    unsigned int index = INTVAL (e.args[1]);
+    rtx rhs_vector = e.args[2];
+
+    /* First copy the full tuple to the target register.  */
+    rtx lhs_tuple = e.get_nonoverlapping_reg_target ();
+    emit_move_insn (lhs_tuple, rhs_tuple);
+
+    /* ...then update the individual vector.  */
+    rtx lhs_vector = simplify_gen_subreg (GET_MODE (rhs_vector),
+					  lhs_tuple, GET_MODE (lhs_tuple),
+					  index * BYTES_PER_SVE_VECTOR);
+    emit_move_insn (lhs_vector, rhs_vector);
+    return lhs_vector;
+  }
+};
+
+class svsetffr_impl : public function_base
+{
+public:
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_WRITE_FFR;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    e.args.quick_push (CONSTM1_RTX (VNx16BImode));
+    return e.use_exact_insn (CODE_FOR_aarch64_wrffr);
+  }
+};
+
+class svst1_impl : public full_width_access
+{
+public:
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_WRITE_MEMORY;
+  }
+
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    tree vectype = f.vector_type (0);
+
+    /* Get the predicate and base pointer.  */
+    gimple_seq stmts = NULL;
+    tree pred = f.convert_pred (stmts, vectype, 0);
+    tree base = f.fold_contiguous_base (stmts, vectype);
+    gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
+
+    tree cookie = f.load_store_cookie (TREE_TYPE (vectype));
+    tree rhs = gimple_call_arg (f.call, gimple_call_num_args (f.call) - 1);
+    return gimple_build_call_internal (IFN_MASK_STORE, 4,
+				       base, cookie, pred, rhs);
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    insn_code icode = convert_optab_handler (maskstore_optab,
+					     e.vector_mode (0), e.gp_mode (0));
+    return e.use_contiguous_store_insn (icode);
+  }
+};
+
+class svst1_scatter_impl : public full_width_access
+{
+public:
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_WRITE_MEMORY;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    e.prepare_gather_address_operands (1);
+    /* Put the predicate last, as required by mask_scatter_store_optab.  */
+    e.rotate_inputs_left (0, 6);
+    machine_mode mem_mode = e.memory_vector_mode ();
+    machine_mode int_mode = aarch64_sve_int_mode (mem_mode);
+    insn_code icode = convert_optab_handler (mask_scatter_store_optab,
+					     mem_mode, int_mode);
+    return e.use_exact_insn (icode);
+  }
+};
+
+/* Implements truncating forms of svst1_scatter.  */
+class svst1_scatter_truncate_impl : public truncating_store
+{
+public:
+  CONSTEXPR svst1_scatter_truncate_impl (scalar_int_mode to_mode)
+    : truncating_store (to_mode) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    e.prepare_gather_address_operands (1);
+    /* Put the predicate last, since the truncating scatters use the same
+       operand order as mask_scatter_store_optab.  */
+    e.rotate_inputs_left (0, 6);
+    insn_code icode = code_for_aarch64_scatter_store_trunc
+      (e.memory_vector_mode (), e.vector_mode (0));
+    return e.use_exact_insn (icode);
+  }
+};
+
+/* Implements truncating contiguous forms of svst1.  */
+class svst1_truncate_impl : public truncating_store
+{
+public:
+  CONSTEXPR svst1_truncate_impl (scalar_int_mode to_mode)
+    : truncating_store (to_mode) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    insn_code icode = code_for_aarch64_store_trunc (e.memory_vector_mode (),
+						    e.vector_mode (0));
+    return e.use_contiguous_store_insn (icode);
+  }
+};
+
+/* Implements svst2, svst3 and svst4.  */
+class svst234_impl : public full_width_access
+{
+public:
+  CONSTEXPR svst234_impl (unsigned int vectors_per_tuple)
+    : full_width_access (vectors_per_tuple) {}
+
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_WRITE_MEMORY;
+  }
+
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    tree vectype = f.vector_type (0);
+
+    /* Get the predicate and base pointer.  */
+    gimple_seq stmts = NULL;
+    tree pred = f.convert_pred (stmts, vectype, 0);
+    tree base = f.fold_contiguous_base (stmts, vectype);
+    gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
+
+    /* View the stored data as an array of vectors.  */
+    unsigned int num_args = gimple_call_num_args (f.call);
+    tree rhs_tuple = gimple_call_arg (f.call, num_args - 1);
+    tree field = tuple_type_field (TREE_TYPE (rhs_tuple));
+    tree rhs_array = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (field), rhs_tuple);
+
+    tree cookie = f.load_store_cookie (TREE_TYPE (vectype));
+    return gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
+				       base, cookie, pred, rhs_array);
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    machine_mode tuple_mode = GET_MODE (e.args.last ());
+    insn_code icode = convert_optab_handler (vec_mask_store_lanes_optab,
+					     tuple_mode, e.vector_mode (0));
+    return e.use_contiguous_store_insn (icode);
+  }
+};
+
+class svstnt1_impl : public full_width_access
+{
+public:
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_WRITE_MEMORY;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    insn_code icode = code_for_aarch64_stnt1 (e.vector_mode (0));
+    return e.use_contiguous_store_insn (icode);
+  }
+};
+
+class svsub_impl : public rtx_code_function
+{
+public:
+  CONSTEXPR svsub_impl ()
+    : rtx_code_function (MINUS, MINUS, UNSPEC_COND_FSUB) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* Canonicalize subtractions of constants to additions.  */
+    machine_mode mode = e.vector_mode (0);
+    if (e.try_negating_argument (2, mode))
+      return e.map_to_rtx_codes (PLUS, PLUS, UNSPEC_COND_FADD);
+
+    return rtx_code_function::expand (e);
+  }
+};
+
+class svtbl_impl : public permute
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    return e.use_exact_insn (code_for_aarch64_sve_tbl (e.vector_mode (0)));
+  }
+};
+
+/* Implements svtrn1 and svtrn2.  */
+class svtrn_impl : public binary_permute
+{
+public:
+  CONSTEXPR svtrn_impl (int base)
+    : binary_permute (base ? UNSPEC_TRN2 : UNSPEC_TRN1), m_base (base) {}
+
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    /* svtrn1: { 0, nelts, 2, nelts + 2, 4, nelts + 4, ... }
+       svtrn2: as for svtrn1, but with 1 added to each index.  */
+    poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs));
+    vec_perm_builder builder (nelts, 2, 3);
+    for (unsigned int i = 0; i < 3; ++i)
+      {
+	builder.quick_push (m_base + i * 2);
+	builder.quick_push (m_base + i * 2 + nelts);
+      }
+    return fold_permute (f, builder);
+  }
+
+  /* 0 for svtrn1, 1 for svtrn2.  */
+  unsigned int m_base;
+};
+
+/* Base class for svundef{,2,3,4}.  */
+class svundef_impl : public quiet<multi_vector_function>
+{
+public:
+  CONSTEXPR svundef_impl (unsigned int vectors_per_tuple)
+    : quiet<multi_vector_function> (vectors_per_tuple) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    rtx target = e.get_reg_target ();
+    emit_clobber (copy_rtx (target));
+    return target;
+  }
+};
+
+/* Implements svunpklo and svunpkhi.  */
+class svunpk_impl : public quiet<function_base>
+{
+public:
+  CONSTEXPR svunpk_impl (bool high_p) : m_high_p (high_p) {}
+
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    /* Don't fold the predicate ops, since every bit of the svbool_t
+       result is significant.  */
+    if (f.type_suffix_ids[0] == TYPE_SUFFIX_b)
+      return NULL;
+
+    /* The first half in memory is VEC_UNPACK_LO_EXPR for little-endian
+       and VEC_UNPACK_HI_EXPR for big-endian.  */
+    bool high_p = BYTES_BIG_ENDIAN ? !m_high_p : m_high_p;
+    tree_code code = high_p ? VEC_UNPACK_HI_EXPR : VEC_UNPACK_LO_EXPR;
+    return gimple_build_assign (f.lhs, code, gimple_call_arg (f.call, 0));
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    machine_mode mode = GET_MODE (e.args[0]);
+    unsigned int unpacku = m_high_p ? UNSPEC_UNPACKUHI : UNSPEC_UNPACKULO;
+    unsigned int unpacks = m_high_p ? UNSPEC_UNPACKSHI : UNSPEC_UNPACKSLO;
+    insn_code icode;
+    if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
+      icode = code_for_aarch64_sve_punpk (unpacku, mode);
+    else
+      {
+	int unspec = e.type_suffix (0).unsigned_p ? unpacku : unpacks;
+	icode = code_for_aarch64_sve_unpk (unspec, unspec, mode);
+      }
+    return e.use_exact_insn (icode);
+  }
+
+  /* True for svunpkhi, false for svunpklo.  */
+  bool m_high_p;
+};
+
+/* Also implements svsudot.  */
+class svusdot_impl : public function_base
+{
+public:
+  CONSTEXPR svusdot_impl (bool su) : m_su (su) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* The implementation of the ACLE function svsudot (for the non-lane
+       version) is through the USDOT instruction but with the second and third
+       inputs swapped.  */
+    if (m_su)
+      e.rotate_inputs_left (1, 2);
+    /* The ACLE function has the same order requirements as for svdot.
+       While there's no requirement for the RTL pattern to have the same sort
+       of order as that for <sur>dot_prod, it's easier to read.
+       Hence we do the same rotation on arguments as svdot_impl does.  */
+    e.rotate_inputs_left (0, 3);
+    machine_mode mode = e.vector_mode (0);
+    insn_code icode = code_for_aarch64_dot_prod (UNSPEC_USDOT, mode);
+    return e.use_exact_insn (icode);
+  }
+
+private:
+  bool m_su;
+};
+
+/* Implements svuzp1 and svuzp2.  */
+class svuzp_impl : public binary_permute
+{
+public:
+  CONSTEXPR svuzp_impl (unsigned int base)
+    : binary_permute (base ? UNSPEC_UZP2 : UNSPEC_UZP1), m_base (base) {}
+
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    /* svuzp1: { 0, 2, 4, 6, ... }
+       svuzp2: { 1, 3, 5, 7, ... }.  */
+    poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs));
+    vec_perm_builder builder (nelts, 1, 3);
+    for (unsigned int i = 0; i < 3; ++i)
+      builder.quick_push (m_base + i * 2);
+    return fold_permute (f, builder);
+  }
+
+  /* 0 for svuzp1, 1 for svuzp2.  */
+  unsigned int m_base;
+};
+
+/* A function_base for svwhilele and svwhilelt functions.  */
+class svwhilelx_impl : public while_comparison
+{
+public:
+  CONSTEXPR svwhilelx_impl (int unspec_for_sint, int unspec_for_uint, bool eq_p)
+    : while_comparison (unspec_for_sint, unspec_for_uint), m_eq_p (eq_p)
+  {}
+
+  /* Try to fold a call by treating its arguments as constants of type T.  */
+  template<typename T>
+  gimple *
+  fold_type (gimple_folder &f) const
+  {
+    /* Only handle cases in which both operands are constant.  */
+    T arg0, arg1;
+    if (!poly_int_tree_p (gimple_call_arg (f.call, 0), &arg0)
+	|| !poly_int_tree_p (gimple_call_arg (f.call, 1), &arg1))
+      return NULL;
+
+    /* Check whether the result is known to be all-false.  */
+    if (m_eq_p ? known_gt (arg0, arg1) : known_ge (arg0, arg1))
+      return f.fold_to_pfalse ();
+
+    /* Punt if we can't tell at compile time whether the result
+       is all-false.  */
+    if (m_eq_p ? maybe_gt (arg0, arg1) : maybe_ge (arg0, arg1))
+      return NULL;
+
+    /* At this point we know the result has at least one set element.  */
+    poly_uint64 diff = arg1 - arg0;
+    poly_uint64 nelts = GET_MODE_NUNITS (f.vector_mode (0));
+
+    /* Canonicalize the svwhilele form to the svwhilelt form.  Subtract
+       from NELTS rather than adding to DIFF, to prevent overflow.  */
+    if (m_eq_p)
+      nelts -= 1;
+
+    /* Check whether the result is known to be all-true.  */
+    if (known_ge (diff, nelts))
+      return f.fold_to_ptrue ();
+
+    /* Punt if DIFF might not be the actual number of set elements
+       in the result.  Conditional equality is fine.  */
+    if (maybe_gt (diff, nelts))
+      return NULL;
+
+    /* At this point we know that the predicate will have DIFF set elements
+       for svwhilelt and DIFF + 1 set elements for svwhilele (which stops
+       after rather than before ARG1 is reached).  See if we can create
+       the predicate at compile time.  */
+    unsigned HOST_WIDE_INT vl;
+    if (diff.is_constant (&vl))
+      /* Overflow is no longer possible after the checks above.  */
+      return f.fold_to_vl_pred (m_eq_p ? vl + 1 : vl);
+
+    return NULL;
+  }
+
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    if (f.type_suffix (1).unsigned_p)
+      return fold_type<poly_uint64> (f);
+    else
+      return fold_type<poly_int64> (f);
+  }
+
+  /* True svwhilele, false for svwhilelt.  */
+  bool m_eq_p;
+};
+
+class svwrffr_impl : public function_base
+{
+public:
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_WRITE_FFR;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    return e.use_exact_insn (CODE_FOR_aarch64_wrffr);
+  }
+};
+
+/* Implements svzip1 and svzip2.  */
+class svzip_impl : public binary_permute
+{
+public:
+  CONSTEXPR svzip_impl (unsigned int base)
+    : binary_permute (base ? UNSPEC_ZIP2 : UNSPEC_ZIP1), m_base (base) {}
+
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    /* svzip1: { 0, nelts, 1, nelts + 1, 2, nelts + 2, ... }
+       svzip2: as for svzip1, but with nelts / 2 added to each index.  */
+    poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs));
+    poly_uint64 base = m_base * exact_div (nelts, 2);
+    vec_perm_builder builder (nelts, 2, 3);
+    for (unsigned int i = 0; i < 3; ++i)
+      {
+	builder.quick_push (base + i);
+	builder.quick_push (base + i + nelts);
+      }
+    return fold_permute (f, builder);
+  }
+
+  /* 0 for svzip1, 1 for svzip2.  */
+  unsigned int m_base;
+};
+
+} /* end anonymous namespace */
+
+namespace aarch64_sve {
+
+FUNCTION (svabd, svabd_impl,)
+FUNCTION (svabs, quiet<rtx_code_function>, (ABS, ABS, UNSPEC_COND_FABS))
+FUNCTION (svacge, svac_impl, (UNSPEC_COND_FCMGE))
+FUNCTION (svacgt, svac_impl, (UNSPEC_COND_FCMGT))
+FUNCTION (svacle, svac_impl, (UNSPEC_COND_FCMLE))
+FUNCTION (svaclt, svac_impl, (UNSPEC_COND_FCMLT))
+FUNCTION (svadd, rtx_code_function, (PLUS, PLUS, UNSPEC_COND_FADD))
+FUNCTION (svadda, svadda_impl,)
+FUNCTION (svaddv, reduction, (UNSPEC_SADDV, UNSPEC_UADDV, UNSPEC_FADDV))
+FUNCTION (svadrb, svadr_bhwd_impl, (0))
+FUNCTION (svadrd, svadr_bhwd_impl, (3))
+FUNCTION (svadrh, svadr_bhwd_impl, (1))
+FUNCTION (svadrw, svadr_bhwd_impl, (2))
+FUNCTION (svand, rtx_code_function, (AND, AND))
+FUNCTION (svandv, reduction, (UNSPEC_ANDV))
+FUNCTION (svasr, rtx_code_function, (ASHIFTRT, ASHIFTRT))
+FUNCTION (svasr_wide, shift_wide, (ASHIFTRT, UNSPEC_ASHIFTRT_WIDE))
+FUNCTION (svasrd, unspec_based_function, (UNSPEC_ASRD, -1, -1))
+FUNCTION (svbfdot, fixed_insn_function, (CODE_FOR_aarch64_sve_bfdotvnx4sf))
+FUNCTION (svbfdot_lane, fixed_insn_function,
+	  (CODE_FOR_aarch64_sve_bfdot_lanevnx4sf))
+FUNCTION (svbfmlalb, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlalbvnx4sf))
+FUNCTION (svbfmlalb_lane, fixed_insn_function,
+	  (CODE_FOR_aarch64_sve_bfmlalb_lanevnx4sf))
+FUNCTION (svbfmlalt, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlaltvnx4sf))
+FUNCTION (svbfmlalt_lane, fixed_insn_function,
+	  (CODE_FOR_aarch64_sve_bfmlalt_lanevnx4sf))
+FUNCTION (svbfmmla, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmmlavnx4sf))
+FUNCTION (svbic, svbic_impl,)
+FUNCTION (svbrka, svbrk_unary_impl, (UNSPEC_BRKA))
+FUNCTION (svbrkb, svbrk_unary_impl, (UNSPEC_BRKB))
+FUNCTION (svbrkn, svbrk_binary_impl, (UNSPEC_BRKN))
+FUNCTION (svbrkpa, svbrk_binary_impl, (UNSPEC_BRKPA))
+FUNCTION (svbrkpb, svbrk_binary_impl, (UNSPEC_BRKPB))
+FUNCTION (svcadd, svcadd_impl,)
+FUNCTION (svclasta, svclast_impl, (UNSPEC_CLASTA))
+FUNCTION (svclastb, svclast_impl, (UNSPEC_CLASTB))
+FUNCTION (svcls, unary_count, (CLRSB))
+FUNCTION (svclz, unary_count, (CLZ))
+FUNCTION (svcmla, svcmla_impl,)
+FUNCTION (svcmla_lane, svcmla_lane_impl,)
+FUNCTION (svcmpeq, svcmp_impl, (EQ_EXPR, UNSPEC_COND_FCMEQ))
+FUNCTION (svcmpeq_wide, svcmp_wide_impl, (EQ_EXPR, UNSPEC_COND_CMPEQ_WIDE,
+					  UNSPEC_COND_CMPEQ_WIDE))
+FUNCTION (svcmpge, svcmp_impl, (GE_EXPR, UNSPEC_COND_FCMGE))
+FUNCTION (svcmpge_wide, svcmp_wide_impl, (GE_EXPR, UNSPEC_COND_CMPGE_WIDE,
+					  UNSPEC_COND_CMPHS_WIDE))
+FUNCTION (svcmpgt, svcmp_impl, (GT_EXPR, UNSPEC_COND_FCMGT))
+FUNCTION (svcmpgt_wide, svcmp_wide_impl, (GT_EXPR, UNSPEC_COND_CMPGT_WIDE,
+					  UNSPEC_COND_CMPHI_WIDE))
+FUNCTION (svcmple, svcmp_impl, (LE_EXPR, UNSPEC_COND_FCMLE))
+FUNCTION (svcmple_wide, svcmp_wide_impl, (LE_EXPR, UNSPEC_COND_CMPLE_WIDE,
+					  UNSPEC_COND_CMPLS_WIDE))
+FUNCTION (svcmplt, svcmp_impl, (LT_EXPR, UNSPEC_COND_FCMLT))
+FUNCTION (svcmplt_wide, svcmp_wide_impl, (LT_EXPR, UNSPEC_COND_CMPLT_WIDE,
+					  UNSPEC_COND_CMPLO_WIDE))
+FUNCTION (svcmpne, svcmp_impl, (NE_EXPR, UNSPEC_COND_FCMNE))
+FUNCTION (svcmpne_wide, svcmp_wide_impl, (NE_EXPR, UNSPEC_COND_CMPNE_WIDE,
+					  UNSPEC_COND_CMPNE_WIDE))
+FUNCTION (svcmpuo, svcmpuo_impl,)
+FUNCTION (svcnot, svcnot_impl,)
+FUNCTION (svcnt, unary_count, (POPCOUNT))
+FUNCTION (svcntb, svcnt_bhwd_impl, (VNx16QImode))
+FUNCTION (svcntb_pat, svcnt_bhwd_pat_impl, (VNx16QImode))
+FUNCTION (svcntd, svcnt_bhwd_impl, (VNx2DImode))
+FUNCTION (svcntd_pat, svcnt_bhwd_pat_impl, (VNx2DImode))
+FUNCTION (svcnth, svcnt_bhwd_impl, (VNx8HImode))
+FUNCTION (svcnth_pat, svcnt_bhwd_pat_impl, (VNx8HImode))
+FUNCTION (svcntp, svcntp_impl,)
+FUNCTION (svcntw, svcnt_bhwd_impl, (VNx4SImode))
+FUNCTION (svcntw_pat, svcnt_bhwd_pat_impl, (VNx4SImode))
+FUNCTION (svcompact, QUIET_CODE_FOR_MODE0 (aarch64_sve_compact),)
+FUNCTION (svcreate2, svcreate_impl, (2))
+FUNCTION (svcreate3, svcreate_impl, (3))
+FUNCTION (svcreate4, svcreate_impl, (4))
+FUNCTION (svcvt, svcvt_impl,)
+FUNCTION (svcvtnt, CODE_FOR_MODE0 (aarch64_sve_cvtnt),)
+FUNCTION (svdiv, rtx_code_function, (DIV, UDIV, UNSPEC_COND_FDIV))
+FUNCTION (svdivr, rtx_code_function_rotated, (DIV, UDIV, UNSPEC_COND_FDIV))
+FUNCTION (svdot, svdot_impl,)
+FUNCTION (svdot_lane, svdotprod_lane_impl, (UNSPEC_SDOT, UNSPEC_UDOT, -1))
+FUNCTION (svdup, svdup_impl,)
+FUNCTION (svdup_lane, svdup_lane_impl,)
+FUNCTION (svdupq, svdupq_impl,)
+FUNCTION (svdupq_lane, svdupq_lane_impl,)
+FUNCTION (sveor, rtx_code_function, (XOR, XOR, -1))
+FUNCTION (sveorv, reduction, (UNSPEC_XORV))
+FUNCTION (svexpa, unspec_based_function, (-1, -1, UNSPEC_FEXPA))
+FUNCTION (svext, QUIET_CODE_FOR_MODE0 (aarch64_sve_ext),)
+FUNCTION (svextb, svext_bhw_impl, (QImode))
+FUNCTION (svexth, svext_bhw_impl, (HImode))
+FUNCTION (svextw, svext_bhw_impl, (SImode))
+FUNCTION (svget2, svget_impl, (2))
+FUNCTION (svget3, svget_impl, (3))
+FUNCTION (svget4, svget_impl, (4))
+FUNCTION (svindex, svindex_impl,)
+FUNCTION (svinsr, svinsr_impl,)
+FUNCTION (svlasta, svlast_impl, (UNSPEC_LASTA))
+FUNCTION (svlastb, svlast_impl, (UNSPEC_LASTB))
+FUNCTION (svld1, svld1_impl,)
+FUNCTION (svld1_gather, svld1_gather_impl,)
+FUNCTION (svld1ro, svld1ro_impl,)
+FUNCTION (svld1rq, svld1rq_impl,)
+FUNCTION (svld1sb, svld1_extend_impl, (TYPE_SUFFIX_s8))
+FUNCTION (svld1sb_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s8))
+FUNCTION (svld1sh, svld1_extend_impl, (TYPE_SUFFIX_s16))
+FUNCTION (svld1sh_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s16))
+FUNCTION (svld1sw, svld1_extend_impl, (TYPE_SUFFIX_s32))
+FUNCTION (svld1sw_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s32))
+FUNCTION (svld1ub, svld1_extend_impl, (TYPE_SUFFIX_u8))
+FUNCTION (svld1ub_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u8))
+FUNCTION (svld1uh, svld1_extend_impl, (TYPE_SUFFIX_u16))
+FUNCTION (svld1uh_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u16))
+FUNCTION (svld1uw, svld1_extend_impl, (TYPE_SUFFIX_u32))
+FUNCTION (svld1uw_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u32))
+FUNCTION (svld2, svld234_impl, (2))
+FUNCTION (svld3, svld234_impl, (3))
+FUNCTION (svld4, svld234_impl, (4))
+FUNCTION (svldff1, svldxf1_impl, (UNSPEC_LDFF1))
+FUNCTION (svldff1_gather, svldff1_gather_impl,)
+FUNCTION (svldff1sb, svldxf1_extend_impl, (TYPE_SUFFIX_s8, UNSPEC_LDFF1))
+FUNCTION (svldff1sb_gather, svldff1_gather_extend, (TYPE_SUFFIX_s8))
+FUNCTION (svldff1sh, svldxf1_extend_impl, (TYPE_SUFFIX_s16, UNSPEC_LDFF1))
+FUNCTION (svldff1sh_gather, svldff1_gather_extend, (TYPE_SUFFIX_s16))
+FUNCTION (svldff1sw, svldxf1_extend_impl, (TYPE_SUFFIX_s32, UNSPEC_LDFF1))
+FUNCTION (svldff1sw_gather, svldff1_gather_extend, (TYPE_SUFFIX_s32))
+FUNCTION (svldff1ub, svldxf1_extend_impl, (TYPE_SUFFIX_u8, UNSPEC_LDFF1))
+FUNCTION (svldff1ub_gather, svldff1_gather_extend, (TYPE_SUFFIX_u8))
+FUNCTION (svldff1uh, svldxf1_extend_impl, (TYPE_SUFFIX_u16, UNSPEC_LDFF1))
+FUNCTION (svldff1uh_gather, svldff1_gather_extend, (TYPE_SUFFIX_u16))
+FUNCTION (svldff1uw, svldxf1_extend_impl, (TYPE_SUFFIX_u32, UNSPEC_LDFF1))
+FUNCTION (svldff1uw_gather, svldff1_gather_extend, (TYPE_SUFFIX_u32))
+FUNCTION (svldnf1, svldxf1_impl, (UNSPEC_LDNF1))
+FUNCTION (svldnf1sb, svldxf1_extend_impl, (TYPE_SUFFIX_s8, UNSPEC_LDNF1))
+FUNCTION (svldnf1sh, svldxf1_extend_impl, (TYPE_SUFFIX_s16, UNSPEC_LDNF1))
+FUNCTION (svldnf1sw, svldxf1_extend_impl, (TYPE_SUFFIX_s32, UNSPEC_LDNF1))
+FUNCTION (svldnf1ub, svldxf1_extend_impl, (TYPE_SUFFIX_u8, UNSPEC_LDNF1))
+FUNCTION (svldnf1uh, svldxf1_extend_impl, (TYPE_SUFFIX_u16, UNSPEC_LDNF1))
+FUNCTION (svldnf1uw, svldxf1_extend_impl, (TYPE_SUFFIX_u32, UNSPEC_LDNF1))
+FUNCTION (svldnt1, svldnt1_impl,)
+FUNCTION (svlen, svlen_impl,)
+FUNCTION (svlsl, rtx_code_function, (ASHIFT, ASHIFT))
+FUNCTION (svlsl_wide, shift_wide, (ASHIFT, UNSPEC_ASHIFT_WIDE))
+FUNCTION (svlsr, rtx_code_function, (LSHIFTRT, LSHIFTRT))
+FUNCTION (svlsr_wide, shift_wide, (LSHIFTRT, UNSPEC_LSHIFTRT_WIDE))
+FUNCTION (svmad, svmad_impl,)
+FUNCTION (svmax, rtx_code_function, (SMAX, UMAX, UNSPEC_COND_FMAX))
+FUNCTION (svmaxnm, unspec_based_function, (-1, -1, UNSPEC_COND_FMAXNM))
+FUNCTION (svmaxnmv, reduction, (UNSPEC_FMAXNMV))
+FUNCTION (svmaxv, reduction, (UNSPEC_SMAXV, UNSPEC_UMAXV, UNSPEC_FMAXV))
+FUNCTION (svmin, rtx_code_function, (SMIN, UMIN, UNSPEC_COND_FMIN))
+FUNCTION (svminnm, unspec_based_function, (-1, -1, UNSPEC_COND_FMINNM))
+FUNCTION (svminnmv, reduction, (UNSPEC_FMINNMV))
+FUNCTION (svminv, reduction, (UNSPEC_SMINV, UNSPEC_UMINV, UNSPEC_FMINV))
+FUNCTION (svmla, svmla_impl,)
+FUNCTION (svmla_lane, svmla_lane_impl,)
+FUNCTION (svmls, svmls_impl,)
+FUNCTION (svmls_lane, svmls_lane_impl,)
+FUNCTION (svmmla, svmmla_impl,)
+FUNCTION (svmov, svmov_impl,)
+FUNCTION (svmsb, svmsb_impl,)
+FUNCTION (svmul, rtx_code_function, (MULT, MULT, UNSPEC_COND_FMUL))
+FUNCTION (svmul_lane, CODE_FOR_MODE0 (aarch64_mul_lane),)
+FUNCTION (svmulh, unspec_based_function, (UNSPEC_SMUL_HIGHPART,
+					  UNSPEC_UMUL_HIGHPART, -1))
+FUNCTION (svmulx, unspec_based_function, (-1, -1, UNSPEC_COND_FMULX))
+FUNCTION (svnand, svnand_impl,)
+FUNCTION (svneg, quiet<rtx_code_function>, (NEG, NEG, UNSPEC_COND_FNEG))
+FUNCTION (svnmad, unspec_based_function, (-1, -1, UNSPEC_COND_FNMLA))
+FUNCTION (svnmla, unspec_based_function_rotated, (-1, -1, UNSPEC_COND_FNMLA))
+FUNCTION (svnmls, unspec_based_function_rotated, (-1, -1, UNSPEC_COND_FNMLS))
+FUNCTION (svnmsb, unspec_based_function, (-1, -1, UNSPEC_COND_FNMLS))
+FUNCTION (svnor, svnor_impl,)
+FUNCTION (svnot, svnot_impl,)
+FUNCTION (svorn, svorn_impl,)
+FUNCTION (svorr, rtx_code_function, (IOR, IOR))
+FUNCTION (svorv, reduction, (UNSPEC_IORV))
+FUNCTION (svpfalse, svpfalse_impl,)
+FUNCTION (svpfirst, svpfirst_svpnext_impl, (UNSPEC_PFIRST))
+FUNCTION (svpnext, svpfirst_svpnext_impl, (UNSPEC_PNEXT))
+FUNCTION (svprfb, svprf_bhwd_impl, (VNx16QImode))
+FUNCTION (svprfb_gather, svprf_bhwd_gather_impl, (VNx16QImode))
+FUNCTION (svprfd, svprf_bhwd_impl, (VNx2DImode))
+FUNCTION (svprfd_gather, svprf_bhwd_gather_impl, (VNx2DImode))
+FUNCTION (svprfh, svprf_bhwd_impl, (VNx8HImode))
+FUNCTION (svprfh_gather, svprf_bhwd_gather_impl, (VNx8HImode))
+FUNCTION (svprfw, svprf_bhwd_impl, (VNx4SImode))
+FUNCTION (svprfw_gather, svprf_bhwd_gather_impl, (VNx4SImode))
+FUNCTION (svptest_any, svptest_impl, (NE))
+FUNCTION (svptest_first, svptest_impl, (LT))
+FUNCTION (svptest_last, svptest_impl, (LTU))
+FUNCTION (svptrue, svptrue_impl,)
+FUNCTION (svptrue_pat, svptrue_pat_impl,)
+FUNCTION (svqadd, rtx_code_function, (SS_PLUS, US_PLUS, -1))
+FUNCTION (svqdecb, svqdec_bhwd_impl, (QImode))
+FUNCTION (svqdecb_pat, svqdec_bhwd_impl, (QImode))
+FUNCTION (svqdecd, svqdec_bhwd_impl, (DImode))
+FUNCTION (svqdecd_pat, svqdec_bhwd_impl, (DImode))
+FUNCTION (svqdech, svqdec_bhwd_impl, (HImode))
+FUNCTION (svqdech_pat, svqdec_bhwd_impl, (HImode))
+FUNCTION (svqdecp, svqdecp_svqincp_impl, (SS_MINUS, US_MINUS))
+FUNCTION (svqdecw, svqdec_bhwd_impl, (SImode))
+FUNCTION (svqdecw_pat, svqdec_bhwd_impl, (SImode))
+FUNCTION (svqincb, svqinc_bhwd_impl, (QImode))
+FUNCTION (svqincb_pat, svqinc_bhwd_impl, (QImode))
+FUNCTION (svqincd, svqinc_bhwd_impl, (DImode))
+FUNCTION (svqincd_pat, svqinc_bhwd_impl, (DImode))
+FUNCTION (svqinch, svqinc_bhwd_impl, (HImode))
+FUNCTION (svqinch_pat, svqinc_bhwd_impl, (HImode))
+FUNCTION (svqincp, svqdecp_svqincp_impl, (SS_PLUS, US_PLUS))
+FUNCTION (svqincw, svqinc_bhwd_impl, (SImode))
+FUNCTION (svqincw_pat, svqinc_bhwd_impl, (SImode))
+FUNCTION (svqsub, rtx_code_function, (SS_MINUS, US_MINUS, -1))
+FUNCTION (svrbit, unspec_based_function, (UNSPEC_RBIT, UNSPEC_RBIT, -1))
+FUNCTION (svrdffr, svrdffr_impl,)
+FUNCTION (svrecpe, unspec_based_function, (-1, UNSPEC_URECPE, UNSPEC_FRECPE))
+FUNCTION (svrecps, unspec_based_function, (-1, -1, UNSPEC_FRECPS))
+FUNCTION (svrecpx, unspec_based_function, (-1, -1, UNSPEC_COND_FRECPX))
+FUNCTION (svreinterpret, svreinterpret_impl,)
+FUNCTION (svrev, svrev_impl,)
+FUNCTION (svrevb, unspec_based_function, (UNSPEC_REVB, UNSPEC_REVB, -1))
+FUNCTION (svrevh, unspec_based_function, (UNSPEC_REVH, UNSPEC_REVH, -1))
+FUNCTION (svrevw, unspec_based_function, (UNSPEC_REVW, UNSPEC_REVW, -1))
+FUNCTION (svrinta, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTA))
+FUNCTION (svrinti, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTI))
+FUNCTION (svrintm, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTM))
+FUNCTION (svrintn, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTN))
+FUNCTION (svrintp, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTP))
+FUNCTION (svrintx, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTX))
+FUNCTION (svrintz, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTZ))
+FUNCTION (svrsqrte, unspec_based_function, (-1, UNSPEC_RSQRTE, UNSPEC_RSQRTE))
+FUNCTION (svrsqrts, unspec_based_function, (-1, -1, UNSPEC_RSQRTS))
+FUNCTION (svscale, unspec_based_function, (-1, -1, UNSPEC_COND_FSCALE))
+FUNCTION (svsel, svsel_impl,)
+FUNCTION (svset2, svset_impl, (2))
+FUNCTION (svset3, svset_impl, (3))
+FUNCTION (svset4, svset_impl, (4))
+FUNCTION (svsetffr, svsetffr_impl,)
+FUNCTION (svsplice, QUIET_CODE_FOR_MODE0 (aarch64_sve_splice),)
+FUNCTION (svsqrt, rtx_code_function, (SQRT, SQRT, UNSPEC_COND_FSQRT))
+FUNCTION (svst1, svst1_impl,)
+FUNCTION (svst1_scatter, svst1_scatter_impl,)
+FUNCTION (svst1b, svst1_truncate_impl, (QImode))
+FUNCTION (svst1b_scatter, svst1_scatter_truncate_impl, (QImode))
+FUNCTION (svst1h, svst1_truncate_impl, (HImode))
+FUNCTION (svst1h_scatter, svst1_scatter_truncate_impl, (HImode))
+FUNCTION (svst1w, svst1_truncate_impl, (SImode))
+FUNCTION (svst1w_scatter, svst1_scatter_truncate_impl, (SImode))
+FUNCTION (svst2, svst234_impl, (2))
+FUNCTION (svst3, svst234_impl, (3))
+FUNCTION (svst4, svst234_impl, (4))
+FUNCTION (svstnt1, svstnt1_impl,)
+FUNCTION (svsub, svsub_impl,)
+FUNCTION (svsubr, rtx_code_function_rotated, (MINUS, MINUS, UNSPEC_COND_FSUB))
+FUNCTION (svsudot, svusdot_impl, (true))
+FUNCTION (svsudot_lane, svdotprod_lane_impl, (UNSPEC_SUDOT, -1, -1))
+FUNCTION (svtbl, svtbl_impl,)
+FUNCTION (svtmad, CODE_FOR_MODE0 (aarch64_sve_tmad),)
+FUNCTION (svtrn1, svtrn_impl, (0))
+FUNCTION (svtrn1q, unspec_based_function, (UNSPEC_TRN1Q, UNSPEC_TRN1Q,
+					   UNSPEC_TRN1Q))
+FUNCTION (svtrn2, svtrn_impl, (1))
+FUNCTION (svtrn2q, unspec_based_function, (UNSPEC_TRN2Q, UNSPEC_TRN2Q,
+					   UNSPEC_TRN2Q))
+FUNCTION (svtsmul, unspec_based_function, (-1, -1, UNSPEC_FTSMUL))
+FUNCTION (svtssel, unspec_based_function, (-1, -1, UNSPEC_FTSSEL))
+FUNCTION (svundef, svundef_impl, (1))
+FUNCTION (svundef2, svundef_impl, (2))
+FUNCTION (svundef3, svundef_impl, (3))
+FUNCTION (svundef4, svundef_impl, (4))
+FUNCTION (svunpkhi, svunpk_impl, (true))
+FUNCTION (svunpklo, svunpk_impl, (false))
+FUNCTION (svusdot, svusdot_impl, (false))
+FUNCTION (svusdot_lane, svdotprod_lane_impl, (UNSPEC_USDOT, -1, -1))
+FUNCTION (svusmmla, unspec_based_add_function, (UNSPEC_USMATMUL, -1, -1))
+FUNCTION (svuzp1, svuzp_impl, (0))
+FUNCTION (svuzp1q, unspec_based_function, (UNSPEC_UZP1Q, UNSPEC_UZP1Q,
+					   UNSPEC_UZP1Q))
+FUNCTION (svuzp2, svuzp_impl, (1))
+FUNCTION (svuzp2q, unspec_based_function, (UNSPEC_UZP2Q, UNSPEC_UZP2Q,
+					   UNSPEC_UZP2Q))
+FUNCTION (svwhilele, svwhilelx_impl, (UNSPEC_WHILELE, UNSPEC_WHILELS, true))
+FUNCTION (svwhilelt, svwhilelx_impl, (UNSPEC_WHILELT, UNSPEC_WHILELO, false))
+FUNCTION (svwrffr, svwrffr_impl,)
+FUNCTION (svzip1, svzip_impl, (0))
+FUNCTION (svzip1q, unspec_based_function, (UNSPEC_ZIP1Q, UNSPEC_ZIP1Q,
+					   UNSPEC_ZIP1Q))
+FUNCTION (svzip2, svzip_impl, (1))
+FUNCTION (svzip2q, unspec_based_function, (UNSPEC_ZIP2Q, UNSPEC_ZIP2Q,
+					   UNSPEC_ZIP2Q))
+
+} /* end namespace aarch64_sve */
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.def b/gcc/config/aarch64/aarch64-sve-builtins-base.def
new file mode 100644
index 0000000000000..7d3f19a6b9115
--- /dev/null
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.def
@@ -0,0 +1,355 @@
+/* ACLE support for AArch64 SVE (__ARM_FEATURE_SVE intrinsics)
+   Copyright (C) 2018-2021 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#define REQUIRED_EXTENSIONS 0
+DEF_SVE_FUNCTION (svabd, binary_opt_n, all_arith, mxz)
+DEF_SVE_FUNCTION (svabs, unary, all_float_and_signed, mxz)
+DEF_SVE_FUNCTION (svacge, compare_opt_n, all_float, implicit)
+DEF_SVE_FUNCTION (svacgt, compare_opt_n, all_float, implicit)
+DEF_SVE_FUNCTION (svacle, compare_opt_n, all_float, implicit)
+DEF_SVE_FUNCTION (svaclt, compare_opt_n, all_float, implicit)
+DEF_SVE_FUNCTION (svadd, binary_opt_n, all_arith, mxz)
+DEF_SVE_FUNCTION (svadda, fold_left, all_float, implicit)
+DEF_SVE_FUNCTION (svaddv, reduction_wide, all_arith, implicit)
+DEF_SVE_FUNCTION (svadrb, adr_offset, none, none)
+DEF_SVE_FUNCTION (svadrd, adr_index, none, none)
+DEF_SVE_FUNCTION (svadrh, adr_index, none, none)
+DEF_SVE_FUNCTION (svadrw, adr_index, none, none)
+DEF_SVE_FUNCTION (svand, binary_opt_n, all_integer, mxz)
+DEF_SVE_FUNCTION (svand, binary_opt_n, b, z)
+DEF_SVE_FUNCTION (svandv, reduction, all_integer, implicit)
+DEF_SVE_FUNCTION (svasr, binary_uint_opt_n, all_signed, mxz)
+DEF_SVE_FUNCTION (svasr_wide, binary_uint64_opt_n, bhs_signed, mxz)
+DEF_SVE_FUNCTION (svasrd, shift_right_imm, all_signed, mxz)
+DEF_SVE_FUNCTION (svbic, binary_opt_n, all_integer, mxz)
+DEF_SVE_FUNCTION (svbic, binary_opt_n, b, z)
+DEF_SVE_FUNCTION (svbrka, unary, b, mz)
+DEF_SVE_FUNCTION (svbrkb, unary, b, mz)
+DEF_SVE_FUNCTION (svbrkn, binary, b, z)
+DEF_SVE_FUNCTION (svbrkpa, binary, b, z)
+DEF_SVE_FUNCTION (svbrkpb, binary, b, z)
+DEF_SVE_FUNCTION (svcadd, binary_rotate, all_float, mxz)
+DEF_SVE_FUNCTION (svclasta, clast, all_data, implicit)
+DEF_SVE_FUNCTION (svclastb, clast, all_data, implicit)
+DEF_SVE_FUNCTION (svcls, unary_to_uint, all_signed, mxz)
+DEF_SVE_FUNCTION (svclz, unary_to_uint, all_integer, mxz)
+DEF_SVE_FUNCTION (svcmla, ternary_rotate, all_float, mxz)
+DEF_SVE_FUNCTION (svcmla_lane, ternary_lane_rotate, hs_float, none)
+DEF_SVE_FUNCTION (svcmpeq, compare_opt_n, all_arith, implicit)
+DEF_SVE_FUNCTION (svcmpeq_wide, compare_wide_opt_n, bhs_signed, implicit)
+DEF_SVE_FUNCTION (svcmpge, compare_opt_n, all_arith, implicit)
+DEF_SVE_FUNCTION (svcmpge_wide, compare_wide_opt_n, bhs_integer, implicit)
+DEF_SVE_FUNCTION (svcmpgt, compare_opt_n, all_arith, implicit)
+DEF_SVE_FUNCTION (svcmpgt_wide, compare_wide_opt_n, bhs_integer, implicit)
+DEF_SVE_FUNCTION (svcmple, compare_opt_n, all_arith, implicit)
+DEF_SVE_FUNCTION (svcmple_wide, compare_wide_opt_n, bhs_integer, implicit)
+DEF_SVE_FUNCTION (svcmplt, compare_opt_n, all_arith, implicit)
+DEF_SVE_FUNCTION (svcmplt_wide, compare_wide_opt_n, bhs_integer, implicit)
+DEF_SVE_FUNCTION (svcmpne, compare_opt_n, all_arith, implicit)
+DEF_SVE_FUNCTION (svcmpne_wide, compare_wide_opt_n, bhs_signed, implicit)
+DEF_SVE_FUNCTION (svcmpuo, compare_opt_n, all_float, implicit)
+DEF_SVE_FUNCTION (svcnot, unary, all_integer, mxz)
+DEF_SVE_FUNCTION (svcnt, unary_to_uint, all_data, mxz)
+DEF_SVE_FUNCTION (svcntb, count_inherent, none, none)
+DEF_SVE_FUNCTION (svcntb_pat, count_pat, none, none)
+DEF_SVE_FUNCTION (svcntd, count_inherent, none, none)
+DEF_SVE_FUNCTION (svcntd_pat, count_pat, none, none)
+DEF_SVE_FUNCTION (svcnth, count_inherent, none, none)
+DEF_SVE_FUNCTION (svcnth_pat, count_pat, none, none)
+DEF_SVE_FUNCTION (svcntp, count_pred, all_pred, implicit)
+DEF_SVE_FUNCTION (svcntw, count_inherent, none, none)
+DEF_SVE_FUNCTION (svcntw_pat, count_pat, none, none)
+DEF_SVE_FUNCTION (svcompact, unary, sd_data, implicit)
+DEF_SVE_FUNCTION (svcreate2, create, all_data, none)
+DEF_SVE_FUNCTION (svcreate3, create, all_data, none)
+DEF_SVE_FUNCTION (svcreate4, create, all_data, none)
+DEF_SVE_FUNCTION (svcvt, unary_convert, cvt, mxz)
+DEF_SVE_FUNCTION (svdiv, binary_opt_n, all_float_and_sd_integer, mxz)
+DEF_SVE_FUNCTION (svdivr, binary_opt_n, all_float_and_sd_integer, mxz)
+DEF_SVE_FUNCTION (svdot, ternary_qq_opt_n, sd_integer, none)
+DEF_SVE_FUNCTION (svdot_lane, ternary_qq_lane, sd_integer, none)
+DEF_SVE_FUNCTION (svdup, unary_n, all_data, mxz_or_none)
+DEF_SVE_FUNCTION (svdup, unary_n, all_pred, none)
+DEF_SVE_FUNCTION (svdup_lane, binary_uint_n, all_data, none)
+DEF_SVE_FUNCTION (svdupq, dupq, all_data, none)
+DEF_SVE_FUNCTION (svdupq, dupq, all_pred, none)
+DEF_SVE_FUNCTION (svdupq_lane, binary_uint64_n, all_data, none)
+DEF_SVE_FUNCTION (sveor, binary_opt_n, all_integer, mxz)
+DEF_SVE_FUNCTION (sveor, binary_opt_n, b, z)
+DEF_SVE_FUNCTION (sveorv, reduction, all_integer, implicit)
+DEF_SVE_FUNCTION (svexpa, unary_uint, all_float, none)
+DEF_SVE_FUNCTION (svext, ext, all_data, none)
+DEF_SVE_FUNCTION (svextb, unary, hsd_integer, mxz)
+DEF_SVE_FUNCTION (svexth, unary, sd_integer, mxz)
+DEF_SVE_FUNCTION (svextw, unary, d_integer, mxz)
+DEF_SVE_FUNCTION (svget2, get, all_data, none)
+DEF_SVE_FUNCTION (svget3, get, all_data, none)
+DEF_SVE_FUNCTION (svget4, get, all_data, none)
+DEF_SVE_FUNCTION (svindex, binary_scalar, all_integer, none)
+DEF_SVE_FUNCTION (svinsr, binary_n, all_data, none)
+DEF_SVE_FUNCTION (svlasta, reduction, all_data, implicit)
+DEF_SVE_FUNCTION (svlastb, reduction, all_data, implicit)
+DEF_SVE_FUNCTION (svld1, load, all_data, implicit)
+DEF_SVE_FUNCTION (svld1_gather, load_gather_sv, sd_data, implicit)
+DEF_SVE_FUNCTION (svld1_gather, load_gather_vs, sd_data, implicit)
+DEF_SVE_FUNCTION (svld1rq, load_replicate, all_data, implicit)
+DEF_SVE_FUNCTION (svld1sb, load_ext, hsd_integer, implicit)
+DEF_SVE_FUNCTION (svld1sb_gather, load_ext_gather_offset, sd_integer, implicit)
+DEF_SVE_FUNCTION (svld1sh, load_ext, sd_integer, implicit)
+DEF_SVE_FUNCTION (svld1sh_gather, load_ext_gather_offset, sd_integer, implicit)
+DEF_SVE_FUNCTION (svld1sh_gather, load_ext_gather_index, sd_integer, implicit)
+DEF_SVE_FUNCTION (svld1sw, load_ext, d_integer, implicit)
+DEF_SVE_FUNCTION (svld1sw_gather, load_ext_gather_offset, d_integer, implicit)
+DEF_SVE_FUNCTION (svld1sw_gather, load_ext_gather_index, d_integer, implicit)
+DEF_SVE_FUNCTION (svld1ub, load_ext, hsd_integer, implicit)
+DEF_SVE_FUNCTION (svld1ub_gather, load_ext_gather_offset, sd_integer, implicit)
+DEF_SVE_FUNCTION (svld1uh, load_ext, sd_integer, implicit)
+DEF_SVE_FUNCTION (svld1uh_gather, load_ext_gather_offset, sd_integer, implicit)
+DEF_SVE_FUNCTION (svld1uh_gather, load_ext_gather_index, sd_integer, implicit)
+DEF_SVE_FUNCTION (svld1uw, load_ext, d_integer, implicit)
+DEF_SVE_FUNCTION (svld1uw_gather, load_ext_gather_offset, d_integer, implicit)
+DEF_SVE_FUNCTION (svld1uw_gather, load_ext_gather_index, d_integer, implicit)
+DEF_SVE_FUNCTION (svldff1, load, all_data, implicit)
+DEF_SVE_FUNCTION (svldff1_gather, load_gather_sv, sd_data, implicit)
+DEF_SVE_FUNCTION (svldff1_gather, load_gather_vs, sd_data, implicit)
+DEF_SVE_FUNCTION (svldff1sb, load_ext, hsd_integer, implicit)
+DEF_SVE_FUNCTION (svldff1sb_gather, load_ext_gather_offset, sd_integer, implicit)
+DEF_SVE_FUNCTION (svldff1sh, load_ext, sd_integer, implicit)
+DEF_SVE_FUNCTION (svldff1sh_gather, load_ext_gather_offset, sd_integer, implicit)
+DEF_SVE_FUNCTION (svldff1sh_gather, load_ext_gather_index, sd_integer, implicit)
+DEF_SVE_FUNCTION (svldff1sw, load_ext, d_integer, implicit)
+DEF_SVE_FUNCTION (svldff1sw_gather, load_ext_gather_offset, d_integer, implicit)
+DEF_SVE_FUNCTION (svldff1sw_gather, load_ext_gather_index, d_integer, implicit)
+DEF_SVE_FUNCTION (svldff1ub, load_ext, hsd_integer, implicit)
+DEF_SVE_FUNCTION (svldff1ub_gather, load_ext_gather_offset, sd_integer, implicit)
+DEF_SVE_FUNCTION (svldff1uh, load_ext, sd_integer, implicit)
+DEF_SVE_FUNCTION (svldff1uh_gather, load_ext_gather_offset, sd_integer, implicit)
+DEF_SVE_FUNCTION (svldff1uh_gather, load_ext_gather_index, sd_integer, implicit)
+DEF_SVE_FUNCTION (svldff1uw, load_ext, d_integer, implicit)
+DEF_SVE_FUNCTION (svldff1uw_gather, load_ext_gather_offset, d_integer, implicit)
+DEF_SVE_FUNCTION (svldff1uw_gather, load_ext_gather_index, d_integer, implicit)
+DEF_SVE_FUNCTION (svldnf1, load, all_data, implicit)
+DEF_SVE_FUNCTION (svldnf1sb, load_ext, hsd_integer, implicit)
+DEF_SVE_FUNCTION (svldnf1sh, load_ext, sd_integer, implicit)
+DEF_SVE_FUNCTION (svldnf1sw, load_ext, d_integer, implicit)
+DEF_SVE_FUNCTION (svldnf1ub, load_ext, hsd_integer, implicit)
+DEF_SVE_FUNCTION (svldnf1uh, load_ext, sd_integer, implicit)
+DEF_SVE_FUNCTION (svldnf1uw, load_ext, d_integer, implicit)
+DEF_SVE_FUNCTION (svldnt1, load, all_data, implicit)
+DEF_SVE_FUNCTION (svld2, load, all_data, implicit)
+DEF_SVE_FUNCTION (svld3, load, all_data, implicit)
+DEF_SVE_FUNCTION (svld4, load, all_data, implicit)
+DEF_SVE_FUNCTION (svlen, count_vector, all_data, none)
+DEF_SVE_FUNCTION (svlsl, binary_uint_opt_n, all_integer, mxz)
+DEF_SVE_FUNCTION (svlsl_wide, binary_uint64_opt_n, bhs_integer, mxz)
+DEF_SVE_FUNCTION (svlsr, binary_uint_opt_n, all_unsigned, mxz)
+DEF_SVE_FUNCTION (svlsr_wide, binary_uint64_opt_n, bhs_unsigned, mxz)
+DEF_SVE_FUNCTION (svmad, ternary_opt_n, all_arith, mxz)
+DEF_SVE_FUNCTION (svmax, binary_opt_n, all_arith, mxz)
+DEF_SVE_FUNCTION (svmaxnm, binary_opt_n, all_float, mxz)
+DEF_SVE_FUNCTION (svmaxnmv, reduction, all_float, implicit)
+DEF_SVE_FUNCTION (svmaxv, reduction, all_arith, implicit)
+DEF_SVE_FUNCTION (svmin, binary_opt_n, all_arith, mxz)
+DEF_SVE_FUNCTION (svminnm, binary_opt_n, all_float, mxz)
+DEF_SVE_FUNCTION (svminnmv, reduction, all_float, implicit)
+DEF_SVE_FUNCTION (svminv, reduction, all_arith, implicit)
+DEF_SVE_FUNCTION (svmla, ternary_opt_n, all_arith, mxz)
+DEF_SVE_FUNCTION (svmla_lane, ternary_lane, all_float, none)
+DEF_SVE_FUNCTION (svmls, ternary_opt_n, all_arith, mxz)
+DEF_SVE_FUNCTION (svmls_lane, ternary_lane, all_float, none)
+DEF_SVE_FUNCTION (svmmla, mmla, none, none)
+DEF_SVE_FUNCTION (svmov, unary, b, z)
+DEF_SVE_FUNCTION (svmsb, ternary_opt_n, all_arith, mxz)
+DEF_SVE_FUNCTION (svmul, binary_opt_n, all_arith, mxz)
+DEF_SVE_FUNCTION (svmul_lane, binary_lane, all_float, none)
+DEF_SVE_FUNCTION (svmulh, binary_opt_n, all_integer, mxz)
+DEF_SVE_FUNCTION (svmulx, binary_opt_n, all_float, mxz)
+DEF_SVE_FUNCTION (svnand, binary_opt_n, b, z)
+DEF_SVE_FUNCTION (svneg, unary, all_float_and_signed, mxz)
+DEF_SVE_FUNCTION (svnmad, ternary_opt_n, all_float, mxz)
+DEF_SVE_FUNCTION (svnmla, ternary_opt_n, all_float, mxz)
+DEF_SVE_FUNCTION (svnmls, ternary_opt_n, all_float, mxz)
+DEF_SVE_FUNCTION (svnmsb, ternary_opt_n, all_float, mxz)
+DEF_SVE_FUNCTION (svnor, binary_opt_n, b, z)
+DEF_SVE_FUNCTION (svnot, unary, all_integer, mxz)
+DEF_SVE_FUNCTION (svnot, unary, b, z)
+DEF_SVE_FUNCTION (svorn, binary_opt_n, b, z)
+DEF_SVE_FUNCTION (svorr, binary_opt_n, all_integer, mxz)
+DEF_SVE_FUNCTION (svorr, binary_opt_n, b, z)
+DEF_SVE_FUNCTION (svorv, reduction, all_integer, implicit)
+DEF_SVE_FUNCTION (svpfalse, inherent_b, b, none)
+DEF_SVE_FUNCTION (svpfirst, unary, b, implicit)
+DEF_SVE_FUNCTION (svpnext, unary_pred, all_pred, implicit)
+DEF_SVE_FUNCTION (svprfb, prefetch, none, implicit)
+DEF_SVE_FUNCTION (svprfb_gather, prefetch_gather_offset, none, implicit)
+DEF_SVE_FUNCTION (svprfd, prefetch, none, implicit)
+DEF_SVE_FUNCTION (svprfd_gather, prefetch_gather_index, none, implicit)
+DEF_SVE_FUNCTION (svprfh, prefetch, none, implicit)
+DEF_SVE_FUNCTION (svprfh_gather, prefetch_gather_index, none, implicit)
+DEF_SVE_FUNCTION (svprfw, prefetch, none, implicit)
+DEF_SVE_FUNCTION (svprfw_gather, prefetch_gather_index, none, implicit)
+DEF_SVE_FUNCTION (svptest_any, ptest, none, implicit)
+DEF_SVE_FUNCTION (svptest_first, ptest, none, implicit)
+DEF_SVE_FUNCTION (svptest_last, ptest, none, implicit)
+DEF_SVE_FUNCTION (svptrue, inherent, all_pred, none)
+DEF_SVE_FUNCTION (svptrue_pat, pattern_pred, all_pred, none)
+DEF_SVE_FUNCTION (svqadd, binary_opt_n, all_integer, none)
+DEF_SVE_FUNCTION (svqdecb, inc_dec, sd_integer, none)
+DEF_SVE_FUNCTION (svqdecb_pat, inc_dec_pat, sd_integer, none)
+DEF_SVE_FUNCTION (svqdecd, inc_dec, d_integer, none)
+DEF_SVE_FUNCTION (svqdecd, inc_dec, sd_integer, none)
+DEF_SVE_FUNCTION (svqdecd_pat, inc_dec_pat, d_integer, none)
+DEF_SVE_FUNCTION (svqdecd_pat, inc_dec_pat, sd_integer, none)
+DEF_SVE_FUNCTION (svqdech, inc_dec, h_integer, none)
+DEF_SVE_FUNCTION (svqdech, inc_dec, sd_integer, none)
+DEF_SVE_FUNCTION (svqdech_pat, inc_dec_pat, h_integer, none)
+DEF_SVE_FUNCTION (svqdech_pat, inc_dec_pat, sd_integer, none)
+DEF_SVE_FUNCTION (svqdecp, inc_dec_pred, hsd_integer, none)
+DEF_SVE_FUNCTION (svqdecp, inc_dec_pred_scalar, inc_dec_n, none)
+DEF_SVE_FUNCTION (svqdecw, inc_dec, s_integer, none)
+DEF_SVE_FUNCTION (svqdecw, inc_dec, sd_integer, none)
+DEF_SVE_FUNCTION (svqdecw_pat, inc_dec_pat, s_integer, none)
+DEF_SVE_FUNCTION (svqdecw_pat, inc_dec_pat, sd_integer, none)
+DEF_SVE_FUNCTION (svqincb, inc_dec, sd_integer, none)
+DEF_SVE_FUNCTION (svqincb_pat, inc_dec_pat, sd_integer, none)
+DEF_SVE_FUNCTION (svqincd, inc_dec, d_integer, none)
+DEF_SVE_FUNCTION (svqincd, inc_dec, sd_integer, none)
+DEF_SVE_FUNCTION (svqincd_pat, inc_dec_pat, d_integer, none)
+DEF_SVE_FUNCTION (svqincd_pat, inc_dec_pat, sd_integer, none)
+DEF_SVE_FUNCTION (svqinch, inc_dec, h_integer, none)
+DEF_SVE_FUNCTION (svqinch, inc_dec, sd_integer, none)
+DEF_SVE_FUNCTION (svqinch_pat, inc_dec_pat, h_integer, none)
+DEF_SVE_FUNCTION (svqinch_pat, inc_dec_pat, sd_integer, none)
+DEF_SVE_FUNCTION (svqincp, inc_dec_pred, hsd_integer, none)
+DEF_SVE_FUNCTION (svqincp, inc_dec_pred_scalar, inc_dec_n, none)
+DEF_SVE_FUNCTION (svqincw, inc_dec, s_integer, none)
+DEF_SVE_FUNCTION (svqincw, inc_dec, sd_integer, none)
+DEF_SVE_FUNCTION (svqincw_pat, inc_dec_pat, s_integer, none)
+DEF_SVE_FUNCTION (svqincw_pat, inc_dec_pat, sd_integer, none)
+DEF_SVE_FUNCTION (svqsub, binary_opt_n, all_integer, none)
+DEF_SVE_FUNCTION (svrbit, unary, all_integer, mxz)
+DEF_SVE_FUNCTION (svrdffr, rdffr, none, z_or_none)
+DEF_SVE_FUNCTION (svrecpe, unary, all_float, none)
+DEF_SVE_FUNCTION (svrecps, binary, all_float, none)
+DEF_SVE_FUNCTION (svrecpx, unary, all_float, mxz)
+DEF_SVE_FUNCTION (svreinterpret, unary_convert, reinterpret, none)
+DEF_SVE_FUNCTION (svrev, unary, all_data, none)
+DEF_SVE_FUNCTION (svrev, unary_pred, all_pred, none)
+DEF_SVE_FUNCTION (svrevb, unary, hsd_integer, mxz)
+DEF_SVE_FUNCTION (svrevh, unary, sd_integer, mxz)
+DEF_SVE_FUNCTION (svrevw, unary, d_integer, mxz)
+DEF_SVE_FUNCTION (svrinta, unary, all_float, mxz)
+DEF_SVE_FUNCTION (svrinti, unary, all_float, mxz)
+DEF_SVE_FUNCTION (svrintm, unary, all_float, mxz)
+DEF_SVE_FUNCTION (svrintn, unary, all_float, mxz)
+DEF_SVE_FUNCTION (svrintp, unary, all_float, mxz)
+DEF_SVE_FUNCTION (svrintx, unary, all_float, mxz)
+DEF_SVE_FUNCTION (svrintz, unary, all_float, mxz)
+DEF_SVE_FUNCTION (svrsqrte, unary, all_float, none)
+DEF_SVE_FUNCTION (svrsqrts, binary, all_float, none)
+DEF_SVE_FUNCTION (svscale, binary_int_opt_n, all_float, mxz)
+DEF_SVE_FUNCTION (svsel, binary, all_data, implicit)
+DEF_SVE_FUNCTION (svsel, binary, b, implicit)
+DEF_SVE_FUNCTION (svset2, set, all_data, none)
+DEF_SVE_FUNCTION (svset3, set, all_data, none)
+DEF_SVE_FUNCTION (svset4, set, all_data, none)
+DEF_SVE_FUNCTION (svsetffr, setffr, none, none)
+DEF_SVE_FUNCTION (svsplice, binary, all_data, implicit)
+DEF_SVE_FUNCTION (svsqrt, unary, all_float, mxz)
+DEF_SVE_FUNCTION (svst1, store, all_data, implicit)
+DEF_SVE_FUNCTION (svst1_scatter, store_scatter_index, sd_data, implicit)
+DEF_SVE_FUNCTION (svst1_scatter, store_scatter_offset, sd_data, implicit)
+DEF_SVE_FUNCTION (svst1b, store, hsd_integer, implicit)
+DEF_SVE_FUNCTION (svst1b_scatter, store_scatter_offset, sd_integer, implicit)
+DEF_SVE_FUNCTION (svst1h, store, sd_integer, implicit)
+DEF_SVE_FUNCTION (svst1h_scatter, store_scatter_index, sd_integer, implicit)
+DEF_SVE_FUNCTION (svst1h_scatter, store_scatter_offset, sd_integer, implicit)
+DEF_SVE_FUNCTION (svst1w, store, d_integer, implicit)
+DEF_SVE_FUNCTION (svst1w_scatter, store_scatter_index, d_integer, implicit)
+DEF_SVE_FUNCTION (svst1w_scatter, store_scatter_offset, d_integer, implicit)
+DEF_SVE_FUNCTION (svst2, store, all_data, implicit)
+DEF_SVE_FUNCTION (svst3, store, all_data, implicit)
+DEF_SVE_FUNCTION (svst4, store, all_data, implicit)
+DEF_SVE_FUNCTION (svstnt1, store, all_data, implicit)
+DEF_SVE_FUNCTION (svsub, binary_opt_n, all_arith, mxz)
+DEF_SVE_FUNCTION (svsubr, binary_opt_n, all_arith, mxz)
+DEF_SVE_FUNCTION (svtbl, binary_uint, all_data, none)
+DEF_SVE_FUNCTION (svtmad, tmad, all_float, none)
+DEF_SVE_FUNCTION (svtrn1, binary, all_data, none)
+DEF_SVE_FUNCTION (svtrn1, binary_pred, all_pred, none)
+DEF_SVE_FUNCTION (svtrn2, binary, all_data, none)
+DEF_SVE_FUNCTION (svtrn2, binary_pred, all_pred, none)
+DEF_SVE_FUNCTION (svtsmul, binary_uint, all_float, none)
+DEF_SVE_FUNCTION (svtssel, binary_uint, all_float, none)
+DEF_SVE_FUNCTION (svundef, inherent, all_data, none)
+DEF_SVE_FUNCTION (svundef2, inherent, all_data, none)
+DEF_SVE_FUNCTION (svundef3, inherent, all_data, none)
+DEF_SVE_FUNCTION (svundef4, inherent, all_data, none)
+DEF_SVE_FUNCTION (svunpkhi, unary_widen, hsd_integer, none)
+DEF_SVE_FUNCTION (svunpkhi, unary_widen, b, none)
+DEF_SVE_FUNCTION (svunpklo, unary_widen, hsd_integer, none)
+DEF_SVE_FUNCTION (svunpklo, unary_widen, b, none)
+DEF_SVE_FUNCTION (svuzp1, binary, all_data, none)
+DEF_SVE_FUNCTION (svuzp1, binary_pred, all_pred, none)
+DEF_SVE_FUNCTION (svuzp2, binary, all_data, none)
+DEF_SVE_FUNCTION (svuzp2, binary_pred, all_pred, none)
+DEF_SVE_FUNCTION (svwhilele, compare_scalar, while, none)
+DEF_SVE_FUNCTION (svwhilelt, compare_scalar, while, none)
+DEF_SVE_FUNCTION (svwrffr, setffr, none, implicit)
+DEF_SVE_FUNCTION (svzip1, binary, all_data, none)
+DEF_SVE_FUNCTION (svzip1, binary_pred, all_pred, none)
+DEF_SVE_FUNCTION (svzip2, binary, all_data, none)
+DEF_SVE_FUNCTION (svzip2, binary_pred, all_pred, none)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS AARCH64_FL_BF16
+DEF_SVE_FUNCTION (svbfdot, ternary_bfloat_opt_n, s_float, none)
+DEF_SVE_FUNCTION (svbfdot_lane, ternary_bfloat_lanex2, s_float, none)
+DEF_SVE_FUNCTION (svbfmlalb, ternary_bfloat_opt_n, s_float, none)
+DEF_SVE_FUNCTION (svbfmlalb_lane, ternary_bfloat_lane, s_float, none)
+DEF_SVE_FUNCTION (svbfmlalt, ternary_bfloat_opt_n, s_float, none)
+DEF_SVE_FUNCTION (svbfmlalt_lane, ternary_bfloat_lane, s_float, none)
+DEF_SVE_FUNCTION (svbfmmla, ternary_bfloat, s_float, none)
+DEF_SVE_FUNCTION (svcvt, unary_convert, cvt_bfloat, mxz)
+DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_bfloat, mx)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS AARCH64_FL_I8MM
+DEF_SVE_FUNCTION (svmmla, mmla, s_integer, none)
+DEF_SVE_FUNCTION (svusmmla, ternary_uintq_intq, s_signed, none)
+DEF_SVE_FUNCTION (svsudot, ternary_intq_uintq_opt_n, s_signed, none)
+DEF_SVE_FUNCTION (svsudot_lane, ternary_intq_uintq_lane, s_signed, none)
+DEF_SVE_FUNCTION (svusdot, ternary_uintq_intq_opt_n, s_signed, none)
+DEF_SVE_FUNCTION (svusdot_lane, ternary_uintq_intq_lane, s_signed, none)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS AARCH64_FL_F32MM
+DEF_SVE_FUNCTION (svmmla, mmla, s_float, none)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS AARCH64_FL_F64MM
+DEF_SVE_FUNCTION (svld1ro, load_replicate, all_data, implicit)
+DEF_SVE_FUNCTION (svmmla, mmla, d_float, none)
+DEF_SVE_FUNCTION (svtrn1q, binary, all_data, none)
+DEF_SVE_FUNCTION (svtrn2q, binary, all_data, none)
+DEF_SVE_FUNCTION (svuzp1q, binary, all_data, none)
+DEF_SVE_FUNCTION (svuzp2q, binary, all_data, none)
+DEF_SVE_FUNCTION (svzip1q, binary, all_data, none)
+DEF_SVE_FUNCTION (svzip2q, binary, all_data, none)
+#undef REQUIRED_EXTENSIONS
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.h b/gcc/config/aarch64/aarch64-sve-builtins-base.h
new file mode 100644
index 0000000000000..dc8d68536109f
--- /dev/null
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.h
@@ -0,0 +1,304 @@
+/* ACLE support for AArch64 SVE (__ARM_FEATURE_SVE intrinsics)
+   Copyright (C) 2018-2021 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_AARCH64_SVE_BUILTINS_BASE_H
+#define GCC_AARCH64_SVE_BUILTINS_BASE_H
+
+namespace aarch64_sve
+{
+  namespace functions
+  {
+    extern const function_base *const svabd;
+    extern const function_base *const svabs;
+    extern const function_base *const svacge;
+    extern const function_base *const svacgt;
+    extern const function_base *const svacle;
+    extern const function_base *const svaclt;
+    extern const function_base *const svadd;
+    extern const function_base *const svadda;
+    extern const function_base *const svaddv;
+    extern const function_base *const svadrb;
+    extern const function_base *const svadrd;
+    extern const function_base *const svadrh;
+    extern const function_base *const svadrw;
+    extern const function_base *const svand;
+    extern const function_base *const svandv;
+    extern const function_base *const svasr;
+    extern const function_base *const svasr_wide;
+    extern const function_base *const svasrd;
+    extern const function_base *const svbfdot;
+    extern const function_base *const svbfdot_lane;
+    extern const function_base *const svbfmlalb;
+    extern const function_base *const svbfmlalb_lane;
+    extern const function_base *const svbfmlalt;
+    extern const function_base *const svbfmlalt_lane;
+    extern const function_base *const svbfmmla;
+    extern const function_base *const svbic;
+    extern const function_base *const svbrka;
+    extern const function_base *const svbrkb;
+    extern const function_base *const svbrkn;
+    extern const function_base *const svbrkpa;
+    extern const function_base *const svbrkpb;
+    extern const function_base *const svcadd;
+    extern const function_base *const svclasta;
+    extern const function_base *const svclastb;
+    extern const function_base *const svcls;
+    extern const function_base *const svclz;
+    extern const function_base *const svcmla;
+    extern const function_base *const svcmla_lane;
+    extern const function_base *const svcmpeq;
+    extern const function_base *const svcmpeq_wide;
+    extern const function_base *const svcmpge;
+    extern const function_base *const svcmpge_wide;
+    extern const function_base *const svcmpgt;
+    extern const function_base *const svcmpgt_wide;
+    extern const function_base *const svcmple;
+    extern const function_base *const svcmple_wide;
+    extern const function_base *const svcmplt;
+    extern const function_base *const svcmplt_wide;
+    extern const function_base *const svcmpne;
+    extern const function_base *const svcmpne_wide;
+    extern const function_base *const svcmpuo;
+    extern const function_base *const svcnot;
+    extern const function_base *const svcnt;
+    extern const function_base *const svcntb;
+    extern const function_base *const svcntb_pat;
+    extern const function_base *const svcntd;
+    extern const function_base *const svcntd_pat;
+    extern const function_base *const svcnth;
+    extern const function_base *const svcnth_pat;
+    extern const function_base *const svcntp;
+    extern const function_base *const svcntw;
+    extern const function_base *const svcntw_pat;
+    extern const function_base *const svcompact;
+    extern const function_base *const svcreate2;
+    extern const function_base *const svcreate3;
+    extern const function_base *const svcreate4;
+    extern const function_base *const svcvt;
+    extern const function_base *const svcvtnt;
+    extern const function_base *const svdiv;
+    extern const function_base *const svdivr;
+    extern const function_base *const svdot;
+    extern const function_base *const svdot_lane;
+    extern const function_base *const svdup;
+    extern const function_base *const svdup_lane;
+    extern const function_base *const svdupq;
+    extern const function_base *const svdupq_lane;
+    extern const function_base *const sveor;
+    extern const function_base *const sveorv;
+    extern const function_base *const svexpa;
+    extern const function_base *const svext;
+    extern const function_base *const svextb;
+    extern const function_base *const svexth;
+    extern const function_base *const svextw;
+    extern const function_base *const svget2;
+    extern const function_base *const svget3;
+    extern const function_base *const svget4;
+    extern const function_base *const svindex;
+    extern const function_base *const svinsr;
+    extern const function_base *const svlasta;
+    extern const function_base *const svlastb;
+    extern const function_base *const svld1;
+    extern const function_base *const svld1_gather;
+    extern const function_base *const svld1ro;
+    extern const function_base *const svld1rq;
+    extern const function_base *const svld1sb;
+    extern const function_base *const svld1sb_gather;
+    extern const function_base *const svld1sh;
+    extern const function_base *const svld1sh_gather;
+    extern const function_base *const svld1sw;
+    extern const function_base *const svld1sw_gather;
+    extern const function_base *const svld1ub;
+    extern const function_base *const svld1ub_gather;
+    extern const function_base *const svld1uh;
+    extern const function_base *const svld1uh_gather;
+    extern const function_base *const svld1uw;
+    extern const function_base *const svld1uw_gather;
+    extern const function_base *const svld2;
+    extern const function_base *const svld3;
+    extern const function_base *const svld4;
+    extern const function_base *const svldff1;
+    extern const function_base *const svldff1_gather;
+    extern const function_base *const svldff1sb;
+    extern const function_base *const svldff1sb_gather;
+    extern const function_base *const svldff1sh;
+    extern const function_base *const svldff1sh_gather;
+    extern const function_base *const svldff1sw;
+    extern const function_base *const svldff1sw_gather;
+    extern const function_base *const svldff1ub;
+    extern const function_base *const svldff1ub_gather;
+    extern const function_base *const svldff1uh;
+    extern const function_base *const svldff1uh_gather;
+    extern const function_base *const svldff1uw;
+    extern const function_base *const svldff1uw_gather;
+    extern const function_base *const svldnf1;
+    extern const function_base *const svldnf1sb;
+    extern const function_base *const svldnf1sh;
+    extern const function_base *const svldnf1sw;
+    extern const function_base *const svldnf1ub;
+    extern const function_base *const svldnf1uh;
+    extern const function_base *const svldnf1uw;
+    extern const function_base *const svldnt1;
+    extern const function_base *const svlen;
+    extern const function_base *const svlsl;
+    extern const function_base *const svlsl_wide;
+    extern const function_base *const svlsr;
+    extern const function_base *const svlsr_wide;
+    extern const function_base *const svmad;
+    extern const function_base *const svmax;
+    extern const function_base *const svmaxnm;
+    extern const function_base *const svmaxnmv;
+    extern const function_base *const svmaxv;
+    extern const function_base *const svmin;
+    extern const function_base *const svminnm;
+    extern const function_base *const svminnmv;
+    extern const function_base *const svminv;
+    extern const function_base *const svmla;
+    extern const function_base *const svmla_lane;
+    extern const function_base *const svmls;
+    extern const function_base *const svmls_lane;
+    extern const function_base *const svmmla;
+    extern const function_base *const svmov;
+    extern const function_base *const svmsb;
+    extern const function_base *const svmul;
+    extern const function_base *const svmul_lane;
+    extern const function_base *const svmulh;
+    extern const function_base *const svmulx;
+    extern const function_base *const svnand;
+    extern const function_base *const svneg;
+    extern const function_base *const svnmad;
+    extern const function_base *const svnmla;
+    extern const function_base *const svnmls;
+    extern const function_base *const svnmsb;
+    extern const function_base *const svnor;
+    extern const function_base *const svnot;
+    extern const function_base *const svorn;
+    extern const function_base *const svorr;
+    extern const function_base *const svorv;
+    extern const function_base *const svpfalse;
+    extern const function_base *const svpfirst;
+    extern const function_base *const svpnext;
+    extern const function_base *const svprfb;
+    extern const function_base *const svprfb_gather;
+    extern const function_base *const svprfd;
+    extern const function_base *const svprfd_gather;
+    extern const function_base *const svprfh;
+    extern const function_base *const svprfh_gather;
+    extern const function_base *const svprfw;
+    extern const function_base *const svprfw_gather;
+    extern const function_base *const svptest_any;
+    extern const function_base *const svptest_first;
+    extern const function_base *const svptest_last;
+    extern const function_base *const svptrue;
+    extern const function_base *const svptrue_pat;
+    extern const function_base *const svqadd;
+    extern const function_base *const svqdecb;
+    extern const function_base *const svqdecb_pat;
+    extern const function_base *const svqdecd;
+    extern const function_base *const svqdecd_pat;
+    extern const function_base *const svqdech;
+    extern const function_base *const svqdech_pat;
+    extern const function_base *const svqdecp;
+    extern const function_base *const svqdecw;
+    extern const function_base *const svqdecw_pat;
+    extern const function_base *const svqincb;
+    extern const function_base *const svqincb_pat;
+    extern const function_base *const svqincd;
+    extern const function_base *const svqincd_pat;
+    extern const function_base *const svqinch;
+    extern const function_base *const svqinch_pat;
+    extern const function_base *const svqincp;
+    extern const function_base *const svqincw;
+    extern const function_base *const svqincw_pat;
+    extern const function_base *const svqsub;
+    extern const function_base *const svrbit;
+    extern const function_base *const svrdffr;
+    extern const function_base *const svrecpe;
+    extern const function_base *const svrecps;
+    extern const function_base *const svrecpx;
+    extern const function_base *const svreinterpret;
+    extern const function_base *const svrev;
+    extern const function_base *const svrevb;
+    extern const function_base *const svrevh;
+    extern const function_base *const svrevw;
+    extern const function_base *const svrinta;
+    extern const function_base *const svrinti;
+    extern const function_base *const svrintm;
+    extern const function_base *const svrintn;
+    extern const function_base *const svrintp;
+    extern const function_base *const svrintx;
+    extern const function_base *const svrintz;
+    extern const function_base *const svrsqrte;
+    extern const function_base *const svrsqrts;
+    extern const function_base *const svscale;
+    extern const function_base *const svsel;
+    extern const function_base *const svset2;
+    extern const function_base *const svset3;
+    extern const function_base *const svset4;
+    extern const function_base *const svsetffr;
+    extern const function_base *const svsplice;
+    extern const function_base *const svsqrt;
+    extern const function_base *const svst1;
+    extern const function_base *const svst1_scatter;
+    extern const function_base *const svst1b;
+    extern const function_base *const svst1b_scatter;
+    extern const function_base *const svst1h;
+    extern const function_base *const svst1h_scatter;
+    extern const function_base *const svst1w;
+    extern const function_base *const svst1w_scatter;
+    extern const function_base *const svst2;
+    extern const function_base *const svst3;
+    extern const function_base *const svst4;
+    extern const function_base *const svstnt1;
+    extern const function_base *const svsub;
+    extern const function_base *const svsubr;
+    extern const function_base *const svsudot;
+    extern const function_base *const svsudot_lane;
+    extern const function_base *const svtbl;
+    extern const function_base *const svtmad;
+    extern const function_base *const svtrn1;
+    extern const function_base *const svtrn1q;
+    extern const function_base *const svtrn2;
+    extern const function_base *const svtrn2q;
+    extern const function_base *const svtsmul;
+    extern const function_base *const svtssel;
+    extern const function_base *const svundef;
+    extern const function_base *const svundef2;
+    extern const function_base *const svundef3;
+    extern const function_base *const svundef4;
+    extern const function_base *const svunpkhi;
+    extern const function_base *const svunpklo;
+    extern const function_base *const svusdot;
+    extern const function_base *const svusdot_lane;
+    extern const function_base *const svusmmla;
+    extern const function_base *const svuzp1;
+    extern const function_base *const svuzp1q;
+    extern const function_base *const svuzp2;
+    extern const function_base *const svuzp2q;
+    extern const function_base *const svwhilele;
+    extern const function_base *const svwhilelt;
+    extern const function_base *const svwrffr;
+    extern const function_base *const svzip1;
+    extern const function_base *const svzip1q;
+    extern const function_base *const svzip2;
+    extern const function_base *const svzip2q;
+  }
+}
+
+#endif
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-functions.h b/gcc/config/aarch64/aarch64-sve-builtins-functions.h
new file mode 100644
index 0000000000000..09c60fad9e70c
--- /dev/null
+++ b/gcc/config/aarch64/aarch64-sve-builtins-functions.h
@@ -0,0 +1,665 @@
+/* ACLE support for AArch64 SVE (function_base classes)
+   Copyright (C) 2018-2021 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_AARCH64_SVE_BUILTINS_FUNCTIONS_H
+#define GCC_AARCH64_SVE_BUILTINS_FUNCTIONS_H
+
+namespace aarch64_sve {
+
+/* Wrap T, which is derived from function_base, and indicate that the
+   function never has side effects.  It is only necessary to use this
+   wrapper on functions that might have floating-point suffixes, since
+   otherwise we assume by default that the function has no side effects.  */
+template<typename T>
+class quiet : public T
+{
+public:
+  CONSTEXPR quiet () : T () {}
+
+  /* Unfortunately we can't use parameter packs yet.  */
+  template<typename T1>
+  CONSTEXPR quiet (const T1 &t1) : T (t1) {}
+
+  template<typename T1, typename T2>
+  CONSTEXPR quiet (const T1 &t1, const T2 &t2) : T (t1, t2) {}
+
+  template<typename T1, typename T2, typename T3>
+  CONSTEXPR quiet (const T1 &t1, const T2 &t2, const T3 &t3)
+    : T (t1, t2, t3) {}
+
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return 0;
+  }
+};
+
+/* A function_base that sometimes or always operates on tuples of
+   vectors.  */
+class multi_vector_function : public function_base
+{
+public:
+  CONSTEXPR multi_vector_function (unsigned int vectors_per_tuple)
+    : m_vectors_per_tuple (vectors_per_tuple) {}
+
+  unsigned int
+  vectors_per_tuple () const OVERRIDE
+  {
+    return m_vectors_per_tuple;
+  }
+
+  /* The number of vectors in a tuple, or 1 if the function only operates
+     on single vectors.  */
+  unsigned int m_vectors_per_tuple;
+};
+
+/* A function_base that loads or stores contiguous memory elements
+   without extending or truncating them.  */
+class full_width_access : public multi_vector_function
+{
+public:
+  CONSTEXPR full_width_access (unsigned int vectors_per_tuple = 1)
+    : multi_vector_function (vectors_per_tuple) {}
+
+  tree
+  memory_scalar_type (const function_instance &fi) const OVERRIDE
+  {
+    return fi.scalar_type (0);
+  }
+
+  machine_mode
+  memory_vector_mode (const function_instance &fi) const OVERRIDE
+  {
+    machine_mode mode = fi.vector_mode (0);
+    if (m_vectors_per_tuple != 1)
+      mode = targetm.array_mode (mode, m_vectors_per_tuple).require ();
+    return mode;
+  }
+};
+
+/* A function_base that loads elements from memory and extends them
+   to a wider element.  The memory element type is a fixed part of
+   the function base name.  */
+class extending_load : public function_base
+{
+public:
+  CONSTEXPR extending_load (type_suffix_index memory_type)
+    : m_memory_type (memory_type) {}
+
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_READ_MEMORY;
+  }
+
+  tree
+  memory_scalar_type (const function_instance &) const OVERRIDE
+  {
+    return scalar_types[type_suffixes[m_memory_type].vector_type];
+  }
+
+  machine_mode
+  memory_vector_mode (const function_instance &fi) const OVERRIDE
+  {
+    machine_mode mem_mode = type_suffixes[m_memory_type].vector_mode;
+    machine_mode reg_mode = fi.vector_mode (0);
+    return aarch64_sve_data_mode (GET_MODE_INNER (mem_mode),
+				  GET_MODE_NUNITS (reg_mode)).require ();
+  }
+
+  /* Return the rtx code associated with the kind of extension that
+     the load performs.  */
+  rtx_code
+  extend_rtx_code () const
+  {
+    return (type_suffixes[m_memory_type].unsigned_p
+	    ? ZERO_EXTEND : SIGN_EXTEND);
+  }
+
+  /* The type of the memory elements.  This is part of the function base
+     name rather than a true type suffix.  */
+  type_suffix_index m_memory_type;
+};
+
+/* A function_base that truncates vector elements and stores them to memory.
+   The memory element width is a fixed part of the function base name.  */
+class truncating_store : public function_base
+{
+public:
+  CONSTEXPR truncating_store (scalar_int_mode to_mode) : m_to_mode (to_mode) {}
+
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_WRITE_MEMORY;
+  }
+
+  tree
+  memory_scalar_type (const function_instance &fi) const OVERRIDE
+  {
+    /* In truncating stores, the signedness of the memory element is defined
+       to be the same as the signedness of the vector element.  The signedness
+       doesn't make any difference to the behavior of the function.  */
+    type_class_index tclass = fi.type_suffix (0).tclass;
+    unsigned int element_bits = GET_MODE_BITSIZE (m_to_mode);
+    type_suffix_index suffix = find_type_suffix (tclass, element_bits);
+    return scalar_types[type_suffixes[suffix].vector_type];
+  }
+
+  machine_mode
+  memory_vector_mode (const function_instance &fi) const OVERRIDE
+  {
+    poly_uint64 nunits = GET_MODE_NUNITS (fi.vector_mode (0));
+    return aarch64_sve_data_mode (m_to_mode, nunits).require ();
+  }
+
+  /* The mode of a single memory element.  */
+  scalar_int_mode m_to_mode;
+};
+
+/* An incomplete function_base for functions that have an associated rtx code.
+   It simply records information about the mapping for derived classes
+   to use.  */
+class rtx_code_function_base : public function_base
+{
+public:
+  CONSTEXPR rtx_code_function_base (rtx_code code_for_sint,
+				    rtx_code code_for_uint,
+				    int unspec_for_fp = -1)
+    : m_code_for_sint (code_for_sint), m_code_for_uint (code_for_uint),
+      m_unspec_for_fp (unspec_for_fp) {}
+
+  /* The rtx code to use for signed and unsigned integers respectively.
+     Can be UNKNOWN for functions that don't have integer forms.  */
+  rtx_code m_code_for_sint;
+  rtx_code m_code_for_uint;
+
+  /* The UNSPEC_COND_* to use for floating-point operations.  Can be -1
+     for functions that only operate on integers.  */
+  int m_unspec_for_fp;
+};
+
+/* A function_base for functions that have an associated rtx code.
+   It supports all forms of predication except PRED_implicit.  */
+class rtx_code_function : public rtx_code_function_base
+{
+public:
+  CONSTEXPR rtx_code_function (rtx_code code_for_sint, rtx_code code_for_uint,
+			       int unspec_for_fp = -1)
+    : rtx_code_function_base (code_for_sint, code_for_uint, unspec_for_fp) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    return e.map_to_rtx_codes (m_code_for_sint, m_code_for_uint,
+			       m_unspec_for_fp);
+  }
+};
+
+/* Like rtx_code_function, but for functions that take what is normally
+   the final argument first.  One use of this class is to handle binary
+   reversed operations; another is to handle MLA-style operations that
+   are normally expressed in GCC as MAD-style operations.  */
+class rtx_code_function_rotated : public rtx_code_function_base
+{
+public:
+  CONSTEXPR rtx_code_function_rotated (rtx_code code_for_sint,
+				       rtx_code code_for_uint,
+				       int unspec_for_fp = -1)
+    : rtx_code_function_base (code_for_sint, code_for_uint, unspec_for_fp) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* Rotate the inputs into their normal order, but continue to make _m
+       functions merge with what was originally the first vector argument.  */
+    unsigned int nargs = e.args.length ();
+    e.rotate_inputs_left (e.pred != PRED_none ? 1 : 0, nargs);
+    return e.map_to_rtx_codes (m_code_for_sint, m_code_for_uint,
+			       m_unspec_for_fp, nargs - 1);
+  }
+};
+
+/* An incomplete function_base for functions that have an associated
+   unspec code, with separate codes for signed integers, unsigned
+   integers and floating-point values.  The class simply records
+   information about the mapping for derived classes to use.  */
+class unspec_based_function_base : public function_base
+{
+public:
+  CONSTEXPR unspec_based_function_base (int unspec_for_sint,
+					int unspec_for_uint,
+					int unspec_for_fp)
+    : m_unspec_for_sint (unspec_for_sint),
+      m_unspec_for_uint (unspec_for_uint),
+      m_unspec_for_fp (unspec_for_fp)
+  {}
+
+  /* Return the unspec code to use for INSTANCE, based on type suffix 0.  */
+  int
+  unspec_for (const function_instance &instance) const
+  {
+    return (!instance.type_suffix (0).integer_p ? m_unspec_for_fp
+	    : instance.type_suffix (0).unsigned_p ? m_unspec_for_uint
+	    : m_unspec_for_sint);
+  }
+
+  /* The unspec code associated with signed-integer, unsigned-integer
+     and floating-point operations respectively.  */
+  int m_unspec_for_sint;
+  int m_unspec_for_uint;
+  int m_unspec_for_fp;
+};
+
+/* A function_base for functions that have an associated unspec code.
+   It supports all forms of predication except PRED_implicit.  */
+class unspec_based_function : public unspec_based_function_base
+{
+public:
+  CONSTEXPR unspec_based_function (int unspec_for_sint, int unspec_for_uint,
+				   int unspec_for_fp)
+    : unspec_based_function_base (unspec_for_sint, unspec_for_uint,
+				  unspec_for_fp)
+  {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    return e.map_to_unspecs (m_unspec_for_sint, m_unspec_for_uint,
+			     m_unspec_for_fp);
+  }
+};
+
+/* Like unspec_based_function, but for functions that take what is normally
+   the final argument first.  One use of this class is to handle binary
+   reversed operations; another is to handle MLA-style operations that
+   are normally expressed in GCC as MAD-style operations.  */
+class unspec_based_function_rotated : public unspec_based_function_base
+{
+public:
+  CONSTEXPR unspec_based_function_rotated (int unspec_for_sint,
+					   int unspec_for_uint,
+					   int unspec_for_fp)
+    : unspec_based_function_base (unspec_for_sint, unspec_for_uint,
+				  unspec_for_fp)
+  {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* Rotate the inputs into their normal order, but continue to make _m
+       functions merge with what was originally the first vector argument.  */
+    unsigned int nargs = e.args.length ();
+    e.rotate_inputs_left (e.pred != PRED_none ? 1 : 0, nargs);
+    return e.map_to_unspecs (m_unspec_for_sint, m_unspec_for_uint,
+			     m_unspec_for_fp, nargs - 1);
+  }
+};
+
+/* Like unspec_based_function, but map the function directly to
+   CODE (UNSPEC, M) instead of using the generic predication-based
+   expansion. where M is the vector mode associated with type suffix 0.
+   This is useful if the unspec doesn't describe the full operation or
+   if the usual predication rules don't apply for some reason.  */
+template<insn_code (*CODE) (int, machine_mode)>
+class unspec_based_function_exact_insn : public unspec_based_function_base
+{
+public:
+  CONSTEXPR unspec_based_function_exact_insn (int unspec_for_sint,
+					      int unspec_for_uint,
+					      int unspec_for_fp)
+    : unspec_based_function_base (unspec_for_sint, unspec_for_uint,
+				  unspec_for_fp)
+  {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    return e.use_exact_insn (CODE (unspec_for (e), e.vector_mode (0)));
+  }
+};
+
+/* A function that performs an unspec and then adds it to another value.  */
+typedef unspec_based_function_exact_insn<code_for_aarch64_sve_add>
+  unspec_based_add_function;
+typedef unspec_based_function_exact_insn<code_for_aarch64_sve_add_lane>
+  unspec_based_add_lane_function;
+
+/* Generic unspec-based _lane function.  */
+typedef unspec_based_function_exact_insn<code_for_aarch64_sve_lane>
+  unspec_based_lane_function;
+
+/* A functon that uses aarch64_pred* patterns regardless of the
+   predication type.  */
+typedef unspec_based_function_exact_insn<code_for_aarch64_pred>
+  unspec_based_pred_function;
+
+/* Like unspec_based_add_function and unspec_based_add_lane_function,
+   but using saturating addition.  */
+typedef unspec_based_function_exact_insn<code_for_aarch64_sve_qadd>
+  unspec_based_qadd_function;
+typedef unspec_based_function_exact_insn<code_for_aarch64_sve_qadd_lane>
+  unspec_based_qadd_lane_function;
+
+/* Like unspec_based_sub_function and unspec_based_sub_lane_function,
+   but using saturating subtraction.  */
+typedef unspec_based_function_exact_insn<code_for_aarch64_sve_qsub>
+  unspec_based_qsub_function;
+typedef unspec_based_function_exact_insn<code_for_aarch64_sve_qsub_lane>
+  unspec_based_qsub_lane_function;
+
+/* A function that performs an unspec and then subtracts it from
+   another value.  */
+typedef unspec_based_function_exact_insn<code_for_aarch64_sve_sub>
+  unspec_based_sub_function;
+typedef unspec_based_function_exact_insn<code_for_aarch64_sve_sub_lane>
+  unspec_based_sub_lane_function;
+
+/* A function that acts like unspec_based_function_exact_insn<INT_CODE>
+   when operating on integers, but that expands to an (fma ...)-style
+   aarch64_sve* operation when applied to floats.  */
+template<insn_code (*INT_CODE) (int, machine_mode)>
+class unspec_based_fused_function : public unspec_based_function_base
+{
+public:
+  CONSTEXPR unspec_based_fused_function (int unspec_for_sint,
+					 int unspec_for_uint,
+					 int unspec_for_fp)
+    : unspec_based_function_base (unspec_for_sint, unspec_for_uint,
+				  unspec_for_fp)
+  {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    int unspec = unspec_for (e);
+    insn_code icode;
+    if (e.type_suffix (0).float_p)
+      {
+	/* Put the operands in the normal (fma ...) order, with the accumulator
+	   last.  This fits naturally since that's also the unprinted operand
+	   in the asm output.  */
+	e.rotate_inputs_left (0, e.pred != PRED_none ? 4 : 3);
+	icode = code_for_aarch64_sve (unspec, e.vector_mode (0));
+      }
+    else
+      icode = INT_CODE (unspec, e.vector_mode (0));
+    return e.use_exact_insn (icode);
+  }
+};
+typedef unspec_based_fused_function<code_for_aarch64_sve_add>
+  unspec_based_mla_function;
+typedef unspec_based_fused_function<code_for_aarch64_sve_sub>
+  unspec_based_mls_function;
+
+/* Like unspec_based_fused_function, but for _lane functions.  */
+template<insn_code (*INT_CODE) (int, machine_mode)>
+class unspec_based_fused_lane_function : public unspec_based_function_base
+{
+public:
+  CONSTEXPR unspec_based_fused_lane_function (int unspec_for_sint,
+					      int unspec_for_uint,
+					      int unspec_for_fp)
+    : unspec_based_function_base (unspec_for_sint, unspec_for_uint,
+				  unspec_for_fp)
+  {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    int unspec = unspec_for (e);
+    insn_code icode;
+    if (e.type_suffix (0).float_p)
+      {
+	/* Put the operands in the normal (fma ...) order, with the accumulator
+	   last.  This fits naturally since that's also the unprinted operand
+	   in the asm output.  */
+	e.rotate_inputs_left (0, e.pred != PRED_none ? 5 : 4);
+	icode = code_for_aarch64_lane (unspec, e.vector_mode (0));
+      }
+    else
+      icode = INT_CODE (unspec, e.vector_mode (0));
+    return e.use_exact_insn (icode);
+  }
+};
+typedef unspec_based_fused_lane_function<code_for_aarch64_sve_add_lane>
+  unspec_based_mla_lane_function;
+typedef unspec_based_fused_lane_function<code_for_aarch64_sve_sub_lane>
+  unspec_based_mls_lane_function;
+
+/* A function_base that uses CODE_FOR_MODE (M) to get the associated
+   instruction code, where M is the vector mode associated with type
+   suffix N.  */
+template<insn_code (*CODE_FOR_MODE) (machine_mode), unsigned int N>
+class code_for_mode_function : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    return e.use_exact_insn (CODE_FOR_MODE (e.vector_mode (N)));
+  }
+};
+
+/* A function that uses code_for_<PATTERN> (M), where M is the vector
+   mode associated with the first type suffix.  */
+#define CODE_FOR_MODE0(PATTERN) code_for_mode_function<code_for_##PATTERN, 0>
+
+/* Likewise for the second type suffix.  */
+#define CODE_FOR_MODE1(PATTERN) code_for_mode_function<code_for_##PATTERN, 1>
+
+/* Like CODE_FOR_MODE0, but the function doesn't raise exceptions when
+   operating on floating-point data.  */
+#define QUIET_CODE_FOR_MODE0(PATTERN) \
+  quiet< code_for_mode_function<code_for_##PATTERN, 0> >
+
+/* A function_base for functions that always expand to a fixed insn pattern,
+   regardless of what the suffixes are.  */
+class fixed_insn_function : public function_base
+{
+public:
+  CONSTEXPR fixed_insn_function (insn_code code) : m_code (code) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    return e.use_exact_insn (m_code);
+  }
+
+  /* The instruction to use.  */
+  insn_code m_code;
+};
+
+/* A function_base for functions that permute their arguments.  */
+class permute : public quiet<function_base>
+{
+public:
+  /* Fold a unary or binary permute with the permute vector given by
+     BUILDER.  */
+  gimple *
+  fold_permute (const gimple_folder &f, const vec_perm_builder &builder) const
+  {
+    /* Punt for now on _b16 and wider; we'd need more complex evpc logic
+       to rerecognize the result.  */
+    if (f.type_suffix (0).bool_p && f.type_suffix (0).element_bits > 8)
+      return NULL;
+
+    unsigned int nargs = gimple_call_num_args (f.call);
+    poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs));
+    vec_perm_indices indices (builder, nargs, nelts);
+    tree perm_type = build_vector_type (ssizetype, nelts);
+    return gimple_build_assign (f.lhs, VEC_PERM_EXPR,
+				gimple_call_arg (f.call, 0),
+				gimple_call_arg (f.call, nargs - 1),
+				vec_perm_indices_to_tree (perm_type, indices));
+  }
+};
+
+/* A function_base for functions that permute two vectors using a fixed
+   choice of indices.  */
+class binary_permute : public permute
+{
+public:
+  CONSTEXPR binary_permute (int unspec) : m_unspec (unspec) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    insn_code icode = code_for_aarch64_sve (m_unspec, e.vector_mode (0));
+    return e.use_exact_insn (icode);
+  }
+
+  /* The unspec code associated with the operation.  */
+  int m_unspec;
+};
+
+/* A function_base for functions that reduce a vector to a scalar.  */
+class reduction : public function_base
+{
+public:
+  CONSTEXPR reduction (int unspec)
+    : m_unspec_for_sint (unspec),
+      m_unspec_for_uint (unspec),
+      m_unspec_for_fp (unspec)
+  {}
+
+  CONSTEXPR reduction (int unspec_for_sint, int unspec_for_uint,
+		       int unspec_for_fp)
+    : m_unspec_for_sint (unspec_for_sint),
+      m_unspec_for_uint (unspec_for_uint),
+      m_unspec_for_fp (unspec_for_fp)
+  {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    machine_mode mode = e.vector_mode (0);
+    int unspec = (!e.type_suffix (0).integer_p ? m_unspec_for_fp
+		  : e.type_suffix (0).unsigned_p ? m_unspec_for_uint
+		  : m_unspec_for_sint);
+    /* There's no distinction between SADDV and UADDV for 64-bit elements;
+       the signed versions only exist for narrower elements.  */
+    if (GET_MODE_UNIT_BITSIZE (mode) == 64 && unspec == UNSPEC_SADDV)
+      unspec = UNSPEC_UADDV;
+    return e.use_exact_insn (code_for_aarch64_pred_reduc (unspec, mode));
+  }
+
+  /* The unspec code associated with signed-integer, unsigned-integer
+     and floating-point operations respectively.  */
+  int m_unspec_for_sint;
+  int m_unspec_for_uint;
+  int m_unspec_for_fp;
+};
+
+/* A function_base for functions that shift narrower-than-64-bit values
+   by 64-bit amounts.  */
+class shift_wide : public function_base
+{
+public:
+  CONSTEXPR shift_wide (rtx_code code, int wide_unspec)
+    : m_code (code), m_wide_unspec (wide_unspec) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    machine_mode mode = e.vector_mode (0);
+    machine_mode elem_mode = GET_MODE_INNER (mode);
+
+    /* If the argument is a constant that the normal shifts can handle
+       directly, use them instead.  */
+    rtx shift = unwrap_const_vec_duplicate (e.args.last ());
+    if (aarch64_simd_shift_imm_p (shift, elem_mode, m_code == ASHIFT))
+      {
+	e.args.last () = shift;
+	return e.map_to_rtx_codes (m_code, m_code, -1);
+      }
+
+    if (e.pred == PRED_x)
+      return e.use_unpred_insn (code_for_aarch64_sve (m_wide_unspec, mode));
+
+    return e.use_cond_insn (code_for_cond (m_wide_unspec, mode));
+  }
+
+  /* The rtx code associated with a "normal" shift.  */
+  rtx_code m_code;
+
+  /* The unspec code associated with the wide shift.  */
+  int m_wide_unspec;
+};
+
+/* A function_base for unary functions that count bits.  */
+class unary_count : public quiet<function_base>
+{
+public:
+  CONSTEXPR unary_count (rtx_code code) : m_code (code) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* The md patterns treat the operand as an integer.  */
+    machine_mode mode = aarch64_sve_int_mode (e.vector_mode (0));
+    e.args.last () = gen_lowpart (mode, e.args.last ());
+
+    if (e.pred == PRED_x)
+      return e.use_pred_x_insn (code_for_aarch64_pred (m_code, mode));
+
+    return e.use_cond_insn (code_for_cond (m_code, mode));
+  }
+
+  /* The rtx code associated with the operation.  */
+  rtx_code m_code;
+};
+
+/* A function_base for svwhile* functions.  */
+class while_comparison : public function_base
+{
+public:
+  CONSTEXPR while_comparison (int unspec_for_sint, int unspec_for_uint)
+    : m_unspec_for_sint (unspec_for_sint),
+      m_unspec_for_uint (unspec_for_uint)
+  {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* Suffix 0 determines the predicate mode, suffix 1 determines the
+       scalar mode and signedness.  */
+    int unspec = (e.type_suffix (1).unsigned_p
+		  ? m_unspec_for_uint
+		  : m_unspec_for_sint);
+    machine_mode pred_mode = e.vector_mode (0);
+    scalar_mode reg_mode = GET_MODE_INNER (e.vector_mode (1));
+    return e.use_exact_insn (code_for_while (unspec, reg_mode, pred_mode));
+  }
+
+  /* The unspec codes associated with signed and unsigned operations
+     respectively.  */
+  int m_unspec_for_sint;
+  int m_unspec_for_uint;
+};
+
+}
+
+/* Declare the global function base NAME, creating it from an instance
+   of class CLASS with constructor arguments ARGS.  */
+#define FUNCTION(NAME, CLASS, ARGS) \
+  namespace { static CONSTEXPR const CLASS NAME##_obj ARGS; } \
+  namespace functions { const function_base *const NAME = &NAME##_obj; }
+
+#endif
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
new file mode 100644
index 0000000000000..e16c81c30ba2d
--- /dev/null
+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
@@ -0,0 +1,3451 @@
+/* ACLE support for AArch64 SVE (function shapes)
+   Copyright (C) 2018-2021 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "rtl.h"
+#include "tm_p.h"
+#include "memmodel.h"
+#include "insn-codes.h"
+#include "optabs.h"
+#include "aarch64-sve-builtins.h"
+#include "aarch64-sve-builtins-shapes.h"
+
+/* In the comments below, _t0 represents the first type suffix and _t1
+   represents the second.  Square brackets enclose characters that are
+   present in only the full name, not the overloaded name.  Governing
+   predicate arguments and predicate suffixes are not shown, since they
+   depend on the predication type, which is a separate piece of
+   information from the shape.
+
+   Non-overloaded functions may have additional suffixes beyond the
+   ones shown, if those suffixes don't affect the types in the type
+   signature.  E.g. the predicate form of svtrn1 has a _b<bits> suffix,
+   but this does not affect the prototype, which is always
+   "svbool_t(svbool_t, svbool_t)".  */
+
+namespace aarch64_sve {
+
+/* Return a representation of "const T *".  */
+static tree
+build_const_pointer (tree t)
+{
+  return build_pointer_type (build_qualified_type (t, TYPE_QUAL_CONST));
+}
+
+/* If INSTANCE has a governing predicate, add it to the list of argument
+   types in ARGUMENT_TYPES.  RETURN_TYPE is the type returned by the
+   function.  */
+static void
+apply_predication (const function_instance &instance, tree return_type,
+		   vec<tree> &argument_types)
+{
+  if (instance.pred != PRED_none)
+    {
+      argument_types.quick_insert (0, get_svbool_t ());
+      /* For unary merge operations, the first argument is a vector with
+	 the same type as the result.  For unary_convert_narrowt it also
+	 provides the "bottom" half of active elements, and is present
+	 for all types of predication.  */
+      if ((argument_types.length () == 2 && instance.pred == PRED_m)
+	  || instance.shape == shapes::unary_convert_narrowt)
+	argument_types.quick_insert (0, return_type);
+    }
+}
+
+/* Parse and move past an element type in FORMAT and return it as a type
+   suffix.  The format is:
+
+   [01]    - the element type in type suffix 0 or 1 of INSTANCE
+   f<bits> - a floating-point type with the given number of bits
+   f[01]   - a floating-point type with the same width as type suffix 0 or 1
+   B       - bfloat16_t
+   h<elt>  - a half-sized version of <elt>
+   p       - a predicate (represented as TYPE_SUFFIX_b)
+   q<elt>  - a quarter-sized version of <elt>
+   s<bits> - a signed type with the given number of bits
+   s[01]   - a signed type with the same width as type suffix 0 or 1
+   u<bits> - an unsigned type with the given number of bits
+   u[01]   - an unsigned type with the same width as type suffix 0 or 1
+   w<elt>  - a 64-bit version of <elt> if <elt> is integral, otherwise <elt>
+
+   where <elt> is another element type.  */
+static type_suffix_index
+parse_element_type (const function_instance &instance, const char *&format)
+{
+  int ch = *format++;
+
+  if (ch == 'f' || ch == 's' || ch == 'u')
+    {
+      type_class_index tclass = (ch == 'f' ? TYPE_float
+				 : ch == 's' ? TYPE_signed
+				 : TYPE_unsigned);
+      char *end;
+      unsigned int bits = strtol (format, &end, 10);
+      format = end;
+      if (bits == 0 || bits == 1)
+	bits = instance.type_suffix (bits).element_bits;
+      return find_type_suffix (tclass, bits);
+    }
+
+  if (ch == 'w')
+    {
+      type_suffix_index suffix = parse_element_type (instance, format);
+      if (type_suffixes[suffix].integer_p)
+	return find_type_suffix (type_suffixes[suffix].tclass, 64);
+      return suffix;
+    }
+
+  if (ch == 'p')
+    return TYPE_SUFFIX_b;
+
+  if (ch == 'B')
+    return TYPE_SUFFIX_bf16;
+
+  if (ch == 'q')
+    {
+      type_suffix_index suffix = parse_element_type (instance, format);
+      return find_type_suffix (type_suffixes[suffix].tclass,
+			       type_suffixes[suffix].element_bits / 4);
+    }
+
+  if (ch == 'h')
+    {
+      type_suffix_index suffix = parse_element_type (instance, format);
+      /* Widening and narrowing doesn't change the type for predicates;
+	 everything's still an svbool_t.  */
+      if (suffix == TYPE_SUFFIX_b)
+	return suffix;
+      return find_type_suffix (type_suffixes[suffix].tclass,
+			       type_suffixes[suffix].element_bits / 2);
+    }
+
+  if (ch == '0' || ch == '1')
+    return instance.type_suffix_ids[ch - '0'];
+
+  gcc_unreachable ();
+}
+
+/* Read and return a type from FORMAT for function INSTANCE.  Advance
+   FORMAT beyond the type string.  The format is:
+
+   _       - void
+   al      - array pointer for loads
+   ap      - array pointer for prefetches
+   as      - array pointer for stores
+   b       - base vector type (from a _<m0>base suffix)
+   d       - displacement vector type (from a _<m1>index or _<m1>offset suffix)
+   e<name> - an enum with the given name
+   s<elt>  - a scalar type with the given element suffix
+   t<elt>  - a vector or tuple type with given element suffix [*1]
+   v<elt>  - a vector with the given element suffix
+
+   where <elt> has the format described above parse_element_type
+
+   [*1] the vectors_per_tuple function indicates whether the type should
+        be a tuple, and if so, how many vectors it should contain.  */
+static tree
+parse_type (const function_instance &instance, const char *&format)
+{
+  int ch = *format++;
+
+  if (ch == '_')
+    return void_type_node;
+
+  if (ch == 'a')
+    {
+      ch = *format++;
+      if (ch == 'l')
+	return build_const_pointer (instance.memory_scalar_type ());
+      if (ch == 'p')
+	return const_ptr_type_node;
+      if (ch == 's')
+	return build_pointer_type (instance.memory_scalar_type ());
+      gcc_unreachable ();
+    }
+
+  if (ch == 'b')
+    return instance.base_vector_type ();
+
+  if (ch == 'd')
+    return instance.displacement_vector_type ();
+
+  if (ch == 'e')
+    {
+      if (strncmp (format, "pattern", 7) == 0)
+	{
+	  format += 7;
+	  return acle_svpattern;
+	}
+      if (strncmp (format, "prfop", 5) == 0)
+	{
+	  format += 5;
+	  return acle_svprfop;
+	}
+      gcc_unreachable ();
+    }
+
+  if (ch == 's')
+    {
+      type_suffix_index suffix = parse_element_type (instance, format);
+      return scalar_types[type_suffixes[suffix].vector_type];
+    }
+
+  if (ch == 't')
+    {
+      type_suffix_index suffix = parse_element_type (instance, format);
+      vector_type_index vector_type = type_suffixes[suffix].vector_type;
+      unsigned int num_vectors = instance.vectors_per_tuple ();
+      return acle_vector_types[num_vectors - 1][vector_type];
+    }
+
+  if (ch == 'v')
+    {
+      type_suffix_index suffix = parse_element_type (instance, format);
+      return acle_vector_types[0][type_suffixes[suffix].vector_type];
+    }
+
+  gcc_unreachable ();
+}
+
+/* Read and move past any argument count at FORMAT for the function
+   signature of INSTANCE.  The counts are:
+
+   *q: one argument per element in a 128-bit quadword (as for svdupq)
+   *t: one argument per vector in a tuple (as for svcreate)
+
+   Otherwise the count is 1.  */
+static unsigned int
+parse_count (const function_instance &instance, const char *&format)
+{
+  if (format[0] == '*' && format[1] == 'q')
+    {
+      format += 2;
+      return instance.elements_per_vq (0);
+    }
+  if (format[0] == '*' && format[1] == 't')
+    {
+      format += 2;
+      return instance.vectors_per_tuple ();
+    }
+  return 1;
+}
+
+/* Read a type signature for INSTANCE from FORMAT.  Add the argument types
+   to ARGUMENT_TYPES and return the return type.
+
+   The format is a comma-separated list of types (as for parse_type),
+   with the first type being the return type and the rest being the
+   argument types.  Each argument type can be followed by an optional
+   count (as for parse_count).  */
+static tree
+parse_signature (const function_instance &instance, const char *format,
+		 vec<tree> &argument_types)
+{
+  tree return_type = parse_type (instance, format);
+  while (format[0] == ',')
+    {
+      format += 1;
+      tree argument_type = parse_type (instance, format);
+      unsigned int count = parse_count (instance, format);
+      for (unsigned int i = 0; i < count; ++i)
+	argument_types.quick_push (argument_type);
+    }
+  gcc_assert (format[0] == 0);
+  return return_type;
+}
+
+/* Add one function instance for GROUP, using mode suffix MODE_SUFFIX_ID,
+   the type suffixes at index TI and the predication suffix at index PI.
+   The other arguments are as for build_all.  */
+static void
+build_one (function_builder &b, const char *signature,
+	   const function_group_info &group, mode_suffix_index mode_suffix_id,
+	   unsigned int ti, unsigned int pi, bool force_direct_overloads)
+{
+  /* Byte forms of svdupq take 16 arguments.  */
+  auto_vec<tree, 16> argument_types;
+  function_instance instance (group.base_name, *group.base, *group.shape,
+			      mode_suffix_id, group.types[ti],
+			      group.preds[pi]);
+  tree return_type = parse_signature (instance, signature, argument_types);
+  apply_predication (instance, return_type, argument_types);
+  b.add_unique_function (instance, return_type, argument_types,
+			 group.required_extensions, force_direct_overloads);
+}
+
+/* GROUP describes some sort of gather or scatter operation.  There are
+   two cases:
+
+   - If the function has any type suffixes (as for loads and stores), the
+     first function type suffix specifies either a 32-bit or a 64-bit type,
+     which in turn selects either MODE32 or MODE64 as the addressing mode.
+     Add a function instance for every type and predicate combination
+     in GROUP for which the associated addressing mode is not MODE_none.
+
+   - If the function has no type suffixes (as for prefetches), add one
+     MODE32 form and one MODE64 form for each predication type.
+
+   The other arguments are as for build_all.  */
+static void
+build_32_64 (function_builder &b, const char *signature,
+	     const function_group_info &group, mode_suffix_index mode32,
+	     mode_suffix_index mode64, bool force_direct_overloads = false)
+{
+  for (unsigned int pi = 0; group.preds[pi] != NUM_PREDS; ++pi)
+    if (group.types[0][0] == NUM_TYPE_SUFFIXES)
+      {
+	gcc_assert (mode32 != MODE_none && mode64 != MODE_none);
+	build_one (b, signature, group, mode32, 0, pi,
+		   force_direct_overloads);
+	build_one (b, signature, group, mode64, 0, pi,
+		   force_direct_overloads);
+      }
+    else
+      for (unsigned int ti = 0; group.types[ti][0] != NUM_TYPE_SUFFIXES; ++ti)
+	{
+	  unsigned int bits = type_suffixes[group.types[ti][0]].element_bits;
+	  gcc_assert (bits == 32 || bits == 64);
+	  mode_suffix_index mode = bits == 32 ? mode32 : mode64;
+	  if (mode != MODE_none)
+	    build_one (b, signature, group, mode, ti, pi,
+		       force_direct_overloads);
+	}
+}
+
+/* For every type and predicate combination in GROUP, add one function
+   that takes a scalar (pointer) base and a signed vector array index,
+   and another that instead takes an unsigned vector array index.
+   The vector array index has the same element size as the first
+   function type suffix.  SIGNATURE is as for build_all.  */
+static void
+build_sv_index (function_builder &b, const char *signature,
+		const function_group_info &group)
+{
+  build_32_64 (b, signature, group, MODE_s32index, MODE_s64index);
+  build_32_64 (b, signature, group, MODE_u32index, MODE_u64index);
+}
+
+/* Like build_sv_index, but only handle 64-bit types.  */
+static void
+build_sv_index64 (function_builder &b, const char *signature,
+		  const function_group_info &group)
+{
+  build_32_64 (b, signature, group, MODE_none, MODE_s64index);
+  build_32_64 (b, signature, group, MODE_none, MODE_u64index);
+}
+
+/* Like build_sv_index, but taking vector byte offsets instead of vector
+   array indices.  */
+static void
+build_sv_offset (function_builder &b, const char *signature,
+		 const function_group_info &group)
+{
+  build_32_64 (b, signature, group, MODE_s32offset, MODE_s64offset);
+  build_32_64 (b, signature, group, MODE_u32offset, MODE_u64offset);
+}
+
+/* Like build_sv_offset, but exclude offsets that must be interpreted
+   as signed (i.e. s32offset).  */
+static void
+build_sv_uint_offset (function_builder &b, const char *signature,
+		      const function_group_info &group)
+{
+  build_32_64 (b, signature, group, MODE_none, MODE_s64offset);
+  build_32_64 (b, signature, group, MODE_u32offset, MODE_u64offset);
+}
+
+/* For every type and predicate combination in GROUP, add a function
+   that takes a vector base address and no displacement.  The vector
+   base has the same element size as the first type suffix.
+
+   The other arguments are as for build_all.  */
+static void
+build_v_base (function_builder &b, const char *signature,
+	      const function_group_info &group,
+	      bool force_direct_overloads = false)
+{
+  build_32_64 (b, signature, group, MODE_u32base, MODE_u64base,
+	       force_direct_overloads);
+}
+
+/* Like build_v_base, but for functions that also take a scalar array
+   index.  */
+static void
+build_vs_index (function_builder &b, const char *signature,
+		const function_group_info &group,
+		bool force_direct_overloads = false)
+{
+  build_32_64 (b, signature, group, MODE_u32base_index, MODE_u64base_index,
+	       force_direct_overloads);
+}
+
+/* Like build_v_base, but for functions that also take a scalar byte
+   offset.  */
+static void
+build_vs_offset (function_builder &b, const char *signature,
+		 const function_group_info &group,
+		 bool force_direct_overloads = false)
+{
+  build_32_64 (b, signature, group, MODE_u32base_offset, MODE_u64base_offset,
+	       force_direct_overloads);
+}
+
+/* Add a function instance for every type and predicate combination
+   in GROUP.  Take the function base name from GROUP and the mode suffix
+   from MODE_SUFFIX_ID.  Use SIGNATURE to construct the function signature
+   without a governing predicate, then use apply_predication to add in the
+   predicate.  FORCE_DIRECT_OVERLOADS is true if there is a one-to-one
+   mapping between "short" and "full" names, and if standard overload
+   resolution therefore isn't necessary.  */
+static void
+build_all (function_builder &b, const char *signature,
+	   const function_group_info &group, mode_suffix_index mode_suffix_id,
+	   bool force_direct_overloads = false)
+{
+  for (unsigned int pi = 0; group.preds[pi] != NUM_PREDS; ++pi)
+    for (unsigned int ti = 0;
+	 ti == 0 || group.types[ti][0] != NUM_TYPE_SUFFIXES; ++ti)
+      build_one (b, signature, group, mode_suffix_id, ti, pi,
+		 force_direct_overloads);
+}
+
+/* TYPE is the largest type suffix associated with the arguments of R,
+   but the result is twice as wide.  Return the associated type suffix
+   if it exists, otherwise report an appropriate error and return
+   NUM_TYPE_SUFFIXES.  */
+static type_suffix_index
+long_type_suffix (function_resolver &r, type_suffix_index type)
+{
+  unsigned int element_bits = type_suffixes[type].element_bits;
+  if (type_suffixes[type].integer_p && element_bits < 64)
+    return find_type_suffix (type_suffixes[type].tclass, element_bits * 2);
+
+  r.report_no_such_form (type);
+  return NUM_TYPE_SUFFIXES;
+}
+
+/* Declare the function shape NAME, pointing it to an instance
+   of class <NAME>_def.  */
+#define SHAPE(NAME) \
+  static CONSTEXPR const NAME##_def NAME##_obj; \
+  namespace shapes { const function_shape *const NAME = &NAME##_obj; }
+
+/* Base class for functions that are not overloaded.  */
+struct nonoverloaded_base : public function_shape
+{
+  bool
+  explicit_type_suffix_p (unsigned int) const OVERRIDE
+  {
+    return true;
+  }
+
+  tree
+  resolve (function_resolver &) const OVERRIDE
+  {
+    gcc_unreachable ();
+  }
+};
+
+/* Base class for overloaded functions.  Bit N of EXPLICIT_MASK is true
+   if type suffix N appears in the overloaded name.  */
+template<unsigned int EXPLICIT_MASK>
+struct overloaded_base : public function_shape
+{
+  bool
+  explicit_type_suffix_p (unsigned int i) const OVERRIDE
+  {
+    return (EXPLICIT_MASK >> i) & 1;
+  }
+};
+
+/* Base class for adr_index and adr_offset.  */
+struct adr_base : public overloaded_base<0>
+{
+  /* The function takes two arguments: a vector base and a vector displacement
+     (either an index or an offset).  Resolve based on them both.  */
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    unsigned int i, nargs;
+    mode_suffix_index mode;
+    if (!r.check_gp_argument (2, i, nargs)
+	|| (mode = r.resolve_adr_address (0)) == MODE_none)
+      return error_mark_node;
+
+    return r.resolve_to (mode);
+  };
+};
+
+/* Base class for narrowing bottom binary functions that take an
+   immediate second operand.  The result is half the size of input
+   and has class CLASS.  */
+template<type_class_index CLASS = function_resolver::SAME_TYPE_CLASS>
+struct binary_imm_narrowb_base : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_n);
+    STATIC_ASSERT (CLASS == function_resolver::SAME_TYPE_CLASS
+		   || CLASS == TYPE_unsigned);
+    if (CLASS == TYPE_unsigned)
+      build_all (b, "vhu0,v0,su64", group, MODE_n);
+    else
+      build_all (b, "vh0,v0,su64", group, MODE_n);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    return r.resolve_uniform (1, 1);
+  }
+};
+
+/* The top equivalent of binary_imm_narrowb_base.  It takes three arguments,
+   with the first being the values of the even elements, which are typically
+   the result of the narrowb operation.  */
+template<type_class_index CLASS = function_resolver::SAME_TYPE_CLASS>
+struct binary_imm_narrowt_base : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_n);
+    STATIC_ASSERT (CLASS == function_resolver::SAME_TYPE_CLASS
+		   || CLASS == TYPE_unsigned);
+    if (CLASS == TYPE_unsigned)
+      build_all (b, "vhu0,vhu0,v0,su64", group, MODE_n);
+    else
+      build_all (b, "vh0,vh0,v0,su64", group, MODE_n);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (3, i, nargs)
+	|| (type = r.infer_vector_type (i + 1)) == NUM_TYPE_SUFFIXES
+	|| !r.require_derived_vector_type (i, i + 1, type, CLASS, r.HALF_SIZE)
+	|| !r.require_integer_immediate (i + 2))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+
+/* Base class for long (i.e. narrow op narrow -> wide) binary functions
+   that take an immediate second operand.  The type suffix specifies
+   the wider type.  */
+struct binary_imm_long_base : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_n);
+    build_all (b, "v0,vh0,su64", group, MODE_n);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    unsigned int i, nargs;
+    type_suffix_index type, result_type;
+    if (!r.check_gp_argument (2, i, nargs)
+	|| (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
+	|| !r.require_integer_immediate (i + 1)
+	|| (result_type = long_type_suffix (r, type)) == NUM_TYPE_SUFFIXES)
+      return error_mark_node;
+
+    if (tree res = r.lookup_form (r.mode_suffix_id, result_type))
+      return res;
+
+    return r.report_no_such_form (type);
+  }
+};
+
+/* Base class for inc_dec and inc_dec_pat.  */
+struct inc_dec_base : public overloaded_base<0>
+{
+  CONSTEXPR inc_dec_base (bool pat_p) : m_pat_p (pat_p) {}
+
+  /* Resolve based on the first argument only, which must be either a
+     scalar or a vector.  If it's a scalar, it must be a 32-bit or
+     64-bit integer.  */
+  tree
+  resolve (function_resolver &r) const
+  {
+    unsigned int i, nargs;
+    if (!r.check_gp_argument (m_pat_p ? 3 : 2, i, nargs)
+	|| !r.require_vector_or_scalar_type (i))
+      return error_mark_node;
+
+    mode_suffix_index mode;
+    type_suffix_index type;
+    if (r.scalar_argument_p (i))
+      {
+	mode = MODE_n;
+	type = r.infer_integer_scalar_type (i);
+      }
+    else
+      {
+	mode = MODE_none;
+	type = r.infer_vector_type (i);
+      }
+    if (type == NUM_TYPE_SUFFIXES)
+      return error_mark_node;
+
+    for (++i; i < nargs; ++i)
+      if (!r.require_integer_immediate (i))
+	return error_mark_node;
+
+    return r.resolve_to (mode, type);
+  }
+
+  bool
+  check (function_checker &c) const OVERRIDE
+  {
+    return c.require_immediate_range (m_pat_p ? 2 : 1, 1, 16);
+  }
+
+  bool m_pat_p;
+};
+
+/* Base class for load and load_replicate.  */
+struct load_contiguous_base : public overloaded_base<0>
+{
+  /* Resolve a call based purely on a pointer argument.  The other arguments
+     are a governing predicate and (for MODE_vnum) a vnum offset.  */
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    bool vnum_p = r.mode_suffix_id == MODE_vnum;
+    gcc_assert (r.mode_suffix_id == MODE_none || vnum_p);
+
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (vnum_p ? 2 : 1, i, nargs)
+	|| (type = r.infer_pointer_type (i)) == NUM_TYPE_SUFFIXES
+	|| (vnum_p && !r.require_scalar_type (i + 1, "int64_t")))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+
+/* Base class for gather loads that take a scalar base and a vector
+   displacement (either an offset or an index).  */
+struct load_gather_sv_base : public overloaded_base<0>
+{
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    unsigned int i, nargs;
+    mode_suffix_index mode;
+    type_suffix_index type;
+    if (!r.check_gp_argument (2, i, nargs)
+	|| (type = r.infer_pointer_type (i, true)) == NUM_TYPE_SUFFIXES
+	|| (mode = r.resolve_sv_displacement (i + 1, type, true),
+	    mode == MODE_none))
+      return error_mark_node;
+
+    return r.resolve_to (mode, type);
+  }
+};
+
+/* Base class for load_ext_gather_index and load_ext_gather_offset,
+   which differ only in the units of the displacement.  */
+struct load_ext_gather_base : public overloaded_base<1>
+{
+  /* Resolve a gather load that takes one of:
+
+     - a scalar pointer base and a vector displacement
+     - a vector base with no displacement or
+     - a vector base and a scalar displacement
+
+     The function has an explicit type suffix that determines the type
+     of the loaded data.  */
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    /* No resolution is needed for a vector base with no displacement;
+       there's a one-to-one mapping between short and long names.  */
+    gcc_assert (r.displacement_units () != UNITS_none);
+
+    type_suffix_index type = r.type_suffix_ids[0];
+
+    unsigned int i, nargs;
+    mode_suffix_index mode;
+    if (!r.check_gp_argument (2, i, nargs)
+	|| (mode = r.resolve_gather_address (i, type, true)) == MODE_none)
+      return error_mark_node;
+
+    return r.resolve_to (mode, type);
+  }
+};
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:quarter>_t,
+		       sv<t0:quarter>_t)  (for integer t0)
+   sv<t0>_t svmmla[_t0](sv<t0>_t, sv<t0>_t, sv<t0>_t)  (for floating-point t0)
+
+   The functions act like the equivalent of "ternary_qq" for integer elements
+   and normal vector-only ternary functions for floating-point elements.  */
+struct mmla_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    /* svmmla is distributed over several extensions.  Allow the common
+       denominator to define the overloaded svmmla function without
+       defining any specific versions.  */
+    if (group.types[0][0] != NUM_TYPE_SUFFIXES)
+      {
+	if (type_suffixes[group.types[0][0]].float_p)
+	  build_all (b, "v0,v0,v0,v0", group, MODE_none);
+	else
+	  build_all (b, "v0,v0,vq0,vq0", group, MODE_none);
+      }
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (3, i, nargs)
+	|| (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES)
+      return error_mark_node;
+
+    /* Make sure that the function exists now, since not all forms
+       follow a set pattern after this point.  */
+    tree res = r.resolve_to (r.mode_suffix_id, type);
+    if (res == error_mark_node)
+      return res;
+
+    bool float_p = type_suffixes[type].float_p;
+    unsigned int modifier = float_p ? r.SAME_SIZE : r.QUARTER_SIZE;
+    if (!r.require_derived_vector_type (i + 1, i, type, r.SAME_TYPE_CLASS,
+					modifier)
+	|| !r.require_derived_vector_type (i + 2, i, type, r.SAME_TYPE_CLASS,
+					   modifier))
+      return error_mark_node;
+
+    return res;
+  }
+};
+SHAPE (mmla)
+
+/* Base class for prefetch_gather_index and prefetch_gather_offset,
+   which differ only in the units of the displacement.  */
+struct prefetch_gather_base : public overloaded_base<0>
+{
+  /* Resolve a gather prefetch that takes one of:
+
+     - a scalar pointer base (const void *) and a vector displacement
+     - a vector base with no displacement or
+     - a vector base and a scalar displacement
+
+     The prefetch operation is the final argument.  This is purely a
+     mode-based resolution; there are no type suffixes.  */
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    bool has_displacement_p = r.displacement_units () != UNITS_none;
+
+    unsigned int i, nargs;
+    mode_suffix_index mode;
+    if (!r.check_gp_argument (has_displacement_p ? 3 : 2, i, nargs)
+	|| (mode = r.resolve_gather_address (i, NUM_TYPE_SUFFIXES,
+					     false)) == MODE_none
+	|| !r.require_integer_immediate (nargs - 1))
+      return error_mark_node;
+
+    return r.resolve_to (mode);
+  }
+};
+
+/* Wraps BASE to provide a narrowing shift right function.  Argument N
+   is an immediate shift amount in the range [1, sizeof(<t0>_t) * 4].  */
+template<typename BASE, unsigned int N>
+struct shift_right_imm_narrow_wrapper : public BASE
+{
+  bool
+  check (function_checker &c) const OVERRIDE
+  {
+    unsigned int bits = c.type_suffix (0).element_bits / 2;
+    return c.require_immediate_range (N, 1, bits);
+  }
+};
+
+/* Base class for store_scatter_index and store_scatter_offset,
+   which differ only in the units of the displacement.  */
+struct store_scatter_base : public overloaded_base<0>
+{
+  /* Resolve a scatter store that takes one of:
+
+     - a scalar pointer base and a vector displacement
+     - a vector base with no displacement or
+     - a vector base and a scalar displacement
+
+     The stored data is the final argument, and it determines the
+     type suffix.  */
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    bool has_displacement_p = r.displacement_units () != UNITS_none;
+
+    unsigned int i, nargs;
+    mode_suffix_index mode;
+    type_suffix_index type;
+    if (!r.check_gp_argument (has_displacement_p ? 3 : 2, i, nargs)
+	|| (type = r.infer_sd_vector_type (nargs - 1)) == NUM_TYPE_SUFFIXES
+	|| (mode = r.resolve_gather_address (i, type, false)) == MODE_none)
+      return error_mark_node;
+
+    return r.resolve_to (mode, type);
+  }
+};
+
+/* Base class for ternary operations in which the final argument is an
+   immediate shift amount.  The derived class should check the range.  */
+struct ternary_shift_imm_base : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_n);
+    build_all (b, "v0,v0,v0,su64", group, MODE_n);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    return r.resolve_uniform (2, 1);
+  }
+};
+
+/* Base class for ternary operations in which the first argument has the
+   same element type as the result, and in which the second and third
+   arguments have an element type that is derived the first.
+
+   MODIFIER is the number of element bits in the second and third
+   arguments, or a function_resolver modifier that says how this
+   precision is derived from the first argument's elements.
+
+   TYPE_CLASS2 and TYPE_CLASS3 are the type classes of the second and
+   third arguments, or function_resolver::SAME_TYPE_CLASS if the type
+   class is the same as the first argument.  */
+template<unsigned int MODIFIER,
+	 type_class_index TYPE_CLASS2 = function_resolver::SAME_TYPE_CLASS,
+	 type_class_index TYPE_CLASS3 = function_resolver::SAME_TYPE_CLASS>
+struct ternary_resize2_opt_n_base : public overloaded_base<0>
+{
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (3, i, nargs)
+	|| (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
+	|| !r.require_derived_vector_type (i + 1, i, type, TYPE_CLASS2,
+					   MODIFIER))
+      return error_mark_node;
+
+    return r.finish_opt_n_resolution (i + 2, i, type, TYPE_CLASS3, MODIFIER);
+  }
+};
+
+/* Like ternary_resize2_opt_n_base, but for functions that don't take
+   a final scalar argument.  */
+template<unsigned int MODIFIER,
+	 type_class_index TYPE_CLASS2 = function_resolver::SAME_TYPE_CLASS,
+	 type_class_index TYPE_CLASS3 = function_resolver::SAME_TYPE_CLASS>
+struct ternary_resize2_base : public overloaded_base<0>
+{
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (3, i, nargs)
+	|| (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
+	|| !r.require_derived_vector_type (i + 1, i, type, TYPE_CLASS2,
+					   MODIFIER)
+	|| !r.require_derived_vector_type (i + 2, i, type, TYPE_CLASS3,
+					   MODIFIER))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+
+/* Like ternary_resize2_opt_n_base, but for functions that take a final
+   lane argument.  */
+template<unsigned int MODIFIER,
+	 type_class_index TYPE_CLASS2 = function_resolver::SAME_TYPE_CLASS,
+	 type_class_index TYPE_CLASS3 = function_resolver::SAME_TYPE_CLASS>
+struct ternary_resize2_lane_base : public overloaded_base<0>
+{
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (4, i, nargs)
+	|| (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
+	|| !r.require_derived_vector_type (i + 1, i, type, TYPE_CLASS2,
+					   MODIFIER)
+	|| !r.require_derived_vector_type (i + 2, i, type, TYPE_CLASS3,
+					   MODIFIER)
+	|| !r.require_integer_immediate (i + 3))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+
+/* A specialization of ternary_resize2_lane_base for bfloat16 elements,
+   indexed in groups of N elements.  */
+template<unsigned int N>
+struct ternary_bfloat_lane_base
+  : public ternary_resize2_lane_base<16, TYPE_bfloat, TYPE_bfloat>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,vB,vB,su64", group, MODE_none);
+  }
+
+  bool
+  check (function_checker &c) const OVERRIDE
+  {
+    return c.require_immediate_lane_index (3, N);
+  }
+};
+
+/* A specialization of ternary_resize2_lane_base for quarter-sized
+   elements.  */
+template<type_class_index TYPE_CLASS2 = function_resolver::SAME_TYPE_CLASS,
+	 type_class_index TYPE_CLASS3 = function_resolver::SAME_TYPE_CLASS>
+struct ternary_qq_lane_base
+  : public ternary_resize2_lane_base<function_resolver::QUARTER_SIZE,
+				     TYPE_CLASS2, TYPE_CLASS3>
+{
+  bool
+  check (function_checker &c) const OVERRIDE
+  {
+    return c.require_immediate_lane_index (3, 4);
+  }
+};
+
+/* Base class for narrowing bottom unary functions.  The result is half
+   the size of input and has class CLASS.  */
+template<type_class_index CLASS = function_resolver::SAME_TYPE_CLASS>
+struct unary_narrowb_base : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    STATIC_ASSERT (CLASS == function_resolver::SAME_TYPE_CLASS
+		   || CLASS == TYPE_unsigned);
+    if (CLASS == TYPE_unsigned)
+      build_all (b, "vhu0,v0", group, MODE_none);
+    else
+      build_all (b, "vh0,v0", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    return r.resolve_unary (CLASS, r.HALF_SIZE);
+  }
+};
+
+/* The top equivalent of unary_imm_narrowb_base.  All forms take the values
+   of the even elements as an extra argument, before any governing predicate.
+   These even elements are typically the result of the narrowb operation.  */
+template<type_class_index CLASS = function_resolver::SAME_TYPE_CLASS>
+struct unary_narrowt_base : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    STATIC_ASSERT (CLASS == function_resolver::SAME_TYPE_CLASS
+		   || CLASS == TYPE_unsigned);
+    if (CLASS == TYPE_unsigned)
+      build_all (b, "vhu0,vhu0,v0", group, MODE_none);
+    else
+      build_all (b, "vh0,vh0,v0", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (2, i, nargs)
+	|| (type = r.infer_vector_type (i + 1)) == NUM_TYPE_SUFFIXES
+	|| !r.require_derived_vector_type (i, i + 1, type, CLASS, r.HALF_SIZE))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+
+/* sv<m0>_t svfoo[_m0base]_[m1]index(sv<m0>_t, sv<m1>_t)
+
+   for all valid combinations of vector base type <m0> and vector
+   displacement type <m1>.  */
+struct adr_index_def : public adr_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_index);
+    build_all (b, "b,b,d", group, MODE_u32base_s32index);
+    build_all (b, "b,b,d", group, MODE_u32base_u32index);
+    build_all (b, "b,b,d", group, MODE_u64base_s64index);
+    build_all (b, "b,b,d", group, MODE_u64base_u64index);
+  }
+};
+SHAPE (adr_index)
+
+/* sv<m0>_t svfoo[_m0base]_[m1]offset(sv<m0>_t, sv<m1>_t).
+
+   for all valid combinations of vector base type <m0> and vector
+   displacement type <m1>.  */
+struct adr_offset_def : public adr_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_offset);
+    build_all (b, "b,b,d", group, MODE_u32base_s32offset);
+    build_all (b, "b,b,d", group, MODE_u32base_u32offset);
+    build_all (b, "b,b,d", group, MODE_u64base_s64offset);
+    build_all (b, "b,b,d", group, MODE_u64base_u64offset);
+  }
+};
+SHAPE (adr_offset)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0>_t)
+
+   i.e. a binary operation with uniform types, but with no scalar form.  */
+struct binary_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,v0", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    return r.resolve_uniform (2);
+  }
+};
+SHAPE (binary)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:int>_t)
+   sv<t0>_t svfoo[_n_t0](sv<t0>_t, <t0:int>_t).
+
+   i.e. a version of the standard binary shape binary_opt_n in which
+   the final argument is always a signed integer.  */
+struct binary_int_opt_n_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,vs0", group, MODE_none);
+    build_all (b, "v0,v0,ss0", group, MODE_n);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (2, i, nargs)
+	|| (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES)
+      return error_mark_node;
+
+    return r.finish_opt_n_resolution (i + 1, i, type, TYPE_signed);
+  }
+};
+SHAPE (binary_int_opt_n)
+
+/* sv<t0>_t svfoo_<t0>(sv<t0>_t, sv<t0>_t, uint64_t)
+
+   where the final argument is an integer constant expression in the
+   range [0, 16 / sizeof (<t0>_t) - 1].  */
+struct binary_lane_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,v0,su64", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    return r.resolve_uniform (2, 1);
+  }
+
+  bool
+  check (function_checker &c) const OVERRIDE
+  {
+    return c.require_immediate_lane_index (2);
+  }
+};
+SHAPE (binary_lane)
+
+/* sv<t0>_t svfoo[_t0](sv<t0:half>_t, sv<t0:half>_t, uint64_t).
+
+   where the final argument is an integer constant expression in the
+   range [0, 32 / sizeof (<t0>_t) - 1].  */
+struct binary_long_lane_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,vh0,vh0,su64", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    unsigned int i, nargs;
+    type_suffix_index type, result_type;
+    if (!r.check_gp_argument (3, i, nargs)
+	|| (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
+	|| !r.require_matching_vector_type (i + 1, type)
+	|| !r.require_integer_immediate (i + 2)
+	|| (result_type = long_type_suffix (r, type)) == NUM_TYPE_SUFFIXES)
+      return error_mark_node;
+
+    if (tree res = r.lookup_form (r.mode_suffix_id, result_type))
+      return res;
+
+    return r.report_no_such_form (type);
+  }
+
+  bool
+  check (function_checker &c) const OVERRIDE
+  {
+    return c.require_immediate_lane_index (2);
+  }
+};
+SHAPE (binary_long_lane)
+
+/* sv<t0>_t svfoo[_t0](sv<t0:half>_t, sv<t0:half>_t)
+   sv<t0>_t svfoo[_n_t0](sv<t0:half>_t, <t0:half>_t).  */
+struct binary_long_opt_n_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,vh0,vh0", group, MODE_none);
+    build_all (b, "v0,vh0,sh0", group, MODE_n);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    unsigned int i, nargs;
+    type_suffix_index type, result_type;
+    if (!r.check_gp_argument (2, i, nargs)
+	|| (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
+	|| (result_type = long_type_suffix (r, type)) == NUM_TYPE_SUFFIXES)
+      return error_mark_node;
+
+    return r.finish_opt_n_resolution (i + 1, i, type, r.SAME_TYPE_CLASS,
+				      r.SAME_SIZE, result_type);
+  }
+};
+SHAPE (binary_long_opt_n)
+
+/* sv<t0>_t svfoo[_n_t0](sv<t0>_t, <t0>_t).
+
+   i.e. a binary operation in which the final argument is always a scalar
+   rather than a vector.  */
+struct binary_n_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_n);
+    build_all (b, "v0,v0,s0", group, MODE_n);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (2, i, nargs)
+	|| (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
+	|| !r.require_derived_scalar_type (i + 1, r.SAME_TYPE_CLASS))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+SHAPE (binary_n)
+
+/* sv<t0:half>_t svfoo[_t0](sv<t0>_t, sv<t0>_t)
+   sv<t0:half>_t svfoo[_n_t0](sv<t0>_t, <t0>_t)
+
+   i.e. a version of binary_opt_n in which the output elements are half the
+   width of the input elements.  */
+struct binary_narrowb_opt_n_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "vh0,v0,v0", group, MODE_none);
+    build_all (b, "vh0,v0,s0", group, MODE_n);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    return r.resolve_uniform_opt_n (2);
+  }
+};
+SHAPE (binary_narrowb_opt_n)
+
+/* sv<t0:half>_t svfoo[_t0](sv<t0:half>_t, sv<t0>_t, sv<t0>_t)
+   sv<t0:half>_t svfoo[_n_t0](sv<t0:half>_t, sv<t0>_t, <t0>_t)
+
+   This is the "top" counterpart to binary_narrowb_opt_n.  */
+struct binary_narrowt_opt_n_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "vh0,vh0,v0,v0", group, MODE_none);
+    build_all (b, "vh0,vh0,v0,s0", group, MODE_n);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (3, i, nargs)
+	|| (type = r.infer_vector_type (i + 1)) == NUM_TYPE_SUFFIXES
+	|| !r.require_derived_vector_type (i, i + 1, type, r.SAME_TYPE_CLASS,
+					   r.HALF_SIZE))
+      return error_mark_node;
+
+    return r.finish_opt_n_resolution (i + 2, i + 1, type);
+  }
+};
+SHAPE (binary_narrowt_opt_n)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0>_t)
+   sv<t0>_t svfoo[_n_t0](sv<t0>_t, <t0>_t)
+
+   i.e. the standard shape for binary operations that operate on
+   uniform types.  */
+struct binary_opt_n_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,v0", group, MODE_none);
+    /* _b functions do not have an _n form, but are classified as
+       binary_opt_n so that they can be overloaded with vector
+       functions.  */
+    if (group.types[0][0] == TYPE_SUFFIX_b)
+      gcc_assert (group.types[0][1] == NUM_TYPE_SUFFIXES);
+    else
+      build_all (b, "v0,v0,s0", group, MODE_n);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    return r.resolve_uniform_opt_n (2);
+  }
+};
+SHAPE (binary_opt_n)
+
+/* svbool_t svfoo(svbool_t, svbool_t).  */
+struct binary_pred_def : public nonoverloaded_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    build_all (b, "v0,v0,v0", group, MODE_none);
+  }
+};
+SHAPE (binary_pred)
+
+/* sv<t0>_t svfoo[_<t0>](sv<t0>_t, sv<t0>_t, uint64_t)
+
+   where the final argument must be 90 or 270.  */
+struct binary_rotate_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,v0,su64", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    return r.resolve_uniform (2, 1);
+  }
+
+  bool
+  check (function_checker &c) const OVERRIDE
+  {
+    return c.require_immediate_either_or (2, 90, 270);
+  }
+};
+SHAPE (binary_rotate)
+
+/* sv<t0>_t svfoo_t0(<t0>_t, <t0>_t)
+
+   i.e. a binary function that takes two scalars and returns a vector.
+   An explicit type suffix is required.  */
+struct binary_scalar_def : public nonoverloaded_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    build_all (b, "v0,s0,s0", group, MODE_none);
+  }
+};
+SHAPE (binary_scalar)
+
+/* sv<t0:uint>_t svfoo[_t0](sv<t0>_t, sv<t0>_t).
+
+   i.e. a version of "binary" that returns unsigned integers.  */
+struct binary_to_uint_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "vu0,v0,v0", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    return r.resolve_uniform (2);
+  }
+};
+SHAPE (binary_to_uint)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:uint>_t)
+
+   i.e. a version of "binary" in which the final argument is always an
+   unsigned integer.  */
+struct binary_uint_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,vu0", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (2, i, nargs)
+	|| (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
+	|| !r.require_derived_vector_type (i + 1, i, type, TYPE_unsigned))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+SHAPE (binary_uint)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, <t0:uint>_t)
+
+   i.e. a version of binary_n in which the final argument is always an
+   unsigned integer.  */
+struct binary_uint_n_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,su0", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (2, i, nargs)
+	|| (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
+	|| !r.require_derived_scalar_type (i + 1, TYPE_unsigned))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+SHAPE (binary_uint_n)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:uint>_t)
+   sv<t0>_t svfoo[_n_t0](sv<t0>_t, <t0:uint>_t)
+
+   i.e. a version of the standard binary shape binary_opt_n in which
+   the final argument is always an unsigned integer.  */
+struct binary_uint_opt_n_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,vu0", group, MODE_none);
+    build_all (b, "v0,v0,su0", group, MODE_n);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (2, i, nargs)
+	|| (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES)
+      return error_mark_node;
+
+    return r.finish_opt_n_resolution (i + 1, i, type, TYPE_unsigned);
+  }
+};
+SHAPE (binary_uint_opt_n)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, uint64_t).
+
+   i.e. a version of binary_n in which the final argument is always
+   a 64-bit unsigned integer.  */
+struct binary_uint64_n_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,su64", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (2, i, nargs)
+	|| (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
+	|| !r.require_scalar_type (i + 1, "uint64_t"))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+SHAPE (binary_uint64_n)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, svuint64_t)
+   sv<t0>_t svfoo[_n_t0](sv<t0>_t, uint64_t)
+
+   i.e. a version of the standard binary shape binary_opt_n in which
+   the final argument is always a uint64_t.  */
+struct binary_uint64_opt_n_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,vu64", group, MODE_none);
+    build_all (b, "v0,v0,su64", group, MODE_n);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (2, i, nargs)
+	|| (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES)
+      return error_mark_node;
+
+    return r.finish_opt_n_resolution (i + 1, i, type, TYPE_unsigned, 64);
+  }
+};
+SHAPE (binary_uint64_opt_n)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:half>_t).  */
+struct binary_wide_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,vh0", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (2, i, nargs)
+	|| (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
+	|| !r.require_derived_vector_type (i + 1, i, type, r.SAME_TYPE_CLASS,
+					   r.HALF_SIZE))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+SHAPE (binary_wide)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:half>_t)
+   sv<t0>_t svfoo[_n_t0](sv<t0>_t, <t0:half>_t).  */
+struct binary_wide_opt_n_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,vh0", group, MODE_none);
+    build_all (b, "v0,v0,sh0", group, MODE_n);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (2, i, nargs)
+	|| (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES)
+      return error_mark_node;
+
+    return r.finish_opt_n_resolution (i + 1, i, type, r.SAME_TYPE_CLASS,
+				      r.HALF_SIZE);
+  }
+};
+SHAPE (binary_wide_opt_n)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0>_t)
+   <t0>_t svfoo[_n_t0](<t0>_t, sv<t0>_t).  */
+struct clast_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,v0", group, MODE_none);
+    build_all (b, "s0,s0,v0", group, MODE_n);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    unsigned int i, nargs;
+    if (!r.check_gp_argument (2, i, nargs)
+	|| !r.require_vector_or_scalar_type (i))
+      return error_mark_node;
+
+    if (r.scalar_argument_p (i))
+      {
+	type_suffix_index type;
+	if (!r.require_derived_scalar_type (i, r.SAME_TYPE_CLASS)
+	    || (type = r.infer_vector_type (i + 1)) == NUM_TYPE_SUFFIXES)
+	  return error_mark_node;
+	return r.resolve_to (MODE_n, type);
+      }
+    else
+      {
+	type_suffix_index type;
+	if ((type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
+	    || !r.require_matching_vector_type (i + 1, type))
+	  return error_mark_node;
+	return r.resolve_to (MODE_none, type);
+      }
+  }
+};
+SHAPE (clast)
+
+/* svbool_t svfoo[_t0](sv<t0>_t, sv<t0>_t).  */
+struct compare_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "vp,v0,v0", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    return r.resolve_uniform (2);
+  }
+};
+SHAPE (compare)
+
+/* svbool_t svfoo[_t0](sv<t0>_t, sv<t0>_t)
+   svbool_t svfoo[_n_t0](sv<t0>_t, <t0>_t)
+
+   i.e. a comparison between two vectors, or between a vector and a scalar.  */
+struct compare_opt_n_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "vp,v0,v0", group, MODE_none);
+    build_all (b, "vp,v0,s0", group, MODE_n);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    return r.resolve_uniform_opt_n (2);
+  }
+};
+SHAPE (compare_opt_n)
+
+/* svbool_t svfoo[_t0](const <t0>_t *, const <t0>_t *).  */
+struct compare_ptr_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "vp,al,al", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (2, i, nargs)
+	|| (type = r.infer_pointer_type (i)) == NUM_TYPE_SUFFIXES
+	|| !r.require_matching_pointer_type (i + 1, i, type))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+SHAPE (compare_ptr)
+
+/* svbool_t svfoo_t0[_t1](<t1>_t, <t1>_t)
+
+   where _t0 is a _b<bits> suffix that describes the predicate result.
+   There is no direct relationship between the element sizes of _t0
+   and _t1.  */
+struct compare_scalar_def : public overloaded_base<1>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "vp,s1,s1", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (2, i, nargs)
+	|| (type = r.infer_integer_scalar_type (i)) == NUM_TYPE_SUFFIXES
+	|| !r.require_matching_integer_scalar_type (i + 1, i, type))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, r.type_suffix_ids[0], type);
+  }
+};
+SHAPE (compare_scalar)
+
+/* svbool_t svfoo[_t0](sv<t0>_t, svint64_t)  (for signed t0)
+   svbool_t svfoo[_n_t0](sv<t0>_t, int64_t)  (for signed t0)
+   svbool_t svfoo[_t0](sv<t0>_t, svuint64_t)  (for unsigned t0)
+   svbool_t svfoo[_n_t0](sv<t0>_t, uint64_t)  (for unsigned t0)
+
+   i.e. a comparison in which the second argument is 64 bits.  */
+struct compare_wide_opt_n_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "vp,v0,vw0", group, MODE_none);
+    build_all (b, "vp,v0,sw0", group, MODE_n);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (2, i, nargs)
+	|| (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES)
+      return error_mark_node;
+
+    return r.finish_opt_n_resolution (i + 1, i, type, r.SAME_TYPE_CLASS, 64);
+  }
+};
+SHAPE (compare_wide_opt_n)
+
+/* uint64_t svfoo().  */
+struct count_inherent_def : public nonoverloaded_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    build_all (b, "su64", group, MODE_none);
+  }
+};
+SHAPE (count_inherent)
+
+/* uint64_t svfoo(enum svpattern).  */
+struct count_pat_def : public nonoverloaded_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    build_all (b, "su64,epattern", group, MODE_none);
+  }
+};
+SHAPE (count_pat)
+
+/* uint64_t svfoo(svbool_t).  */
+struct count_pred_def : public nonoverloaded_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    build_all (b, "su64,vp", group, MODE_none);
+  }
+};
+SHAPE (count_pred)
+
+/* uint64_t svfoo[_t0](sv<t0>_t).  */
+struct count_vector_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "su64,v0", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    return r.resolve_uniform (1);
+  }
+};
+SHAPE (count_vector)
+
+/* sv<t0>xN_t svfoo[_t0](sv<t0>_t, ..., sv<t0>_t)
+
+   where there are N arguments in total.  */
+struct create_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "t0,v0*t", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    return r.resolve_uniform (r.vectors_per_tuple ());
+  }
+};
+SHAPE (create)
+
+/* sv<t0>_t svfoo[_n]_t0(<t0>_t, ..., <t0>_t)
+
+   where there are enough arguments to fill 128 bits of data (or to
+   control 128 bits of data in the case of predicates).  */
+struct dupq_def : public overloaded_base<1>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    /* The "_n" suffix is optional; the full name has it, but the short
+       name doesn't.  */
+    build_all (b, "v0,s0*q", group, MODE_n, true);
+  }
+
+  tree
+  resolve (function_resolver &) const OVERRIDE
+  {
+    /* The short forms just make "_n" implicit, so no resolution is needed.  */
+    gcc_unreachable ();
+  }
+};
+SHAPE (dupq)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0>_t, uint64_t)
+
+   where the final argument is an integer constant expression that when
+   multiplied by the number of bytes in t0 is in the range [0, 255].  */
+struct ext_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,v0,su64", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    return r.resolve_uniform (2, 1);
+  }
+
+  bool
+  check (function_checker &c) const OVERRIDE
+  {
+    unsigned int bytes = c.type_suffix (0).element_bytes;
+    return c.require_immediate_range (2, 0, 256 / bytes - 1);
+  }
+};
+SHAPE (ext)
+
+/* <t0>_t svfoo[_t0](<t0>_t, sv<t0>_t).  */
+struct fold_left_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "s0,s0,v0", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (2, i, nargs)
+	|| !r.require_derived_scalar_type (i, r.SAME_TYPE_CLASS)
+	|| (type = r.infer_vector_type (i + 1)) == NUM_TYPE_SUFFIXES)
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+SHAPE (fold_left)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>xN_t, uint64_t)
+
+   where the final argument is an integer constant expression in
+   the range [0, N - 1].  */
+struct get_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,t0,su64", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (2, i, nargs)
+	|| (type = r.infer_tuple_type (i)) == NUM_TYPE_SUFFIXES
+	|| !r.require_integer_immediate (i + 1))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+
+  bool
+  check (function_checker &c) const OVERRIDE
+  {
+    unsigned int nvectors = c.vectors_per_tuple ();
+    return c.require_immediate_range (1, 0, nvectors - 1);
+  }
+};
+SHAPE (get)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, uint64_t)
+   <t0>_t svfoo[_n_t0](<t0>_t, uint64_t)
+
+   where the t0 in the vector form is a signed or unsigned integer
+   whose size is tied to the [bhwd] suffix of "svfoo".  */
+struct inc_dec_def : public inc_dec_base
+{
+  CONSTEXPR inc_dec_def () : inc_dec_base (false) {}
+
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    /* These functions are unusual in that the type suffixes for
+       the scalar and vector forms are not related.  The vector
+       form always has exactly two potential suffixes while the
+       scalar form always has four.  */
+    if (group.types[2][0] == NUM_TYPE_SUFFIXES)
+      build_all (b, "v0,v0,su64", group, MODE_none);
+    else
+      build_all (b, "s0,s0,su64", group, MODE_n);
+  }
+};
+SHAPE (inc_dec)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, enum svpattern, uint64_t)
+   <t0>_t svfoo[_n_t0](<t0>_t, enum svpattern, uint64_t)
+
+   where the t0 in the vector form is a signed or unsigned integer
+   whose size is tied to the [bhwd] suffix of "svfoo".  */
+struct inc_dec_pat_def : public inc_dec_base
+{
+  CONSTEXPR inc_dec_pat_def () : inc_dec_base (true) {}
+
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    /* These functions are unusual in that the type suffixes for
+       the scalar and vector forms are not related.  The vector
+       form always has exactly two potential suffixes while the
+       scalar form always has four.  */
+    if (group.types[2][0] == NUM_TYPE_SUFFIXES)
+      build_all (b, "v0,v0,epattern,su64", group, MODE_none);
+    else
+      build_all (b, "s0,s0,epattern,su64", group, MODE_n);
+  }
+};
+SHAPE (inc_dec_pat)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, svbool_t).  */
+struct inc_dec_pred_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,vp", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (2, i, nargs)
+	|| (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
+	|| !r.require_vector_type (i + 1, VECTOR_TYPE_svbool_t))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+SHAPE (inc_dec_pred)
+
+/* <t0>_t svfoo[_n_t0]_t1(<t0>_t, svbool_t)
+
+   where _t1 is a _b<bits> suffix that describes the svbool_t argument.  */
+struct inc_dec_pred_scalar_def : public overloaded_base<2>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_n);
+    build_all (b, "s0,s0,vp", group, MODE_n);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (2, i, nargs)
+	|| (type = r.infer_integer_scalar_type (i)) == NUM_TYPE_SUFFIXES
+	|| !r.require_vector_type (i + 1, VECTOR_TYPE_svbool_t))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type, r.type_suffix_ids[1]);
+  }
+};
+SHAPE (inc_dec_pred_scalar)
+
+/* sv<t0>[xN]_t svfoo_t0().  */
+struct inherent_def : public nonoverloaded_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    build_all (b, "t0", group, MODE_none);
+  }
+};
+SHAPE (inherent)
+
+/* svbool_t svfoo[_b]().  */
+struct inherent_b_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    /* The "_b" suffix is optional; the full name has it, but the short
+       name doesn't.  */
+    build_all (b, "v0", group, MODE_none, true);
+  }
+
+  tree
+  resolve (function_resolver &) const OVERRIDE
+  {
+    /* The short forms just make "_b" implicit, so no resolution is needed.  */
+    gcc_unreachable ();
+  }
+};
+SHAPE (inherent_b)
+
+/* sv<t0>[xN]_t svfoo[_t0](const <t0>_t *)
+   sv<t0>[xN]_t svfoo_vnum[_t0](const <t0>_t *, int64_t).  */
+struct load_def : public load_contiguous_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    b.add_overloaded_functions (group, MODE_vnum);
+    build_all (b, "t0,al", group, MODE_none);
+    build_all (b, "t0,al,ss64", group, MODE_vnum);
+  }
+};
+SHAPE (load)
+
+/* sv<t0>_t svfoo_t0(const <X>_t *)
+   sv<t0>_t svfoo_vnum_t0(const <X>_t *, int64_t)
+
+   where <X> is determined by the function base name.  */
+struct load_ext_def : public nonoverloaded_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    build_all (b, "t0,al", group, MODE_none);
+    build_all (b, "t0,al,ss64", group, MODE_vnum);
+  }
+};
+SHAPE (load_ext)
+
+/* sv<t0>_t svfoo_[s32]index_t0(const <X>_t *, svint32_t)
+   sv<t0>_t svfoo_[s64]index_t0(const <X>_t *, svint64_t)
+   sv<t0>_t svfoo_[u32]index_t0(const <X>_t *, svuint32_t)
+   sv<t0>_t svfoo_[u64]index_t0(const <X>_t *, svuint64_t)
+
+   sv<t0>_t svfoo[_u32base]_index_t0(svuint32_t, int64_t)
+   sv<t0>_t svfoo[_u64base]_index_t0(svuint64_t, int64_t)
+
+   where <X> is determined by the function base name.  */
+struct load_ext_gather_index_def : public load_ext_gather_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_index);
+    build_sv_index (b, "t0,al,d", group);
+    build_vs_index (b, "t0,b,ss64", group);
+  }
+};
+SHAPE (load_ext_gather_index)
+
+/* sv<t0>_t svfoo_[s64]index_t0(const <X>_t *, svint64_t)
+   sv<t0>_t svfoo_[u64]index_t0(const <X>_t *, svuint64_t)
+
+   sv<t0>_t svfoo[_u32base]_index_t0(svuint32_t, int64_t)
+   sv<t0>_t svfoo[_u64base]_index_t0(svuint64_t, int64_t)
+
+   where <X> is determined by the function base name.  This is
+   load_ext_gather_index that doesn't support 32-bit vector indices.  */
+struct load_ext_gather_index_restricted_def : public load_ext_gather_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_index);
+    build_sv_index64 (b, "t0,al,d", group);
+    build_vs_index (b, "t0,b,ss64", group);
+  }
+};
+SHAPE (load_ext_gather_index_restricted)
+
+/* sv<t0>_t svfoo_[s32]offset_t0(const <X>_t *, svint32_t)
+   sv<t0>_t svfoo_[s64]offset_t0(const <X>_t *, svint64_t)
+   sv<t0>_t svfoo_[u32]offset_t0(const <X>_t *, svuint32_t)
+   sv<t0>_t svfoo_[u64]offset_t0(const <X>_t *, svuint64_t)
+
+   sv<t0>_t svfoo[_u32base]_t0(svuint32_t)
+   sv<t0>_t svfoo[_u64base]_t0(svuint64_t)
+
+   sv<t0>_t svfoo[_u32base]_offset_t0(svuint32_t, int64_t)
+   sv<t0>_t svfoo[_u64base]_offset_t0(svuint64_t, int64_t)
+
+   where <X> is determined by the function base name.  */
+struct load_ext_gather_offset_def : public load_ext_gather_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_offset);
+    build_sv_offset (b, "t0,al,d", group);
+    build_v_base (b, "t0,b", group, true);
+    build_vs_offset (b, "t0,b,ss64", group);
+  }
+};
+SHAPE (load_ext_gather_offset)
+
+/* sv<t0>_t svfoo_[s64]offset_t0(const <X>_t *, svint64_t)
+   sv<t0>_t svfoo_[u32]offset_t0(const <X>_t *, svuint32_t)
+   sv<t0>_t svfoo_[u64]offset_t0(const <X>_t *, svuint64_t)
+
+   sv<t0>_t svfoo[_u32base]_t0(svuint32_t)
+   sv<t0>_t svfoo[_u64base]_t0(svuint64_t)
+
+   sv<t0>_t svfoo[_u32base]_offset_t0(svuint32_t, int64_t)
+   sv<t0>_t svfoo[_u64base]_offset_t0(svuint64_t, int64_t)
+
+   where <X> is determined by the function base name.  This is
+   load_ext_gather_offset without the s32 vector offset form.  */
+struct load_ext_gather_offset_restricted_def : public load_ext_gather_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_offset);
+    build_sv_uint_offset (b, "t0,al,d", group);
+    build_v_base (b, "t0,b", group, true);
+    build_vs_offset (b, "t0,b,ss64", group);
+  }
+};
+SHAPE (load_ext_gather_offset_restricted)
+
+/* sv<t0>_t svfoo_[s32]index[_t0](const <t0>_t *, svint32_t)
+   sv<t0>_t svfoo_[s64]index[_t0](const <t0>_t *, svint64_t)
+   sv<t0>_t svfoo_[u32]index[_t0](const <t0>_t *, svuint32_t)
+   sv<t0>_t svfoo_[u64]index[_t0](const <t0>_t *, svuint64_t)
+
+   sv<t0>_t svfoo_[s32]offset[_t0](const <t0>_t *, svint32_t)
+   sv<t0>_t svfoo_[s64]offset[_t0](const <t0>_t *, svint64_t)
+   sv<t0>_t svfoo_[u32]offset[_t0](const <t0>_t *, svuint32_t)
+   sv<t0>_t svfoo_[u64]offset[_t0](const <t0>_t *, svuint64_t).  */
+struct load_gather_sv_def : public load_gather_sv_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_index);
+    b.add_overloaded_functions (group, MODE_offset);
+    build_sv_index (b, "t0,al,d", group);
+    build_sv_offset (b, "t0,al,d", group);
+  }
+};
+SHAPE (load_gather_sv)
+
+/* sv<t0>_t svfoo_[u32]index[_t0](const <t0>_t *, svuint32_t)
+   sv<t0>_t svfoo_[u64]index[_t0](const <t0>_t *, svuint64_t)
+
+   sv<t0>_t svfoo_[s64]offset[_t0](const <t0>_t *, svint64_t)
+   sv<t0>_t svfoo_[u32]offset[_t0](const <t0>_t *, svuint32_t)
+   sv<t0>_t svfoo_[u64]offset[_t0](const <t0>_t *, svuint64_t)
+
+   This is load_gather_sv without the 32-bit vector index forms and
+   without the s32 vector offset form.  */
+struct load_gather_sv_restricted_def : public load_gather_sv_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_index);
+    b.add_overloaded_functions (group, MODE_offset);
+    build_sv_index64 (b, "t0,al,d", group);
+    build_sv_uint_offset (b, "t0,al,d", group);
+  }
+};
+SHAPE (load_gather_sv_restricted)
+
+/* sv<t0>_t svfoo[_u32base]_t0(svuint32_t)
+   sv<t0>_t svfoo[_u64base]_t0(svuint64_t)
+
+   sv<t0>_t svfoo[_u32base]_index_t0(svuint32_t, int64_t)
+   sv<t0>_t svfoo[_u64base]_index_t0(svuint64_t, int64_t)
+
+   sv<t0>_t svfoo[_u32base]_offset_t0(svuint32_t, int64_t)
+   sv<t0>_t svfoo[_u64base]_offset_t0(svuint64_t, int64_t).  */
+struct load_gather_vs_def : public overloaded_base<1>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    /* The base vector mode is optional; the full name has it but the
+       short name doesn't.  There is no ambiguity with SHAPE_load_gather_sv
+       because the latter uses an implicit type suffix.  */
+    build_v_base (b, "t0,b", group, true);
+    build_vs_index (b, "t0,b,ss64", group, true);
+    build_vs_offset (b, "t0,b,ss64", group, true);
+  }
+
+  tree
+  resolve (function_resolver &) const OVERRIDE
+  {
+    /* The short name just makes the base vector mode implicit;
+       no resolution is needed.  */
+    gcc_unreachable ();
+  }
+};
+SHAPE (load_gather_vs)
+
+/* sv<t0>_t svfoo[_t0](const <t0>_t *)
+
+   The only difference from "load" is that this shape has no vnum form.  */
+struct load_replicate_def : public load_contiguous_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "t0,al", group, MODE_none);
+  }
+};
+SHAPE (load_replicate)
+
+/* svbool_t svfoo(enum svpattern).  */
+struct pattern_pred_def : public nonoverloaded_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    build_all (b, "vp,epattern", group, MODE_none);
+  }
+};
+SHAPE (pattern_pred)
+
+/* void svfoo(const void *, svprfop)
+   void svfoo_vnum(const void *, int64_t, svprfop).  */
+struct prefetch_def : public nonoverloaded_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    build_all (b, "_,ap,eprfop", group, MODE_none);
+    build_all (b, "_,ap,ss64,eprfop", group, MODE_vnum);
+  }
+};
+SHAPE (prefetch)
+
+/* void svfoo_[s32]index(const void *, svint32_t, svprfop)
+   void svfoo_[s64]index(const void *, svint64_t, svprfop)
+   void svfoo_[u32]index(const void *, svuint32_t, svprfop)
+   void svfoo_[u64]index(const void *, svuint64_t, svprfop)
+
+   void svfoo[_u32base](svuint32_t, svprfop)
+   void svfoo[_u64base](svuint64_t, svprfop)
+
+   void svfoo[_u32base]_index(svuint32_t, int64_t, svprfop)
+   void svfoo[_u64base]_index(svuint64_t, int64_t, svprfop).  */
+struct prefetch_gather_index_def : public prefetch_gather_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    b.add_overloaded_functions (group, MODE_index);
+    build_sv_index (b, "_,ap,d,eprfop", group);
+    build_v_base (b, "_,b,eprfop", group);
+    build_vs_index (b, "_,b,ss64,eprfop", group);
+  }
+};
+SHAPE (prefetch_gather_index)
+
+/* void svfoo_[s32]offset(const void *, svint32_t, svprfop)
+   void svfoo_[s64]offset(const void *, svint64_t, svprfop)
+   void svfoo_[u32]offset(const void *, svuint32_t, svprfop)
+   void svfoo_[u64]offset(const void *, svuint64_t, svprfop)
+
+   void svfoo[_u32base](svuint32_t, svprfop)
+   void svfoo[_u64base](svuint64_t, svprfop)
+
+   void svfoo[_u32base]_offset(svuint32_t, int64_t, svprfop)
+   void svfoo[_u64base]_offset(svuint64_t, int64_t, svprfop).  */
+struct prefetch_gather_offset_def : public prefetch_gather_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    b.add_overloaded_functions (group, MODE_offset);
+    build_sv_offset (b, "_,ap,d,eprfop", group);
+    build_v_base (b, "_,b,eprfop", group);
+    build_vs_offset (b, "_,b,ss64,eprfop", group);
+  }
+};
+SHAPE (prefetch_gather_offset)
+
+/* bool svfoo(svbool_t).  */
+struct ptest_def : public nonoverloaded_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    build_all (b, "sp,vp", group, MODE_none);
+  }
+};
+SHAPE (ptest)
+
+/* svbool_t svfoo().  */
+struct rdffr_def : public nonoverloaded_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    build_all (b, "vp", group, MODE_none);
+  }
+};
+SHAPE (rdffr)
+
+/* <t0>_t svfoo[_t0](sv<t0>_t).  */
+struct reduction_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "s0,v0", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    return r.resolve_uniform (1);
+  }
+};
+SHAPE (reduction)
+
+/* int64_t svfoo[_t0](sv<t0>_t)  (for signed t0)
+   uint64_t svfoo[_t0](sv<t0>_t)  (for unsigned t0)
+   <t0>_t svfoo[_t0](sv<t0>_t)  (for floating-point t0)
+
+   i.e. a version of "reduction" in which the return type for integers
+   always has 64 bits.  */
+struct reduction_wide_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "sw0,v0", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    return r.resolve_uniform (1);
+  }
+};
+SHAPE (reduction_wide)
+
+/* sv<t0>xN_t svfoo[_t0](sv<t0>xN_t, uint64_t, sv<t0>_t)
+
+   where the second argument is an integer constant expression in the
+   range [0, N - 1].  */
+struct set_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "t0,t0,su64,v0", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (3, i, nargs)
+	|| (type = r.infer_tuple_type (i)) == NUM_TYPE_SUFFIXES
+	|| !r.require_integer_immediate (i + 1)
+	|| !r.require_derived_vector_type (i + 2, i, type))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+
+  bool
+  check (function_checker &c) const OVERRIDE
+  {
+    unsigned int nvectors = c.vectors_per_tuple ();
+    return c.require_immediate_range (1, 0, nvectors - 1);
+  }
+};
+SHAPE (set)
+
+/* void svfoo().  */
+struct setffr_def : public nonoverloaded_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    build_all (b, "_", group, MODE_none);
+  }
+};
+SHAPE (setffr)
+
+/* sv<t0>_t svfoo[_n_t0])(sv<t0>_t, uint64_t)
+
+   where the final argument must be an integer constant expression in the
+   range [0, sizeof (<t0>_t) * 8 - 1].  */
+struct shift_left_imm_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_n);
+    build_all (b, "v0,v0,su64", group, MODE_n);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    return r.resolve_uniform (1, 1);
+  }
+
+  bool
+  check (function_checker &c) const OVERRIDE
+  {
+    unsigned int bits = c.type_suffix (0).element_bits;
+    return c.require_immediate_range (1, 0, bits - 1);
+  }
+};
+SHAPE (shift_left_imm)
+
+/* sv<t0>_t svfoo[_n_t0])(sv<t0:half>_t, uint64_t)
+
+   where the final argument must be an integer constant expression in the
+   range [0, sizeof (<t0>_t) * 4 - 1].  */
+struct shift_left_imm_long_def : public binary_imm_long_base
+{
+  bool
+  check (function_checker &c) const OVERRIDE
+  {
+    unsigned int bits = c.type_suffix (0).element_bits / 2;
+    return c.require_immediate_range (1, 0, bits - 1);
+  }
+};
+SHAPE (shift_left_imm_long)
+
+/* sv<t0:uint>_t svfoo[_n_t0])(sv<t0>_t, uint64_t)
+
+   where the final argument must be an integer constant expression in the
+   range [0, sizeof (<t0>_t) * 8 - 1].  */
+struct shift_left_imm_to_uint_def : public shift_left_imm_def
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_n);
+    build_all (b, "vu0,v0,su64", group, MODE_n);
+  }
+};
+SHAPE (shift_left_imm_to_uint)
+
+/* sv<t0>_t svfoo[_n_t0])(sv<t0>_t, uint64_t)
+
+   where the final argument must be an integer constant expression in the
+   range [1, sizeof (<t0>_t) * 8].  */
+struct shift_right_imm_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_n);
+    build_all (b, "v0,v0,su64", group, MODE_n);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    return r.resolve_uniform (1, 1);
+  }
+
+  bool
+  check (function_checker &c) const OVERRIDE
+  {
+    unsigned int bits = c.type_suffix (0).element_bits;
+    return c.require_immediate_range (1, 1, bits);
+  }
+};
+SHAPE (shift_right_imm)
+
+/* sv<t0:half>_t svfoo[_n_t0])(sv<t0>_t, uint64_t)
+
+   where the final argument must be an integer constant expression in the
+   range [1, sizeof (<t0>_t) * 4].  */
+typedef shift_right_imm_narrow_wrapper<binary_imm_narrowb_base<>, 1>
+  shift_right_imm_narrowb_def;
+SHAPE (shift_right_imm_narrowb)
+
+/* sv<t0:half>_t svfoo[_n_t0])(sv<t0:half>_t, sv<t0>_t, uint64_t)
+
+   where the final argument must be an integer constant expression in the
+   range [1, sizeof (<t0>_t) * 4].  */
+typedef shift_right_imm_narrow_wrapper<binary_imm_narrowt_base<>, 2>
+  shift_right_imm_narrowt_def;
+SHAPE (shift_right_imm_narrowt)
+
+/* sv<t0:uint:half>_t svfoo[_n_t0])(sv<t0>_t, uint64_t)
+
+   where the final argument must be an integer constant expression in the
+   range [1, sizeof (<t0>_t) * 4].  */
+typedef binary_imm_narrowb_base<TYPE_unsigned>
+  binary_imm_narrowb_base_unsigned;
+typedef shift_right_imm_narrow_wrapper<binary_imm_narrowb_base_unsigned, 1>
+  shift_right_imm_narrowb_to_uint_def;
+SHAPE (shift_right_imm_narrowb_to_uint)
+
+/* sv<t0:uint:half>_t svfoo[_n_t0])(sv<t0:uint:half>_t, sv<t0>_t, uint64_t)
+
+   where the final argument must be an integer constant expression in the
+   range [1, sizeof (<t0>_t) * 4].  */
+typedef binary_imm_narrowt_base<TYPE_unsigned>
+  binary_imm_narrowt_base_unsigned;
+typedef shift_right_imm_narrow_wrapper<binary_imm_narrowt_base_unsigned, 2>
+  shift_right_imm_narrowt_to_uint_def;
+SHAPE (shift_right_imm_narrowt_to_uint)
+
+/* void svfoo[_t0](<X>_t *, sv<t0>[xN]_t)
+   void svfoo_vnum[_t0](<X>_t *, int64_t, sv<t0>[xN]_t)
+
+   where <X> might be tied to <t0> (for non-truncating stores) or might
+   depend on the function base name (for truncating stores).  */
+struct store_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    b.add_overloaded_functions (group, MODE_vnum);
+    build_all (b, "_,as,t0", group, MODE_none);
+    build_all (b, "_,as,ss64,t0", group, MODE_vnum);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    bool vnum_p = r.mode_suffix_id == MODE_vnum;
+    gcc_assert (r.mode_suffix_id == MODE_none || vnum_p);
+
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (vnum_p ? 3 : 2, i, nargs)
+	|| !r.require_pointer_type (i)
+	|| (vnum_p && !r.require_scalar_type (i + 1, "int64_t"))
+	|| ((type = r.infer_tuple_type (nargs - 1)) == NUM_TYPE_SUFFIXES))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+SHAPE (store)
+
+/* void svfoo_[s32]index[_t0](<X>_t *, svint32_t, sv<t0>_t)
+   void svfoo_[s64]index[_t0](<X>_t *, svint64_t, sv<t0>_t)
+   void svfoo_[u32]index[_t0](<X>_t *, svuint32_t, sv<t0>_t)
+   void svfoo_[u64]index[_t0](<X>_t *, svuint64_t, sv<t0>_t)
+
+   void svfoo[_u32base]_index[_t0](svuint32_t, int64_t, sv<t0>_t)
+   void svfoo[_u64base]_index[_t0](svuint64_t, int64_t, sv<t0>_t)
+
+   where <X> might be tied to <t0> (for non-truncating stores) or might
+   depend on the function base name (for truncating stores).  */
+struct store_scatter_index_def : public store_scatter_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_index);
+    build_sv_index (b, "_,as,d,t0", group);
+    build_vs_index (b, "_,b,ss64,t0", group);
+  }
+};
+SHAPE (store_scatter_index)
+
+/* void svfoo_[s64]index[_t0](<X>_t *, svint64_t, sv<t0>_t)
+   void svfoo_[u64]index[_t0](<X>_t *, svuint64_t, sv<t0>_t)
+
+   void svfoo[_u32base]_index[_t0](svuint32_t, int64_t, sv<t0>_t)
+   void svfoo[_u64base]_index[_t0](svuint64_t, int64_t, sv<t0>_t)
+
+   i.e. a version of store_scatter_index that doesn't support 32-bit
+   vector indices.  */
+struct store_scatter_index_restricted_def : public store_scatter_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_index);
+    build_sv_index64 (b, "_,as,d,t0", group);
+    build_vs_index (b, "_,b,ss64,t0", group);
+  }
+};
+SHAPE (store_scatter_index_restricted)
+
+/* void svfoo_[s32]offset[_t0](<X>_t *, svint32_t, sv<t0>_t)
+   void svfoo_[s64]offset[_t0](<X>_t *, svint64_t, sv<t0>_t)
+   void svfoo_[u32]offset[_t0](<X>_t *, svuint32_t, sv<t0>_t)
+   void svfoo_[u64]offset[_t0](<X>_t *, svuint64_t, sv<t0>_t)
+
+   void svfoo[_u32base_t0](svuint32_t, sv<t0>_t)
+   void svfoo[_u64base_t0](svuint64_t, sv<t0>_t)
+
+   void svfoo[_u32base]_offset[_t0](svuint32_t, int64_t, sv<t0>_t)
+   void svfoo[_u64base]_offset[_t0](svuint64_t, int64_t, sv<t0>_t)
+
+   where <X> might be tied to <t0> (for non-truncating stores) or might
+   depend on the function base name (for truncating stores).  */
+struct store_scatter_offset_def : public store_scatter_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    b.add_overloaded_functions (group, MODE_offset);
+    build_sv_offset (b, "_,as,d,t0", group);
+    build_v_base (b, "_,b,t0", group);
+    build_vs_offset (b, "_,b,ss64,t0", group);
+  }
+};
+SHAPE (store_scatter_offset)
+
+/* void svfoo_[s64]offset[_t0](<X>_t *, svint64_t, sv<t0>_t)
+   void svfoo_[u32]offset[_t0](<X>_t *, svuint32_t, sv<t0>_t)
+   void svfoo_[u64]offset[_t0](<X>_t *, svuint64_t, sv<t0>_t)
+
+   void svfoo[_u32base_t0](svuint32_t, sv<t0>_t)
+   void svfoo[_u64base_t0](svuint64_t, sv<t0>_t)
+
+   void svfoo[_u32base]_offset[_t0](svuint32_t, int64_t, sv<t0>_t)
+   void svfoo[_u64base]_offset[_t0](svuint64_t, int64_t, sv<t0>_t)
+
+   i.e. a version of store_scatter_offset that doesn't support svint32_t
+   offsets.  */
+struct store_scatter_offset_restricted_def : public store_scatter_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    b.add_overloaded_functions (group, MODE_offset);
+    build_sv_uint_offset (b, "_,as,d,t0", group);
+    build_v_base (b, "_,b,t0", group);
+    build_vs_offset (b, "_,b,ss64,t0", group);
+  }
+};
+SHAPE (store_scatter_offset_restricted)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>xN_t, sv<t0:uint>_t).  */
+struct tbl_tuple_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,t0,vu0", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (2, i, nargs)
+	|| (type = r.infer_tuple_type (i)) == NUM_TYPE_SUFFIXES
+	|| !r.require_derived_vector_type (i + 1, i, type, TYPE_unsigned))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+SHAPE (tbl_tuple)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, svbfloatt16_t, svbfloat16_t).  */
+struct ternary_bfloat_def
+  : public ternary_resize2_base<16, TYPE_bfloat, TYPE_bfloat>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,vB,vB", group, MODE_none);
+  }
+};
+SHAPE (ternary_bfloat)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, svbfloat16_t, svbfloat16_t, uint64_t)
+
+   where the final argument is an integer constant expression in the range
+   [0, 7].  */
+typedef ternary_bfloat_lane_base<1> ternary_bfloat_lane_def;
+SHAPE (ternary_bfloat_lane)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, svbfloat16_t, svbfloat16_t, uint64_t)
+
+   where the final argument is an integer constant expression in the range
+   [0, 3].  */
+typedef ternary_bfloat_lane_base<2> ternary_bfloat_lanex2_def;
+SHAPE (ternary_bfloat_lanex2)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, svbfloatt16_t, svbfloat16_t)
+   sv<t0>_t svfoo[_n_t0](sv<t0>_t, svbfloat16_t, bfloat16_t).  */
+struct ternary_bfloat_opt_n_def
+  : public ternary_resize2_opt_n_base<16, TYPE_bfloat, TYPE_bfloat>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,vB,vB", group, MODE_none);
+    build_all (b, "v0,v0,vB,sB", group, MODE_n);
+  }
+};
+SHAPE (ternary_bfloat_opt_n)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:int:quarter>_t, sv<t0:uint:quarter>_t,
+		       uint64_t)
+
+   where the final argument is an integer constant expression in the range
+   [0, 16 / sizeof (<t0>_t) - 1].  */
+struct ternary_intq_uintq_lane_def
+  : public ternary_qq_lane_base<TYPE_signed, TYPE_unsigned>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,vqs0,vqu0,su64", group, MODE_none);
+  }
+};
+SHAPE (ternary_intq_uintq_lane)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:int:quarter>_t, sv<t0:uint:quarter>_t)
+   sv<t0>_t svfoo[_n_t0](sv<t0>_t, sv<t0:int:quarter>_t,
+			 <t0:uint:quarter>_t).  */
+struct ternary_intq_uintq_opt_n_def
+  : public ternary_resize2_opt_n_base<function_resolver::QUARTER_SIZE,
+				      TYPE_signed, TYPE_unsigned>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,vqs0,vqu0", group, MODE_none);
+    build_all (b, "v0,v0,vqs0,squ0", group, MODE_n);
+  }
+};
+SHAPE (ternary_intq_uintq_opt_n)
+
+/* svbool_t svfoo[_<t0>](sv<t0>_t, sv<t0>_t, sv<t0>_t, uint64_t)
+
+   where the final argument is an integer constant expression in the
+   range [0, 16 / sizeof (<t0>_t) - 1].  */
+struct ternary_lane_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,v0,v0,su64", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    return r.resolve_uniform (3, 1);
+  }
+
+  bool
+  check (function_checker &c) const OVERRIDE
+  {
+    return c.require_immediate_lane_index (3);
+  }
+};
+SHAPE (ternary_lane)
+
+/* svbool_t svfoo[_<t0>](sv<t0>_t, sv<t0>_t, sv<t0>_t, uint64_t, uint64_t)
+
+   where the penultimate argument is an integer constant expression in
+   the range [0, 8 / sizeof (<t0>_t) - 1] and where the final argument
+   is an integer constant expression in {0, 90, 180, 270}.  */
+struct ternary_lane_rotate_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,v0,v0,su64,su64", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    return r.resolve_uniform (3, 2);
+  }
+
+  bool
+  check (function_checker &c) const OVERRIDE
+  {
+    return (c.require_immediate_lane_index (3, 2)
+	    && c.require_immediate_one_of (4, 0, 90, 180, 270));
+  }
+};
+SHAPE (ternary_lane_rotate)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:half>_t, sv<t0:half>_t, uint64_t)
+
+   where the final argument is an integer constant expression in the range
+   [0, 32 / sizeof (<t0>_t) - 1].  */
+struct ternary_long_lane_def
+  : public ternary_resize2_lane_base<function_resolver::HALF_SIZE>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,vh0,vh0,su64", group, MODE_none);
+  }
+
+  bool
+  check (function_checker &c) const OVERRIDE
+  {
+    return c.require_immediate_lane_index (3);
+  }
+};
+SHAPE (ternary_long_lane)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:half>_t, sv<t0:half>_t)
+   sv<t0>_t svfoo[_n_t0](sv<t0>_t, sv<t0:half>_t, <t0:half>_t)
+
+   i.e. a version of the standard ternary shape ternary_opt_n in which
+   the element type of the last two arguments is the half-sized
+   equivalent of <t0>.  */
+struct ternary_long_opt_n_def
+  : public ternary_resize2_opt_n_base<function_resolver::HALF_SIZE>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,vh0,vh0", group, MODE_none);
+    build_all (b, "v0,v0,vh0,sh0", group, MODE_n);
+  }
+};
+SHAPE (ternary_long_opt_n)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0>_t, sv<t0>_t)
+   sv<t0>_t svfoo[_n_t0](sv<t0>_t, sv<t0>_t, <t0>_t)
+
+   i.e. the standard shape for ternary operations that operate on
+   uniform types.  */
+struct ternary_opt_n_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,v0,v0", group, MODE_none);
+    build_all (b, "v0,v0,v0,s0", group, MODE_n);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    return r.resolve_uniform_opt_n (3);
+  }
+};
+SHAPE (ternary_opt_n)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:quarter>_t, sv<t0:quarter>_t, uint64_t)
+
+   where the final argument is an integer constant expression in the range
+   [0, 16 / sizeof (<t0>_t) - 1].  */
+struct ternary_qq_lane_def : public ternary_qq_lane_base<>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,vq0,vq0,su64", group, MODE_none);
+  }
+};
+SHAPE (ternary_qq_lane)
+
+/* svbool_t svfoo[_<t0>](sv<t0>_t, sv<t0:quarter>_t, sv<t0:quarter>_t,
+			 uint64_t)
+
+   where the final argument is an integer constant expression in
+   {0, 90, 180, 270}.  */
+struct ternary_qq_lane_rotate_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,vq0,vq0,su64,su64", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (5, i, nargs)
+	|| (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
+	|| !r.require_derived_vector_type (i + 1, i, type, r.SAME_TYPE_CLASS,
+					   r.QUARTER_SIZE)
+	|| !r.require_derived_vector_type (i + 2, i, type, r.SAME_TYPE_CLASS,
+					   r.QUARTER_SIZE)
+	|| !r.require_integer_immediate (i + 3)
+	|| !r.require_integer_immediate (i + 4))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+
+  bool
+  check (function_checker &c) const OVERRIDE
+  {
+    return (c.require_immediate_lane_index (3, 4)
+	    && c.require_immediate_one_of (4, 0, 90, 180, 270));
+  }
+};
+SHAPE (ternary_qq_lane_rotate)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:quarter>_t, sv<t0:quarter>_t)
+   sv<t0>_t svfoo[_n_t0](sv<t0>_t, sv<t0:quarter>_t, <t0:quarter>_t)
+
+   i.e. a version of the standard ternary shape ternary_opt_n in which
+   the element type of the last two arguments is the quarter-sized
+   equivalent of <t0>.  */
+struct ternary_qq_opt_n_def
+  : public ternary_resize2_opt_n_base<function_resolver::QUARTER_SIZE>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,vq0,vq0", group, MODE_none);
+    build_all (b, "v0,v0,vq0,sq0", group, MODE_n);
+  }
+};
+SHAPE (ternary_qq_opt_n)
+
+/* svbool_t svfoo[_<t0>](sv<t0>_t, sv<t0:quarter>_t, sv<t0:quarter>_t,
+			 uint64_t)
+
+   where the final argument is an integer constant expression in
+   {0, 90, 180, 270}.  */
+struct ternary_qq_rotate_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,vq0,vq0,su64", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (4, i, nargs)
+	|| (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
+	|| !r.require_derived_vector_type (i + 1, i, type, r.SAME_TYPE_CLASS,
+					   r.QUARTER_SIZE)
+	|| !r.require_derived_vector_type (i + 2, i, type, r.SAME_TYPE_CLASS,
+					   r.QUARTER_SIZE)
+	|| !r.require_integer_immediate (i + 3))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+
+  bool
+  check (function_checker &c) const OVERRIDE
+  {
+    return c.require_immediate_one_of (3, 0, 90, 180, 270);
+  }
+};
+SHAPE (ternary_qq_rotate)
+
+/* svbool_t svfoo[_<t0>](sv<t0>_t, sv<t0>_t, sv<t0>_t, uint64_t)
+
+   where the final argument is an integer constant expression in
+   {0, 90, 180, 270}.  */
+struct ternary_rotate_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,v0,v0,su64", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    return r.resolve_uniform (3, 1);
+  }
+
+  bool
+  check (function_checker &c) const OVERRIDE
+  {
+    return c.require_immediate_one_of (3, 0, 90, 180, 270);
+  }
+};
+SHAPE (ternary_rotate)
+
+/* sv<t0>_t svfoo[_n_t0])(sv<t0>_t, sv<t0>_t, uint64_t)
+
+   where the final argument must be an integer constant expression in the
+   range [0, sizeof (<t0>_t) * 8 - 1].  */
+struct ternary_shift_left_imm_def : public ternary_shift_imm_base
+{
+  bool
+  check (function_checker &c) const OVERRIDE
+  {
+    unsigned int bits = c.type_suffix (0).element_bits;
+    return c.require_immediate_range (2, 0, bits - 1);
+  }
+};
+SHAPE (ternary_shift_left_imm)
+
+/* sv<t0>_t svfoo[_n_t0])(sv<t0>_t, sv<t0>_t, uint64_t)
+
+   where the final argument must be an integer constant expression in the
+   range [1, sizeof (<t0>_t) * 8].  */
+struct ternary_shift_right_imm_def : public ternary_shift_imm_base
+{
+  bool
+  check (function_checker &c) const OVERRIDE
+  {
+    unsigned int bits = c.type_suffix (0).element_bits;
+    return c.require_immediate_range (2, 1, bits);
+  }
+};
+SHAPE (ternary_shift_right_imm)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0>_t, sv<t0:uint>_t).  */
+struct ternary_uint_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,v0,vu0", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (3, i, nargs)
+	|| (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
+	|| !r.require_matching_vector_type (i + 1, type)
+	|| !r.require_derived_vector_type (i + 2, i, type, TYPE_unsigned))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+SHAPE (ternary_uint)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, svu<t0:uint:quarter>_t,
+		       sv<t0:int:quarter>_t).  */
+struct ternary_uintq_intq_def
+  : public ternary_resize2_base<function_resolver::QUARTER_SIZE,
+				TYPE_unsigned, TYPE_signed>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,vqu0,vqs0", group, MODE_none);
+  }
+};
+SHAPE (ternary_uintq_intq)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:uint:quarter>_t, sv<t0:int:quarter>_t,
+		       uint64_t)
+
+   where the final argument is an integer constant expression in the range
+   [0, 16 / sizeof (<t0>_t) - 1].  */
+struct ternary_uintq_intq_lane_def
+  : public ternary_qq_lane_base<TYPE_unsigned, TYPE_signed>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,vqu0,vqs0,su64", group, MODE_none);
+  }
+};
+SHAPE (ternary_uintq_intq_lane)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:uint:quarter>_t, sv<t0:int:quarter>_t)
+   sv<t0>_t svfoo[_n_t0](sv<t0>_t, sv<t0:uint:quarter>_t,
+			 <t0:int:quarter>_t).  */
+struct ternary_uintq_intq_opt_n_def
+  : public ternary_resize2_opt_n_base<function_resolver::QUARTER_SIZE,
+				      TYPE_unsigned, TYPE_signed>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,vqu0,vqs0", group, MODE_none);
+    build_all (b, "v0,v0,vqu0,sqs0", group, MODE_n);
+  }
+};
+SHAPE (ternary_uintq_intq_opt_n)
+
+/* svbool_t svfoo[_<t0>](sv<t0>_t, sv<t0>_t, uint64_t)
+
+   where the final argument is an integer constant expression in the
+   range [0, 7].  */
+struct tmad_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,v0,su64", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    return r.resolve_uniform (2, 1);
+  }
+
+  bool
+  check (function_checker &c) const OVERRIDE
+  {
+    return c.require_immediate_range (2, 0, 7);
+  }
+};
+SHAPE (tmad)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t)
+
+   i.e. the standard shape for unary operations that operate on
+   uniform types.  */
+struct unary_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    return r.resolve_unary ();
+  }
+};
+SHAPE (unary)
+
+/* sv<t0>_t svfoo_t0[_t1](sv<t1>_t)
+
+   where the target type <t0> must be specified explicitly but the source
+   type <t1> can be inferred.  */
+struct unary_convert_def : public overloaded_base<1>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v1", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    return r.resolve_unary (r.type_suffix (0).tclass,
+			    r.type_suffix (0).element_bits);
+  }
+};
+SHAPE (unary_convert)
+
+/* sv<t0>_t svfoo_t0[_t1](sv<t0>_t, sv<t1>_t)
+
+   This is a version of unary_convert in which the even-indexed
+   elements are passed in as a first parameter, before any governing
+   predicate.  */
+struct unary_convert_narrowt_def : public overloaded_base<1>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v1", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    return r.resolve_unary (r.type_suffix (0).tclass,
+			    r.type_suffix (0).element_bits, true);
+  }
+};
+SHAPE (unary_convert_narrowt)
+
+/* sv<t0>_t svfoo[_t0](sv<t0:half>_t).  */
+struct unary_long_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,vh0", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    unsigned int i, nargs;
+    type_suffix_index type, result_type;
+    if (!r.check_gp_argument (1, i, nargs)
+	|| (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
+	|| (result_type = long_type_suffix (r, type)) == NUM_TYPE_SUFFIXES)
+      return error_mark_node;
+
+    if (tree res = r.lookup_form (r.mode_suffix_id, result_type))
+      return res;
+
+    return r.report_no_such_form (type);
+  }
+};
+SHAPE (unary_long)
+
+/* sv<t0>_t svfoo[_n]_t0(<t0>_t).  */
+struct unary_n_def : public overloaded_base<1>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    /* The "_n" suffix is optional; the full name has it, but the short
+       name doesn't.  */
+    build_all (b, "v0,s0", group, MODE_n, true);
+  }
+
+  tree
+  resolve (function_resolver &) const OVERRIDE
+  {
+    /* The short forms just make "_n" implicit, so no resolution is needed.  */
+    gcc_unreachable ();
+  }
+};
+SHAPE (unary_n)
+
+/* sv<t0:half>_t svfoo[_t0](sv<t0>_t).  */
+typedef unary_narrowb_base<> unary_narrowb_def;
+SHAPE (unary_narrowb)
+
+/* sv<t0:half>_t svfoo[_t0](sv<t0:half>_t, sv<t0>_t).  */
+typedef unary_narrowt_base<> unary_narrowt_def;
+SHAPE (unary_narrowt)
+
+/* sv<t0:uint:half>_t svfoo[_t0](sv<t0>_t).  */
+typedef unary_narrowb_base<TYPE_unsigned> unary_narrowb_to_uint_def;
+SHAPE (unary_narrowb_to_uint)
+
+/* sv<t0:uint:half>_t svfoo[_t0](sv<t0:uint:half>_t, sv<t0>_t).  */
+typedef unary_narrowt_base<TYPE_unsigned> unary_narrowt_to_uint_def;
+SHAPE (unary_narrowt_to_uint)
+
+/* svbool_t svfoo(svbool_t).  */
+struct unary_pred_def : public nonoverloaded_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    build_all (b, "v0,v0", group, MODE_none);
+  }
+};
+SHAPE (unary_pred)
+
+/* sv<t0:int>_t svfoo[_t0](sv<t0>_t)
+
+   i.e. a version of "unary" in which the returned vector contains
+   signed integers.  */
+struct unary_to_int_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "vs0,v0", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    return r.resolve_unary (TYPE_signed);
+  }
+};
+SHAPE (unary_to_int)
+
+/* sv<t0:uint>_t svfoo[_t0](sv<t0>_t)
+
+   i.e. a version of "unary" in which the returned vector contains
+   unsigned integers.  */
+struct unary_to_uint_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "vu0,v0", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    return r.resolve_unary (TYPE_unsigned);
+  }
+};
+SHAPE (unary_to_uint)
+
+/* sv<t0>_t svfoo[_t0](sv<t0:uint>_t)
+
+   where <t0> always belongs a certain type class, and where <t0:uint>
+   therefore uniquely determines <t0>.  */
+struct unary_uint_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,vu0", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (1, i, nargs)
+	|| (type = r.infer_unsigned_vector_type (i)) == NUM_TYPE_SUFFIXES)
+      return error_mark_node;
+
+    /* Search for a valid suffix with the same number of bits as TYPE.  */
+    unsigned int element_bits = type_suffixes[type].element_bits;
+    if (type_suffixes[type].unsigned_p)
+      for (unsigned int j = 0; j < NUM_TYPE_SUFFIXES; ++j)
+	if (type_suffixes[j].element_bits == element_bits)
+	  if (tree res = r.lookup_form (r.mode_suffix_id,
+					type_suffix_index (j)))
+	    return res;
+
+    return r.report_no_such_form (type);
+  }
+};
+SHAPE (unary_uint)
+
+/* sv<t0>_t svfoo[_<t0>](sv<t0:half>_t)
+
+   i.e. a version of "unary" in which the source elements are half the
+   size of the destination elements, but have the same type class.  */
+struct unary_widen_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,vh0", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const OVERRIDE
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (1, i, nargs)
+	|| (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES)
+      return error_mark_node;
+
+    /* There is only a single form for predicates.  */
+    if (type == TYPE_SUFFIX_b)
+      return r.resolve_to (r.mode_suffix_id, type);
+
+    if (type_suffixes[type].integer_p
+	&& type_suffixes[type].element_bits < 64)
+      {
+	type_suffix_index wide_suffix
+	  = find_type_suffix (type_suffixes[type].tclass,
+			      type_suffixes[type].element_bits * 2);
+	if (tree res = r.lookup_form (r.mode_suffix_id, wide_suffix))
+	  return res;
+      }
+
+    return r.report_no_such_form (type);
+  }
+};
+SHAPE (unary_widen)
+
+}
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
new file mode 100644
index 0000000000000..2e2ee26f76e06
--- /dev/null
+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
@@ -0,0 +1,191 @@
+/* ACLE support for AArch64 SVE (function shapes)
+   Copyright (C) 2018-2021 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_AARCH64_SVE_BUILTINS_SHAPES_H
+#define GCC_AARCH64_SVE_BUILTINS_SHAPES_H
+
+namespace aarch64_sve
+{
+  /* The naming convention is:
+
+     - to use the name of the function if the rules are very specific to
+       a particular function (e.g. svext, for which the range of the
+       final immediate value is in no way generic).
+
+     - to use names like "unary" etc. if the rules are somewhat generic,
+       especially if there are no ranges involved.
+
+     When using generic names, the handling of the final vector argument
+     can be modified as follows:
+
+     - an "_n" suffix changes the argument from a vector to a scalar.
+
+     - an "_opt_n" suffix says that there are two forms of each function:
+       one in which the argument is the usual vector, and one in which it
+       is replaced by a scalar.
+
+     - "_int" and "_uint" replace the argument's element type with a
+       signed or unsigned integer of the same width.  The suffixes above
+       then indicate whether this final argument is or might be a scalar.
+
+     - "_int64" and "_uint64" similarly replace the argument's element type
+       with int64_t or uint64_t.
+
+     - "_wide" replaces the argument's element type with a 64-bit integer
+       of the same signedness.  This only makes sense for integer elements.
+
+     - "_lane" indicates that the argument is indexed by a constant lane
+       number, provided as an immediately-following argument of type uint64_t.
+
+     Also:
+
+     - "inherent" means that the function takes no arguments.
+
+     - "_rotate" means that the final argument is a rotation amount
+       (0, 90, 180 or 270).
+
+     - "_scalar" indicates that all data arguments are scalars rather
+       than vectors.
+
+     - in gather/scatter addresses, "sv" stands for "scalar base,
+       vector displacement" while "vs" stands for "vector base,
+       scalar displacement".
+
+     - "_pred" indicates that the function takes an svbool_t argument
+       that does not act as a governing predicate..  */
+  namespace shapes
+  {
+    extern const function_shape *const adr_index;
+    extern const function_shape *const adr_offset;
+    extern const function_shape *const binary;
+    extern const function_shape *const binary_int_opt_n;
+    extern const function_shape *const binary_lane;
+    extern const function_shape *const binary_long_lane;
+    extern const function_shape *const binary_long_opt_n;
+    extern const function_shape *const binary_n;
+    extern const function_shape *const binary_narrowb_opt_n;
+    extern const function_shape *const binary_narrowt_opt_n;
+    extern const function_shape *const binary_opt_n;
+    extern const function_shape *const binary_pred;
+    extern const function_shape *const binary_rotate;
+    extern const function_shape *const binary_scalar;
+    extern const function_shape *const binary_to_uint;
+    extern const function_shape *const binary_uint;
+    extern const function_shape *const binary_uint_n;
+    extern const function_shape *const binary_uint_opt_n;
+    extern const function_shape *const binary_uint64_n;
+    extern const function_shape *const binary_uint64_opt_n;
+    extern const function_shape *const binary_wide;
+    extern const function_shape *const binary_wide_opt_n;
+    extern const function_shape *const clast;
+    extern const function_shape *const compare;
+    extern const function_shape *const compare_opt_n;
+    extern const function_shape *const compare_ptr;
+    extern const function_shape *const compare_scalar;
+    extern const function_shape *const compare_wide_opt_n;
+    extern const function_shape *const count_inherent;
+    extern const function_shape *const count_pat;
+    extern const function_shape *const count_pred;
+    extern const function_shape *const count_vector;
+    extern const function_shape *const create;
+    extern const function_shape *const dupq;
+    extern const function_shape *const ext;
+    extern const function_shape *const fold_left;
+    extern const function_shape *const get;
+    extern const function_shape *const inc_dec;
+    extern const function_shape *const inc_dec_pat;
+    extern const function_shape *const inc_dec_pred;
+    extern const function_shape *const inc_dec_pred_scalar;
+    extern const function_shape *const inherent;
+    extern const function_shape *const inherent_b;
+    extern const function_shape *const load;
+    extern const function_shape *const load_ext;
+    extern const function_shape *const load_ext_gather_index;
+    extern const function_shape *const load_ext_gather_index_restricted;
+    extern const function_shape *const load_ext_gather_offset;
+    extern const function_shape *const load_ext_gather_offset_restricted;
+    extern const function_shape *const load_gather_sv;
+    extern const function_shape *const load_gather_sv_restricted;
+    extern const function_shape *const load_gather_vs;
+    extern const function_shape *const load_replicate;
+    extern const function_shape *const mmla;
+    extern const function_shape *const pattern_pred;
+    extern const function_shape *const prefetch;
+    extern const function_shape *const prefetch_gather_index;
+    extern const function_shape *const prefetch_gather_offset;
+    extern const function_shape *const ptest;
+    extern const function_shape *const rdffr;
+    extern const function_shape *const reduction;
+    extern const function_shape *const reduction_wide;
+    extern const function_shape *const set;
+    extern const function_shape *const setffr;
+    extern const function_shape *const shift_left_imm_long;
+    extern const function_shape *const shift_left_imm_to_uint;
+    extern const function_shape *const shift_right_imm;
+    extern const function_shape *const shift_right_imm_narrowb;
+    extern const function_shape *const shift_right_imm_narrowt;
+    extern const function_shape *const shift_right_imm_narrowb_to_uint;
+    extern const function_shape *const shift_right_imm_narrowt_to_uint;
+    extern const function_shape *const store;
+    extern const function_shape *const store_scatter_index;
+    extern const function_shape *const store_scatter_index_restricted;
+    extern const function_shape *const store_scatter_offset;
+    extern const function_shape *const store_scatter_offset_restricted;
+    extern const function_shape *const tbl_tuple;
+    extern const function_shape *const ternary_bfloat;
+    extern const function_shape *const ternary_bfloat_lane;
+    extern const function_shape *const ternary_bfloat_lanex2;
+    extern const function_shape *const ternary_bfloat_opt_n;
+    extern const function_shape *const ternary_intq_uintq_lane;
+    extern const function_shape *const ternary_intq_uintq_opt_n;
+    extern const function_shape *const ternary_lane;
+    extern const function_shape *const ternary_lane_rotate;
+    extern const function_shape *const ternary_long_lane;
+    extern const function_shape *const ternary_long_opt_n;
+    extern const function_shape *const ternary_opt_n;
+    extern const function_shape *const ternary_qq_lane;
+    extern const function_shape *const ternary_qq_lane_rotate;
+    extern const function_shape *const ternary_qq_opt_n;
+    extern const function_shape *const ternary_qq_rotate;
+    extern const function_shape *const ternary_rotate;
+    extern const function_shape *const ternary_shift_left_imm;
+    extern const function_shape *const ternary_shift_right_imm;
+    extern const function_shape *const ternary_uint;
+    extern const function_shape *const ternary_uintq_intq;
+    extern const function_shape *const ternary_uintq_intq_lane;
+    extern const function_shape *const ternary_uintq_intq_opt_n;
+    extern const function_shape *const tmad;
+    extern const function_shape *const unary;
+    extern const function_shape *const unary_convert;
+    extern const function_shape *const unary_convert_narrowt;
+    extern const function_shape *const unary_long;
+    extern const function_shape *const unary_n;
+    extern const function_shape *const unary_narrowb;
+    extern const function_shape *const unary_narrowt;
+    extern const function_shape *const unary_narrowb_to_uint;
+    extern const function_shape *const unary_narrowt_to_uint;
+    extern const function_shape *const unary_pred;
+    extern const function_shape *const unary_to_int;
+    extern const function_shape *const unary_to_uint;
+    extern const function_shape *const unary_uint;
+    extern const function_shape *const unary_widen;
+  }
+}
+
+#endif
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
new file mode 100644
index 0000000000000..4b299860d1622
--- /dev/null
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
@@ -0,0 +1,655 @@
+/* ACLE support for AArch64 SVE (__ARM_FEATURE_SVE2 intrinsics)
+   Copyright (C) 2020-2021 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "rtl.h"
+#include "tm_p.h"
+#include "memmodel.h"
+#include "insn-codes.h"
+#include "optabs.h"
+#include "recog.h"
+#include "expr.h"
+#include "basic-block.h"
+#include "function.h"
+#include "fold-const.h"
+#include "gimple.h"
+#include "gimple-iterator.h"
+#include "gimplify.h"
+#include "explow.h"
+#include "emit-rtl.h"
+#include "tree-vector-builder.h"
+#include "rtx-vector-builder.h"
+#include "vec-perm-indices.h"
+#include "aarch64-sve-builtins.h"
+#include "aarch64-sve-builtins-shapes.h"
+#include "aarch64-sve-builtins-base.h"
+#include "aarch64-sve-builtins-sve2.h"
+#include "aarch64-sve-builtins-functions.h"
+
+using namespace aarch64_sve;
+
+namespace {
+
+/* Return the UNSPEC_CDOT* unspec for rotation amount ROT.  */
+static int
+unspec_cdot (int rot)
+{
+  switch (rot)
+    {
+    case 0: return UNSPEC_CDOT;
+    case 90: return UNSPEC_CDOT90;
+    case 180: return UNSPEC_CDOT180;
+    case 270: return UNSPEC_CDOT270;
+    default: gcc_unreachable ();
+    }
+}
+
+/* Return the UNSPEC_SQRDCMLAH* unspec for rotation amount ROT.  */
+static int
+unspec_sqrdcmlah (int rot)
+{
+  switch (rot)
+    {
+    case 0: return UNSPEC_SQRDCMLAH;
+    case 90: return UNSPEC_SQRDCMLAH90;
+    case 180: return UNSPEC_SQRDCMLAH180;
+    case 270: return UNSPEC_SQRDCMLAH270;
+    default: gcc_unreachable ();
+    }
+}
+
+class svaba_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    rtx_code max_code = e.type_suffix (0).unsigned_p ? UMAX : SMAX;
+    machine_mode mode = e.vector_mode (0);
+    return e.use_exact_insn (code_for_aarch64_sve2_aba (max_code, mode));
+  }
+};
+
+class svcdot_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* Convert the rotation amount into a specific unspec.  */
+    int rot = INTVAL (e.args.pop ());
+    return e.use_exact_insn (code_for_aarch64_sve (unspec_cdot (rot),
+						   e.vector_mode (0)));
+  }
+};
+
+class svcdot_lane_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* Convert the rotation amount into a specific unspec.  */
+    int rot = INTVAL (e.args.pop ());
+    return e.use_exact_insn (code_for_aarch64_lane (unspec_cdot (rot),
+						    e.vector_mode (0)));
+  }
+};
+
+class svldnt1_gather_impl : public full_width_access
+{
+public:
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_READ_MEMORY;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    e.prepare_gather_address_operands (1, false);
+    machine_mode mem_mode = e.memory_vector_mode ();
+    return e.use_exact_insn (code_for_aarch64_gather_ldnt (mem_mode));
+  }
+};
+
+/* Implements extending forms of svldnt1_gather.  */
+class svldnt1_gather_extend_impl : public extending_load
+{
+public:
+  CONSTEXPR svldnt1_gather_extend_impl (type_suffix_index memory_type)
+    : extending_load (memory_type) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    e.prepare_gather_address_operands (1, false);
+    /* Add a constant predicate for the extension rtx.  */
+    e.args.quick_push (CONSTM1_RTX (VNx16BImode));
+    insn_code icode = code_for_aarch64_gather_ldnt (extend_rtx_code (),
+						    e.vector_mode (0),
+						    e.memory_vector_mode ());
+    return e.use_exact_insn (icode);
+  }
+};
+
+/* Implements both svmatch and svnmatch; the unspec parameter decides
+   between them.  */
+class svmatch_svnmatch_impl : public function_base
+{
+public:
+  CONSTEXPR svmatch_svnmatch_impl (int unspec) : m_unspec (unspec) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* These are UNSPEC_PRED_Z operations and so need a hint operand.  */
+    e.add_ptrue_hint (0, e.gp_mode (0));
+    return e.use_exact_insn (code_for_aarch64_pred (m_unspec,
+						    e.vector_mode (0)));
+  }
+
+  int m_unspec;
+};
+
+/* Implements both svmovlb and svmovlt; the unspec parameters decide
+   between them.  */
+class svmovl_lb_impl : public unspec_based_function_base
+{
+public:
+  CONSTEXPR svmovl_lb_impl (int unspec_for_sint, int unspec_for_uint,
+			    int unspec_for_fp)
+    : unspec_based_function_base (unspec_for_sint, unspec_for_uint,
+				  unspec_for_fp)
+  {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    e.args.quick_push (const0_rtx);
+    return e.map_to_unspecs (m_unspec_for_sint, m_unspec_for_uint,
+			     m_unspec_for_fp);
+  }
+};
+
+class svqcadd_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* Convert the rotation amount into a specific unspec.  */
+    int rot = INTVAL (e.args.pop ());
+    if (rot == 90)
+      return e.map_to_unspecs (UNSPEC_SQCADD90, -1, -1);
+    if (rot == 270)
+      return e.map_to_unspecs (UNSPEC_SQCADD270, -1, -1);
+    gcc_unreachable ();
+  }
+};
+
+class svqrdcmlah_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* Convert the rotation amount into a specific unspec.  */
+    int rot = INTVAL (e.args.pop ());
+    return e.use_exact_insn (code_for_aarch64_sve (unspec_sqrdcmlah (rot),
+						   e.vector_mode (0)));
+  }
+};
+
+class svqrdcmlah_lane_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* Convert the rotation amount into a specific unspec.  */
+    int rot = INTVAL (e.args.pop ());
+    return e.use_exact_insn (code_for_aarch64_lane (unspec_sqrdcmlah (rot),
+						    e.vector_mode (0)));
+  }
+};
+
+class svqrshl_impl : public unspec_based_function
+{
+public:
+  CONSTEXPR svqrshl_impl ()
+    : unspec_based_function (UNSPEC_SQRSHL, UNSPEC_UQRSHL, -1) {}
+
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    if (tree amount = uniform_integer_cst_p (gimple_call_arg (f.call, 2)))
+      {
+	if (wi::to_widest (amount) >= 0)
+	  {
+	    /* The rounding has no effect, and [SU]QSHL has immediate forms
+	       that we can use for sensible shift amounts.  */
+	    function_instance instance ("svqshl", functions::svqshl,
+					shapes::binary_int_opt_n, MODE_n,
+					f.type_suffix_ids, f.pred);
+	    return f.redirect_call (instance);
+	  }
+	else
+	  {
+	    /* The saturation has no effect, and [SU]RSHL has immediate forms
+	       that we can use for sensible shift amounts.  */
+	    function_instance instance ("svrshl", functions::svrshl,
+					shapes::binary_int_opt_n, MODE_n,
+					f.type_suffix_ids, f.pred);
+	    return f.redirect_call (instance);
+	  }
+      }
+    return NULL;
+  }
+};
+
+class svqshl_impl : public unspec_based_function
+{
+public:
+  CONSTEXPR svqshl_impl ()
+    : unspec_based_function (UNSPEC_SQSHL, UNSPEC_UQSHL, -1) {}
+
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    if (tree amount = uniform_integer_cst_p (gimple_call_arg (f.call, 2)))
+      {
+	int element_bits = f.type_suffix (0).element_bits;
+	if (wi::to_widest (amount) >= -element_bits
+	    && wi::to_widest (amount) < 0)
+	  {
+	    /* The saturation has no effect for right shifts, so we can
+	       use the immediate form of ASR or LSR.  */
+	    amount = wide_int_to_tree (TREE_TYPE (amount),
+				       -wi::to_wide (amount));
+	    function_instance instance ("svasr", functions::svasr,
+					shapes::binary_uint_opt_n, MODE_n,
+					f.type_suffix_ids, f.pred);
+	    if (f.type_suffix (0).unsigned_p)
+	      {
+		instance.base_name = "svlsr";
+		instance.base = functions::svlsr;
+	      }
+	    gcall *call = as_a <gcall *> (f.redirect_call (instance));
+	    gimple_call_set_arg (call, 2, amount);
+	    return call;
+	  }
+      }
+    return NULL;
+  }
+};
+
+class svrshl_impl : public unspec_based_function
+{
+public:
+  CONSTEXPR svrshl_impl ()
+    : unspec_based_function (UNSPEC_SRSHL, UNSPEC_URSHL, -1) {}
+
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    if (tree amount = uniform_integer_cst_p (gimple_call_arg (f.call, 2)))
+      {
+	if (wi::to_widest (amount) >= 0)
+	  {
+	    /* The rounding has no effect, and LSL has immediate forms
+	       that we can use for sensible shift amounts.  */
+	    function_instance instance ("svlsl", functions::svlsl,
+					shapes::binary_uint_opt_n, MODE_n,
+					f.type_suffix_ids, f.pred);
+	    gcall *call = as_a <gcall *> (f.redirect_call (instance));
+	    gimple_call_set_arg (call, 2, amount);
+	    return call;
+	  }
+	int element_bits = f.type_suffix (0).element_bits;
+	if (wi::to_widest (amount) >= -element_bits)
+	  {
+	    /* The shift amount is in range of [SU]RSHR.  */
+	    amount = wide_int_to_tree (TREE_TYPE (amount),
+				       -wi::to_wide (amount));
+	    function_instance instance ("svrshr", functions::svrshr,
+					shapes::shift_right_imm, MODE_n,
+					f.type_suffix_ids, f.pred);
+	    gcall *call = as_a <gcall *> (f.redirect_call (instance));
+	    gimple_call_set_arg (call, 2, amount);
+	    return call;
+	  }
+      }
+    return NULL;
+  }
+};
+
+class svsqadd_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    machine_mode mode = e.vector_mode (0);
+    if (e.pred == PRED_x
+	&& aarch64_sve_sqadd_sqsub_immediate_p (mode, e.args[2], false))
+      return e.map_to_rtx_codes (UNKNOWN, US_PLUS, -1);
+    return e.map_to_unspecs (-1, UNSPEC_USQADD, -1);
+  }
+};
+
+class svsra_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    rtx_code shift_code = e.type_suffix (0).unsigned_p ? LSHIFTRT : ASHIFTRT;
+    machine_mode mode = e.vector_mode (0);
+    return e.use_exact_insn (code_for_aarch64_sve_add (shift_code, mode));
+  }
+};
+
+class svstnt1_scatter_impl : public full_width_access
+{
+public:
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_WRITE_MEMORY;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    e.prepare_gather_address_operands (1, false);
+    machine_mode mem_mode = e.memory_vector_mode ();
+    return e.use_exact_insn (code_for_aarch64_scatter_stnt (mem_mode));
+  }
+};
+
+/* Implements truncating forms of svstnt1_scatter.  */
+class svstnt1_scatter_truncate_impl : public truncating_store
+{
+public:
+  CONSTEXPR svstnt1_scatter_truncate_impl (scalar_int_mode to_mode)
+    : truncating_store (to_mode) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    e.prepare_gather_address_operands (1, false);
+    insn_code icode = code_for_aarch64_scatter_stnt (e.vector_mode (0),
+						     e.memory_vector_mode ());
+    return e.use_exact_insn (icode);
+  }
+};
+
+class svtbl2_impl : public quiet<multi_vector_function>
+{
+public:
+  CONSTEXPR svtbl2_impl () : quiet<multi_vector_function> (2) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    return e.use_exact_insn (code_for_aarch64_sve2_tbl2 (e.vector_mode (0)));
+  }
+};
+
+class svuqadd_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    machine_mode mode = e.vector_mode (0);
+    if (e.pred == PRED_x
+	&& aarch64_sve_arith_immediate_p (mode, e.args[2], false))
+      return e.use_unpred_insn (code_for_aarch64_sve_suqadd_const (mode));
+    return e.map_to_unspecs (UNSPEC_SUQADD, -1, -1);
+  }
+};
+
+/* Implements both svwhilerw and svwhilewr; the unspec parameter decides
+   between them.  */
+class svwhilerw_svwhilewr_impl : public full_width_access
+{
+public:
+  CONSTEXPR svwhilerw_svwhilewr_impl (int unspec) : m_unspec (unspec) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    for (unsigned int i = 0; i < 2; ++i)
+      e.args[i] = e.convert_to_pmode (e.args[i]);
+    return e.use_exact_insn (code_for_while (m_unspec, Pmode, e.gp_mode (0)));
+  }
+
+  int m_unspec;
+};
+
+} /* end anonymous namespace */
+
+namespace aarch64_sve {
+
+FUNCTION (svaba, svaba_impl,)
+FUNCTION (svabalb, unspec_based_add_function, (UNSPEC_SABDLB,
+					       UNSPEC_UABDLB, -1))
+FUNCTION (svabalt, unspec_based_add_function, (UNSPEC_SABDLT,
+					       UNSPEC_UABDLT, -1))
+FUNCTION (svadclb, unspec_based_function, (-1, UNSPEC_ADCLB, -1))
+FUNCTION (svadclt, unspec_based_function, (-1, UNSPEC_ADCLT, -1))
+FUNCTION (svaddhnb, unspec_based_function, (UNSPEC_ADDHNB, UNSPEC_ADDHNB, -1))
+FUNCTION (svaddhnt, unspec_based_function, (UNSPEC_ADDHNT, UNSPEC_ADDHNT, -1))
+FUNCTION (svabdlb, unspec_based_function, (UNSPEC_SABDLB, UNSPEC_UABDLB, -1))
+FUNCTION (svabdlt, unspec_based_function, (UNSPEC_SABDLT, UNSPEC_UABDLT, -1))
+FUNCTION (svadalp, unspec_based_function, (UNSPEC_SADALP, UNSPEC_UADALP, -1))
+FUNCTION (svaddlb, unspec_based_function, (UNSPEC_SADDLB, UNSPEC_UADDLB, -1))
+FUNCTION (svaddlbt, unspec_based_function, (UNSPEC_SADDLBT, -1, -1))
+FUNCTION (svaddlt, unspec_based_function, (UNSPEC_SADDLT, UNSPEC_UADDLT, -1))
+FUNCTION (svaddwb, unspec_based_function, (UNSPEC_SADDWB, UNSPEC_UADDWB, -1))
+FUNCTION (svaddwt, unspec_based_function, (UNSPEC_SADDWT, UNSPEC_UADDWT, -1))
+FUNCTION (svaddp, unspec_based_pred_function, (UNSPEC_ADDP, UNSPEC_ADDP,
+					       UNSPEC_FADDP))
+FUNCTION (svaesd, fixed_insn_function, (CODE_FOR_aarch64_sve2_aesd))
+FUNCTION (svaese, fixed_insn_function, (CODE_FOR_aarch64_sve2_aese))
+FUNCTION (svaesimc, fixed_insn_function, (CODE_FOR_aarch64_sve2_aesimc))
+FUNCTION (svaesmc, fixed_insn_function, (CODE_FOR_aarch64_sve2_aesmc))
+FUNCTION (svbcax, CODE_FOR_MODE0 (aarch64_sve2_bcax),)
+FUNCTION (svbdep, unspec_based_function, (UNSPEC_BDEP, UNSPEC_BDEP, -1))
+FUNCTION (svbext, unspec_based_function, (UNSPEC_BEXT, UNSPEC_BEXT, -1))
+FUNCTION (svbgrp, unspec_based_function, (UNSPEC_BGRP, UNSPEC_BGRP, -1))
+FUNCTION (svbsl, CODE_FOR_MODE0 (aarch64_sve2_bsl),)
+FUNCTION (svbsl1n, CODE_FOR_MODE0 (aarch64_sve2_bsl1n),)
+FUNCTION (svbsl2n, CODE_FOR_MODE0 (aarch64_sve2_bsl2n),)
+FUNCTION (svcdot, svcdot_impl,)
+FUNCTION (svcdot_lane, svcdot_lane_impl,)
+FUNCTION (svcvtlt, unspec_based_function, (-1, -1, UNSPEC_COND_FCVTLT))
+FUNCTION (svcvtx, unspec_based_function, (-1, -1, UNSPEC_COND_FCVTX))
+FUNCTION (svcvtxnt, CODE_FOR_MODE1 (aarch64_sve2_cvtxnt),)
+FUNCTION (sveor3, CODE_FOR_MODE0 (aarch64_sve2_eor3),)
+FUNCTION (sveorbt, unspec_based_function, (UNSPEC_EORBT, UNSPEC_EORBT, -1))
+FUNCTION (sveortb, unspec_based_function, (UNSPEC_EORTB, UNSPEC_EORTB, -1))
+FUNCTION (svhadd, unspec_based_function, (UNSPEC_SHADD, UNSPEC_UHADD, -1))
+FUNCTION (svhsub, unspec_based_function, (UNSPEC_SHSUB, UNSPEC_UHSUB, -1))
+FUNCTION (svhistcnt, CODE_FOR_MODE0 (aarch64_sve2_histcnt),)
+FUNCTION (svhistseg, CODE_FOR_MODE0 (aarch64_sve2_histseg),)
+FUNCTION (svhsubr, unspec_based_function_rotated, (UNSPEC_SHSUB,
+						   UNSPEC_UHSUB, -1))
+FUNCTION (svldnt1_gather, svldnt1_gather_impl,)
+FUNCTION (svldnt1sb_gather, svldnt1_gather_extend_impl, (TYPE_SUFFIX_s8))
+FUNCTION (svldnt1sh_gather, svldnt1_gather_extend_impl, (TYPE_SUFFIX_s16))
+FUNCTION (svldnt1sw_gather, svldnt1_gather_extend_impl, (TYPE_SUFFIX_s32))
+FUNCTION (svldnt1ub_gather, svldnt1_gather_extend_impl, (TYPE_SUFFIX_u8))
+FUNCTION (svldnt1uh_gather, svldnt1_gather_extend_impl, (TYPE_SUFFIX_u16))
+FUNCTION (svldnt1uw_gather, svldnt1_gather_extend_impl, (TYPE_SUFFIX_u32))
+FUNCTION (svlogb, unspec_based_function, (-1, -1, UNSPEC_COND_FLOGB))
+FUNCTION (svmatch, svmatch_svnmatch_impl, (UNSPEC_MATCH))
+FUNCTION (svmaxp, unspec_based_pred_function, (UNSPEC_SMAXP, UNSPEC_UMAXP,
+					       UNSPEC_FMAXP))
+FUNCTION (svmaxnmp, unspec_based_pred_function, (-1, -1, UNSPEC_FMAXNMP))
+FUNCTION (svminp, unspec_based_pred_function, (UNSPEC_SMINP, UNSPEC_UMINP,
+					       UNSPEC_FMINP))
+FUNCTION (svminnmp, unspec_based_pred_function, (-1, -1, UNSPEC_FMINNMP))
+FUNCTION (svmlalb, unspec_based_mla_function, (UNSPEC_SMULLB,
+					       UNSPEC_UMULLB, UNSPEC_FMLALB))
+FUNCTION (svmlalb_lane, unspec_based_mla_lane_function, (UNSPEC_SMULLB,
+							 UNSPEC_UMULLB,
+							 UNSPEC_FMLALB))
+FUNCTION (svmlalt, unspec_based_mla_function, (UNSPEC_SMULLT,
+					       UNSPEC_UMULLT, UNSPEC_FMLALT))
+FUNCTION (svmlalt_lane, unspec_based_mla_lane_function, (UNSPEC_SMULLT,
+							 UNSPEC_UMULLT,
+							 UNSPEC_FMLALT))
+FUNCTION (svmlslb, unspec_based_mls_function, (UNSPEC_SMULLB,
+					       UNSPEC_UMULLB, UNSPEC_FMLSLB))
+FUNCTION (svmlslb_lane, unspec_based_mls_lane_function, (UNSPEC_SMULLB,
+							 UNSPEC_UMULLB,
+							 UNSPEC_FMLSLB))
+FUNCTION (svmlslt, unspec_based_mls_function, (UNSPEC_SMULLT,
+					       UNSPEC_UMULLT, UNSPEC_FMLSLT))
+FUNCTION (svmlslt_lane, unspec_based_mls_lane_function, (UNSPEC_SMULLT,
+							 UNSPEC_UMULLT,
+							 UNSPEC_FMLSLT))
+FUNCTION (svmovlb, svmovl_lb_impl, (UNSPEC_SSHLLB, UNSPEC_USHLLB, -1))
+FUNCTION (svmovlt, svmovl_lb_impl, (UNSPEC_SSHLLT, UNSPEC_USHLLT, -1))
+FUNCTION (svmullb, unspec_based_function, (UNSPEC_SMULLB, UNSPEC_UMULLB, -1))
+FUNCTION (svmullb_lane, unspec_based_lane_function, (UNSPEC_SMULLB,
+						     UNSPEC_UMULLB, -1))
+FUNCTION (svmullt, unspec_based_function, (UNSPEC_SMULLT, UNSPEC_UMULLT, -1))
+FUNCTION (svmullt_lane, unspec_based_lane_function, (UNSPEC_SMULLT,
+						     UNSPEC_UMULLT, -1))
+FUNCTION (svnbsl, CODE_FOR_MODE0 (aarch64_sve2_nbsl),)
+FUNCTION (svnmatch, svmatch_svnmatch_impl, (UNSPEC_NMATCH))
+FUNCTION (svpmul, CODE_FOR_MODE0 (aarch64_sve2_pmul),)
+FUNCTION (svpmullb, unspec_based_function, (-1, UNSPEC_PMULLB, -1))
+FUNCTION (svpmullb_pair, unspec_based_function, (-1, UNSPEC_PMULLB_PAIR, -1))
+FUNCTION (svpmullt, unspec_based_function, (-1, UNSPEC_PMULLT, -1))
+FUNCTION (svpmullt_pair, unspec_based_function, (-1, UNSPEC_PMULLT_PAIR, -1))
+FUNCTION (svqabs, rtx_code_function, (SS_ABS, UNKNOWN, UNKNOWN))
+FUNCTION (svqcadd, svqcadd_impl,)
+FUNCTION (svqdmlalb, unspec_based_qadd_function, (UNSPEC_SQDMULLB, -1, -1))
+FUNCTION (svqdmlalb_lane, unspec_based_qadd_lane_function, (UNSPEC_SQDMULLB,
+							    -1, -1))
+FUNCTION (svqdmlalbt, unspec_based_qadd_function, (UNSPEC_SQDMULLBT, -1, -1))
+FUNCTION (svqdmlalt, unspec_based_qadd_function, (UNSPEC_SQDMULLT, -1, -1))
+FUNCTION (svqdmlalt_lane, unspec_based_qadd_lane_function, (UNSPEC_SQDMULLT,
+							    -1, -1))
+FUNCTION (svqdmlslb, unspec_based_qsub_function, (UNSPEC_SQDMULLB, -1, -1))
+FUNCTION (svqdmlslb_lane, unspec_based_qsub_lane_function, (UNSPEC_SQDMULLB,
+							    -1, -1))
+FUNCTION (svqdmlslbt, unspec_based_qsub_function, (UNSPEC_SQDMULLBT, -1, -1))
+FUNCTION (svqdmlslt, unspec_based_qsub_function, (UNSPEC_SQDMULLT, -1, -1))
+FUNCTION (svqdmlslt_lane, unspec_based_qsub_lane_function, (UNSPEC_SQDMULLT,
+							    -1, -1))
+FUNCTION (svqdmulh, unspec_based_function, (UNSPEC_SQDMULH, -1, -1))
+FUNCTION (svqdmulh_lane, unspec_based_lane_function, (UNSPEC_SQDMULH, -1, -1))
+FUNCTION (svqdmullb, unspec_based_function, (UNSPEC_SQDMULLB, -1, -1))
+FUNCTION (svqdmullb_lane, unspec_based_lane_function, (UNSPEC_SQDMULLB,
+						       -1, -1))
+FUNCTION (svqdmullt, unspec_based_function, (UNSPEC_SQDMULLT, -1, -1))
+FUNCTION (svqdmullt_lane, unspec_based_lane_function, (UNSPEC_SQDMULLT,
+						       -1, -1))
+FUNCTION (svqneg, rtx_code_function, (SS_NEG, UNKNOWN, UNKNOWN))
+FUNCTION (svqrdcmlah, svqrdcmlah_impl,)
+FUNCTION (svqrdcmlah_lane, svqrdcmlah_lane_impl,)
+FUNCTION (svqrdmulh, unspec_based_function, (UNSPEC_SQRDMULH, -1, -1))
+FUNCTION (svqrdmulh_lane, unspec_based_lane_function, (UNSPEC_SQRDMULH,
+						       -1, -1))
+FUNCTION (svqrdmlah, unspec_based_function, (UNSPEC_SQRDMLAH, -1, -1))
+FUNCTION (svqrdmlah_lane, unspec_based_lane_function, (UNSPEC_SQRDMLAH,
+						       -1, -1))
+FUNCTION (svqrdmlsh, unspec_based_function, (UNSPEC_SQRDMLSH, -1, -1))
+FUNCTION (svqrdmlsh_lane, unspec_based_lane_function, (UNSPEC_SQRDMLSH,
+						       -1, -1))
+FUNCTION (svqrshl, svqrshl_impl,)
+FUNCTION (svqrshrnb, unspec_based_function, (UNSPEC_SQRSHRNB,
+					     UNSPEC_UQRSHRNB, -1))
+FUNCTION (svqrshrnt, unspec_based_function, (UNSPEC_SQRSHRNT,
+					     UNSPEC_UQRSHRNT, -1))
+FUNCTION (svqrshrunb, unspec_based_function, (UNSPEC_SQRSHRUNB, -1, -1))
+FUNCTION (svqrshrunt, unspec_based_function, (UNSPEC_SQRSHRUNT, -1, -1))
+FUNCTION (svqshl, svqshl_impl,)
+FUNCTION (svqshlu, unspec_based_function, (UNSPEC_SQSHLU, -1, -1))
+FUNCTION (svqshrnb, unspec_based_function, (UNSPEC_SQSHRNB,
+					    UNSPEC_UQSHRNB, -1))
+FUNCTION (svqshrnt, unspec_based_function, (UNSPEC_SQSHRNT,
+					    UNSPEC_UQSHRNT, -1))
+FUNCTION (svqshrunb, unspec_based_function, (UNSPEC_SQSHRUNB, -1, -1))
+FUNCTION (svqshrunt, unspec_based_function, (UNSPEC_SQSHRUNT, -1, -1))
+FUNCTION (svqsubr, rtx_code_function_rotated, (SS_MINUS, US_MINUS, -1))
+FUNCTION (svqxtnb, unspec_based_function, (UNSPEC_SQXTNB, UNSPEC_UQXTNB, -1))
+FUNCTION (svqxtnt, unspec_based_function, (UNSPEC_SQXTNT, UNSPEC_UQXTNT, -1))
+FUNCTION (svqxtunb, unspec_based_function, (UNSPEC_SQXTUNB, -1, -1))
+FUNCTION (svqxtunt, unspec_based_function, (UNSPEC_SQXTUNT, -1, -1))
+FUNCTION (svraddhnb, unspec_based_function, (UNSPEC_RADDHNB,
+					     UNSPEC_RADDHNB, -1))
+FUNCTION (svraddhnt, unspec_based_function, (UNSPEC_RADDHNT,
+					     UNSPEC_RADDHNT, -1))
+FUNCTION (svrax1, fixed_insn_function, (CODE_FOR_aarch64_sve2_rax1))
+FUNCTION (svrhadd, unspec_based_function, (UNSPEC_SRHADD, UNSPEC_URHADD, -1))
+FUNCTION (svrshl, svrshl_impl,)
+FUNCTION (svrshr, unspec_based_function, (UNSPEC_SRSHR, UNSPEC_URSHR, -1))
+FUNCTION (svrshrnb, unspec_based_function, (UNSPEC_RSHRNB, UNSPEC_RSHRNB, -1))
+FUNCTION (svrshrnt, unspec_based_function, (UNSPEC_RSHRNT, UNSPEC_RSHRNT, -1))
+FUNCTION (svrsra, unspec_based_add_function, (UNSPEC_SRSHR, UNSPEC_URSHR, -1))
+FUNCTION (svrsubhnb, unspec_based_function, (UNSPEC_RSUBHNB,
+					     UNSPEC_RSUBHNB, -1))
+FUNCTION (svrsubhnt, unspec_based_function, (UNSPEC_RSUBHNT,
+					     UNSPEC_RSUBHNT, -1))
+FUNCTION (svsbclb, unspec_based_function, (-1, UNSPEC_SBCLB, -1))
+FUNCTION (svsbclt, unspec_based_function, (-1, UNSPEC_SBCLT, -1))
+FUNCTION (svshllb, unspec_based_function, (UNSPEC_SSHLLB, UNSPEC_USHLLB, -1))
+FUNCTION (svshllt, unspec_based_function, (UNSPEC_SSHLLT, UNSPEC_USHLLT, -1))
+FUNCTION (svshrnb, unspec_based_function, (UNSPEC_SHRNB, UNSPEC_SHRNB, -1))
+FUNCTION (svshrnt, unspec_based_function, (UNSPEC_SHRNT, UNSPEC_SHRNT, -1))
+FUNCTION (svsli, unspec_based_function, (UNSPEC_SLI, UNSPEC_SLI, -1))
+FUNCTION (svsm4e, fixed_insn_function, (CODE_FOR_aarch64_sve2_sm4e))
+FUNCTION (svsm4ekey, fixed_insn_function, (CODE_FOR_aarch64_sve2_sm4ekey))
+FUNCTION (svsqadd, svsqadd_impl,)
+FUNCTION (svsra, svsra_impl,)
+FUNCTION (svsri, unspec_based_function, (UNSPEC_SRI, UNSPEC_SRI, -1))
+FUNCTION (svstnt1_scatter, svstnt1_scatter_impl,)
+FUNCTION (svstnt1b_scatter, svstnt1_scatter_truncate_impl, (QImode))
+FUNCTION (svstnt1h_scatter, svstnt1_scatter_truncate_impl, (HImode))
+FUNCTION (svstnt1w_scatter, svstnt1_scatter_truncate_impl, (SImode))
+FUNCTION (svsubhnb, unspec_based_function, (UNSPEC_SUBHNB, UNSPEC_SUBHNB, -1))
+FUNCTION (svsubhnt, unspec_based_function, (UNSPEC_SUBHNT, UNSPEC_SUBHNT, -1))
+FUNCTION (svsublb, unspec_based_function, (UNSPEC_SSUBLB, UNSPEC_USUBLB, -1))
+FUNCTION (svsublbt, unspec_based_function, (UNSPEC_SSUBLBT, -1, -1))
+FUNCTION (svsublt, unspec_based_function, (UNSPEC_SSUBLT, UNSPEC_USUBLT, -1))
+FUNCTION (svsubltb, unspec_based_function, (UNSPEC_SSUBLTB, -1, -1))
+FUNCTION (svsubwb, unspec_based_function, (UNSPEC_SSUBWB, UNSPEC_USUBWB, -1))
+FUNCTION (svsubwt, unspec_based_function, (UNSPEC_SSUBWT, UNSPEC_USUBWT, -1))
+FUNCTION (svtbl2, svtbl2_impl,)
+FUNCTION (svtbx, CODE_FOR_MODE0 (aarch64_sve2_tbx),)
+FUNCTION (svuqadd, svuqadd_impl,)
+FUNCTION (svwhilege, while_comparison, (UNSPEC_WHILEGE, UNSPEC_WHILEHS))
+FUNCTION (svwhilegt, while_comparison, (UNSPEC_WHILEGT, UNSPEC_WHILEHI))
+FUNCTION (svwhilerw, svwhilerw_svwhilewr_impl, (UNSPEC_WHILERW))
+FUNCTION (svwhilewr, svwhilerw_svwhilewr_impl, (UNSPEC_WHILEWR))
+FUNCTION (svxar, CODE_FOR_MODE0 (aarch64_sve2_xar),)
+
+} /* end namespace aarch64_sve */
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
new file mode 100644
index 0000000000000..bed792df41574
--- /dev/null
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
@@ -0,0 +1,214 @@
+/* ACLE support for AArch64 SVE (__ARM_FEATURE_SVE intrinsics)
+   Copyright (C) 2020-2021 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#define REQUIRED_EXTENSIONS AARCH64_FL_SVE2
+DEF_SVE_FUNCTION (svaba, ternary_opt_n, all_integer, none)
+DEF_SVE_FUNCTION (svabalb, ternary_long_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svabalt, ternary_long_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svadalp, binary_wide, hsd_integer, mxz)
+DEF_SVE_FUNCTION (svadclb, ternary_opt_n, sd_unsigned, none)
+DEF_SVE_FUNCTION (svadclt, ternary_opt_n, sd_unsigned, none)
+DEF_SVE_FUNCTION (svaddhnb, binary_narrowb_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svaddhnt, binary_narrowt_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svabdlb, binary_long_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svabdlt, binary_long_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svaddlb, binary_long_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svaddlbt, binary_long_opt_n, hsd_signed, none)
+DEF_SVE_FUNCTION (svaddlt, binary_long_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svaddp, binary, all_arith, mx)
+DEF_SVE_FUNCTION (svaddwb, binary_wide_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svaddwt, binary_wide_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svbcax, ternary_opt_n, all_integer, none)
+DEF_SVE_FUNCTION (svbsl, ternary_opt_n, all_integer, none)
+DEF_SVE_FUNCTION (svbsl1n, ternary_opt_n, all_integer, none)
+DEF_SVE_FUNCTION (svbsl2n, ternary_opt_n, all_integer, none)
+DEF_SVE_FUNCTION (svcadd, binary_rotate, all_integer, none)
+DEF_SVE_FUNCTION (svcdot, ternary_qq_rotate, sd_signed, none)
+DEF_SVE_FUNCTION (svcdot_lane, ternary_qq_lane_rotate, sd_signed, none)
+DEF_SVE_FUNCTION (svcmla, ternary_rotate, all_integer, none)
+DEF_SVE_FUNCTION (svcmla_lane, ternary_lane_rotate, hs_integer, none)
+DEF_SVE_FUNCTION (svcvtlt, unary_convert, cvt_long, mx)
+DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_narrow, mx)
+DEF_SVE_FUNCTION (svcvtx, unary_convert, cvt_narrow_s, mxz)
+DEF_SVE_FUNCTION (svcvtxnt, unary_convert_narrowt, cvt_narrow_s, mx)
+DEF_SVE_FUNCTION (sveor3, ternary_opt_n, all_integer, none)
+DEF_SVE_FUNCTION (sveorbt, ternary_opt_n, all_integer, none)
+DEF_SVE_FUNCTION (sveortb, ternary_opt_n, all_integer, none)
+DEF_SVE_FUNCTION (svhadd, binary_opt_n, all_integer, mxz)
+DEF_SVE_FUNCTION (svhistcnt, binary_to_uint, sd_integer, z)
+DEF_SVE_FUNCTION (svhistseg, binary_to_uint, b_integer, none)
+DEF_SVE_FUNCTION (svhsub, binary_opt_n, all_integer, mxz)
+DEF_SVE_FUNCTION (svhsubr, binary_opt_n, all_integer, mxz)
+DEF_SVE_FUNCTION (svldnt1_gather, load_gather_sv_restricted, sd_data, implicit)
+DEF_SVE_FUNCTION (svldnt1_gather, load_gather_vs, sd_data, implicit)
+DEF_SVE_FUNCTION (svldnt1sb_gather, load_ext_gather_offset_restricted, sd_integer, implicit)
+DEF_SVE_FUNCTION (svldnt1sh_gather, load_ext_gather_offset_restricted, sd_integer, implicit)
+DEF_SVE_FUNCTION (svldnt1sh_gather, load_ext_gather_index_restricted, sd_integer, implicit)
+DEF_SVE_FUNCTION (svldnt1sw_gather, load_ext_gather_offset_restricted, d_integer, implicit)
+DEF_SVE_FUNCTION (svldnt1sw_gather, load_ext_gather_index_restricted, d_integer, implicit)
+DEF_SVE_FUNCTION (svldnt1ub_gather, load_ext_gather_offset_restricted, sd_integer, implicit)
+DEF_SVE_FUNCTION (svldnt1uh_gather, load_ext_gather_offset_restricted, sd_integer, implicit)
+DEF_SVE_FUNCTION (svldnt1uh_gather, load_ext_gather_index_restricted, sd_integer, implicit)
+DEF_SVE_FUNCTION (svldnt1uw_gather, load_ext_gather_offset_restricted, d_integer, implicit)
+DEF_SVE_FUNCTION (svldnt1uw_gather, load_ext_gather_index_restricted, d_integer, implicit)
+DEF_SVE_FUNCTION (svlogb, unary_to_int, all_float, mxz)
+DEF_SVE_FUNCTION (svmatch, compare, bh_integer, implicit)
+DEF_SVE_FUNCTION (svmaxp, binary, all_arith, mx)
+DEF_SVE_FUNCTION (svmaxnmp, binary, all_float, mx)
+DEF_SVE_FUNCTION (svmla_lane, ternary_lane, hsd_integer, none)
+DEF_SVE_FUNCTION (svmlalb, ternary_long_opt_n, s_float_hsd_integer, none)
+DEF_SVE_FUNCTION (svmlalb_lane, ternary_long_lane, s_float_sd_integer, none)
+DEF_SVE_FUNCTION (svmlalt, ternary_long_opt_n, s_float_hsd_integer, none)
+DEF_SVE_FUNCTION (svmlalt_lane, ternary_long_lane, s_float_sd_integer, none)
+DEF_SVE_FUNCTION (svmls_lane, ternary_lane, hsd_integer, none)
+DEF_SVE_FUNCTION (svmlslb, ternary_long_opt_n, s_float_hsd_integer, none)
+DEF_SVE_FUNCTION (svmlslb_lane, ternary_long_lane, s_float_sd_integer, none)
+DEF_SVE_FUNCTION (svmlslt, ternary_long_opt_n, s_float_hsd_integer, none)
+DEF_SVE_FUNCTION (svmlslt_lane, ternary_long_lane, s_float_sd_integer, none)
+DEF_SVE_FUNCTION (svminp, binary, all_arith, mx)
+DEF_SVE_FUNCTION (svminnmp, binary, all_float, mx)
+DEF_SVE_FUNCTION (svmovlb, unary_long, hsd_integer, none)
+DEF_SVE_FUNCTION (svmovlt, unary_long, hsd_integer, none)
+DEF_SVE_FUNCTION (svmul_lane, binary_lane, hsd_integer, none)
+DEF_SVE_FUNCTION (svmullb, binary_long_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svmullb_lane, binary_long_lane, sd_integer, none)
+DEF_SVE_FUNCTION (svmullt, binary_long_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svmullt_lane, binary_long_lane, sd_integer, none)
+DEF_SVE_FUNCTION (svnbsl, ternary_opt_n, all_integer, none)
+DEF_SVE_FUNCTION (svnmatch, compare, bh_integer, implicit)
+DEF_SVE_FUNCTION (svpmul, binary_opt_n, b_unsigned, none)
+DEF_SVE_FUNCTION (svpmullb, binary_long_opt_n, hd_unsigned, none)
+DEF_SVE_FUNCTION (svpmullb_pair, binary_opt_n, bs_unsigned, none)
+DEF_SVE_FUNCTION (svpmullt, binary_long_opt_n, hd_unsigned, none)
+DEF_SVE_FUNCTION (svpmullt_pair, binary_opt_n, bs_unsigned, none)
+DEF_SVE_FUNCTION (svqabs, unary, all_signed, mxz)
+DEF_SVE_FUNCTION (svqadd, binary_opt_n, all_integer, mxz)
+DEF_SVE_FUNCTION (svqcadd, binary_rotate, all_signed, none)
+DEF_SVE_FUNCTION (svqdmlalb, ternary_long_opt_n, hsd_signed, none)
+DEF_SVE_FUNCTION (svqdmlalb_lane, ternary_long_lane, sd_signed, none)
+DEF_SVE_FUNCTION (svqdmlalbt, ternary_long_opt_n, hsd_signed, none)
+DEF_SVE_FUNCTION (svqdmlalt, ternary_long_opt_n, hsd_signed, none)
+DEF_SVE_FUNCTION (svqdmlalt_lane, ternary_long_lane, sd_signed, none)
+DEF_SVE_FUNCTION (svqdmlslb, ternary_long_opt_n, hsd_signed, none)
+DEF_SVE_FUNCTION (svqdmlslb_lane, ternary_long_lane, sd_signed, none)
+DEF_SVE_FUNCTION (svqdmlslbt, ternary_long_opt_n, hsd_signed, none)
+DEF_SVE_FUNCTION (svqdmlslt, ternary_long_opt_n, hsd_signed, none)
+DEF_SVE_FUNCTION (svqdmlslt_lane, ternary_long_lane, sd_signed, none)
+DEF_SVE_FUNCTION (svqdmulh, binary_opt_n, all_signed, none)
+DEF_SVE_FUNCTION (svqdmulh_lane, binary_lane, hsd_signed, none)
+DEF_SVE_FUNCTION (svqdmullb, binary_long_opt_n, hsd_signed, none)
+DEF_SVE_FUNCTION (svqdmullb_lane, binary_long_lane, sd_signed, none)
+DEF_SVE_FUNCTION (svqdmullt, binary_long_opt_n, hsd_signed, none)
+DEF_SVE_FUNCTION (svqdmullt_lane, binary_long_lane, sd_signed, none)
+DEF_SVE_FUNCTION (svqneg, unary, all_signed, mxz)
+DEF_SVE_FUNCTION (svqrdmulh, binary_opt_n, all_signed, none)
+DEF_SVE_FUNCTION (svqrdmulh_lane, binary_lane, hsd_signed, none)
+DEF_SVE_FUNCTION (svqrdmlah, ternary_opt_n, all_signed, none)
+DEF_SVE_FUNCTION (svqrdmlah_lane, ternary_lane, hsd_signed, none)
+DEF_SVE_FUNCTION (svqrdmlsh, ternary_opt_n, all_signed, none)
+DEF_SVE_FUNCTION (svqrdmlsh_lane, ternary_lane, hsd_signed, none)
+DEF_SVE_FUNCTION (svqrdcmlah, ternary_rotate, all_signed, none)
+DEF_SVE_FUNCTION (svqrdcmlah_lane, ternary_lane_rotate, hs_signed, none)
+DEF_SVE_FUNCTION (svqrshrnb, shift_right_imm_narrowb, hsd_integer, none)
+DEF_SVE_FUNCTION (svqrshrnt, shift_right_imm_narrowt, hsd_integer, none)
+DEF_SVE_FUNCTION (svqrshrunb, shift_right_imm_narrowb_to_uint, hsd_signed, none)
+DEF_SVE_FUNCTION (svqrshrunt, shift_right_imm_narrowt_to_uint, hsd_signed, none)
+DEF_SVE_FUNCTION (svqshl, binary_int_opt_n, all_integer, mxz)
+DEF_SVE_FUNCTION (svqshlu, shift_left_imm_to_uint, all_signed, mxz)
+DEF_SVE_FUNCTION (svqshrnb, shift_right_imm_narrowb, hsd_integer, none)
+DEF_SVE_FUNCTION (svqshrnt, shift_right_imm_narrowt, hsd_integer, none)
+DEF_SVE_FUNCTION (svqshrunb, shift_right_imm_narrowb_to_uint, hsd_signed, none)
+DEF_SVE_FUNCTION (svqshrunt, shift_right_imm_narrowt_to_uint, hsd_signed, none)
+DEF_SVE_FUNCTION (svqrshl, binary_int_opt_n, all_integer, mxz)
+DEF_SVE_FUNCTION (svqsub, binary_opt_n, all_integer, mxz)
+DEF_SVE_FUNCTION (svqsubr, binary_opt_n, all_integer, mxz)
+DEF_SVE_FUNCTION (svqxtnb, unary_narrowb, hsd_integer, none)
+DEF_SVE_FUNCTION (svqxtnt, unary_narrowt, hsd_integer, none)
+DEF_SVE_FUNCTION (svqxtunb, unary_narrowb_to_uint, hsd_signed, none)
+DEF_SVE_FUNCTION (svqxtunt, unary_narrowt_to_uint, hsd_signed, none)
+DEF_SVE_FUNCTION (svraddhnb, binary_narrowb_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svraddhnt, binary_narrowt_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svrecpe, unary, s_unsigned, mxz)
+DEF_SVE_FUNCTION (svrhadd, binary_opt_n, all_integer, mxz)
+DEF_SVE_FUNCTION (svrsqrte, unary, s_unsigned, mxz)
+DEF_SVE_FUNCTION (svrshl, binary_int_opt_n, all_integer, mxz)
+DEF_SVE_FUNCTION (svrshr, shift_right_imm, all_integer, mxz)
+DEF_SVE_FUNCTION (svrshrnb, shift_right_imm_narrowb, hsd_integer, none)
+DEF_SVE_FUNCTION (svrshrnt, shift_right_imm_narrowt, hsd_integer, none)
+DEF_SVE_FUNCTION (svrsra, ternary_shift_right_imm, all_integer, none)
+DEF_SVE_FUNCTION (svrsubhnb, binary_narrowb_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svrsubhnt, binary_narrowt_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svsbclb, ternary_opt_n, sd_unsigned, none)
+DEF_SVE_FUNCTION (svsbclt, ternary_opt_n, sd_unsigned, none)
+DEF_SVE_FUNCTION (svshllb, shift_left_imm_long, hsd_integer, none)
+DEF_SVE_FUNCTION (svshllt, shift_left_imm_long, hsd_integer, none)
+DEF_SVE_FUNCTION (svshrnb, shift_right_imm_narrowb, hsd_integer, none)
+DEF_SVE_FUNCTION (svshrnt, shift_right_imm_narrowt, hsd_integer, none)
+DEF_SVE_FUNCTION (svsli, ternary_shift_left_imm, all_integer, none)
+DEF_SVE_FUNCTION (svsqadd, binary_int_opt_n, all_unsigned, mxz)
+DEF_SVE_FUNCTION (svsra, ternary_shift_right_imm, all_integer, none)
+DEF_SVE_FUNCTION (svsri, ternary_shift_right_imm, all_integer, none)
+DEF_SVE_FUNCTION (svstnt1_scatter, store_scatter_index_restricted, sd_data, implicit)
+DEF_SVE_FUNCTION (svstnt1_scatter, store_scatter_offset_restricted, sd_data, implicit)
+DEF_SVE_FUNCTION (svstnt1b_scatter, store_scatter_offset_restricted, sd_integer, implicit)
+DEF_SVE_FUNCTION (svstnt1h_scatter, store_scatter_index_restricted, sd_integer, implicit)
+DEF_SVE_FUNCTION (svstnt1h_scatter, store_scatter_offset_restricted, sd_integer, implicit)
+DEF_SVE_FUNCTION (svstnt1w_scatter, store_scatter_index_restricted, d_integer, implicit)
+DEF_SVE_FUNCTION (svstnt1w_scatter, store_scatter_offset_restricted, d_integer, implicit)
+DEF_SVE_FUNCTION (svsubhnb, binary_narrowb_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svsubhnt, binary_narrowt_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svsublb, binary_long_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svsublbt, binary_long_opt_n, hsd_signed, none)
+DEF_SVE_FUNCTION (svsublt, binary_long_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svsubltb, binary_long_opt_n, hsd_signed, none)
+DEF_SVE_FUNCTION (svsubwb, binary_wide_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svsubwt, binary_wide_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svtbl2, tbl_tuple, all_data, none)
+DEF_SVE_FUNCTION (svtbx, ternary_uint, all_data, none)
+DEF_SVE_FUNCTION (svuqadd, binary_uint_opt_n, all_signed, mxz)
+DEF_SVE_FUNCTION (svwhilege, compare_scalar, while, none)
+DEF_SVE_FUNCTION (svwhilegt, compare_scalar, while, none)
+DEF_SVE_FUNCTION (svwhilerw, compare_ptr, all_data, none)
+DEF_SVE_FUNCTION (svwhilewr, compare_ptr, all_data, none)
+DEF_SVE_FUNCTION (svxar, ternary_shift_right_imm, all_integer, none)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE2 | AARCH64_FL_SVE2_AES)
+DEF_SVE_FUNCTION (svaesd, binary, b_unsigned, none)
+DEF_SVE_FUNCTION (svaese, binary, b_unsigned, none)
+DEF_SVE_FUNCTION (svaesmc, unary, b_unsigned, none)
+DEF_SVE_FUNCTION (svaesimc, unary, b_unsigned, none)
+DEF_SVE_FUNCTION (svpmullb_pair, binary_opt_n, d_unsigned, none)
+DEF_SVE_FUNCTION (svpmullt_pair, binary_opt_n, d_unsigned, none)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE2 | AARCH64_FL_SVE2_BITPERM)
+DEF_SVE_FUNCTION (svbdep, binary_opt_n, all_unsigned, none)
+DEF_SVE_FUNCTION (svbext, binary_opt_n, all_unsigned, none)
+DEF_SVE_FUNCTION (svbgrp, binary_opt_n, all_unsigned, none)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE2 | AARCH64_FL_SVE2_SHA3)
+DEF_SVE_FUNCTION (svrax1, binary, d_integer, none)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE2 | AARCH64_FL_SVE2_SM4)
+DEF_SVE_FUNCTION (svsm4e, binary, s_unsigned, none)
+DEF_SVE_FUNCTION (svsm4ekey, binary, s_unsigned, none)
+#undef REQUIRED_EXTENSIONS
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
new file mode 100644
index 0000000000000..00bf17c403db6
--- /dev/null
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
@@ -0,0 +1,190 @@
+/* ACLE support for AArch64 SVE (__ARM_FEATURE_SVE intrinsics)
+   Copyright (C) 2020-2021 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_AARCH64_SVE_BUILTINS_SVE2_H
+#define GCC_AARCH64_SVE_BUILTINS_SVE2_H
+
+namespace aarch64_sve
+{
+  namespace functions
+  {
+    extern const function_base *const svaba;
+    extern const function_base *const svabalb;
+    extern const function_base *const svabalt;
+    extern const function_base *const svabdlb;
+    extern const function_base *const svabdlt;
+    extern const function_base *const svadalp;
+    extern const function_base *const svadclb;
+    extern const function_base *const svadclt;
+    extern const function_base *const svaddhnb;
+    extern const function_base *const svaddhnt;
+    extern const function_base *const svaddlb;
+    extern const function_base *const svaddlbt;
+    extern const function_base *const svaddlt;
+    extern const function_base *const svaddp;
+    extern const function_base *const svaddwb;
+    extern const function_base *const svaddwt;
+    extern const function_base *const svaesd;
+    extern const function_base *const svaese;
+    extern const function_base *const svaesimc;
+    extern const function_base *const svaesmc;
+    extern const function_base *const svbcax;
+    extern const function_base *const svbdep;
+    extern const function_base *const svbext;
+    extern const function_base *const svbgrp;
+    extern const function_base *const svbsl;
+    extern const function_base *const svbsl1n;
+    extern const function_base *const svbsl2n;
+    extern const function_base *const svcdot;
+    extern const function_base *const svcdot_lane;
+    extern const function_base *const svcvtlt;
+    extern const function_base *const svcvtx;
+    extern const function_base *const svcvtxnt;
+    extern const function_base *const sveor3;
+    extern const function_base *const sveorbt;
+    extern const function_base *const sveortb;
+    extern const function_base *const svhadd;
+    extern const function_base *const svhistcnt;
+    extern const function_base *const svhistseg;
+    extern const function_base *const svhsub;
+    extern const function_base *const svhsubr;
+    extern const function_base *const svldnt1_gather;
+    extern const function_base *const svldnt1sb_gather;
+    extern const function_base *const svldnt1sh_gather;
+    extern const function_base *const svldnt1sw_gather;
+    extern const function_base *const svldnt1ub_gather;
+    extern const function_base *const svldnt1uh_gather;
+    extern const function_base *const svldnt1uw_gather;
+    extern const function_base *const svlogb;
+    extern const function_base *const svmatch;
+    extern const function_base *const svmaxp;
+    extern const function_base *const svmaxnmp;
+    extern const function_base *const svmlalb;
+    extern const function_base *const svmlalb_lane;
+    extern const function_base *const svmlalt;
+    extern const function_base *const svmlalt_lane;
+    extern const function_base *const svmlslb;
+    extern const function_base *const svmlslb_lane;
+    extern const function_base *const svmlslt;
+    extern const function_base *const svmlslt_lane;
+    extern const function_base *const svminp;
+    extern const function_base *const svminnmp;
+    extern const function_base *const svmovlb;
+    extern const function_base *const svmovlt;
+    extern const function_base *const svmullb;
+    extern const function_base *const svmullb_lane;
+    extern const function_base *const svmullt;
+    extern const function_base *const svmullt_lane;
+    extern const function_base *const svnbsl;
+    extern const function_base *const svnmatch;
+    extern const function_base *const svpmul;
+    extern const function_base *const svpmullb;
+    extern const function_base *const svpmullb_pair;
+    extern const function_base *const svpmullt;
+    extern const function_base *const svpmullt_pair;
+    extern const function_base *const svqabs;
+    extern const function_base *const svqcadd;
+    extern const function_base *const svqdmlalb;
+    extern const function_base *const svqdmlalb_lane;
+    extern const function_base *const svqdmlalbt;
+    extern const function_base *const svqdmlalt;
+    extern const function_base *const svqdmlalt_lane;
+    extern const function_base *const svqdmlslb;
+    extern const function_base *const svqdmlslb_lane;
+    extern const function_base *const svqdmlslbt;
+    extern const function_base *const svqdmlslt;
+    extern const function_base *const svqdmlslt_lane;
+    extern const function_base *const svqdmulh;
+    extern const function_base *const svqdmulh_lane;
+    extern const function_base *const svqdmullb;
+    extern const function_base *const svqdmullb_lane;
+    extern const function_base *const svqdmullt;
+    extern const function_base *const svqdmullt_lane;
+    extern const function_base *const svqneg;
+    extern const function_base *const svqrdcmlah;
+    extern const function_base *const svqrdcmlah_lane;
+    extern const function_base *const svqrdmulh;
+    extern const function_base *const svqrdmulh_lane;
+    extern const function_base *const svqrdmlah;
+    extern const function_base *const svqrdmlah_lane;
+    extern const function_base *const svqrdmlsh;
+    extern const function_base *const svqrdmlsh_lane;
+    extern const function_base *const svqrshl;
+    extern const function_base *const svqrshrnb;
+    extern const function_base *const svqrshrnt;
+    extern const function_base *const svqrshrunb;
+    extern const function_base *const svqrshrunt;
+    extern const function_base *const svqshl;
+    extern const function_base *const svqshlu;
+    extern const function_base *const svqshrnb;
+    extern const function_base *const svqshrnt;
+    extern const function_base *const svqshrunb;
+    extern const function_base *const svqshrunt;
+    extern const function_base *const svqsubr;
+    extern const function_base *const svqxtnb;
+    extern const function_base *const svqxtnt;
+    extern const function_base *const svqxtunb;
+    extern const function_base *const svqxtunt;
+    extern const function_base *const svraddhnb;
+    extern const function_base *const svraddhnt;
+    extern const function_base *const svrax1;
+    extern const function_base *const svrhadd;
+    extern const function_base *const svrshl;
+    extern const function_base *const svrshr;
+    extern const function_base *const svrshrnb;
+    extern const function_base *const svrshrnt;
+    extern const function_base *const svrsra;
+    extern const function_base *const svrsubhnb;
+    extern const function_base *const svrsubhnt;
+    extern const function_base *const svsbclb;
+    extern const function_base *const svsbclt;
+    extern const function_base *const svshllb;
+    extern const function_base *const svshllt;
+    extern const function_base *const svshrnb;
+    extern const function_base *const svshrnt;
+    extern const function_base *const svsli;
+    extern const function_base *const svsm4e;
+    extern const function_base *const svsm4ekey;
+    extern const function_base *const svsqadd;
+    extern const function_base *const svsra;
+    extern const function_base *const svsri;
+    extern const function_base *const svstnt1_scatter;
+    extern const function_base *const svstnt1b_scatter;
+    extern const function_base *const svstnt1h_scatter;
+    extern const function_base *const svstnt1w_scatter;
+    extern const function_base *const svsubhnb;
+    extern const function_base *const svsubhnt;
+    extern const function_base *const svsublb;
+    extern const function_base *const svsublbt;
+    extern const function_base *const svsublt;
+    extern const function_base *const svsubltb;
+    extern const function_base *const svsubwb;
+    extern const function_base *const svsubwt;
+    extern const function_base *const svtbl2;
+    extern const function_base *const svtbx;
+    extern const function_base *const svuqadd;
+    extern const function_base *const svwhilege;
+    extern const function_base *const svwhilegt;
+    extern const function_base *const svwhilerw;
+    extern const function_base *const svwhilewr;
+    extern const function_base *const svxar;
+  }
+}
+
+#endif
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
new file mode 100644
index 0000000000000..f44f81f13754b
--- /dev/null
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -0,0 +1,3919 @@
+/* ACLE support for AArch64 SVE
+   Copyright (C) 2018-2021 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#define IN_TARGET_CODE 1
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "rtl.h"
+#include "tm_p.h"
+#include "memmodel.h"
+#include "insn-codes.h"
+#include "optabs.h"
+#include "recog.h"
+#include "diagnostic.h"
+#include "expr.h"
+#include "basic-block.h"
+#include "function.h"
+#include "fold-const.h"
+#include "gimple.h"
+#include "gimple-iterator.h"
+#include "gimplify.h"
+#include "explow.h"
+#include "emit-rtl.h"
+#include "tree-vector-builder.h"
+#include "stor-layout.h"
+#include "regs.h"
+#include "alias.h"
+#include "gimple-fold.h"
+#include "langhooks.h"
+#include "stringpool.h"
+#include "attribs.h"
+#include "aarch64-sve-builtins.h"
+#include "aarch64-sve-builtins-base.h"
+#include "aarch64-sve-builtins-sve2.h"
+#include "aarch64-sve-builtins-shapes.h"
+
+namespace aarch64_sve {
+
+/* Static information about each single-predicate or single-vector
+   ABI and ACLE type.  */
+struct vector_type_info
+{
+  /* The name of the type as declared by arm_sve.h.  */
+  const char *acle_name;
+
+  /* The name of the type specified in AAPCS64.  The type is always
+     available under this name, even when arm_sve.h isn't included.  */
+  const char *abi_name;
+
+  /* The C++ mangling of ABI_NAME.  */
+  const char *mangled_name;
+};
+
+/* Describes a function decl.  */
+class GTY(()) registered_function
+{
+public:
+  /* The ACLE function that the decl represents.  */
+  function_instance instance GTY ((skip));
+
+  /* The decl itself.  */
+  tree decl;
+
+  /* The architecture extensions that the function requires, as a set of
+     AARCH64_FL_* flags.  */
+  uint64_t required_extensions;
+
+  /* True if the decl represents an overloaded function that needs to be
+     resolved by function_resolver.  */
+  bool overloaded_p;
+};
+
+/* Hash traits for registered_function.  */
+struct registered_function_hasher : nofree_ptr_hash <registered_function>
+{
+  typedef function_instance compare_type;
+
+  static hashval_t hash (value_type);
+  static bool equal (value_type, const compare_type &);
+};
+
+/* Information about each single-predicate or single-vector type.  */
+static CONSTEXPR const vector_type_info vector_types[] = {
+#define DEF_SVE_TYPE(ACLE_NAME, NCHARS, ABI_NAME, SCALAR_TYPE) \
+  { #ACLE_NAME, #ABI_NAME, "u" #NCHARS #ABI_NAME },
+#include "aarch64-sve-builtins.def"
+};
+
+/* The function name suffix associated with each predication type.  */
+static const char *const pred_suffixes[NUM_PREDS + 1] = {
+  "",
+  "",
+  "_m",
+  "_x",
+  "_z",
+  ""
+};
+
+/* Static information about each mode_suffix_index.  */
+CONSTEXPR const mode_suffix_info mode_suffixes[] = {
+#define VECTOR_TYPE_none NUM_VECTOR_TYPES
+#define DEF_SVE_MODE(NAME, BASE, DISPLACEMENT, UNITS) \
+  { "_" #NAME, VECTOR_TYPE_##BASE, VECTOR_TYPE_##DISPLACEMENT, UNITS_##UNITS },
+#include "aarch64-sve-builtins.def"
+#undef VECTOR_TYPE_none
+  { "", NUM_VECTOR_TYPES, NUM_VECTOR_TYPES, UNITS_none }
+};
+
+/* Static information about each type_suffix_index.  */
+CONSTEXPR const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1] = {
+#define DEF_SVE_TYPE_SUFFIX(NAME, ACLE_TYPE, CLASS, BITS, MODE) \
+  { "_" #NAME, \
+    VECTOR_TYPE_##ACLE_TYPE, \
+    TYPE_##CLASS, \
+    BITS, \
+    BITS / BITS_PER_UNIT, \
+    TYPE_##CLASS == TYPE_signed || TYPE_##CLASS == TYPE_unsigned, \
+    TYPE_##CLASS == TYPE_unsigned, \
+    TYPE_##CLASS == TYPE_float, \
+    TYPE_##CLASS == TYPE_bool, \
+    0, \
+    MODE },
+#include "aarch64-sve-builtins.def"
+  { "", NUM_VECTOR_TYPES, TYPE_bool, 0, 0, false, false, false, false,
+    0, VOIDmode }
+};
+
+/* Define a TYPES_<combination> macro for each combination of type
+   suffixes that an ACLE function can have, where <combination> is the
+   name used in DEF_SVE_FUNCTION entries.
+
+   Use S (T) for single type suffix T and D (T1, T2) for a pair of type
+   suffixes T1 and T2.  Use commas to separate the suffixes.
+
+   Although the order shouldn't matter, the convention is to sort the
+   suffixes lexicographically after dividing suffixes into a type
+   class ("b", "f", etc.) and a numerical bit count.  */
+
+/* _b8 _b16 _b32 _b64.  */
+#define TYPES_all_pred(S, D) \
+  S (b8), S (b16), S (b32), S (b64)
+
+/* _f16 _f32 _f64.  */
+#define TYPES_all_float(S, D) \
+  S (f16), S (f32), S (f64)
+
+/* _s8 _s16 _s32 _s64.  */
+#define TYPES_all_signed(S, D) \
+  S (s8), S (s16), S (s32), S (s64)
+
+/*     _f16 _f32 _f64
+   _s8 _s16 _s32 _s64.  */
+#define TYPES_all_float_and_signed(S, D) \
+  TYPES_all_float (S, D), TYPES_all_signed (S, D)
+
+/* _u8 _u16 _u32 _u64.  */
+#define TYPES_all_unsigned(S, D) \
+  S (u8), S (u16), S (u32), S (u64)
+
+/* _s8 _s16 _s32 _s64
+   _u8 _u16 _u32 _u64.  */
+#define TYPES_all_integer(S, D) \
+  TYPES_all_signed (S, D), TYPES_all_unsigned (S, D)
+
+/*     _f16 _f32 _f64
+   _s8 _s16 _s32 _s64
+   _u8 _u16 _u32 _u64.  */
+#define TYPES_all_arith(S, D) \
+  TYPES_all_float (S, D), TYPES_all_integer (S, D)
+
+/*     _bf16
+	_f16 _f32 _f64
+   _s8  _s16 _s32 _s64
+   _u8  _u16 _u32 _u64.  */
+#define TYPES_all_data(S, D) \
+  S (bf16), TYPES_all_arith (S, D)
+
+/* _b only.  */
+#define TYPES_b(S, D) \
+  S (b)
+
+/* _u8.  */
+#define TYPES_b_unsigned(S, D) \
+  S (u8)
+
+/* _s8
+   _u8.  */
+#define TYPES_b_integer(S, D) \
+  S (s8), TYPES_b_unsigned (S, D)
+
+/* _s8 _s16
+   _u8 _u16.  */
+#define TYPES_bh_integer(S, D) \
+  S (s8), S (s16), S (u8), S (u16)
+
+/* _u8 _u32.  */
+#define TYPES_bs_unsigned(S, D) \
+  S (u8), S (u32)
+
+/* _s8 _s16 _s32.  */
+#define TYPES_bhs_signed(S, D) \
+  S (s8), S (s16), S (s32)
+
+/* _u8 _u16 _u32.  */
+#define TYPES_bhs_unsigned(S, D) \
+  S (u8), S (u16), S (u32)
+
+/* _s8 _s16 _s32
+   _u8 _u16 _u32.  */
+#define TYPES_bhs_integer(S, D) \
+  TYPES_bhs_signed (S, D), TYPES_bhs_unsigned (S, D)
+
+/* _s16
+   _u16.  */
+#define TYPES_h_integer(S, D) \
+  S (s16), S (u16)
+
+/* _s16 _s32.  */
+#define TYPES_hs_signed(S, D) \
+  S (s16), S (s32)
+
+/* _s16 _s32
+   _u16 _u32.  */
+#define TYPES_hs_integer(S, D) \
+  TYPES_hs_signed (S, D), S (u16), S (u32)
+
+/* _f16 _f32.  */
+#define TYPES_hs_float(S, D) \
+  S (f16), S (f32)
+
+/* _u16 _u64.  */
+#define TYPES_hd_unsigned(S, D) \
+  S (u16), S (u64)
+
+/* _s16 _s32 _s64.  */
+#define TYPES_hsd_signed(S, D) \
+  S (s16), S (s32), S (s64)
+
+/* _s16 _s32 _s64
+   _u16 _u32 _u64.  */
+#define TYPES_hsd_integer(S, D) \
+  TYPES_hsd_signed (S, D), S (u16), S (u32), S (u64)
+
+/* _f32.  */
+#define TYPES_s_float(S, D) \
+  S (f32)
+
+/*      _f32
+   _s16 _s32 _s64
+   _u16 _u32 _u64.  */
+#define TYPES_s_float_hsd_integer(S, D) \
+  TYPES_s_float (S, D), TYPES_hsd_integer (S, D)
+
+/* _f32
+   _s32 _s64
+   _u32 _u64.  */
+#define TYPES_s_float_sd_integer(S, D) \
+  TYPES_s_float (S, D), TYPES_sd_integer (S, D)
+
+/* _s32.  */
+#define TYPES_s_signed(S, D) \
+  S (s32)
+
+/* _u32.  */
+#define TYPES_s_unsigned(S, D) \
+  S (u32)
+
+/* _s32 _u32.  */
+#define TYPES_s_integer(S, D) \
+  TYPES_s_signed (S, D), TYPES_s_unsigned (S, D)
+
+/* _s32 _s64.  */
+#define TYPES_sd_signed(S, D) \
+  S (s32), S (s64)
+
+/* _u32 _u64.  */
+#define TYPES_sd_unsigned(S, D) \
+  S (u32), S (u64)
+
+/* _s32 _s64
+   _u32 _u64.  */
+#define TYPES_sd_integer(S, D) \
+  TYPES_sd_signed (S, D), TYPES_sd_unsigned (S, D)
+
+/* _f32 _f64
+   _s32 _s64
+   _u32 _u64.  */
+#define TYPES_sd_data(S, D) \
+  S (f32), S (f64), TYPES_sd_integer (S, D)
+
+/* _f16 _f32 _f64
+	_s32 _s64
+	_u32 _u64.  */
+#define TYPES_all_float_and_sd_integer(S, D) \
+  TYPES_all_float (S, D), TYPES_sd_integer (S, D)
+
+/* _f64.  */
+#define TYPES_d_float(S, D) \
+  S (f64)
+
+/* _u64.  */
+#define TYPES_d_unsigned(S, D) \
+  S (u64)
+
+/* _s64
+   _u64.  */
+#define TYPES_d_integer(S, D) \
+  S (s64), TYPES_d_unsigned (S, D)
+
+/* _f64
+   _s64
+   _u64.  */
+#define TYPES_d_data(S, D) \
+  TYPES_d_float (S, D), TYPES_d_integer (S, D)
+
+/* All the type combinations allowed by svcvt.  */
+#define TYPES_cvt(S, D) \
+  D (f16, f32), D (f16, f64), \
+  D (f16, s16), D (f16, s32), D (f16, s64), \
+  D (f16, u16), D (f16, u32), D (f16, u64), \
+  \
+  D (f32, f16), D (f32, f64), \
+  D (f32, s32), D (f32, s64), \
+  D (f32, u32), D (f32, u64), \
+  \
+  D (f64, f16), D (f64, f32), \
+  D (f64, s32), D (f64, s64), \
+  D (f64, u32), D (f64, u64), \
+  \
+  D (s16, f16), \
+  D (s32, f16), D (s32, f32), D (s32, f64), \
+  D (s64, f16), D (s64, f32), D (s64, f64), \
+  \
+  D (u16, f16), \
+  D (u32, f16), D (u32, f32), D (u32, f64), \
+  D (u64, f16), D (u64, f32), D (u64, f64)
+
+/* _bf16_f32.  */
+#define TYPES_cvt_bfloat(S, D) \
+  D (bf16, f32)
+
+/* _f32_f16
+   _f64_f32.  */
+#define TYPES_cvt_long(S, D) \
+  D (f32, f16), D (f64, f32)
+
+/* _f16_f32.  */
+#define TYPES_cvt_narrow_s(S, D) \
+  D (f32, f64)
+
+/* _f16_f32
+   _f32_f64.  */
+#define TYPES_cvt_narrow(S, D) \
+  D (f16, f32), TYPES_cvt_narrow_s (S, D)
+
+/* { _s32 _s64 } x { _b8 _b16 _b32 _b64 }
+   { _u32 _u64 }.  */
+#define TYPES_inc_dec_n1(D, A) \
+  D (A, b8), D (A, b16), D (A, b32), D (A, b64)
+#define TYPES_inc_dec_n(S, D) \
+  TYPES_inc_dec_n1 (D, s32), \
+  TYPES_inc_dec_n1 (D, s64), \
+  TYPES_inc_dec_n1 (D, u32), \
+  TYPES_inc_dec_n1 (D, u64)
+
+/* {     _bf16           }   {     _bf16           }
+   {      _f16 _f32 _f64 }   {      _f16 _f32 _f64 }
+   { _s8  _s16 _s32 _s64 } x { _s8  _s16 _s32 _s64 }
+   { _u8  _u16 _u32 _u64 }   { _u8  _u16 _u32 _u64 }.  */
+#define TYPES_reinterpret1(D, A) \
+  D (A, bf16), \
+  D (A, f16), D (A, f32), D (A, f64), \
+  D (A, s8), D (A, s16), D (A, s32), D (A, s64), \
+  D (A, u8), D (A, u16), D (A, u32), D (A, u64)
+#define TYPES_reinterpret(S, D) \
+  TYPES_reinterpret1 (D, bf16), \
+  TYPES_reinterpret1 (D, f16), \
+  TYPES_reinterpret1 (D, f32), \
+  TYPES_reinterpret1 (D, f64), \
+  TYPES_reinterpret1 (D, s8), \
+  TYPES_reinterpret1 (D, s16), \
+  TYPES_reinterpret1 (D, s32), \
+  TYPES_reinterpret1 (D, s64), \
+  TYPES_reinterpret1 (D, u8), \
+  TYPES_reinterpret1 (D, u16), \
+  TYPES_reinterpret1 (D, u32), \
+  TYPES_reinterpret1 (D, u64)
+
+/* { _b8 _b16 _b32 _b64 } x { _s32 _s64 }
+			    { _u32 _u64 } */
+#define TYPES_while1(D, bn) \
+  D (bn, s32), D (bn, s64), D (bn, u32), D (bn, u64)
+#define TYPES_while(S, D) \
+  TYPES_while1 (D, b8), \
+  TYPES_while1 (D, b16), \
+  TYPES_while1 (D, b32), \
+  TYPES_while1 (D, b64)
+
+/* Describe a pair of type suffixes in which only the first is used.  */
+#define DEF_VECTOR_TYPE(X) { TYPE_SUFFIX_ ## X, NUM_TYPE_SUFFIXES }
+
+/* Describe a pair of type suffixes in which both are used.  */
+#define DEF_DOUBLE_TYPE(X, Y) { TYPE_SUFFIX_ ## X, TYPE_SUFFIX_ ## Y }
+
+/* Create an array that can be used in aarch64-sve-builtins.def to
+   select the type suffixes in TYPES_<NAME>.  */
+#define DEF_SVE_TYPES_ARRAY(NAME) \
+  static const type_suffix_pair types_##NAME[] = { \
+    TYPES_##NAME (DEF_VECTOR_TYPE, DEF_DOUBLE_TYPE), \
+    { NUM_TYPE_SUFFIXES, NUM_TYPE_SUFFIXES } \
+  }
+
+/* For functions that don't take any type suffixes.  */
+static const type_suffix_pair types_none[] = {
+  { NUM_TYPE_SUFFIXES, NUM_TYPE_SUFFIXES },
+  { NUM_TYPE_SUFFIXES, NUM_TYPE_SUFFIXES }
+};
+
+/* Create an array for each TYPES_<combination> macro above.  */
+DEF_SVE_TYPES_ARRAY (all_pred);
+DEF_SVE_TYPES_ARRAY (all_float);
+DEF_SVE_TYPES_ARRAY (all_signed);
+DEF_SVE_TYPES_ARRAY (all_float_and_signed);
+DEF_SVE_TYPES_ARRAY (all_unsigned);
+DEF_SVE_TYPES_ARRAY (all_integer);
+DEF_SVE_TYPES_ARRAY (all_arith);
+DEF_SVE_TYPES_ARRAY (all_data);
+DEF_SVE_TYPES_ARRAY (b);
+DEF_SVE_TYPES_ARRAY (b_unsigned);
+DEF_SVE_TYPES_ARRAY (b_integer);
+DEF_SVE_TYPES_ARRAY (bh_integer);
+DEF_SVE_TYPES_ARRAY (bs_unsigned);
+DEF_SVE_TYPES_ARRAY (bhs_signed);
+DEF_SVE_TYPES_ARRAY (bhs_unsigned);
+DEF_SVE_TYPES_ARRAY (bhs_integer);
+DEF_SVE_TYPES_ARRAY (h_integer);
+DEF_SVE_TYPES_ARRAY (hs_signed);
+DEF_SVE_TYPES_ARRAY (hs_integer);
+DEF_SVE_TYPES_ARRAY (hs_float);
+DEF_SVE_TYPES_ARRAY (hd_unsigned);
+DEF_SVE_TYPES_ARRAY (hsd_signed);
+DEF_SVE_TYPES_ARRAY (hsd_integer);
+DEF_SVE_TYPES_ARRAY (s_float);
+DEF_SVE_TYPES_ARRAY (s_float_hsd_integer);
+DEF_SVE_TYPES_ARRAY (s_float_sd_integer);
+DEF_SVE_TYPES_ARRAY (s_signed);
+DEF_SVE_TYPES_ARRAY (s_unsigned);
+DEF_SVE_TYPES_ARRAY (s_integer);
+DEF_SVE_TYPES_ARRAY (sd_signed);
+DEF_SVE_TYPES_ARRAY (sd_unsigned);
+DEF_SVE_TYPES_ARRAY (sd_integer);
+DEF_SVE_TYPES_ARRAY (sd_data);
+DEF_SVE_TYPES_ARRAY (all_float_and_sd_integer);
+DEF_SVE_TYPES_ARRAY (d_float);
+DEF_SVE_TYPES_ARRAY (d_unsigned);
+DEF_SVE_TYPES_ARRAY (d_integer);
+DEF_SVE_TYPES_ARRAY (d_data);
+DEF_SVE_TYPES_ARRAY (cvt);
+DEF_SVE_TYPES_ARRAY (cvt_bfloat);
+DEF_SVE_TYPES_ARRAY (cvt_long);
+DEF_SVE_TYPES_ARRAY (cvt_narrow_s);
+DEF_SVE_TYPES_ARRAY (cvt_narrow);
+DEF_SVE_TYPES_ARRAY (inc_dec_n);
+DEF_SVE_TYPES_ARRAY (reinterpret);
+DEF_SVE_TYPES_ARRAY (while);
+
+/* Used by functions that have no governing predicate.  */
+static const predication_index preds_none[] = { PRED_none, NUM_PREDS };
+
+/* Used by functions that have a governing predicate but do not have an
+   explicit suffix.  */
+static const predication_index preds_implicit[] = { PRED_implicit, NUM_PREDS };
+
+/* Used by functions that allow merging and "don't care" predication,
+   but are not suitable for predicated MOVPRFX.  */
+static const predication_index preds_mx[] = {
+  PRED_m, PRED_x, NUM_PREDS
+};
+
+/* Used by functions that allow merging, zeroing and "don't care"
+   predication.  */
+static const predication_index preds_mxz[] = {
+  PRED_m, PRED_x, PRED_z, NUM_PREDS
+};
+
+/* Used by functions that have the mxz predicated forms above, and in addition
+   have an unpredicated form.  */
+static const predication_index preds_mxz_or_none[] = {
+  PRED_m, PRED_x, PRED_z, PRED_none, NUM_PREDS
+};
+
+/* Used by functions that allow merging and zeroing predication but have
+   no "_x" form.  */
+static const predication_index preds_mz[] = { PRED_m, PRED_z, NUM_PREDS };
+
+/* Used by functions that have an unpredicated form and a _z predicated
+   form.  */
+static const predication_index preds_z_or_none[] = {
+  PRED_z, PRED_none, NUM_PREDS
+};
+
+/* Used by (mostly predicate) functions that only support "_z" predication.  */
+static const predication_index preds_z[] = { PRED_z, NUM_PREDS };
+
+/* A list of all SVE ACLE functions.  */
+static CONSTEXPR const function_group_info function_groups[] = {
+#define DEF_SVE_FUNCTION(NAME, SHAPE, TYPES, PREDS) \
+  { #NAME, &functions::NAME, &shapes::SHAPE, types_##TYPES, preds_##PREDS, \
+    REQUIRED_EXTENSIONS | AARCH64_FL_SVE },
+#include "aarch64-sve-builtins.def"
+};
+
+/* The scalar type associated with each vector type.  */
+GTY(()) tree scalar_types[NUM_VECTOR_TYPES];
+
+/* The single-predicate and single-vector types, with their built-in
+   "__SV..._t" name.  Allow an index of NUM_VECTOR_TYPES, which always
+   yields a null tree.  */
+static GTY(()) tree abi_vector_types[NUM_VECTOR_TYPES + 1];
+
+/* Same, but with the arm_sve.h "sv..._t" name.  */
+GTY(()) tree acle_vector_types[MAX_TUPLE_SIZE][NUM_VECTOR_TYPES + 1];
+
+/* The svpattern enum type.  */
+GTY(()) tree acle_svpattern;
+
+/* The svprfop enum type.  */
+GTY(()) tree acle_svprfop;
+
+/* The list of all registered function decls, indexed by code.  */
+static GTY(()) vec<registered_function *, va_gc> *registered_functions;
+
+/* All registered function decls, hashed on the function_instance
+   that they implement.  This is used for looking up implementations of
+   overloaded functions.  */
+static hash_table<registered_function_hasher> *function_table;
+
+/* True if we've already complained about attempts to use functions
+   when the required extension is disabled.  */
+static bool reported_missing_extension_p;
+
+/* True if we've already complained about attempts to use functions
+   which require registers that are missing.  */
+static bool reported_missing_registers_p;
+
+/* Record that TYPE is an ABI-defined SVE type that contains NUM_ZR SVE vectors
+   and NUM_PR SVE predicates.  MANGLED_NAME, if nonnull, is the ABI-defined
+   mangling of the type.  ACLE_NAME is the <arm_sve.h> name of the type.  */
+static void
+add_sve_type_attribute (tree type, unsigned int num_zr, unsigned int num_pr,
+			const char *mangled_name, const char *acle_name)
+{
+  tree mangled_name_tree
+    = (mangled_name ? get_identifier (mangled_name) : NULL_TREE);
+
+  tree value = tree_cons (NULL_TREE, get_identifier (acle_name), NULL_TREE);
+  value = tree_cons (NULL_TREE, mangled_name_tree, value);
+  value = tree_cons (NULL_TREE, size_int (num_pr), value);
+  value = tree_cons (NULL_TREE, size_int (num_zr), value);
+  TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("SVE type"), value,
+				      TYPE_ATTRIBUTES (type));
+}
+
+/* If TYPE is an ABI-defined SVE type, return its attribute descriptor,
+   otherwise return null.  */
+static tree
+lookup_sve_type_attribute (const_tree type)
+{
+  if (type == error_mark_node)
+    return NULL_TREE;
+  return lookup_attribute ("SVE type", TYPE_ATTRIBUTES (type));
+}
+
+/* Force TYPE to be a sizeless type.  */
+static void
+make_type_sizeless (tree type)
+{
+  TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("SVE sizeless type"),
+				      NULL_TREE, TYPE_ATTRIBUTES (type));
+}
+
+/* Return true if TYPE is a sizeless type.  */
+static bool
+sizeless_type_p (const_tree type)
+{
+  if (type == error_mark_node)
+    return NULL_TREE;
+  return lookup_attribute ("SVE sizeless type", TYPE_ATTRIBUTES (type));
+}
+
+/* Return true if CANDIDATE is equivalent to MODEL_TYPE for overloading
+   purposes.  */
+static bool
+matches_type_p (const_tree model_type, const_tree candidate)
+{
+  if (VECTOR_TYPE_P (model_type))
+    {
+      if (!VECTOR_TYPE_P (candidate)
+	  || maybe_ne (TYPE_VECTOR_SUBPARTS (model_type),
+		       TYPE_VECTOR_SUBPARTS (candidate))
+	  || TYPE_MODE (model_type) != TYPE_MODE (candidate))
+	return false;
+
+      model_type = TREE_TYPE (model_type);
+      candidate = TREE_TYPE (candidate);
+    }
+  return (candidate != error_mark_node
+	  && TYPE_MAIN_VARIANT (model_type) == TYPE_MAIN_VARIANT (candidate));
+}
+
+/* If TYPE is a valid SVE element type, return the corresponding type
+   suffix, otherwise return NUM_TYPE_SUFFIXES.  */
+static type_suffix_index
+find_type_suffix_for_scalar_type (const_tree type)
+{
+  /* A linear search should be OK here, since the code isn't hot and
+     the number of types is only small.  */
+  for (unsigned int suffix_i = 0; suffix_i < NUM_TYPE_SUFFIXES; ++suffix_i)
+    if (!type_suffixes[suffix_i].bool_p)
+      {
+	vector_type_index vector_i = type_suffixes[suffix_i].vector_type;
+	if (matches_type_p (scalar_types[vector_i], type))
+	  return type_suffix_index (suffix_i);
+      }
+  return NUM_TYPE_SUFFIXES;
+}
+
+/* Report an error against LOCATION that the user has tried to use
+   function FNDECL when extension EXTENSION is disabled.  */
+static void
+report_missing_extension (location_t location, tree fndecl,
+			  const char *extension)
+{
+  /* Avoid reporting a slew of messages for a single oversight.  */
+  if (reported_missing_extension_p)
+    return;
+
+  error_at (location, "ACLE function %qD requires ISA extension %qs",
+	    fndecl, extension);
+  inform (location, "you can enable %qs using the command-line"
+	  " option %<-march%>, or by using the %<target%>"
+	  " attribute or pragma", extension);
+  reported_missing_extension_p = true;
+}
+
+/* Check whether the registers required by SVE function fndecl are available.
+   Report an error against LOCATION and return false if not.  */
+static bool
+check_required_registers (location_t location, tree fndecl)
+{
+  /* Avoid reporting a slew of messages for a single oversight.  */
+  if (reported_missing_registers_p)
+    return false;
+
+  if (TARGET_GENERAL_REGS_ONLY)
+    {
+      /* SVE registers are not usable when -mgeneral-regs-only option
+	 is specified.  */
+      error_at (location,
+		"ACLE function %qD is incompatible with the use of %qs",
+		fndecl, "-mgeneral-regs-only");
+      reported_missing_registers_p = true;
+      return false;
+    }
+
+  return true;
+}
+
+/* Check whether all the AARCH64_FL_* values in REQUIRED_EXTENSIONS are
+   enabled, given that those extensions are required for function FNDECL.
+   Report an error against LOCATION if not.  */
+static bool
+check_required_extensions (location_t location, tree fndecl,
+			   uint64_t required_extensions)
+{
+  uint64_t missing_extensions = required_extensions & ~aarch64_isa_flags;
+  if (missing_extensions == 0)
+    return check_required_registers (location, fndecl);
+
+  static const struct { uint64_t flag; const char *name; } extensions[] = {
+#define AARCH64_OPT_EXTENSION(EXT_NAME, FLAG_CANONICAL, FLAGS_ON, FLAGS_OFF, \
+			      SYNTHETIC, FEATURE_STRING) \
+    { FLAG_CANONICAL, EXT_NAME },
+#include "aarch64-option-extensions.def"
+  };
+
+  for (unsigned int i = 0; i < ARRAY_SIZE (extensions); ++i)
+    if (missing_extensions & extensions[i].flag)
+      {
+	report_missing_extension (location, fndecl, extensions[i].name);
+	return false;
+      }
+  gcc_unreachable ();
+}
+
+/* Report that LOCATION has a call to FNDECL in which argument ARGNO
+   was not an integer constant expression.  ARGNO counts from zero.  */
+static void
+report_non_ice (location_t location, tree fndecl, unsigned int argno)
+{
+  error_at (location, "argument %d of %qE must be an integer constant"
+	    " expression", argno + 1, fndecl);
+}
+
+/* Report that LOCATION has a call to FNDECL in which argument ARGNO has
+   the value ACTUAL, whereas the function requires a value in the range
+   [MIN, MAX].  ARGNO counts from zero.  */
+static void
+report_out_of_range (location_t location, tree fndecl, unsigned int argno,
+		     HOST_WIDE_INT actual, HOST_WIDE_INT min,
+		     HOST_WIDE_INT max)
+{
+  error_at (location, "passing %wd to argument %d of %qE, which expects"
+	    " a value in the range [%wd, %wd]", actual, argno + 1, fndecl,
+	    min, max);
+}
+
+/* Report that LOCATION has a call to FNDECL in which argument ARGNO has
+   the value ACTUAL, whereas the function requires either VALUE0 or
+   VALUE1.  ARGNO counts from zero.  */
+static void
+report_neither_nor (location_t location, tree fndecl, unsigned int argno,
+		    HOST_WIDE_INT actual, HOST_WIDE_INT value0,
+		    HOST_WIDE_INT value1)
+{
+  error_at (location, "passing %wd to argument %d of %qE, which expects"
+	    " either %wd or %wd", actual, argno + 1, fndecl, value0, value1);
+}
+
+/* Report that LOCATION has a call to FNDECL in which argument ARGNO has
+   the value ACTUAL, whereas the function requires one of VALUE0..3.
+   ARGNO counts from zero.  */
+static void
+report_not_one_of (location_t location, tree fndecl, unsigned int argno,
+		   HOST_WIDE_INT actual, HOST_WIDE_INT value0,
+		   HOST_WIDE_INT value1, HOST_WIDE_INT value2,
+		   HOST_WIDE_INT value3)
+{
+  error_at (location, "passing %wd to argument %d of %qE, which expects"
+	    " %wd, %wd, %wd or %wd", actual, argno + 1, fndecl, value0, value1,
+	    value2, value3);
+}
+
+/* Report that LOCATION has a call to FNDECL in which argument ARGNO has
+   the value ACTUAL, whereas the function requires a valid value of
+   enum type ENUMTYPE.  ARGNO counts from zero.  */
+static void
+report_not_enum (location_t location, tree fndecl, unsigned int argno,
+		 HOST_WIDE_INT actual, tree enumtype)
+{
+  error_at (location, "passing %wd to argument %d of %qE, which expects"
+	    " a valid %qT value", actual, argno + 1, fndecl, enumtype);
+}
+
+/* Return a hash code for a function_instance.  */
+hashval_t
+function_instance::hash () const
+{
+  inchash::hash h;
+  /* BASE uniquely determines BASE_NAME, so we don't need to hash both.  */
+  h.add_ptr (base);
+  h.add_ptr (shape);
+  h.add_int (mode_suffix_id);
+  h.add_int (type_suffix_ids[0]);
+  h.add_int (type_suffix_ids[1]);
+  h.add_int (pred);
+  return h.end ();
+}
+
+/* Return a set of CP_* flags that describe what the function could do,
+   taking the command-line flags into account.  */
+unsigned int
+function_instance::call_properties () const
+{
+  unsigned int flags = base->call_properties (*this);
+
+  /* -fno-trapping-math means that we can assume any FP exceptions
+     are not user-visible.  */
+  if (!flag_trapping_math)
+    flags &= ~CP_RAISE_FP_EXCEPTIONS;
+
+  return flags;
+}
+
+/* Return true if calls to the function could read some form of
+   global state.  */
+bool
+function_instance::reads_global_state_p () const
+{
+  unsigned int flags = call_properties ();
+
+  /* Preserve any dependence on rounding mode, flush to zero mode, etc.
+     There is currently no way of turning this off; in particular,
+     -fno-rounding-math (which is the default) means that we should make
+     the usual assumptions about rounding mode, which for intrinsics means
+     acting as the instructions do.  */
+  if (flags & CP_READ_FPCR)
+    return true;
+
+  /* Handle direct reads of global state.  */
+  return flags & (CP_READ_MEMORY | CP_READ_FFR);
+}
+
+/* Return true if calls to the function could modify some form of
+   global state.  */
+bool
+function_instance::modifies_global_state_p () const
+{
+  unsigned int flags = call_properties ();
+
+  /* Preserve any exception state written back to the FPCR,
+     unless -fno-trapping-math says this is unnecessary.  */
+  if (flags & CP_RAISE_FP_EXCEPTIONS)
+    return true;
+
+  /* Treat prefetches as modifying global state, since that's the
+     only means we have of keeping them in their correct position.  */
+  if (flags & CP_PREFETCH_MEMORY)
+    return true;
+
+  /* Handle direct modifications of global state.  */
+  return flags & (CP_WRITE_MEMORY | CP_WRITE_FFR);
+}
+
+/* Return true if calls to the function could raise a signal.  */
+bool
+function_instance::could_trap_p () const
+{
+  unsigned int flags = call_properties ();
+
+  /* Handle functions that could raise SIGFPE.  */
+  if (flags & CP_RAISE_FP_EXCEPTIONS)
+    return true;
+
+  /* Handle functions that could raise SIGBUS or SIGSEGV.  */
+  if (flags & (CP_READ_MEMORY | CP_WRITE_MEMORY))
+    return true;
+
+  return false;
+}
+
+inline hashval_t
+registered_function_hasher::hash (value_type value)
+{
+  return value->instance.hash ();
+}
+
+inline bool
+registered_function_hasher::equal (value_type value, const compare_type &key)
+{
+  return value->instance == key;
+}
+
+sve_switcher::sve_switcher ()
+  : m_old_isa_flags (aarch64_isa_flags)
+{
+  /* Changing the ISA flags and have_regs_of_mode should be enough here.
+     We shouldn't need to pay the compile-time cost of a full target
+     switch.  */
+  aarch64_isa_flags = (AARCH64_FL_FP | AARCH64_FL_SIMD | AARCH64_FL_F16
+		       | AARCH64_FL_SVE);
+
+  m_old_maximum_field_alignment = maximum_field_alignment;
+  maximum_field_alignment = 0;
+
+  m_old_general_regs_only = TARGET_GENERAL_REGS_ONLY;
+  global_options.x_target_flags &= ~MASK_GENERAL_REGS_ONLY;
+
+  memcpy (m_old_have_regs_of_mode, have_regs_of_mode,
+	  sizeof (have_regs_of_mode));
+  for (int i = 0; i < NUM_MACHINE_MODES; ++i)
+    if (aarch64_sve_mode_p ((machine_mode) i))
+      have_regs_of_mode[i] = true;
+}
+
+sve_switcher::~sve_switcher ()
+{
+  memcpy (have_regs_of_mode, m_old_have_regs_of_mode,
+	  sizeof (have_regs_of_mode));
+  if (m_old_general_regs_only)
+    global_options.x_target_flags |= MASK_GENERAL_REGS_ONLY;
+  aarch64_isa_flags = m_old_isa_flags;
+  maximum_field_alignment = m_old_maximum_field_alignment;
+}
+
+function_builder::function_builder ()
+{
+  m_overload_type = build_function_type (void_type_node, void_list_node);
+  m_direct_overloads = lang_GNU_CXX ();
+  gcc_obstack_init (&m_string_obstack);
+}
+
+function_builder::~function_builder ()
+{
+  obstack_free (&m_string_obstack, NULL);
+}
+
+/* Add NAME to the end of the function name being built.  */
+void
+function_builder::append_name (const char *name)
+{
+  obstack_grow (&m_string_obstack, name, strlen (name));
+}
+
+/* Zero-terminate and complete the function name being built.  */
+char *
+function_builder::finish_name ()
+{
+  obstack_1grow (&m_string_obstack, 0);
+  return (char *) obstack_finish (&m_string_obstack);
+}
+
+/* Return the overloaded or full function name for INSTANCE; OVERLOADED_P
+   selects which.  Allocate the string on m_string_obstack; the caller
+   must use obstack_free to free it after use.  */
+char *
+function_builder::get_name (const function_instance &instance,
+			    bool overloaded_p)
+{
+  append_name (instance.base_name);
+  if (overloaded_p)
+    switch (instance.displacement_units ())
+      {
+      case UNITS_none:
+	break;
+
+      case UNITS_bytes:
+	append_name ("_offset");
+	break;
+
+      case UNITS_elements:
+	append_name ("_index");
+	break;
+
+      case UNITS_vectors:
+	append_name ("_vnum");
+	break;
+      }
+  else
+    append_name (instance.mode_suffix ().string);
+  for (unsigned int i = 0; i < 2; ++i)
+    if (!overloaded_p || instance.shape->explicit_type_suffix_p (i))
+      append_name (instance.type_suffix (i).string);
+  append_name (pred_suffixes[instance.pred]);
+  return finish_name ();
+}
+
+/* Add attribute NAME to ATTRS.  */
+static tree
+add_attribute (const char *name, tree attrs)
+{
+  return tree_cons (get_identifier (name), NULL_TREE, attrs);
+}
+
+/* Return the appropriate function attributes for INSTANCE.  */
+tree
+function_builder::get_attributes (const function_instance &instance)
+{
+  tree attrs = NULL_TREE;
+
+  if (!instance.modifies_global_state_p ())
+    {
+      if (instance.reads_global_state_p ())
+	attrs = add_attribute ("pure", attrs);
+      else
+	attrs = add_attribute ("const", attrs);
+    }
+
+  if (!flag_non_call_exceptions || !instance.could_trap_p ())
+    attrs = add_attribute ("nothrow", attrs);
+
+  return add_attribute ("leaf", attrs);
+}
+
+/* Add a function called NAME with type FNTYPE and attributes ATTRS.
+   INSTANCE describes what the function does and OVERLOADED_P indicates
+   whether it is overloaded.  REQUIRED_EXTENSIONS are the set of
+   architecture extensions that the function requires.  */
+registered_function &
+function_builder::add_function (const function_instance &instance,
+				const char *name, tree fntype, tree attrs,
+				uint64_t required_extensions,
+				bool overloaded_p,
+				bool placeholder_p)
+{
+  unsigned int code = vec_safe_length (registered_functions);
+  code = (code << AARCH64_BUILTIN_SHIFT) | AARCH64_BUILTIN_SVE;
+
+  /* We need to be able to generate placeholders to enusre that we have a
+     consistent numbering scheme for function codes between the C and C++
+     frontends, so that everything ties up in LTO.
+
+     Currently, tree-streamer-in.c:unpack_ts_function_decl_value_fields
+     validates that tree nodes returned by TARGET_BUILTIN_DECL are non-NULL and
+     some node other than error_mark_node. This is a holdover from when builtin
+     decls were streamed by code rather than by value.
+
+     Ultimately, we should be able to remove this validation of BUILT_IN_MD
+     nodes and remove the target hook. For now, however, we need to appease the
+     validation and return a non-NULL, non-error_mark_node node, so we
+     arbitrarily choose integer_zero_node.  */
+  tree decl = placeholder_p
+    ? integer_zero_node
+    : simulate_builtin_function_decl (input_location, name, fntype,
+				      code, NULL, attrs);
+
+  registered_function &rfn = *ggc_alloc <registered_function> ();
+  rfn.instance = instance;
+  rfn.decl = decl;
+  rfn.required_extensions = required_extensions;
+  rfn.overloaded_p = overloaded_p;
+  vec_safe_push (registered_functions, &rfn);
+
+  return rfn;
+}
+
+/* Add a built-in function for INSTANCE, with the argument types given
+   by ARGUMENT_TYPES and the return type given by RETURN_TYPE.
+   REQUIRED_EXTENSIONS are the set of architecture extensions that the
+   function requires.  FORCE_DIRECT_OVERLOADS is true if there is a
+   one-to-one mapping between "short" and "full" names, and if standard
+   overload resolution therefore isn't necessary.  */
+void
+function_builder::add_unique_function (const function_instance &instance,
+				       tree return_type,
+				       vec<tree> &argument_types,
+				       uint64_t required_extensions,
+				       bool force_direct_overloads)
+{
+  /* Add the function under its full (unique) name.  */
+  char *name = get_name (instance, false);
+  tree fntype = build_function_type_array (return_type,
+					   argument_types.length (),
+					   argument_types.address ());
+  tree attrs = get_attributes (instance);
+  registered_function &rfn = add_function (instance, name, fntype, attrs,
+					   required_extensions, false, false);
+
+  /* Enter the function into the hash table.  */
+  hashval_t hash = instance.hash ();
+  registered_function **rfn_slot
+    = function_table->find_slot_with_hash (instance, hash, INSERT);
+  gcc_assert (!*rfn_slot);
+  *rfn_slot = &rfn;
+
+  /* Also add the function under its overloaded alias, if we want
+     a separate decl for each instance of an overloaded function.  */
+  char *overload_name = get_name (instance, true);
+  if (strcmp (name, overload_name) != 0)
+    {
+      /* Attribute lists shouldn't be shared.  */
+      tree attrs = get_attributes (instance);
+      bool placeholder_p = !(m_direct_overloads || force_direct_overloads);
+      add_function (instance, overload_name, fntype, attrs,
+		    required_extensions, false, placeholder_p);
+    }
+
+  obstack_free (&m_string_obstack, name);
+}
+
+/* Add one function decl for INSTANCE, to be used with manual overload
+   resolution.  REQUIRED_EXTENSIONS are the set of architecture extensions
+   that the function requires.
+
+   For simplicity, deal with duplicate attempts to add the same function,
+   including cases in which the new function requires more features than
+   the original one did.  In that case we'll check whether the required
+   features are available as part of resolving the function to the
+   relevant unique function.  */
+void
+function_builder::add_overloaded_function (const function_instance &instance,
+					   uint64_t required_extensions)
+{
+  char *name = get_name (instance, true);
+  if (registered_function **map_value = m_overload_names.get (name))
+    {
+      gcc_assert ((*map_value)->instance == instance
+		  && ((*map_value)->required_extensions
+		      & ~required_extensions) == 0);
+      obstack_free (&m_string_obstack, name);
+    }
+  else
+    {
+      registered_function &rfn
+	= add_function (instance, name, m_overload_type, NULL_TREE,
+			required_extensions, true, m_direct_overloads);
+      m_overload_names.put (name, &rfn);
+    }
+}
+
+/* If we are using manual overload resolution, add one function decl
+   for each overloaded function in GROUP.  Take the function base name
+   from GROUP and the mode from MODE.  */
+void
+function_builder::add_overloaded_functions (const function_group_info &group,
+					    mode_suffix_index mode)
+{
+  unsigned int explicit_type0 = (*group.shape)->explicit_type_suffix_p (0);
+  unsigned int explicit_type1 = (*group.shape)->explicit_type_suffix_p (1);
+  for (unsigned int pi = 0; group.preds[pi] != NUM_PREDS; ++pi)
+    {
+      if (!explicit_type0 && !explicit_type1)
+	{
+	  /* Deal with the common case in which there is one overloaded
+	     function for all type combinations.  */
+	  function_instance instance (group.base_name, *group.base,
+				      *group.shape, mode, types_none[0],
+				      group.preds[pi]);
+	  add_overloaded_function (instance, group.required_extensions);
+	}
+      else
+	for (unsigned int ti = 0; group.types[ti][0] != NUM_TYPE_SUFFIXES;
+	     ++ti)
+	  {
+	    /* Stub out the types that are determined by overload
+	       resolution.  */
+	    type_suffix_pair types = {
+	      explicit_type0 ? group.types[ti][0] : NUM_TYPE_SUFFIXES,
+	      explicit_type1 ? group.types[ti][1] : NUM_TYPE_SUFFIXES
+	    };
+	    function_instance instance (group.base_name, *group.base,
+					*group.shape, mode, types,
+					group.preds[pi]);
+	    add_overloaded_function (instance, group.required_extensions);
+	  }
+    }
+}
+
+/* Register all the functions in GROUP.  */
+void
+function_builder::register_function_group (const function_group_info &group)
+{
+  (*group.shape)->build (*this, group);
+}
+
+function_call_info::function_call_info (location_t location_in,
+					const function_instance &instance_in,
+					tree fndecl_in)
+  : function_instance (instance_in), location (location_in), fndecl (fndecl_in)
+{
+}
+
+function_resolver::function_resolver (location_t location,
+				      const function_instance &instance,
+				      tree fndecl, vec<tree, va_gc> &arglist)
+  : function_call_info (location, instance, fndecl), m_arglist (arglist)
+{
+}
+
+/* Return the vector type associated with type suffix TYPE.  */
+tree
+function_resolver::get_vector_type (type_suffix_index type)
+{
+  return acle_vector_types[0][type_suffixes[type].vector_type];
+}
+
+/* Return the <stdint.h> name associated with TYPE.  Using the <stdint.h>
+   name should be more user-friendly than the underlying canonical type,
+   since it makes the signedness and bitwidth explicit.  */
+const char *
+function_resolver::get_scalar_type_name (type_suffix_index type)
+{
+  return vector_types[type_suffixes[type].vector_type].acle_name + 2;
+}
+
+/* Return the type of argument I, or error_mark_node if it isn't
+   well-formed.  */
+tree
+function_resolver::get_argument_type (unsigned int i)
+{
+  tree arg = m_arglist[i];
+  return arg == error_mark_node ? arg : TREE_TYPE (arg);
+}
+
+/* Return true if argument I is some form of scalar value.  */
+bool
+function_resolver::scalar_argument_p (unsigned int i)
+{
+  tree type = get_argument_type (i);
+  return (INTEGRAL_TYPE_P (type)
+	  /* Allow pointer types, leaving the frontend to warn where
+	     necessary.  */
+	  || POINTER_TYPE_P (type)
+	  || SCALAR_FLOAT_TYPE_P (type));
+}
+
+/* Report that the function has no form that takes type suffix TYPE.
+   Return error_mark_node.  */
+tree
+function_resolver::report_no_such_form (type_suffix_index type)
+{
+  error_at (location, "%qE has no form that takes %qT arguments",
+	    fndecl, get_vector_type (type));
+  return error_mark_node;
+}
+
+/* Silently check whether there is an instance of the function with the
+   mode suffix given by MODE and the type suffixes given by TYPE0 and TYPE1.
+   Return its function decl if so, otherwise return null.  */
+tree
+function_resolver::lookup_form (mode_suffix_index mode,
+				type_suffix_index type0,
+				type_suffix_index type1)
+{
+  type_suffix_pair types = { type0, type1 };
+  function_instance instance (base_name, base, shape, mode, types, pred);
+  registered_function *rfn
+    = function_table->find_with_hash (instance, instance.hash ());
+  return rfn ? rfn->decl : NULL_TREE;
+}
+
+/* Resolve the function to one with the mode suffix given by MODE and the
+   type suffixes given by TYPE0 and TYPE1.  Return its function decl on
+   success, otherwise report an error and return error_mark_node.  */
+tree
+function_resolver::resolve_to (mode_suffix_index mode,
+			       type_suffix_index type0,
+			       type_suffix_index type1)
+{
+  tree res = lookup_form (mode, type0, type1);
+  if (!res)
+    {
+      if (type1 == NUM_TYPE_SUFFIXES)
+	return report_no_such_form (type0);
+      if (type0 == type_suffix_ids[0])
+	return report_no_such_form (type1);
+      /* To be filled in when we have other cases.  */
+      gcc_unreachable ();
+    }
+  return res;
+}
+
+/* Require argument ARGNO to be a 32-bit or 64-bit scalar integer type.
+   Return the associated type suffix on success, otherwise report an
+   error and return NUM_TYPE_SUFFIXES.  */
+type_suffix_index
+function_resolver::infer_integer_scalar_type (unsigned int argno)
+{
+  tree actual = get_argument_type (argno);
+  if (actual == error_mark_node)
+    return NUM_TYPE_SUFFIXES;
+
+  /* Allow enums and booleans to decay to integers, for compatibility
+     with C++ overloading rules.  */
+  if (INTEGRAL_TYPE_P (actual))
+    {
+      bool uns_p = TYPE_UNSIGNED (actual);
+      /* Honor the usual integer promotions, so that resolution works
+	 in the same way as for C++.  */
+      if (TYPE_PRECISION (actual) < 32)
+	return TYPE_SUFFIX_s32;
+      if (TYPE_PRECISION (actual) == 32)
+	return uns_p ? TYPE_SUFFIX_u32 : TYPE_SUFFIX_s32;
+      if (TYPE_PRECISION (actual) == 64)
+	return uns_p ? TYPE_SUFFIX_u64 : TYPE_SUFFIX_s64;
+    }
+
+  error_at (location, "passing %qT to argument %d of %qE, which expects"
+	    " a 32-bit or 64-bit integer type", actual, argno + 1, fndecl);
+  return NUM_TYPE_SUFFIXES;
+}
+
+/* Require argument ARGNO to be a pointer to a scalar type that has a
+   corresponding type suffix.  Return that type suffix on success,
+   otherwise report an error and return NUM_TYPE_SUFFIXES.
+   GATHER_SCATTER_P is true if the function is a gather/scatter
+   operation, and so requires a pointer to 32-bit or 64-bit data.  */
+type_suffix_index
+function_resolver::infer_pointer_type (unsigned int argno,
+				       bool gather_scatter_p)
+{
+  tree actual = get_argument_type (argno);
+  if (actual == error_mark_node)
+    return NUM_TYPE_SUFFIXES;
+
+  if (TREE_CODE (actual) != POINTER_TYPE)
+    {
+      error_at (location, "passing %qT to argument %d of %qE, which"
+		" expects a pointer type", actual, argno + 1, fndecl);
+      if (VECTOR_TYPE_P (actual) && gather_scatter_p)
+	inform (location, "an explicit type suffix is needed"
+		" when using a vector of base addresses");
+      return NUM_TYPE_SUFFIXES;
+    }
+
+  tree target = TREE_TYPE (actual);
+  type_suffix_index type = find_type_suffix_for_scalar_type (target);
+  if (type == NUM_TYPE_SUFFIXES)
+    {
+      error_at (location, "passing %qT to argument %d of %qE, but %qT is not"
+		" a valid SVE element type", actual, argno + 1, fndecl,
+		build_qualified_type (target, 0));
+      return NUM_TYPE_SUFFIXES;
+    }
+  unsigned int bits = type_suffixes[type].element_bits;
+  if (gather_scatter_p && bits != 32 && bits != 64)
+    {
+      error_at (location, "passing %qT to argument %d of %qE, which"
+		" expects a pointer to 32-bit or 64-bit elements",
+		actual, argno + 1, fndecl);
+      return NUM_TYPE_SUFFIXES;
+    }
+
+  return type;
+}
+
+/* Require argument ARGNO to be a single vector or a tuple of NUM_VECTORS
+   vectors; NUM_VECTORS is 1 for the former.  Return the associated type
+   suffix on success, using TYPE_SUFFIX_b for predicates.  Report an error
+   and return NUM_TYPE_SUFFIXES on failure.  */
+type_suffix_index
+function_resolver::infer_vector_or_tuple_type (unsigned int argno,
+					       unsigned int num_vectors)
+{
+  tree actual = get_argument_type (argno);
+  if (actual == error_mark_node)
+    return NUM_TYPE_SUFFIXES;
+
+  /* A linear search should be OK here, since the code isn't hot and
+     the number of types is only small.  */
+  for (unsigned int size_i = 0; size_i < MAX_TUPLE_SIZE; ++size_i)
+    for (unsigned int suffix_i = 0; suffix_i < NUM_TYPE_SUFFIXES; ++suffix_i)
+      {
+	vector_type_index type_i = type_suffixes[suffix_i].vector_type;
+	tree type = acle_vector_types[size_i][type_i];
+	if (type && matches_type_p (type, actual))
+	  {
+	    if (size_i + 1 == num_vectors)
+	      return type_suffix_index (suffix_i);
+
+	    if (num_vectors == 1)
+	      error_at (location, "passing %qT to argument %d of %qE, which"
+			" expects a single SVE vector rather than a tuple",
+			actual, argno + 1, fndecl);
+	    else if (size_i == 0 && type_i != VECTOR_TYPE_svbool_t)
+	      error_at (location, "passing single vector %qT to argument %d"
+			" of %qE, which expects a tuple of %d vectors",
+			actual, argno + 1, fndecl, num_vectors);
+	    else
+	      error_at (location, "passing %qT to argument %d of %qE, which"
+			" expects a tuple of %d vectors", actual, argno + 1,
+			fndecl, num_vectors);
+	    return NUM_TYPE_SUFFIXES;
+	  }
+      }
+
+  if (num_vectors == 1)
+    error_at (location, "passing %qT to argument %d of %qE, which"
+	      " expects an SVE vector type", actual, argno + 1, fndecl);
+  else
+    error_at (location, "passing %qT to argument %d of %qE, which"
+	      " expects an SVE tuple type", actual, argno + 1, fndecl);
+  return NUM_TYPE_SUFFIXES;
+}
+
+/* Require argument ARGNO to have some form of vector type.  Return the
+   associated type suffix on success, using TYPE_SUFFIX_b for predicates.
+   Report an error and return NUM_TYPE_SUFFIXES on failure.  */
+type_suffix_index
+function_resolver::infer_vector_type (unsigned int argno)
+{
+  return infer_vector_or_tuple_type (argno, 1);
+}
+
+/* Like infer_vector_type, but also require the type to be integral.  */
+type_suffix_index
+function_resolver::infer_integer_vector_type (unsigned int argno)
+{
+  type_suffix_index type = infer_vector_type (argno);
+  if (type == NUM_TYPE_SUFFIXES)
+    return type;
+
+  if (!type_suffixes[type].integer_p)
+    {
+      error_at (location, "passing %qT to argument %d of %qE, which"
+		" expects a vector of integers", get_argument_type (argno),
+		argno + 1, fndecl);
+      return NUM_TYPE_SUFFIXES;
+    }
+
+  return type;
+}
+
+/* Like infer_vector_type, but also require the type to be an unsigned
+   integer.  */
+type_suffix_index
+function_resolver::infer_unsigned_vector_type (unsigned int argno)
+{
+  type_suffix_index type = infer_vector_type (argno);
+  if (type == NUM_TYPE_SUFFIXES)
+    return type;
+
+  if (!type_suffixes[type].unsigned_p)
+    {
+      error_at (location, "passing %qT to argument %d of %qE, which"
+		" expects a vector of unsigned integers",
+		get_argument_type (argno), argno + 1, fndecl);
+      return NUM_TYPE_SUFFIXES;
+    }
+
+  return type;
+}
+
+/* Like infer_vector_type, but also require the element size to be
+   32 or 64 bits.  */
+type_suffix_index
+function_resolver::infer_sd_vector_type (unsigned int argno)
+{
+  type_suffix_index type = infer_vector_type (argno);
+  if (type == NUM_TYPE_SUFFIXES)
+    return type;
+
+  unsigned int bits = type_suffixes[type].element_bits;
+  if (bits != 32 && bits != 64)
+    {
+      error_at (location, "passing %qT to argument %d of %qE, which"
+		" expects a vector of 32-bit or 64-bit elements",
+		get_argument_type (argno), argno + 1, fndecl);
+      return NUM_TYPE_SUFFIXES;
+    }
+
+  return type;
+}
+
+/* If the function operates on tuples of vectors, require argument ARGNO to be
+   a tuple with the appropriate number of vectors, otherwise require it to be
+   a single vector.  Return the associated type suffix on success, using
+   TYPE_SUFFIX_b for predicates.  Report an error and return NUM_TYPE_SUFFIXES
+   on failure.  */
+type_suffix_index
+function_resolver::infer_tuple_type (unsigned int argno)
+{
+  return infer_vector_or_tuple_type (argno, vectors_per_tuple ());
+}
+
+/* Require argument ARGNO to be a vector or scalar argument.  Return true
+   if it is, otherwise report an appropriate error.  */
+bool
+function_resolver::require_vector_or_scalar_type (unsigned int argno)
+{
+  tree actual = get_argument_type (argno);
+  if (actual == error_mark_node)
+    return false;
+
+  if (!scalar_argument_p (argno) && !VECTOR_TYPE_P (actual))
+    {
+      error_at (location, "passing %qT to argument %d of %qE, which"
+		" expects a vector or scalar type", actual, argno + 1, fndecl);
+      return false;
+    }
+
+  return true;
+}
+
+/* Require argument ARGNO to have vector type TYPE, in cases where this
+   requirement holds for all uses of the function.  Return true if the
+   argument has the right form, otherwise report an appropriate error.  */
+bool
+function_resolver::require_vector_type (unsigned int argno,
+					vector_type_index type)
+{
+  tree expected = acle_vector_types[0][type];
+  tree actual = get_argument_type (argno);
+  if (actual == error_mark_node)
+    return false;
+
+  if (!matches_type_p (expected, actual))
+    {
+      error_at (location, "passing %qT to argument %d of %qE, which"
+		" expects %qT", actual, argno + 1, fndecl, expected);
+      return false;
+    }
+  return true;
+}
+
+/* Like require_vector_type, but TYPE is inferred from previous arguments
+   rather than being a fixed part of the function signature.  This changes
+   the nature of the error messages.  */
+bool
+function_resolver::require_matching_vector_type (unsigned int argno,
+						 type_suffix_index type)
+{
+  type_suffix_index new_type = infer_vector_type (argno);
+  if (new_type == NUM_TYPE_SUFFIXES)
+    return false;
+
+  if (type != new_type)
+    {
+      error_at (location, "passing %qT to argument %d of %qE, but"
+		" previous arguments had type %qT",
+		get_vector_type (new_type), argno + 1, fndecl,
+		get_vector_type (type));
+      return false;
+    }
+  return true;
+}
+
+/* Require argument ARGNO to be a vector type with the following properties:
+
+   - the type class must be the same as FIRST_TYPE's if EXPECTED_TCLASS
+     is SAME_TYPE_CLASS, otherwise it must be EXPECTED_TCLASS itself.
+
+   - the element size must be:
+
+     - the same as FIRST_TYPE's if EXPECTED_BITS == SAME_SIZE
+     - half of FIRST_TYPE's if EXPECTED_BITS == HALF_SIZE
+     - a quarter of FIRST_TYPE's if EXPECTED_BITS == QUARTER_SIZE
+     - EXPECTED_BITS itself otherwise
+
+   Return true if the argument has the required type, otherwise report
+   an appropriate error.
+
+   FIRST_ARGNO is the first argument that is known to have type FIRST_TYPE.
+   Usually it comes before ARGNO, but sometimes it is more natural to resolve
+   arguments out of order.
+
+   If the required properties depend on FIRST_TYPE then both FIRST_ARGNO and
+   ARGNO contribute to the resolution process.  If the required properties
+   are fixed, only FIRST_ARGNO contributes to the resolution process.
+
+   This function is a bit of a Swiss army knife.  The complication comes
+   from trying to give good error messages when FIRST_ARGNO and ARGNO are
+   inconsistent, since either of them might be wrong.  */
+bool function_resolver::
+require_derived_vector_type (unsigned int argno,
+			     unsigned int first_argno,
+			     type_suffix_index first_type,
+			     type_class_index expected_tclass,
+			     unsigned int expected_bits)
+{
+  /* If the type needs to match FIRST_ARGNO exactly, use the preferred
+     error message for that case.  The VECTOR_TYPE_P test excludes tuple
+     types, which we handle below instead.  */
+  bool both_vectors_p = VECTOR_TYPE_P (get_argument_type (first_argno));
+  if (both_vectors_p
+      && expected_tclass == SAME_TYPE_CLASS
+      && expected_bits == SAME_SIZE)
+    {
+      /* There's no need to resolve this case out of order.  */
+      gcc_assert (argno > first_argno);
+      return require_matching_vector_type (argno, first_type);
+    }
+
+  /* Use FIRST_TYPE to get the expected type class and element size.  */
+  type_class_index orig_expected_tclass = expected_tclass;
+  if (expected_tclass == NUM_TYPE_CLASSES)
+    expected_tclass = type_suffixes[first_type].tclass;
+
+  unsigned int orig_expected_bits = expected_bits;
+  if (expected_bits == SAME_SIZE)
+    expected_bits = type_suffixes[first_type].element_bits;
+  else if (expected_bits == HALF_SIZE)
+    expected_bits = type_suffixes[first_type].element_bits / 2;
+  else if (expected_bits == QUARTER_SIZE)
+    expected_bits = type_suffixes[first_type].element_bits / 4;
+
+  /* If the expected type doesn't depend on FIRST_TYPE at all,
+     just check for the fixed choice of vector type.  */
+  if (expected_tclass == orig_expected_tclass
+      && expected_bits == orig_expected_bits)
+    {
+      const type_suffix_info &expected_suffix
+	= type_suffixes[find_type_suffix (expected_tclass, expected_bits)];
+      return require_vector_type (argno, expected_suffix.vector_type);
+    }
+
+  /* Require the argument to be some form of SVE vector type,
+     without being specific about the type of vector we want.  */
+  type_suffix_index actual_type = infer_vector_type (argno);
+  if (actual_type == NUM_TYPE_SUFFIXES)
+    return false;
+
+  /* Exit now if we got the right type.  */
+  bool tclass_ok_p = (type_suffixes[actual_type].tclass == expected_tclass);
+  bool size_ok_p = (type_suffixes[actual_type].element_bits == expected_bits);
+  if (tclass_ok_p && size_ok_p)
+    return true;
+
+  /* First look for cases in which the actual type contravenes a fixed
+     size requirement, without having to refer to FIRST_TYPE.  */
+  if (!size_ok_p && expected_bits == orig_expected_bits)
+    {
+      error_at (location, "passing %qT to argument %d of %qE, which"
+		" expects a vector of %d-bit elements",
+		get_vector_type (actual_type), argno + 1, fndecl,
+		expected_bits);
+      return false;
+    }
+
+  /* Likewise for a fixed type class requirement.  This is only ever
+     needed for signed and unsigned types, so don't create unnecessary
+     translation work for other type classes.  */
+  if (!tclass_ok_p && orig_expected_tclass == TYPE_signed)
+    {
+      error_at (location, "passing %qT to argument %d of %qE, which"
+		" expects a vector of signed integers",
+		get_vector_type (actual_type), argno + 1, fndecl);
+      return false;
+    }
+  if (!tclass_ok_p && orig_expected_tclass == TYPE_unsigned)
+    {
+      error_at (location, "passing %qT to argument %d of %qE, which"
+		" expects a vector of unsigned integers",
+		get_vector_type (actual_type), argno + 1, fndecl);
+      return false;
+    }
+
+  /* Make sure that FIRST_TYPE itself is sensible before using it
+     as a basis for an error message.  */
+  if (resolve_to (mode_suffix_id, first_type) == error_mark_node)
+    return false;
+
+  /* If the arguments have consistent type classes, but a link between
+     the sizes has been broken, try to describe the error in those terms.  */
+  if (both_vectors_p && tclass_ok_p && orig_expected_bits == SAME_SIZE)
+    {
+      if (argno < first_argno)
+	{
+	  std::swap (argno, first_argno);
+	  std::swap (actual_type, first_type);
+	}
+      error_at (location, "arguments %d and %d of %qE must have the"
+		" same element size, but the values passed here have type"
+		" %qT and %qT respectively", first_argno + 1, argno + 1,
+		fndecl, get_vector_type (first_type),
+		get_vector_type (actual_type));
+      return false;
+    }
+
+  /* Likewise in reverse: look for cases in which the sizes are consistent
+     but a link between the type classes has been broken.  */
+  if (both_vectors_p
+      && size_ok_p
+      && orig_expected_tclass == SAME_TYPE_CLASS
+      && type_suffixes[first_type].integer_p
+      && type_suffixes[actual_type].integer_p)
+    {
+      if (argno < first_argno)
+	{
+	  std::swap (argno, first_argno);
+	  std::swap (actual_type, first_type);
+	}
+      error_at (location, "arguments %d and %d of %qE must have the"
+		" same signedness, but the values passed here have type"
+		" %qT and %qT respectively", first_argno + 1, argno + 1,
+		fndecl, get_vector_type (first_type),
+		get_vector_type (actual_type));
+      return false;
+    }
+
+  /* The two arguments are wildly inconsistent.  */
+  type_suffix_index expected_type
+    = find_type_suffix (expected_tclass, expected_bits);
+  error_at (location, "passing %qT instead of the expected %qT to argument"
+	    " %d of %qE, after passing %qT to argument %d",
+	    get_vector_type (actual_type), get_vector_type (expected_type),
+	    argno + 1, fndecl, get_argument_type (first_argno),
+	    first_argno + 1);
+  return false;
+}
+
+/* Require argument ARGNO to match argument FIRST_ARGNO, which was inferred
+   to be a pointer to a scalar element of type TYPE.  */
+bool
+function_resolver::require_matching_pointer_type (unsigned int argno,
+						  unsigned int first_argno,
+						  type_suffix_index type)
+{
+  type_suffix_index new_type = infer_pointer_type (argno);
+  if (new_type == NUM_TYPE_SUFFIXES)
+    return false;
+
+  if (type != new_type)
+    {
+      error_at (location, "passing %qT to argument %d of %qE, but"
+		" argument %d had type %qT", get_argument_type (argno),
+		argno + 1, fndecl, first_argno + 1,
+		get_argument_type (first_argno));
+      return false;
+    }
+  return true;
+}
+
+/* Require argument ARGNO to be a (possibly variable) scalar, using EXPECTED
+   as the name of its expected type.  Return true if the argument has the
+   right form, otherwise report an appropriate error.  */
+bool
+function_resolver::require_scalar_type (unsigned int argno,
+					const char *expected)
+{
+  if (!scalar_argument_p (argno))
+    {
+      error_at (location, "passing %qT to argument %d of %qE, which"
+		" expects %qs", get_argument_type (argno), argno + 1,
+		fndecl, expected);
+      return false;
+    }
+  return true;
+}
+
+/* Require argument ARGNO to be some form of pointer, without being specific
+   about its target type.  Return true if the argument has the right form,
+   otherwise report an appropriate error.  */
+bool
+function_resolver::require_pointer_type (unsigned int argno)
+{
+  if (!scalar_argument_p (argno))
+    {
+      error_at (location, "passing %qT to argument %d of %qE, which"
+		" expects a scalar pointer", get_argument_type (argno),
+		argno + 1, fndecl);
+      return false;
+    }
+  return true;
+}
+
+/* Argument FIRST_ARGNO is a scalar with type EXPECTED_TYPE, and argument
+   ARGNO should be consistent with it.  Return true if it is, otherwise
+   report an appropriate error.  */
+bool function_resolver::
+require_matching_integer_scalar_type (unsigned int argno,
+				      unsigned int first_argno,
+				      type_suffix_index expected_type)
+{
+  type_suffix_index actual_type = infer_integer_scalar_type (argno);
+  if (actual_type == NUM_TYPE_SUFFIXES)
+    return false;
+
+  if (actual_type == expected_type)
+    return true;
+
+  error_at (location, "call to %qE is ambiguous; argument %d has type"
+	    " %qs but argument %d has type %qs", fndecl,
+	    first_argno + 1, get_scalar_type_name (expected_type),
+	    argno + 1, get_scalar_type_name (actual_type));
+  return false;
+}
+
+/* Require argument ARGNO to be a (possibly variable) scalar, expecting it
+   to have the following properties:
+
+   - the type class must be the same as for type suffix 0 if EXPECTED_TCLASS
+     is SAME_TYPE_CLASS, otherwise it must be EXPECTED_TCLASS itself.
+
+   - the element size must be the same as for type suffix 0 if EXPECTED_BITS
+     is SAME_TYPE_SIZE, otherwise it must be EXPECTED_BITS itself.
+
+   Return true if the argument is valid, otherwise report an appropriate error.
+
+   Note that we don't check whether the scalar type actually has the required
+   properties, since that's subject to implicit promotions and conversions.
+   Instead we just use the expected properties to tune the error message.  */
+bool function_resolver::
+require_derived_scalar_type (unsigned int argno,
+			     type_class_index expected_tclass,
+			     unsigned int expected_bits)
+{
+  gcc_assert (expected_tclass == SAME_TYPE_CLASS
+	      || expected_tclass == TYPE_signed
+	      || expected_tclass == TYPE_unsigned);
+
+  /* If the expected type doesn't depend on the type suffix at all,
+     just check for the fixed choice of scalar type.  */
+  if (expected_tclass != SAME_TYPE_CLASS && expected_bits != SAME_SIZE)
+    {
+      type_suffix_index expected_type
+	= find_type_suffix (expected_tclass, expected_bits);
+      return require_scalar_type (argno, get_scalar_type_name (expected_type));
+    }
+
+  if (scalar_argument_p (argno))
+    return true;
+
+  if (expected_tclass == SAME_TYPE_CLASS)
+    /* It doesn't really matter whether the element is expected to be
+       the same size as type suffix 0.  */
+    error_at (location, "passing %qT to argument %d of %qE, which"
+	      " expects a scalar element", get_argument_type (argno),
+	      argno + 1, fndecl);
+  else
+    /* It doesn't seem useful to distinguish between signed and unsigned
+       scalars here.  */
+    error_at (location, "passing %qT to argument %d of %qE, which"
+	      " expects a scalar integer", get_argument_type (argno),
+	      argno + 1, fndecl);
+  return false;
+}
+
+/* Require argument ARGNO to be suitable for an integer constant expression.
+   Return true if it is, otherwise report an appropriate error.
+
+   function_checker checks whether the argument is actually constant and
+   has a suitable range.  The reason for distinguishing immediate arguments
+   here is because it provides more consistent error messages than
+   require_scalar_type would.  */
+bool
+function_resolver::require_integer_immediate (unsigned int argno)
+{
+  if (!scalar_argument_p (argno))
+    {
+      report_non_ice (location, fndecl, argno);
+      return false;
+    }
+  return true;
+}
+
+/* Require argument ARGNO to be a vector base in a gather-style address.
+   Return its type on success, otherwise return NUM_VECTOR_TYPES.  */
+vector_type_index
+function_resolver::infer_vector_base_type (unsigned int argno)
+{
+  type_suffix_index type = infer_vector_type (argno);
+  if (type == NUM_TYPE_SUFFIXES)
+    return NUM_VECTOR_TYPES;
+
+  if (type == TYPE_SUFFIX_u32 || type == TYPE_SUFFIX_u64)
+    return type_suffixes[type].vector_type;
+
+  error_at (location, "passing %qT to argument %d of %qE, which"
+	    " expects %qs or %qs", get_argument_type (argno),
+	    argno + 1, fndecl, "svuint32_t", "svuint64_t");
+  return NUM_VECTOR_TYPES;
+}
+
+/* Require argument ARGNO to be a vector displacement in a gather-style
+   address.  Return its type on success, otherwise return NUM_VECTOR_TYPES.  */
+vector_type_index
+function_resolver::infer_vector_displacement_type (unsigned int argno)
+{
+  type_suffix_index type = infer_integer_vector_type (argno);
+  if (type == NUM_TYPE_SUFFIXES)
+    return NUM_VECTOR_TYPES;
+
+  if (type_suffixes[type].integer_p
+      && (type_suffixes[type].element_bits == 32
+	  || type_suffixes[type].element_bits == 64))
+    return type_suffixes[type].vector_type;
+
+  error_at (location, "passing %qT to argument %d of %qE, which"
+	    " expects a vector of 32-bit or 64-bit integers",
+	    get_argument_type (argno), argno + 1, fndecl);
+  return NUM_VECTOR_TYPES;
+}
+
+/* Require argument ARGNO to be a vector displacement in a gather-style
+   address.  There are three possible uses:
+
+   - for loading into elements of type TYPE (when LOAD_P is true)
+   - for storing from elements of type TYPE (when LOAD_P is false)
+   - for prefetching data (when TYPE is NUM_TYPE_SUFFIXES)
+
+   The overloaded function's mode suffix determines the units of the
+   displacement (bytes for "_offset", elements for "_index").
+
+   Return the associated mode on success, otherwise report an error
+   and return MODE_none.  */
+mode_suffix_index
+function_resolver::resolve_sv_displacement (unsigned int argno,
+					    type_suffix_index type,
+					    bool load_p)
+{
+  if (type == NUM_TYPE_SUFFIXES)
+    {
+      /* For prefetches, the base is a void pointer and the displacement
+	 can be any valid offset or index type.  */
+      vector_type_index displacement_vector_type
+	= infer_vector_displacement_type (argno);
+      if (displacement_vector_type == NUM_VECTOR_TYPES)
+	return MODE_none;
+
+      mode_suffix_index mode = find_mode_suffix (NUM_VECTOR_TYPES,
+						 displacement_vector_type,
+						 displacement_units ());
+      gcc_assert (mode != MODE_none);
+      return mode;
+    }
+
+  unsigned int required_bits = type_suffixes[type].element_bits;
+  if (required_bits == 32
+      && displacement_units () == UNITS_elements
+      && !lookup_form (MODE_s32index, type)
+      && !lookup_form (MODE_u32index, type))
+    {
+      if (lookup_form (MODE_u32base_index, type))
+	{
+	  if (type_suffix_ids[0] == NUM_TYPE_SUFFIXES)
+	    {
+	      gcc_assert (!load_p);
+	      error_at (location, "when storing %qT, %qE requires a vector"
+			" base and a scalar index", get_vector_type (type),
+			fndecl);
+	    }
+	  else
+	    error_at (location, "%qE requires a vector base and a scalar"
+		      " index", fndecl);
+	}
+      else
+	error_at (location, "%qE does not support 32-bit vector type %qT",
+		  fndecl, get_vector_type (type));
+      return MODE_none;
+    }
+
+  /* Check for some form of vector type, without naming any in particular
+     as being expected.  */
+  type_suffix_index displacement_type = infer_vector_type (argno);
+  if (displacement_type == NUM_TYPE_SUFFIXES)
+    return MODE_none;
+
+  /* If the displacement type is consistent with the data vector type,
+     try to find the associated mode suffix.  This will fall through
+     for non-integral displacement types.  */
+  if (type_suffixes[displacement_type].element_bits == required_bits)
+    {
+      vector_type_index displacement_vector_type
+	= type_suffixes[displacement_type].vector_type;
+      mode_suffix_index mode = find_mode_suffix (NUM_VECTOR_TYPES,
+						 displacement_vector_type,
+						 displacement_units ());
+      if (mode != MODE_none)
+	{
+	  if (mode == MODE_s32offset
+	      && !lookup_form (mode, type)
+	      && lookup_form (MODE_u32offset, type))
+	    {
+	      if (type_suffix_ids[0] == NUM_TYPE_SUFFIXES)
+		error_at (location, "%qE does not support 32-bit sign-extended"
+			  " offsets", fndecl);
+	      else
+		error_at (location, "%qE does not support sign-extended"
+			  " offsets", fndecl);
+	      return MODE_none;
+	    }
+	  return mode;
+	}
+    }
+
+  if (type_suffix_ids[0] == NUM_TYPE_SUFFIXES)
+    {
+      /* TYPE has been inferred rather than specified by the user,
+	 so mention it in the error messages.  */
+      if (load_p)
+	error_at (location, "passing %qT to argument %d of %qE, which when"
+		  " loading %qT expects a vector of %d-bit integers",
+		  get_argument_type (argno), argno + 1, fndecl,
+		  get_vector_type (type), required_bits);
+      else
+	error_at (location, "passing %qT to argument %d of %qE, which when"
+		  " storing %qT expects a vector of %d-bit integers",
+		  get_argument_type (argno), argno + 1, fndecl,
+		  get_vector_type (type), required_bits);
+    }
+  else
+    /* TYPE is part of the function name.  */
+    error_at (location, "passing %qT to argument %d of %qE, which"
+	      " expects a vector of %d-bit integers",
+	      get_argument_type (argno), argno + 1, fndecl, required_bits);
+  return MODE_none;
+}
+
+/* Require the arguments starting at ARGNO to form a gather-style address.
+   There are three possible uses:
+
+   - for loading into elements of type TYPE (when LOAD_P is true)
+   - for storing from elements of type TYPE (when LOAD_P is false)
+   - for prefetching data (when TYPE is NUM_TYPE_SUFFIXES)
+
+   The three possible addresses are:
+
+   - a vector base with no displacement
+   - a vector base and a scalar displacement
+   - a scalar (pointer) base and a vector displacement
+
+   The overloaded function's mode suffix determines whether there is
+   a displacement, and if so, what units it uses:
+
+   - MODE_none: no displacement
+   - MODE_offset: the displacement is measured in bytes
+   - MODE_index: the displacement is measured in elements
+
+   Return the mode of the non-overloaded function on success, otherwise
+   report an error and return MODE_none.  */
+mode_suffix_index
+function_resolver::resolve_gather_address (unsigned int argno,
+					   type_suffix_index type,
+					   bool load_p)
+{
+  tree actual = get_argument_type (argno);
+  if (actual == error_mark_node)
+    return MODE_none;
+
+  if (displacement_units () != UNITS_none)
+    {
+      /* Some form of displacement is needed.  First handle a scalar
+	 pointer base and a vector displacement.  */
+      if (scalar_argument_p (argno))
+	/* Don't check the pointer type here, since there's only one valid
+	   choice.  Leave that to the frontend.  */
+	return resolve_sv_displacement (argno + 1, type, load_p);
+
+      if (!VECTOR_TYPE_P (actual))
+	{
+	  error_at (location, "passing %qT to argument %d of %qE,"
+		    " which expects a vector or pointer base address",
+		    actual, argno + 1, fndecl);
+	  return MODE_none;
+	}
+    }
+
+  /* Check for the correct choice of vector base type.  */
+  vector_type_index base_vector_type;
+  if (type == NUM_TYPE_SUFFIXES)
+    {
+      /* Since prefetches have no type suffix, there is a free choice
+	 between 32-bit and 64-bit base addresses.  */
+      base_vector_type = infer_vector_base_type (argno);
+      if (base_vector_type == NUM_VECTOR_TYPES)
+	return MODE_none;
+    }
+  else
+    {
+      /* Check for some form of vector type, without saying which type
+	 we expect.  */
+      type_suffix_index base_type = infer_vector_type (argno);
+      if (base_type == NUM_TYPE_SUFFIXES)
+	return MODE_none;
+
+      /* Check whether the type is the right one.  */
+      unsigned int required_bits = type_suffixes[type].element_bits;
+      gcc_assert (required_bits == 32 || required_bits == 64);
+      type_suffix_index required_type = (required_bits == 32
+					 ? TYPE_SUFFIX_u32
+					 : TYPE_SUFFIX_u64);
+      if (required_type != base_type)
+	{
+	  error_at (location, "passing %qT to argument %d of %qE,"
+		    " which expects %qT", actual, argno + 1, fndecl,
+		    get_vector_type (required_type));
+	  return MODE_none;
+	}
+      base_vector_type = type_suffixes[base_type].vector_type;
+    }
+
+  /* Check the scalar displacement, if any.  */
+  if (displacement_units () != UNITS_none
+      && !require_scalar_type (argno + 1, "int64_t"))
+    return MODE_none;
+
+  /* Find the appropriate mode suffix.  The checks above should have
+     weeded out all erroneous cases.  */
+  for (unsigned int mode_i = 0; mode_i < ARRAY_SIZE (mode_suffixes); ++mode_i)
+    {
+      const mode_suffix_info &mode = mode_suffixes[mode_i];
+      if (mode.base_vector_type == base_vector_type
+	  && mode.displacement_vector_type == NUM_VECTOR_TYPES
+	  && mode.displacement_units == displacement_units ())
+	return mode_suffix_index (mode_i);
+    }
+
+  gcc_unreachable ();
+}
+
+/* Require arguments ARGNO and ARGNO + 1 to form an ADR-style address,
+   i.e. one with a vector of base addresses and a vector of displacements.
+   The overloaded function's mode suffix determines the units of the
+   displacement (bytes for "_offset", elements for "_index").
+
+   Return the associated mode suffix on success, otherwise report
+   an error and return MODE_none.  */
+mode_suffix_index
+function_resolver::resolve_adr_address (unsigned int argno)
+{
+  vector_type_index base_type = infer_vector_base_type (argno);
+  if (base_type == NUM_VECTOR_TYPES)
+    return MODE_none;
+
+  vector_type_index displacement_type
+    = infer_vector_displacement_type (argno + 1);
+  if (displacement_type == NUM_VECTOR_TYPES)
+    return MODE_none;
+
+  mode_suffix_index mode = find_mode_suffix (base_type, displacement_type,
+					     displacement_units ());
+  if (mode == MODE_none)
+    {
+      if (mode_suffix_id == MODE_offset)
+	error_at (location, "cannot combine a base of type %qT with"
+		  " an offset of type %qT",
+		  get_argument_type (argno), get_argument_type (argno + 1));
+      else
+	error_at (location, "cannot combine a base of type %qT with"
+		  " an index of type %qT",
+		  get_argument_type (argno), get_argument_type (argno + 1));
+    }
+  return mode;
+}
+
+/* Require the function to have exactly EXPECTED arguments.  Return true
+   if it does, otherwise report an appropriate error.  */
+bool
+function_resolver::check_num_arguments (unsigned int expected)
+{
+  if (m_arglist.length () < expected)
+    error_at (location, "too few arguments to function %qE", fndecl);
+  else if (m_arglist.length () > expected)
+    error_at (location, "too many arguments to function %qE", fndecl);
+  return m_arglist.length () == expected;
+}
+
+/* If the function is predicated, check that the first argument is a
+   suitable governing predicate.  Also check that there are NOPS further
+   arguments after any governing predicate, but don't check what they are.
+
+   Return true on success, otherwise report a suitable error.
+   When returning true:
+
+   - set I to the number of the first unchecked argument.
+   - set NARGS to the total number of arguments.  */
+bool
+function_resolver::check_gp_argument (unsigned int nops,
+				      unsigned int &i, unsigned int &nargs)
+{
+  i = 0;
+  if (pred != PRED_none)
+    {
+      /* Unary merge operations should use resolve_unary instead.  */
+      gcc_assert (nops != 1 || pred != PRED_m);
+      nargs = nops + 1;
+      if (!check_num_arguments (nargs)
+	  || !require_vector_type (i, VECTOR_TYPE_svbool_t))
+	return false;
+      i += 1;
+    }
+  else
+    {
+      nargs = nops;
+      if (!check_num_arguments (nargs))
+	return false;
+    }
+
+  return true;
+}
+
+/* Finish resolving a function whose final argument can be a vector
+   or a scalar, with the function having an implicit "_n" suffix
+   in the latter case.  This "_n" form might only exist for certain
+   type suffixes.
+
+   ARGNO is the index of the final argument.  The inferred type suffix
+   was obtained from argument FIRST_ARGNO, which has type FIRST_TYPE.
+   EXPECTED_TCLASS and EXPECTED_BITS describe the expected properties
+   of the final vector or scalar argument, in the same way as for
+   require_derived_vector_type.  INFERRED_TYPE is the inferred type
+   suffix itself, or NUM_TYPE_SUFFIXES if it's the same as FIRST_TYPE.
+
+   Return the function decl of the resolved function on success,
+   otherwise report a suitable error and return error_mark_node.  */
+tree function_resolver::
+finish_opt_n_resolution (unsigned int argno, unsigned int first_argno,
+			 type_suffix_index first_type,
+			 type_class_index expected_tclass,
+			 unsigned int expected_bits,
+			 type_suffix_index inferred_type)
+{
+  if (inferred_type == NUM_TYPE_SUFFIXES)
+    inferred_type = first_type;
+  tree scalar_form = lookup_form (MODE_n, inferred_type);
+
+  /* Allow the final argument to be scalar, if an _n form exists.  */
+  if (scalar_argument_p (argno))
+    {
+      if (scalar_form)
+	return scalar_form;
+
+      /* Check the vector form normally.  If that succeeds, raise an
+	 error about having no corresponding _n form.  */
+      tree res = resolve_to (mode_suffix_id, inferred_type);
+      if (res != error_mark_node)
+	error_at (location, "passing %qT to argument %d of %qE, but its"
+		  " %qT form does not accept scalars",
+		  get_argument_type (argno), argno + 1, fndecl,
+		  get_vector_type (first_type));
+      return error_mark_node;
+    }
+
+  /* If an _n form does exist, provide a more accurate message than
+     require_derived_vector_type would for arguments that are neither
+     vectors nor scalars.  */
+  if (scalar_form && !require_vector_or_scalar_type (argno))
+    return error_mark_node;
+
+  /* Check for the correct vector type.  */
+  if (!require_derived_vector_type (argno, first_argno, first_type,
+				    expected_tclass, expected_bits))
+    return error_mark_node;
+
+  return resolve_to (mode_suffix_id, inferred_type);
+}
+
+/* Resolve a (possibly predicated) unary function.  If the function uses
+   merge predication or if TREAT_AS_MERGE_P is true, there is an extra
+   vector argument before the governing predicate that specifies the
+   values of inactive elements.  This argument has the following
+   properties:
+
+   - the type class must be the same as for active elements if MERGE_TCLASS
+     is SAME_TYPE_CLASS, otherwise it must be MERGE_TCLASS itself.
+
+   - the element size must be the same as for active elements if MERGE_BITS
+     is SAME_TYPE_SIZE, otherwise it must be MERGE_BITS itself.
+
+   Return the function decl of the resolved function on success,
+   otherwise report a suitable error and return error_mark_node.  */
+tree
+function_resolver::resolve_unary (type_class_index merge_tclass,
+				  unsigned int merge_bits,
+				  bool treat_as_merge_p)
+{
+  type_suffix_index type;
+  if (pred == PRED_m || treat_as_merge_p)
+    {
+      if (!check_num_arguments (3))
+	return error_mark_node;
+      if (merge_tclass == SAME_TYPE_CLASS && merge_bits == SAME_SIZE)
+	{
+	  /* The inactive elements are the same as the active elements,
+	     so we can use normal left-to-right resolution.  */
+	  if ((type = infer_vector_type (0)) == NUM_TYPE_SUFFIXES
+	      || !require_vector_type (1, VECTOR_TYPE_svbool_t)
+	      || !require_matching_vector_type (2, type))
+	    return error_mark_node;
+	}
+      else
+	{
+	  /* The inactive element type is a function of the active one,
+	     so resolve the active one first.  */
+	  if (!require_vector_type (1, VECTOR_TYPE_svbool_t)
+	      || (type = infer_vector_type (2)) == NUM_TYPE_SUFFIXES
+	      || !require_derived_vector_type (0, 2, type, merge_tclass,
+					       merge_bits))
+	    return error_mark_node;
+	}
+    }
+  else
+    {
+      /* We just need to check the predicate (if any) and the single
+	 vector argument.  */
+      unsigned int i, nargs;
+      if (!check_gp_argument (1, i, nargs)
+	  || (type = infer_vector_type (i)) == NUM_TYPE_SUFFIXES)
+	return error_mark_node;
+    }
+
+  /* Handle convert-like functions in which the first type suffix is
+     explicit.  */
+  if (type_suffix_ids[0] != NUM_TYPE_SUFFIXES)
+    return resolve_to (mode_suffix_id, type_suffix_ids[0], type);
+
+  return resolve_to (mode_suffix_id, type);
+}
+
+/* Resolve a (possibly predicated) function that takes NOPS like-typed
+   vector arguments followed by NIMM integer immediates.  Return the
+   function decl of the resolved function on success, otherwise report
+   a suitable error and return error_mark_node.  */
+tree
+function_resolver::resolve_uniform (unsigned int nops, unsigned int nimm)
+{
+  unsigned int i, nargs;
+  type_suffix_index type;
+  if (!check_gp_argument (nops + nimm, i, nargs)
+      || (type = infer_vector_type (i)) == NUM_TYPE_SUFFIXES)
+    return error_mark_node;
+
+  i += 1;
+  for (; i < nargs - nimm; ++i)
+    if (!require_matching_vector_type (i, type))
+      return error_mark_node;
+
+  for (; i < nargs; ++i)
+    if (!require_integer_immediate (i))
+      return error_mark_node;
+
+  return resolve_to (mode_suffix_id, type);
+}
+
+/* Resolve a (possibly predicated) function that offers a choice between
+   taking:
+
+   - NOPS like-typed vector arguments or
+   - NOPS - 1 like-typed vector arguments followed by a scalar argument
+
+   Return the function decl of the resolved function on success,
+   otherwise report a suitable error and return error_mark_node.  */
+tree
+function_resolver::resolve_uniform_opt_n (unsigned int nops)
+{
+  unsigned int i, nargs;
+  type_suffix_index type;
+  if (!check_gp_argument (nops, i, nargs)
+      || (type = infer_vector_type (i)) == NUM_TYPE_SUFFIXES)
+    return error_mark_node;
+
+  unsigned int first_arg = i++;
+  for (; i < nargs - 1; ++i)
+    if (!require_matching_vector_type (i, type))
+      return error_mark_node;
+
+  return finish_opt_n_resolution (i, first_arg, type);
+}
+
+/* If the call is erroneous, report an appropriate error and return
+   error_mark_node.  Otherwise, if the function is overloaded, return
+   the decl of the non-overloaded function.  Return NULL_TREE otherwise,
+   indicating that the call should be processed in the normal way.  */
+tree
+function_resolver::resolve ()
+{
+  return shape->resolve (*this);
+}
+
+function_checker::function_checker (location_t location,
+				    const function_instance &instance,
+				    tree fndecl, tree fntype,
+				    unsigned int nargs, tree *args)
+  : function_call_info (location, instance, fndecl),
+    m_fntype (fntype), m_nargs (nargs), m_args (args),
+    /* We don't have to worry about unary _m operations here, since they
+       never have arguments that need checking.  */
+    m_base_arg (pred != PRED_none ? 1 : 0)
+{
+}
+
+/* Return true if argument ARGNO exists. which it might not for
+   erroneous calls.  It is safe to wave through checks if this
+   function returns false.  */
+bool
+function_checker::argument_exists_p (unsigned int argno)
+{
+  gcc_assert (argno < (unsigned int) type_num_arguments (m_fntype));
+  return argno < m_nargs;
+}
+
+/* Check that argument ARGNO is an integer constant expression and
+   store its value in VALUE_OUT if so.  The caller should first
+   check that argument ARGNO exists.  */
+bool
+function_checker::require_immediate (unsigned int argno,
+				     HOST_WIDE_INT &value_out)
+{
+  gcc_assert (argno < m_nargs);
+  tree arg = m_args[argno];
+
+  /* The type and range are unsigned, so read the argument as an
+     unsigned rather than signed HWI.  */
+  if (!tree_fits_uhwi_p (arg))
+    {
+      report_non_ice (location, fndecl, argno);
+      return false;
+    }
+
+  /* ...but treat VALUE_OUT as signed for error reporting, since printing
+     -1 is more user-friendly than the maximum uint64_t value.  */
+  value_out = tree_to_uhwi (arg);
+  return true;
+}
+
+/* Check that argument REL_ARGNO is an integer constant expression that
+   has the value VALUE0 or VALUE1.  REL_ARGNO counts from the end of the
+   predication arguments.  */
+bool
+function_checker::require_immediate_either_or (unsigned int rel_argno,
+					       HOST_WIDE_INT value0,
+					       HOST_WIDE_INT value1)
+{
+  unsigned int argno = m_base_arg + rel_argno;
+  if (!argument_exists_p (argno))
+    return true;
+
+  HOST_WIDE_INT actual;
+  if (!require_immediate (argno, actual))
+    return false;
+
+  if (actual != value0 && actual != value1)
+    {
+      report_neither_nor (location, fndecl, argno, actual, 90, 270);
+      return false;
+    }
+
+  return true;
+}
+
+/* Check that argument REL_ARGNO is an integer constant expression that has
+   a valid value for enumeration type TYPE.  REL_ARGNO counts from the end
+   of the predication arguments.  */
+bool
+function_checker::require_immediate_enum (unsigned int rel_argno, tree type)
+{
+  unsigned int argno = m_base_arg + rel_argno;
+  if (!argument_exists_p (argno))
+    return true;
+
+  HOST_WIDE_INT actual;
+  if (!require_immediate (argno, actual))
+    return false;
+
+  for (tree entry = TYPE_VALUES (type); entry; entry = TREE_CHAIN (entry))
+    {
+      /* The value is an INTEGER_CST for C and a CONST_DECL wrapper
+	 around an INTEGER_CST for C++.  */
+      tree value = TREE_VALUE (entry);
+      if (TREE_CODE (value) == CONST_DECL)
+	value = DECL_INITIAL (value);
+      if (wi::to_widest (value) == actual)
+	return true;
+    }
+
+  report_not_enum (location, fndecl, argno, actual, type);
+  return false;
+}
+
+/* Check that argument REL_ARGNO is suitable for indexing argument
+   REL_ARGNO - 1, in groups of GROUP_SIZE elements.  REL_ARGNO counts
+   from the end of the predication arguments.  */
+bool
+function_checker::require_immediate_lane_index (unsigned int rel_argno,
+						unsigned int group_size)
+{
+  unsigned int argno = m_base_arg + rel_argno;
+  if (!argument_exists_p (argno))
+    return true;
+
+  /* Get the type of the previous argument.  tree_argument_type wants a
+     1-based number, whereas ARGNO is 0-based.  */
+  machine_mode mode = TYPE_MODE (type_argument_type (m_fntype, argno));
+  gcc_assert (VECTOR_MODE_P (mode));
+  unsigned int nlanes = 128 / (group_size * GET_MODE_UNIT_BITSIZE (mode));
+  return require_immediate_range (rel_argno, 0, nlanes - 1);
+}
+
+/* Check that argument REL_ARGNO is an integer constant expression that
+   has one of the given values.  */
+bool
+function_checker::require_immediate_one_of (unsigned int rel_argno,
+					    HOST_WIDE_INT value0,
+					    HOST_WIDE_INT value1,
+					    HOST_WIDE_INT value2,
+					    HOST_WIDE_INT value3)
+{
+  unsigned int argno = m_base_arg + rel_argno;
+  if (!argument_exists_p (argno))
+    return true;
+
+  HOST_WIDE_INT actual;
+  if (!require_immediate (argno, actual))
+    return false;
+
+  if (actual != value0
+      && actual != value1
+      && actual != value2
+      && actual != value3)
+    {
+      report_not_one_of (location, fndecl, argno, actual,
+			 value0, value1, value2, value3);
+      return false;
+    }
+
+  return true;
+}
+
+/* Check that argument REL_ARGNO is an integer constant expression in the
+   range [MIN, MAX].  REL_ARGNO counts from the end of the predication
+   arguments.  */
+bool
+function_checker::require_immediate_range (unsigned int rel_argno,
+					   HOST_WIDE_INT min,
+					   HOST_WIDE_INT max)
+{
+  unsigned int argno = m_base_arg + rel_argno;
+  if (!argument_exists_p (argno))
+    return true;
+
+  /* Required because of the tree_to_uhwi -> HOST_WIDE_INT conversion
+     in require_immediate.  */
+  gcc_assert (min >= 0 && min <= max);
+  HOST_WIDE_INT actual;
+  if (!require_immediate (argno, actual))
+    return false;
+
+  if (!IN_RANGE (actual, min, max))
+    {
+      report_out_of_range (location, fndecl, argno, actual, min, max);
+      return false;
+    }
+
+  return true;
+}
+
+/* Perform semantic checks on the call.  Return true if the call is valid,
+   otherwise report a suitable error.  */
+bool
+function_checker::check ()
+{
+  function_args_iterator iter;
+  tree type;
+  unsigned int i = 0;
+  FOREACH_FUNCTION_ARGS (m_fntype, type, iter)
+    {
+      if (type == void_type_node || i >= m_nargs)
+	break;
+
+      if (i >= m_base_arg
+	  && TREE_CODE (type) == ENUMERAL_TYPE
+	  && !require_immediate_enum (i - m_base_arg, type))
+	return false;
+
+      i += 1;
+    }
+
+  return shape->check (*this);
+}
+
+gimple_folder::gimple_folder (const function_instance &instance, tree fndecl,
+			      gimple_stmt_iterator *gsi_in, gcall *call_in)
+  : function_call_info (gimple_location (call_in), instance, fndecl),
+    gsi (gsi_in), call (call_in), lhs (gimple_call_lhs (call_in))
+{
+}
+
+/* VALUE might be a vector of type VECTYPE or a single scalar element.
+   Duplicate it into a vector of type VECTYPE in the latter case, adding any
+   new statements to STMTS.  */
+tree
+gimple_folder::force_vector (gimple_seq &stmts, tree vectype, tree value)
+{
+  if (!VECTOR_TYPE_P (TREE_TYPE (value)))
+    value = gimple_build_vector_from_val (&stmts, vectype, value);
+  return value;
+}
+
+/* Convert predicate argument ARGNO so that it has the type appropriate for
+   an operation on VECTYPE.  Add any new statements to STMTS.  */
+tree
+gimple_folder::convert_pred (gimple_seq &stmts, tree vectype,
+			     unsigned int argno)
+{
+  tree pred = gimple_call_arg (call, argno);
+  if (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (pred)),
+		TYPE_VECTOR_SUBPARTS (vectype)))
+    return pred;
+
+  return gimple_build (&stmts, VIEW_CONVERT_EXPR,
+		       truth_type_for (vectype), pred);
+}
+
+/* Return a pointer to the address in a contiguous load or store,
+   given that each memory vector has type VECTYPE.  Add any new
+   statements to STMTS.  */
+tree
+gimple_folder::fold_contiguous_base (gimple_seq &stmts, tree vectype)
+{
+  tree base = gimple_call_arg (call, 1);
+  if (mode_suffix_id == MODE_vnum)
+    {
+      tree offset = gimple_call_arg (call, 2);
+      offset = gimple_convert (&stmts, sizetype, offset);
+      offset = gimple_build (&stmts, MULT_EXPR, sizetype, offset,
+			     TYPE_SIZE_UNIT (vectype));
+      base = gimple_build (&stmts, POINTER_PLUS_EXPR, TREE_TYPE (base),
+			   base, offset);
+    }
+  return base;
+}
+
+/* Return the alignment and TBAA argument to an internal load or store
+   function like IFN_MASK_LOAD or IFN_MASK_STORE, given that it accesses
+   memory elements of type TYPE.  */
+tree
+gimple_folder::load_store_cookie (tree type)
+{
+  return build_int_cst (build_pointer_type (type), TYPE_ALIGN (type));
+}
+
+/* Fold the call to a call to INSTANCE, with the same arguments.  */
+gimple *
+gimple_folder::redirect_call (const function_instance &instance)
+{
+  registered_function *rfn
+    = function_table->find_with_hash (instance, instance.hash ());
+  if (!rfn)
+    return NULL;
+
+  gimple_call_set_fndecl (call, rfn->decl);
+  return call;
+}
+
+/* Fold the call to a PTRUE, taking the element size from type suffix 0.  */
+gimple *
+gimple_folder::fold_to_ptrue ()
+{
+  tree svbool_type = TREE_TYPE (lhs);
+  tree bool_type = TREE_TYPE (svbool_type);
+  unsigned int element_bytes = type_suffix (0).element_bytes;
+
+  /* The return type is svbool_t for all type suffixes, thus for b8 we
+     want { 1, 1, 1, 1, ... }, for b16 we want { 1, 0, 1, 0, ... }, etc.  */
+  tree_vector_builder builder (svbool_type, element_bytes, 1);
+  builder.quick_push (build_all_ones_cst (bool_type));
+  for (unsigned int i = 1; i < element_bytes; ++i)
+    builder.quick_push (build_zero_cst (bool_type));
+  return gimple_build_assign (lhs, builder.build ());
+}
+
+/* Fold the call to a PFALSE.  */
+gimple *
+gimple_folder::fold_to_pfalse ()
+{
+  return gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
+}
+
+/* Fold an operation to a constant predicate in which the first VL
+   elements are set and the rest are clear.  Take the element size
+   from type suffix 0.  */
+gimple *
+gimple_folder::fold_to_vl_pred (unsigned int vl)
+{
+  tree vectype = TREE_TYPE (lhs);
+  tree element_type = TREE_TYPE (vectype);
+  tree minus_one = build_all_ones_cst (element_type);
+  tree zero = build_zero_cst (element_type);
+  unsigned int element_bytes = type_suffix (0).element_bytes;
+
+  /* Construct COUNT elements that contain the ptrue followed by
+     a repeating sequence of COUNT elements.  */
+  unsigned int count = constant_lower_bound (TYPE_VECTOR_SUBPARTS (vectype));
+  gcc_assert (vl * element_bytes <= count);
+  tree_vector_builder builder (vectype, count, 2);
+  for (unsigned int i = 0; i < count * 2; ++i)
+    {
+      bool bit = (i & (element_bytes - 1)) == 0 && i < vl * element_bytes;
+      builder.quick_push (bit ? minus_one : zero);
+    }
+  return gimple_build_assign (lhs, builder.build ());
+}
+
+/* Try to fold the call.  Return the new statement on success and null
+   on failure.  */
+gimple *
+gimple_folder::fold ()
+{
+  /* Don't fold anything when SVE is disabled; emit an error during
+     expansion instead.  */
+  if (!TARGET_SVE)
+    return NULL;
+
+  /* Punt if the function has a return type and no result location is
+     provided.  The attributes should allow target-independent code to
+     remove the calls if appropriate.  */
+  if (!lhs && TREE_TYPE (gimple_call_fntype (call)) != void_type_node)
+    return NULL;
+
+  return base->fold (*this);
+}
+
+function_expander::function_expander (const function_instance &instance,
+				      tree fndecl, tree call_expr_in,
+				      rtx possible_target_in)
+  : function_call_info (EXPR_LOCATION (call_expr_in), instance, fndecl),
+    call_expr (call_expr_in), possible_target (possible_target_in)
+{
+}
+
+/* Return the handler of direct optab OP for type suffix SUFFIX_I.  */
+insn_code
+function_expander::direct_optab_handler (optab op, unsigned int suffix_i)
+{
+  return ::direct_optab_handler (op, vector_mode (suffix_i));
+}
+
+/* Choose between signed and unsigned direct optabs SIGNED_OP and
+   UNSIGNED_OP based on the signedness of type suffix SUFFIX_I, then
+   pick the appropriate optab handler for the mode.  Use MODE as the
+   mode if given, otherwise use the mode of type suffix SUFFIX_I.  */
+insn_code
+function_expander::direct_optab_handler_for_sign (optab signed_op,
+						  optab unsigned_op,
+						  unsigned int suffix_i,
+						  machine_mode mode)
+{
+  if (mode == VOIDmode)
+    mode = vector_mode (suffix_i);
+  optab op = type_suffix (suffix_i).unsigned_p ? unsigned_op : signed_op;
+  return ::direct_optab_handler (op, mode);
+}
+
+/* Return true if X overlaps any input.  */
+bool
+function_expander::overlaps_input_p (rtx x)
+{
+  for (unsigned int i = 0; i < args.length (); ++i)
+    if (reg_overlap_mentioned_p (x, args[i]))
+      return true;
+  return false;
+}
+
+/* Convert ptr_mode value X to Pmode.  */
+rtx
+function_expander::convert_to_pmode (rtx x)
+{
+  if (ptr_mode == SImode)
+    x = simplify_gen_unary (ZERO_EXTEND, DImode, x, SImode);
+  return x;
+}
+
+/* Return the base address for a contiguous load or store function.
+   MEM_MODE is the mode of the addressed memory.  */
+rtx
+function_expander::get_contiguous_base (machine_mode mem_mode)
+{
+  rtx base = convert_to_pmode (args[1]);
+  if (mode_suffix_id == MODE_vnum)
+    {
+      /* Use the size of the memory mode for extending loads and truncating
+	 stores.  Use the size of a full vector for non-extending loads
+	 and non-truncating stores (including svld[234] and svst[234]).  */
+      poly_int64 size = ordered_min (GET_MODE_SIZE (mem_mode),
+				     BYTES_PER_SVE_VECTOR);
+      rtx offset = gen_int_mode (size, Pmode);
+      offset = simplify_gen_binary (MULT, Pmode, args[2], offset);
+      base = simplify_gen_binary (PLUS, Pmode, base, offset);
+    }
+  return base;
+}
+
+/* For a function that does the equivalent of:
+
+     OUTPUT = COND ? FN (INPUTS) : FALLBACK;
+
+   return the value of FALLBACK.
+
+   MODE is the mode of OUTPUT.  NOPS is the number of operands in INPUTS.
+   MERGE_ARGNO is the argument that provides FALLBACK for _m functions,
+   or DEFAULT_MERGE_ARGNO if we should apply the usual rules.
+
+   ARGNO is the caller's index into args.  If the returned value is
+   argument 0 (as for unary _m operations), increment ARGNO past the
+   returned argument.  */
+rtx
+function_expander::get_fallback_value (machine_mode mode, unsigned int nops,
+				       unsigned int merge_argno,
+				       unsigned int &argno)
+{
+  if (pred == PRED_z)
+    return CONST0_RTX (mode);
+
+  gcc_assert (pred == PRED_m || pred == PRED_x);
+  if (merge_argno == DEFAULT_MERGE_ARGNO)
+    merge_argno = nops == 1 && pred == PRED_m ? 0 : 1;
+
+  if (merge_argno == 0)
+    return args[argno++];
+
+  return args[merge_argno];
+}
+
+/* Return a REG rtx that can be used for the result of the function,
+   using the preferred target if suitable.  */
+rtx
+function_expander::get_reg_target ()
+{
+  machine_mode target_mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (fndecl)));
+  if (!possible_target || GET_MODE (possible_target) != target_mode)
+    possible_target = gen_reg_rtx (target_mode);
+  return possible_target;
+}
+
+/* As for get_reg_target, but make sure that the returned REG does not
+   overlap any inputs.  */
+rtx
+function_expander::get_nonoverlapping_reg_target ()
+{
+  if (possible_target && overlaps_input_p (possible_target))
+    possible_target = NULL_RTX;
+  return get_reg_target ();
+}
+
+/* Add an output operand to the instruction we're building, which has
+   code ICODE.  Bind the output to the preferred target rtx if possible.  */
+void
+function_expander::add_output_operand (insn_code icode)
+{
+  unsigned int opno = m_ops.length ();
+  machine_mode mode = insn_data[icode].operand[opno].mode;
+  m_ops.safe_grow (opno + 1, true);
+  create_output_operand (&m_ops.last (), possible_target, mode);
+}
+
+/* Add an input operand to the instruction we're building, which has
+   code ICODE.  Calculate the value of the operand as follows:
+
+   - If the operand is a vector and X is not, broadcast X to fill a
+     vector of the appropriate mode.
+
+   - Otherwise, if the operand is a predicate, coerce X to have the
+     mode that the instruction expects.  In this case X is known to be
+     VNx16BImode (the mode of svbool_t).
+
+   - Otherwise use X directly.  The expand machinery checks that X has
+     the right mode for the instruction.  */
+void
+function_expander::add_input_operand (insn_code icode, rtx x)
+{
+  unsigned int opno = m_ops.length ();
+  const insn_operand_data &operand = insn_data[icode].operand[opno];
+  machine_mode mode = operand.mode;
+  if (mode == VOIDmode)
+    {
+      /* The only allowable use of VOIDmode is the wildcard
+	 aarch64_any_register_operand, which is used to avoid
+	 combinatorial explosion in the reinterpret patterns.  */
+      gcc_assert (operand.predicate == aarch64_any_register_operand);
+      mode = GET_MODE (x);
+    }
+  else if (!VECTOR_MODE_P (GET_MODE (x)) && VECTOR_MODE_P (mode))
+    x = expand_vector_broadcast (mode, x);
+  else if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
+    {
+      gcc_assert (GET_MODE (x) == VNx16BImode);
+      x = gen_lowpart (mode, x);
+    }
+  m_ops.safe_grow (m_ops.length () + 1, true);
+  create_input_operand (&m_ops.last (), x, mode);
+}
+
+/* Add an integer operand with value X to the instruction.  */
+void
+function_expander::add_integer_operand (HOST_WIDE_INT x)
+{
+  m_ops.safe_grow (m_ops.length () + 1, true);
+  create_integer_operand (&m_ops.last (), x);
+}
+
+/* Add a memory operand with mode MODE and address ADDR.  */
+void
+function_expander::add_mem_operand (machine_mode mode, rtx addr)
+{
+  /* Exception for OImode for the ld1ro intrinsics.
+     They act on 256 bit octaword data, and it's just easier to use a scalar
+     mode to represent that than add a new vector mode solely for the purpose
+     of this intrinsic.  */
+  gcc_assert (VECTOR_MODE_P (mode) || mode == OImode);
+  rtx mem = gen_rtx_MEM (mode, memory_address (mode, addr));
+  /* The memory is only guaranteed to be element-aligned.  */
+  set_mem_align (mem, GET_MODE_ALIGNMENT (GET_MODE_INNER (mode)));
+  add_fixed_operand (mem);
+}
+
+/* Add an address operand with value X.  The static operand data says
+   what mode and form the address must have.  */
+void
+function_expander::add_address_operand (rtx x)
+{
+  m_ops.safe_grow (m_ops.length () + 1, true);
+  create_address_operand (&m_ops.last (), x);
+}
+
+/* Add an operand that must be X.  The only way of legitimizing an
+   invalid X is to reload the address of a MEM.  */
+void
+function_expander::add_fixed_operand (rtx x)
+{
+  m_ops.safe_grow (m_ops.length () + 1, true);
+  create_fixed_operand (&m_ops.last (), x);
+}
+
+/* Generate instruction ICODE, given that its operands have already
+   been added to M_OPS.  Return the value of the first operand.  */
+rtx
+function_expander::generate_insn (insn_code icode)
+{
+  expand_insn (icode, m_ops.length (), m_ops.address ());
+  return function_returns_void_p () ? const0_rtx : m_ops[0].value;
+}
+
+/* Convert the arguments to a gather/scatter function into the
+   associated md operands.  Argument ARGNO is the scalar or vector base and
+   argument ARGNO + 1 is the scalar or vector displacement (if applicable).
+   The md pattern expects:
+
+   - a scalar base
+   - a vector displacement
+
+   If SCALED_P is true, it also expects:
+
+   - a const_int that is 1 if the displacement is zero-extended from 32 bits
+   - a scaling multiplier (1 for bytes, 2 for .h indices, etc.).
+
+   If SCALED_P is false, the displacement is implicitly zero-extended
+   and the scaling multiplier is implicitly 1.  */
+void
+function_expander::prepare_gather_address_operands (unsigned int argno,
+						    bool scaled_p)
+{
+  machine_mode mem_mode = memory_vector_mode ();
+  tree vector_type = base_vector_type ();
+  units_index units = displacement_units ();
+  int shift_idx = -1;
+  if (units == UNITS_none)
+    {
+      /* Vector base, no displacement.  Convert to an integer zero base
+	 and a vector byte offset.  */
+      args.quick_insert (argno, const0_rtx);
+      units = UNITS_bytes;
+    }
+  else if (vector_type)
+    {
+      /* Vector base, scalar displacement.  Convert to a scalar base and
+	 a vector byte offset.  */
+      std::swap (args[argno], args[argno + 1]);
+      if (units == UNITS_elements)
+	shift_idx = argno;
+    }
+  else
+    {
+      /* Scalar base, vector displacement.  This is the order that the md
+	 pattern wants.  */
+      args[argno] = convert_to_pmode (args[argno]);
+      vector_type = displacement_vector_type ();
+      if (units == UNITS_elements && !scaled_p)
+	shift_idx = argno + 1;
+    }
+  tree scalar_displacement_type = TREE_TYPE (vector_type);
+
+  if (shift_idx >= 0)
+    {
+      machine_mode arg_mode = GET_MODE (args[shift_idx]);
+      if (arg_mode == VOIDmode)
+	arg_mode = DImode;
+      unsigned int elt_bytes = GET_MODE_UNIT_SIZE (mem_mode);
+      rtx shift = gen_int_mode (exact_log2 (elt_bytes), DImode);
+      args[shift_idx] = simplify_gen_binary (ASHIFT, arg_mode,
+					     args[shift_idx], shift);
+      units = UNITS_bytes;
+    }
+
+  bool uxtw_p = (TYPE_PRECISION (scalar_displacement_type) == 64
+		 || TYPE_UNSIGNED (scalar_displacement_type));
+  unsigned int scale = (units == UNITS_bytes
+			? 1 : GET_MODE_UNIT_SIZE (mem_mode));
+
+  if (scaled_p)
+    {
+      args.quick_insert (argno + 2, GEN_INT (uxtw_p));
+      args.quick_insert (argno + 3, GEN_INT (scale));
+    }
+  else
+    gcc_assert (uxtw_p && scale == 1);
+}
+
+/* The final argument is an immediate svprfop value.  Add two fake arguments
+   to represent the rw and locality operands of a PREFETCH rtx.  */
+void
+function_expander::prepare_prefetch_operands ()
+{
+  unsigned int prfop = INTVAL (args.last ());
+  /* Bit 3 of the prfop selects stores over loads.  */
+  args.quick_push (GEN_INT ((prfop & 8) != 0));
+  /* Bits 1 and 2 specify the locality; 0-based for svprfop but
+     1-based for PREFETCH.  */
+  args.quick_push (GEN_INT (((prfop >> 1) & 3) + 1));
+}
+
+/* Add a dummy argument to indicate whether predicate argument ARGNO
+   is all-true when interpreted in mode PRED_MODE.  The hint goes
+   immediately after ARGNO.  */
+void
+function_expander::add_ptrue_hint (unsigned int argno, machine_mode pred_mode)
+{
+  rtx pred = gen_lowpart (pred_mode, args[argno]);
+  int hint = (pred == CONSTM1_RTX (pred_mode)
+	      ? SVE_KNOWN_PTRUE : SVE_MAYBE_NOT_PTRUE);
+  args.quick_insert (argno + 1, gen_int_mode (hint, SImode));
+}
+
+/* Rotate inputs args[START:END] one position to the left, so that
+   args[START] becomes args[END - 1].  */
+void
+function_expander::rotate_inputs_left (unsigned int start, unsigned int end)
+{
+  rtx new_last = args[start];
+  for (unsigned int i = start; i < end - 1; ++i)
+    args[i] = args[i + 1];
+  args[end - 1] = new_last;
+}
+
+/* Return true if the negation of argument ARGNO can be folded away,
+   replacing it with the negated value if so.  MODE is the associated
+   vector mode, but the argument could be a single element.  The main
+   case this handles is constant arguments.  */
+bool
+function_expander::try_negating_argument (unsigned int argno,
+					  machine_mode mode)
+{
+  rtx x = args[argno];
+  if (!VECTOR_MODE_P (GET_MODE (x)))
+    mode = GET_MODE_INNER (mode);
+
+  x = simplify_unary_operation (NEG, mode, x, mode);
+  if (!x)
+    return false;
+
+  args[argno] = x;
+  return true;
+}
+
+/* Implement the call using instruction ICODE, with a 1:1 mapping between
+   arguments and input operands.  */
+rtx
+function_expander::use_exact_insn (insn_code icode)
+{
+  unsigned int nops = insn_data[icode].n_operands;
+  if (!function_returns_void_p ())
+    {
+      add_output_operand (icode);
+      nops -= 1;
+    }
+  for (unsigned int i = 0; i < nops; ++i)
+    add_input_operand (icode, args[i]);
+  return generate_insn (icode);
+}
+
+/* Implement the call using instruction ICODE, which does not use a
+   governing predicate.  We must therefore drop the GP from an _x call.  */
+rtx
+function_expander::use_unpred_insn (insn_code icode)
+{
+  /* We can't drop the predicate for _z and _m.  */
+  gcc_assert (pred == PRED_x || pred == PRED_none);
+  /* Discount the output operand.  */
+  unsigned int nops = insn_data[icode].n_operands - 1;
+  /* Drop the predicate argument in the case of _x predication.  */
+  unsigned int bias = (pred == PRED_x ? 1 : 0);
+  unsigned int i = 0;
+
+  add_output_operand (icode);
+  for (; i < nops; ++i)
+    add_input_operand (icode, args[i + bias]);
+
+  return generate_insn (icode);
+}
+
+/* Implement the call using instruction ICODE, which is a predicated
+   operation that returns arbitrary values for inactive lanes.  */
+rtx
+function_expander::use_pred_x_insn (insn_code icode)
+{
+  /* At present we never need to handle PRED_none, which would involve
+     creating a new predicate rather than using one supplied by the user.  */
+  gcc_assert (pred == PRED_x);
+  /* Discount the output operand.  */
+  unsigned int nops = args.length () - 1;
+
+  bool has_float_operand_p = FLOAT_MODE_P (insn_data[icode].operand[0].mode);
+
+  /* Add the normal operands.  */
+  add_output_operand (icode);
+  add_input_operand (icode, args[0]);
+  for (unsigned int i = 0; i < nops; ++i)
+    {
+      add_input_operand (icode, args[i + 1]);
+      if (FLOAT_MODE_P (GET_MODE (args[i + 1])))
+	has_float_operand_p = true;
+    }
+
+  if (has_float_operand_p)
+    {
+      /* Add a flag that indicates whether unpredicated instructions
+	 are allowed.  */
+      rtx pred = m_ops[1].value;
+      if (flag_trapping_math && pred != CONST1_RTX (GET_MODE (pred)))
+	add_integer_operand (SVE_STRICT_GP);
+      else
+	add_integer_operand (SVE_RELAXED_GP);
+    }
+
+  return generate_insn (icode);
+}
+
+/* Implement the call using instruction ICODE, which does the equivalent of:
+
+     OUTPUT = COND ? FN (INPUTS) : FALLBACK;
+
+   The instruction operands are in the order above: OUTPUT, COND, INPUTS
+   and FALLBACK.  MERGE_ARGNO is the argument that provides FALLBACK for _m
+   functions, or DEFAULT_MERGE_ARGNO if we should apply the usual rules.  */
+rtx
+function_expander::use_cond_insn (insn_code icode, unsigned int merge_argno)
+{
+  /* At present we never need to handle PRED_none, which would involve
+     creating a new predicate rather than using one supplied by the user.  */
+  gcc_assert (pred != PRED_none);
+  /* Discount the output, predicate and fallback value.  */
+  unsigned int nops = insn_data[icode].n_operands - 3;
+  machine_mode mode = insn_data[icode].operand[0].mode;
+
+  unsigned int opno = 0;
+  rtx fallback_arg = get_fallback_value (mode, nops, merge_argno, opno);
+  rtx pred = args[opno++];
+
+  add_output_operand (icode);
+  add_input_operand (icode, pred);
+  for (unsigned int i = 0; i < nops; ++i)
+    add_input_operand (icode, args[opno + i]);
+  add_input_operand (icode, fallback_arg);
+  return generate_insn (icode);
+}
+
+/* Implement the call using instruction ICODE, which is a select-like
+   operation with the following operands:
+
+   0: output
+   1: true value
+   2: false value
+   3: predicate
+
+   MERGE_ARGNO is the argument that provides the "false" value for _m
+   functions, or DEFAULT_MERGE_ARGNO if we should apply the usual rules.  */
+rtx
+function_expander::use_vcond_mask_insn (insn_code icode,
+					unsigned int merge_argno)
+{
+  machine_mode mode = vector_mode (0);
+
+  unsigned int opno = 0;
+  rtx false_arg = get_fallback_value (mode, 1, merge_argno, opno);
+  rtx pred_arg = args[opno++];
+  rtx true_arg = args[opno++];
+
+  add_output_operand (icode);
+  add_input_operand (icode, true_arg);
+  add_input_operand (icode, false_arg);
+  add_input_operand (icode, pred_arg);
+  return generate_insn (icode);
+}
+
+/* Implement the call using instruction ICODE, which loads memory operand 1
+   into register operand 0 under the control of predicate operand 2.
+   Extending loads have a further predicate (operand 3) that nominally
+   controls the extension.  */
+rtx
+function_expander::use_contiguous_load_insn (insn_code icode)
+{
+  machine_mode mem_mode = memory_vector_mode ();
+
+  add_output_operand (icode);
+  add_mem_operand (mem_mode, get_contiguous_base (mem_mode));
+  add_input_operand (icode, args[0]);
+  if (GET_MODE_UNIT_BITSIZE (mem_mode) < type_suffix (0).element_bits)
+    add_input_operand (icode, CONSTM1_RTX (VNx16BImode));
+  return generate_insn (icode);
+}
+
+/* Implement the call using instruction ICODE, which prefetches from
+   address operand 1 under the control of predicate operand 0.
+   Operands 2, 3 and 4 respectively specify the svprfop value,
+   the PREFETCH rw flag and the PREFETCH locality.  */
+rtx
+function_expander::use_contiguous_prefetch_insn (insn_code icode)
+{
+  add_input_operand (icode, args[0]);
+  add_address_operand (get_contiguous_base (VNx16QImode));
+  for (unsigned int i = args.length () - 3; i < args.length (); ++i)
+    add_input_operand (icode, args[i]);
+  return generate_insn (icode);
+}
+
+/* Implement the call using instruction ICODE, which stores register operand 1
+   into memory operand 0 under the control of predicate operand 2.  */
+rtx
+function_expander::use_contiguous_store_insn (insn_code icode)
+{
+  machine_mode mem_mode = memory_vector_mode ();
+
+  add_mem_operand (mem_mode, get_contiguous_base (mem_mode));
+  add_input_operand (icode, args.last ());
+  add_input_operand (icode, args[0]);
+  return generate_insn (icode);
+}
+
+/* Implement the call using one of the following strategies, chosen in order:
+
+   (1) "aarch64_pred_<optab><mode>_z" for PRED_z predicate functions
+
+   (2) "aarch64_pred_<optab><mode>" for PRED_x functions
+
+   (3) a normal unpredicated optab for PRED_none and PRED_x functions,
+       dropping the predicate in the latter case
+
+   (4) an unpredicated "aarch64_sve_<code_optab><mode>" for PRED_none and
+       PRED_x functions, again dropping the predicate for PRED_x
+
+   (5) "cond_<optab><mode>" otherwise
+
+   where <optab> corresponds to:
+
+   - CODE_FOR_SINT for signed integers
+   - CODE_FOR_UINT for unsigned integers
+   - UNSPEC_FOR_FP for floating-point values
+
+   and where <code_optab> is like <optab>, but uses CODE_FOR_SINT instead
+   of UNSPEC_FOR_FP for floating-point values.
+
+   MERGE_ARGNO is the argument that provides the values of inactive lanes for
+   _m functions, or DEFAULT_MERGE_ARGNO if we should apply the usual rules.  */
+rtx
+function_expander::map_to_rtx_codes (rtx_code code_for_sint,
+				     rtx_code code_for_uint,
+				     int unspec_for_fp,
+				     unsigned int merge_argno)
+{
+  machine_mode mode = vector_mode (0);
+  rtx_code code = (type_suffix (0).unsigned_p ? code_for_uint : code_for_sint);
+  insn_code icode;
+
+  /* Handle predicate logic operations, which always use _z predication.  */
+  if (type_suffix (0).tclass == TYPE_bool)
+    {
+      gcc_assert (pred == PRED_z && code_for_uint == code_for_sint);
+      return use_exact_insn (code_for_aarch64_pred_z (code, mode));
+    }
+
+  /* First try using UNSPEC_PRED_X patterns for _x predication,
+     if available.  */
+  if (pred == PRED_x)
+    {
+      if (type_suffix (0).integer_p)
+	icode = maybe_code_for_aarch64_pred (code, mode);
+      else
+	icode = maybe_code_for_aarch64_pred (unspec_for_fp, mode);
+      if (icode != CODE_FOR_nothing)
+	return use_pred_x_insn (icode);
+    }
+
+  /* Otherwise expand PRED_none and PRED_x operations without a predicate.
+     Floating-point operations conventionally use the signed rtx code.  */
+  if (pred == PRED_none || pred == PRED_x)
+    {
+      icode = direct_optab_handler (code_to_optab (code), 0);
+      if (icode == CODE_FOR_nothing)
+	icode = code_for_aarch64_sve (code, mode);
+      return use_unpred_insn (icode);
+    }
+
+  /* Don't use cond_*_optabs here, since not all codes have one yet.  */
+  if (type_suffix (0).integer_p)
+    icode = code_for_cond (code, mode);
+  else
+    icode = code_for_cond (unspec_for_fp, mode);
+  return use_cond_insn (icode, merge_argno);
+}
+
+/* Implement the call using one of the following strategies, chosen in order:
+
+   (1) "aarch64_pred_<optab><mode>" for PRED_x functions; this is a
+       predicated pattern
+
+   (2) "aarch64_sve_<optab><mode>" for PRED_none and PRED_x functions;
+       this is an unpredicated pattern
+
+   (3) "cond_<optab><mode>" otherwise
+
+   where <optab> corresponds to:
+
+   - UNSPEC_FOR_SINT for signed integers
+   - UNSPEC_FOR_UINT for unsigned integers
+   - UNSPEC_FOR_FP for floating-point values
+
+   MERGE_ARGNO is the argument that provides the values of inactive lanes for
+   _m functions, or DEFAULT_MERGE_ARGNO if we should apply the usual rules.  */
+rtx
+function_expander::map_to_unspecs (int unspec_for_sint, int unspec_for_uint,
+				   int unspec_for_fp, unsigned int merge_argno)
+{
+  machine_mode mode = vector_mode (0);
+  int unspec = (!type_suffix (0).integer_p ? unspec_for_fp
+		: type_suffix (0).unsigned_p ? unspec_for_uint
+		: unspec_for_sint);
+
+  if (pred == PRED_x)
+    {
+      insn_code icode = maybe_code_for_aarch64_pred (unspec, mode);
+      if (icode != CODE_FOR_nothing)
+	return use_pred_x_insn (icode);
+    }
+
+  if (pred == PRED_none || pred == PRED_x)
+    {
+      insn_code icode = maybe_code_for_aarch64_sve (unspec, mode);
+      if (icode != CODE_FOR_nothing)
+	return use_unpred_insn (icode);
+    }
+
+  insn_code icode = code_for_cond (unspec, vector_mode (0));
+  return use_cond_insn (icode, merge_argno);
+}
+
+/* Expand the call and return its lhs.  */
+rtx
+function_expander::expand ()
+{
+  unsigned int nargs = call_expr_nargs (call_expr);
+  args.reserve (nargs);
+  for (unsigned int i = 0; i < nargs; ++i)
+    args.quick_push (expand_normal (CALL_EXPR_ARG (call_expr, i)));
+
+  return base->expand (*this);
+}
+
+/* Register the built-in SVE ABI types, such as __SVBool_t.  */
+static void
+register_builtin_types ()
+{
+#define DEF_SVE_TYPE(ACLE_NAME, NCHARS, ABI_NAME, SCALAR_TYPE) \
+  scalar_types[VECTOR_TYPE_ ## ACLE_NAME] = SCALAR_TYPE;
+#include "aarch64-sve-builtins.def"
+
+  for (unsigned int i = 0; i < NUM_VECTOR_TYPES; ++i)
+    {
+      tree eltype = scalar_types[i];
+      tree vectype;
+      unsigned int num_zr = 0, num_pr = 0;
+      if (eltype == boolean_type_node)
+	{
+	  vectype = build_truth_vector_type_for_mode (BYTES_PER_SVE_VECTOR,
+						      VNx16BImode);
+	  gcc_assert (TYPE_MODE (vectype) == VNx16BImode
+		      && TYPE_MODE (vectype) == TYPE_MODE_RAW (vectype)
+		      && TYPE_ALIGN (vectype) == 16
+		      && known_eq (wi::to_poly_offset (TYPE_SIZE (vectype)),
+				   BYTES_PER_SVE_VECTOR));
+	  num_pr = 1;
+	}
+      else
+	{
+	  scalar_mode elmode = SCALAR_TYPE_MODE (eltype);
+	  unsigned int elbytes = GET_MODE_SIZE (elmode);
+	  poly_uint64 nunits = exact_div (BYTES_PER_SVE_VECTOR, elbytes);
+	  machine_mode mode
+	    = aarch64_sve_data_mode (elmode, nunits).require ();
+	  vectype = build_vector_type_for_mode (eltype, mode);
+	  gcc_assert (VECTOR_MODE_P (TYPE_MODE (vectype))
+		      && TYPE_MODE (vectype) == mode
+		      && TYPE_MODE_RAW (vectype) == mode
+		      && TYPE_ALIGN (vectype) == 128
+		      && known_eq (wi::to_poly_offset (TYPE_SIZE (vectype)),
+				   BITS_PER_SVE_VECTOR));
+	  num_zr = 1;
+	}
+      vectype = build_distinct_type_copy (vectype);
+      gcc_assert (vectype == TYPE_MAIN_VARIANT (vectype));
+      SET_TYPE_STRUCTURAL_EQUALITY (vectype);
+      TYPE_ARTIFICIAL (vectype) = 1;
+      TYPE_INDIVISIBLE_P (vectype) = 1;
+      add_sve_type_attribute (vectype, num_zr, num_pr,
+			      vector_types[i].mangled_name,
+			      vector_types[i].acle_name);
+      make_type_sizeless (vectype);
+      abi_vector_types[i] = vectype;
+      lang_hooks.types.register_builtin_type (vectype,
+					      vector_types[i].abi_name);
+    }
+}
+
+/* Initialize all compiler built-ins related to SVE that should be
+   defined at start-up.  */
+void
+init_builtins ()
+{
+  sve_switcher sve;
+  register_builtin_types ();
+  if (in_lto_p)
+    handle_arm_sve_h ();
+}
+
+/* Register vector type TYPE under its arm_sve.h name.  */
+static void
+register_vector_type (vector_type_index type)
+{
+  tree vectype = abi_vector_types[type];
+  tree id = get_identifier (vector_types[type].acle_name);
+  tree decl = build_decl (input_location, TYPE_DECL, id, vectype);
+  decl = lang_hooks.decls.pushdecl (decl);
+
+  /* Record the new ACLE type if pushdecl succeeded without error.  Use
+     the ABI type otherwise, so that the type we record at least has the
+     right form, even if it doesn't have the right name.  This should give
+     better error recovery behavior than installing error_mark_node or
+     installing an incorrect type.  */
+  if (decl
+      && TREE_CODE (decl) == TYPE_DECL
+      && TYPE_MAIN_VARIANT (TREE_TYPE (decl)) == vectype)
+    vectype = TREE_TYPE (decl);
+  acle_vector_types[0][type] = vectype;
+}
+
+/* Register the tuple type that contains NUM_VECTORS vectors of type TYPE.  */
+static void
+register_tuple_type (unsigned int num_vectors, vector_type_index type)
+{
+  tree tuple_type = lang_hooks.types.make_type (RECORD_TYPE);
+
+  /* Work out the structure name.  */
+  char buffer[sizeof ("svbfloat16x4_t")];
+  const char *vector_type_name = vector_types[type].acle_name;
+  snprintf (buffer, sizeof (buffer), "%.*sx%d_t",
+	    (int) strlen (vector_type_name) - 2, vector_type_name,
+	    num_vectors);
+
+  /* The contents of the type are opaque, so we can define them in any
+     way that maps to the correct ABI type.
+
+     Here we choose to use the same layout as for arm_neon.h, but with
+     "__val" instead of "val":
+
+	struct svfooxN_t { svfoo_t __val[N]; };
+
+     (It wouldn't be possible to write that directly in C or C++ for
+     sizeless types, but that's not a problem for this function.)
+
+     Using arrays simplifies the handling of svget and svset for variable
+     arguments.  */
+  tree vector_type = acle_vector_types[0][type];
+  tree array_type = build_array_type_nelts (vector_type, num_vectors);
+  gcc_assert (VECTOR_MODE_P (TYPE_MODE (array_type))
+	      && TYPE_MODE_RAW (array_type) == TYPE_MODE (array_type)
+	      && TYPE_ALIGN (array_type) == 128);
+
+  tree field = build_decl (input_location, FIELD_DECL,
+			   get_identifier ("__val"), array_type);
+  DECL_FIELD_CONTEXT (field) = tuple_type;
+  TYPE_FIELDS (tuple_type) = field;
+  add_sve_type_attribute (tuple_type, num_vectors, 0, NULL, buffer);
+  make_type_sizeless (tuple_type);
+  layout_type (tuple_type);
+  gcc_assert (VECTOR_MODE_P (TYPE_MODE (tuple_type))
+	      && TYPE_MODE_RAW (tuple_type) == TYPE_MODE (tuple_type)
+	      && TYPE_ALIGN (tuple_type) == 128);
+
+  tree decl = build_decl (input_location, TYPE_DECL,
+			  get_identifier (buffer), tuple_type);
+  TYPE_NAME (tuple_type) = decl;
+  TYPE_STUB_DECL (tuple_type) = decl;
+  lang_hooks.decls.pushdecl (decl);
+  /* ??? Undo the effect of set_underlying_type for C.  The C frontend
+     doesn't recognize DECL as a built-in because (as intended) the decl has
+     a real location instead of BUILTINS_LOCATION.  The frontend therefore
+     treats the decl like a normal C "typedef struct foo foo;", expecting
+     the type for tag "struct foo" to have a dummy unnamed TYPE_DECL instead
+     of the named one we attached above.  It then sets DECL_ORIGINAL_TYPE
+     on the supposedly unnamed decl, creating a circularity that upsets
+     dwarf2out.
+
+     We don't want to follow the normal C model and create "struct foo"
+     tags for tuple types since (a) the types are supposed to be opaque
+     and (b) they couldn't be defined as a real struct anyway.  Treating
+     the TYPE_DECLs as "typedef struct foo foo;" without creating
+     "struct foo" would lead to confusing error messages.  */
+  DECL_ORIGINAL_TYPE (decl) = NULL_TREE;
+
+  acle_vector_types[num_vectors - 1][type] = tuple_type;
+}
+
+/* Register the svpattern enum.  */
+static void
+register_svpattern ()
+{
+  auto_vec<string_int_pair, 32> values;
+#define PUSH(UPPER, LOWER, VALUE) \
+    values.quick_push (string_int_pair ("SV_" #UPPER, VALUE));
+  AARCH64_FOR_SVPATTERN (PUSH)
+#undef PUSH
+
+  acle_svpattern = lang_hooks.types.simulate_enum_decl (input_location,
+							"svpattern", values);
+}
+
+/* Register the svprfop enum.  */
+static void
+register_svprfop ()
+{
+  auto_vec<string_int_pair, 16> values;
+#define PUSH(UPPER, LOWER, VALUE) \
+    values.quick_push (string_int_pair ("SV_" #UPPER, VALUE));
+  AARCH64_FOR_SVPRFOP (PUSH)
+#undef PUSH
+
+  acle_svprfop = lang_hooks.types.simulate_enum_decl (input_location,
+						      "svprfop", values);
+}
+
+/* Implement #pragma GCC aarch64 "arm_sve.h".  */
+void
+handle_arm_sve_h ()
+{
+  if (function_table)
+    {
+      error ("duplicate definition of %qs", "arm_sve.h");
+      return;
+    }
+
+  sve_switcher sve;
+
+  /* Define the vector and tuple types.  */
+  for (unsigned int type_i = 0; type_i < NUM_VECTOR_TYPES; ++type_i)
+    {
+      vector_type_index type = vector_type_index (type_i);
+      register_vector_type (type);
+      if (type != VECTOR_TYPE_svbool_t)
+	for (unsigned int count = 2; count <= MAX_TUPLE_SIZE; ++count)
+	  register_tuple_type (count, type);
+    }
+
+  /* Define the enums.  */
+  register_svpattern ();
+  register_svprfop ();
+
+  /* Define the functions.  */
+  function_table = new hash_table<registered_function_hasher> (1023);
+  function_builder builder;
+  for (unsigned int i = 0; i < ARRAY_SIZE (function_groups); ++i)
+    builder.register_function_group (function_groups[i]);
+}
+
+/* Return the function decl with SVE function subcode CODE, or error_mark_node
+   if no such function exists.  */
+tree
+builtin_decl (unsigned int code, bool)
+{
+  if (code >= vec_safe_length (registered_functions))
+    return error_mark_node;
+  return (*registered_functions)[code]->decl;
+}
+
+/* If we're implementing manual overloading, check whether the SVE
+   function with subcode CODE is overloaded, and if so attempt to
+   determine the corresponding non-overloaded function.  The call
+   occurs at location LOCATION and has the arguments given by ARGLIST.
+
+   If the call is erroneous, report an appropriate error and return
+   error_mark_node.  Otherwise, if the function is overloaded, return
+   the decl of the non-overloaded function.  Return NULL_TREE otherwise,
+   indicating that the call should be processed in the normal way.  */
+tree
+resolve_overloaded_builtin (location_t location, unsigned int code,
+			    vec<tree, va_gc> *arglist)
+{
+  if (code >= vec_safe_length (registered_functions))
+    return NULL_TREE;
+
+  registered_function &rfn = *(*registered_functions)[code];
+  if (rfn.overloaded_p)
+    return function_resolver (location, rfn.instance, rfn.decl,
+			      *arglist).resolve ();
+  return NULL_TREE;
+}
+
+/* Perform any semantic checks needed for a call to the SVE function
+   with subcode CODE, such as testing for integer constant expressions.
+   The call occurs at location LOCATION and has NARGS arguments,
+   given by ARGS.  FNDECL is the original function decl, before
+   overload resolution.
+
+   Return true if the call is valid, otherwise report a suitable error.  */
+bool
+check_builtin_call (location_t location, vec<location_t>, unsigned int code,
+		    tree fndecl, unsigned int nargs, tree *args)
+{
+  const registered_function &rfn = *(*registered_functions)[code];
+  if (!check_required_extensions (location, rfn.decl, rfn.required_extensions))
+    return false;
+  return function_checker (location, rfn.instance, fndecl,
+			   TREE_TYPE (rfn.decl), nargs, args).check ();
+}
+
+/* Attempt to fold STMT, given that it's a call to the SVE function
+   with subcode CODE.  Return the new statement on success and null
+   on failure.  Insert any other new statements at GSI.  */
+gimple *
+gimple_fold_builtin (unsigned int code, gimple_stmt_iterator *gsi, gcall *stmt)
+{
+  registered_function &rfn = *(*registered_functions)[code];
+  return gimple_folder (rfn.instance, rfn.decl, gsi, stmt).fold ();
+}
+
+/* Expand a call to the SVE function with subcode CODE.  EXP is the call
+   expression and TARGET is the preferred location for the result.
+   Return the value of the lhs.  */
+rtx
+expand_builtin (unsigned int code, tree exp, rtx target)
+{
+  registered_function &rfn = *(*registered_functions)[code];
+  if (!check_required_extensions (EXPR_LOCATION (exp), rfn.decl,
+				  rfn.required_extensions))
+    return target;
+  return function_expander (rfn.instance, rfn.decl, exp, target).expand ();
+}
+
+/* If TYPE is a built-in type defined by the SVE ABI, return the mangled name,
+   otherwise return NULL.  */
+const char *
+mangle_builtin_type (const_tree type)
+{
+  /* ??? The C++ frontend normally strips qualifiers and attributes before
+     calling this hook, adding separate mangling for attributes that affect
+     type identity.  Fortunately the type copy will have the same TYPE_NAME
+     as the original, so we can get the attributes from there.  */
+  if (TYPE_NAME (type) && TREE_CODE (TYPE_NAME (type)) == TYPE_DECL)
+    type = TREE_TYPE (TYPE_NAME (type));
+  if (tree attr = lookup_sve_type_attribute (type))
+    if (tree id = TREE_VALUE (chain_index (2, TREE_VALUE (attr))))
+      return IDENTIFIER_POINTER (id);
+  return NULL;
+}
+
+/* Return true if TYPE is a built-in SVE type defined by the ABI or ACLE.  */
+bool
+builtin_type_p (const_tree type)
+{
+  return lookup_sve_type_attribute (type);
+}
+
+/* Return true if TYPE is a built-in SVE type defined by the ABI or ACLE.
+   If so, store the number of constituent SVE vectors in *NUM_ZR and the
+   number of constituent SVE predicates in *NUM_PR.  */
+bool
+builtin_type_p (const_tree type, unsigned int *num_zr, unsigned int *num_pr)
+{
+  if (tree attr = lookup_sve_type_attribute (type))
+    {
+      tree num_zr_node = TREE_VALUE (attr);
+      tree num_pr_node = TREE_CHAIN (num_zr_node);
+      *num_zr = tree_to_uhwi (TREE_VALUE (num_zr_node));
+      *num_pr = tree_to_uhwi (TREE_VALUE (num_pr_node));
+      return true;
+    }
+  return false;
+}
+
+/* ATTRS is the attribute list for a sizeless SVE type.  Return the
+   attributes of the associated fixed-length SVE type, taking the
+   "SVE type" attributes from NEW_SVE_TYPE_ARGS.  */
+static tree
+get_arm_sve_vector_bits_attributes (tree old_attrs, tree new_sve_type_args)
+{
+  tree new_attrs = NULL_TREE;
+  tree *ptr = &new_attrs;
+  for (tree attr = old_attrs; attr; attr = TREE_CHAIN (attr))
+    {
+      tree name = get_attribute_name (attr);
+      if (is_attribute_p ("SVE sizeless type", name))
+	continue;
+
+      tree args = TREE_VALUE (attr);
+      if (is_attribute_p ("SVE type", name))
+	args = new_sve_type_args;
+      *ptr = tree_cons (TREE_PURPOSE (attr), args, NULL_TREE);
+      ptr = &TREE_CHAIN (*ptr);
+    }
+  return new_attrs;
+}
+
+/* An attribute callback for the "arm_sve_vector_bits" attribute.  */
+tree
+handle_arm_sve_vector_bits_attribute (tree *node, tree, tree args, int,
+				      bool *no_add_attrs)
+{
+  *no_add_attrs = true;
+
+  tree type = *node;
+  tree attr = lookup_sve_type_attribute (type);
+  if (!attr)
+    {
+      error ("%qs applied to non-SVE type %qT", "arm_sve_vector_bits", type);
+      return NULL_TREE;
+    }
+
+  if (!VECTOR_TYPE_P (type))
+    {
+      error ("%qs applied to non-vector type %qT",
+	     "arm_sve_vector_bits", type);
+      return NULL_TREE;
+    }
+
+  if (!sizeless_type_p (type))
+    {
+      error ("%qs applied to type %qT, which already has a size",
+	     "arm_sve_vector_bits", type);
+      return NULL_TREE;
+    }
+
+  tree size = TREE_VALUE (args);
+  if (TREE_CODE (size) != INTEGER_CST)
+    {
+      error ("%qs requires an integer constant expression",
+	     "arm_sve_vector_bits");
+      return NULL_TREE;
+    }
+
+  unsigned HOST_WIDE_INT value = tree_to_uhwi (size);
+  if (maybe_ne (value, BITS_PER_SVE_VECTOR))
+    {
+      warning (OPT_Wattributes, "unsupported SVE vector size");
+      return NULL_TREE;
+    }
+
+  /* Construct a new list of "SVE type" attribute arguments.  */
+  tree new_sve_type_args = copy_list (TREE_VALUE (attr));
+
+  /* Mangle the type as an instance of the imaginary template:
+
+       __SVE_VLS<typename, unsigned>
+
+     where the first parameter is the SVE type and where the second
+     parameter is the SVE vector length in bits.  */
+  tree mangled_name_node = chain_index (2, new_sve_type_args);
+  const char *old_mangled_name
+    = IDENTIFIER_POINTER (TREE_VALUE (mangled_name_node));
+  char *new_mangled_name
+    = xasprintf ("9__SVE_VLSI%sLj%dEE", old_mangled_name, (int) value);
+  TREE_VALUE (mangled_name_node) = get_identifier (new_mangled_name);
+  free (new_mangled_name);
+
+  /* FIXME: The type ought to be a distinct copy in all cases, but
+     currently that makes the C frontend reject conversions between
+     svbool_t and its fixed-length variants.  Using a type variant
+     avoids that but means that we treat some ambiguous combinations
+     as valid.  */
+  tree new_type;
+  tree base_type = TYPE_MAIN_VARIANT (type);
+  if (lang_GNU_C () && VECTOR_BOOLEAN_TYPE_P (type))
+    new_type = build_variant_type_copy (base_type);
+  else
+    new_type = build_distinct_type_copy (base_type);
+
+  /* Construct a TYPE_DECL for the new type.  This serves two purposes:
+
+     - It ensures we don't print the original TYPE_DECL in error messages.
+       Printing the original name would be confusing because there are
+       situations in which the distinction between the original type and
+       the new type matters.  For example:
+
+	   __SVInt8_t __attribute__((arm_sve_vector_bits(512))) *a;
+	   __SVInt8_t *b;
+
+	   a = b;
+
+       is invalid in C++, but without this, we'd print both types in
+       the same way.
+
+     - Having a separate TYPE_DECL is necessary to ensure that C++
+       mangling works correctly.  See mangle_builtin_type for details.
+
+     The name of the decl is something like:
+
+       svint8_t __attribute__((arm_sve_vector_bits(512)))
+
+     This is a compromise.  It would be more accurate to use something like:
+
+       __SVInt8_t __attribute__((arm_sve_vector_bits(512)))
+
+     but the <arm_sve.h> name is likely to be more meaningful.  */
+  tree acle_name_node = TREE_CHAIN (mangled_name_node);
+  const char *old_type_name = IDENTIFIER_POINTER (TREE_VALUE (acle_name_node));
+  char *new_type_name
+    = xasprintf ("%s __attribute__((arm_sve_vector_bits(%d)))",
+		 old_type_name, (int) value);
+  tree decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
+			  get_identifier (new_type_name), new_type);
+  DECL_ARTIFICIAL (decl) = 1;
+  TYPE_NAME (new_type) = decl;
+  free (new_type_name);
+
+  /* Allow the GNU vector extensions to be applied to vectors.
+     The extensions aren't yet defined for packed predicates,
+     so continue to treat them as abstract entities for now.  */
+  if (!VECTOR_BOOLEAN_TYPE_P (new_type))
+    TYPE_INDIVISIBLE_P (new_type) = 0;
+
+  /* The new type is a normal sized type; it doesn't have the same
+     restrictions as sizeless types.  */
+  TYPE_ATTRIBUTES (new_type)
+    = get_arm_sve_vector_bits_attributes (TYPE_ATTRIBUTES (new_type),
+					  new_sve_type_args);
+
+  /* Apply the relevant attributes, qualifiers and alignment of TYPE,
+     if they differ from the original (sizeless) BASE_TYPE.  */
+  if (TYPE_ATTRIBUTES (base_type) != TYPE_ATTRIBUTES (type)
+      || TYPE_QUALS (base_type) != TYPE_QUALS (type))
+    {
+      tree attrs
+	= get_arm_sve_vector_bits_attributes (TYPE_ATTRIBUTES (type),
+					      new_sve_type_args);
+      new_type = build_type_attribute_qual_variant (new_type, attrs,
+						    TYPE_QUALS (type));
+    }
+  if (TYPE_ALIGN (base_type) != TYPE_ALIGN (type))
+    new_type = build_aligned_type (new_type, TYPE_ALIGN (type));
+
+  *node = new_type;
+  return NULL_TREE;
+}
+
+/* Implement TARGET_VERIFY_TYPE_CONTEXT for SVE types.  */
+bool
+verify_type_context (location_t loc, type_context_kind context,
+		     const_tree type, bool silent_p)
+{
+  if (!sizeless_type_p (type))
+    return true;
+
+  switch (context)
+    {
+    case TCTX_SIZEOF:
+    case TCTX_STATIC_STORAGE:
+      if (!silent_p)
+	error_at (loc, "SVE type %qT does not have a fixed size", type);
+      return false;
+
+    case TCTX_ALIGNOF:
+      if (!silent_p)
+	error_at (loc, "SVE type %qT does not have a defined alignment", type);
+      return false;
+
+    case TCTX_THREAD_STORAGE:
+      if (!silent_p)
+	error_at (loc, "variables of type %qT cannot have thread-local"
+		  " storage duration", type);
+      return false;
+
+    case TCTX_POINTER_ARITH:
+      if (!silent_p)
+	error_at (loc, "arithmetic on pointer to SVE type %qT", type);
+      return false;
+
+    case TCTX_FIELD:
+      if (silent_p)
+	;
+      else if (lang_GNU_CXX ())
+	error_at (loc, "member variables cannot have SVE type %qT", type);
+      else
+	error_at (loc, "fields cannot have SVE type %qT", type);
+      return false;
+
+    case TCTX_ARRAY_ELEMENT:
+      if (!silent_p)
+	error_at (loc, "array elements cannot have SVE type %qT", type);
+      return false;
+
+    case TCTX_ALLOCATION:
+      if (!silent_p)
+	error_at (loc, "cannot allocate objects with SVE type %qT", type);
+      return false;
+
+    case TCTX_DEALLOCATION:
+      if (!silent_p)
+	error_at (loc, "cannot delete objects with SVE type %qT", type);
+      return false;
+
+    case TCTX_EXCEPTIONS:
+      if (!silent_p)
+	error_at (loc, "cannot throw or catch SVE type %qT", type);
+      return false;
+
+    case TCTX_CAPTURE_BY_COPY:
+      if (!silent_p)
+	error_at (loc, "capture by copy of SVE type %qT", type);
+      return false;
+    }
+  gcc_unreachable ();
+}
+
+}
+
+using namespace aarch64_sve;
+
+inline void
+gt_ggc_mx (function_instance *)
+{
+}
+
+inline void
+gt_pch_nx (function_instance *)
+{
+}
+
+inline void
+gt_pch_nx (function_instance *, void (*) (void *, void *), void *)
+{
+}
+
+#include "gt-aarch64-sve-builtins.h"
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.def b/gcc/config/aarch64/aarch64-sve-builtins.def
new file mode 100644
index 0000000000000..6505163999219
--- /dev/null
+++ b/gcc/config/aarch64/aarch64-sve-builtins.def
@@ -0,0 +1,104 @@
+/* Builtin lists for AArch64 SVE
+   Copyright (C) 2018-2021 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef DEF_SVE_MODE
+#define DEF_SVE_MODE(A, B, C, D)
+#endif
+
+#ifndef DEF_SVE_TYPE
+#define DEF_SVE_TYPE(A, B, C, D)
+#endif
+
+#ifndef DEF_SVE_TYPE_SUFFIX
+#define DEF_SVE_TYPE_SUFFIX(A, B, C, D, E)
+#endif
+
+#ifndef DEF_SVE_FUNCTION
+#define DEF_SVE_FUNCTION(A, B, C, D)
+#endif
+
+DEF_SVE_MODE (n, none, none, none)
+DEF_SVE_MODE (index, none, none, elements)
+DEF_SVE_MODE (offset, none, none, bytes)
+DEF_SVE_MODE (s32index, none, svint32_t, elements)
+DEF_SVE_MODE (s32offset, none, svint32_t, bytes)
+DEF_SVE_MODE (s64index,  none, svint64_t, elements)
+DEF_SVE_MODE (s64offset, none, svint64_t, bytes)
+DEF_SVE_MODE (u32base, svuint32_t, none, none)
+DEF_SVE_MODE (u32base_index, svuint32_t, none, elements)
+DEF_SVE_MODE (u32base_offset, svuint32_t, none, bytes)
+DEF_SVE_MODE (u32base_s32index, svuint32_t, svint32_t, elements)
+DEF_SVE_MODE (u32base_s32offset, svuint32_t, svint32_t, bytes)
+DEF_SVE_MODE (u32base_u32index, svuint32_t, svuint32_t, elements)
+DEF_SVE_MODE (u32base_u32offset, svuint32_t, svuint32_t, bytes)
+DEF_SVE_MODE (u32index, none, svuint32_t, elements)
+DEF_SVE_MODE (u32offset, none, svuint32_t, bytes)
+DEF_SVE_MODE (u64base, svuint64_t, none, none)
+DEF_SVE_MODE (u64base_index, svuint64_t, none, elements)
+DEF_SVE_MODE (u64base_offset, svuint64_t, none, bytes)
+DEF_SVE_MODE (u64base_s64index, svuint64_t, svint64_t, elements)
+DEF_SVE_MODE (u64base_s64offset, svuint64_t, svint64_t, bytes)
+DEF_SVE_MODE (u64base_u64index, svuint64_t, svuint64_t, elements)
+DEF_SVE_MODE (u64base_u64offset, svuint64_t, svuint64_t, bytes)
+DEF_SVE_MODE (u64index, none, svuint64_t, elements)
+DEF_SVE_MODE (u64offset, none, svuint64_t, bytes)
+DEF_SVE_MODE (vnum, none, none, vectors)
+
+DEF_SVE_TYPE (svbool_t, 10, __SVBool_t, boolean_type_node)
+DEF_SVE_TYPE (svbfloat16_t, 14, __SVBfloat16_t, aarch64_bf16_type_node)
+DEF_SVE_TYPE (svfloat16_t, 13, __SVFloat16_t, aarch64_fp16_type_node)
+DEF_SVE_TYPE (svfloat32_t, 13, __SVFloat32_t, float_type_node)
+DEF_SVE_TYPE (svfloat64_t, 13, __SVFloat64_t, double_type_node)
+DEF_SVE_TYPE (svint8_t, 10, __SVInt8_t, get_typenode_from_name (INT8_TYPE))
+DEF_SVE_TYPE (svint16_t, 11, __SVInt16_t, get_typenode_from_name (INT16_TYPE))
+DEF_SVE_TYPE (svint32_t, 11, __SVInt32_t, get_typenode_from_name (INT32_TYPE))
+DEF_SVE_TYPE (svint64_t, 11, __SVInt64_t, get_typenode_from_name (INT64_TYPE))
+DEF_SVE_TYPE (svuint8_t, 11, __SVUint8_t, get_typenode_from_name (UINT8_TYPE))
+DEF_SVE_TYPE (svuint16_t, 12, __SVUint16_t,
+	      get_typenode_from_name (UINT16_TYPE))
+DEF_SVE_TYPE (svuint32_t, 12, __SVUint32_t,
+	      get_typenode_from_name (UINT32_TYPE))
+DEF_SVE_TYPE (svuint64_t, 12, __SVUint64_t,
+	      get_typenode_from_name (UINT64_TYPE))
+
+DEF_SVE_TYPE_SUFFIX (b, svbool_t, bool, 8, VNx16BImode)
+DEF_SVE_TYPE_SUFFIX (b8, svbool_t, bool, 8, VNx16BImode)
+DEF_SVE_TYPE_SUFFIX (b16, svbool_t, bool, 16, VNx8BImode)
+DEF_SVE_TYPE_SUFFIX (b32, svbool_t, bool, 32, VNx4BImode)
+DEF_SVE_TYPE_SUFFIX (b64, svbool_t, bool, 64, VNx2BImode)
+DEF_SVE_TYPE_SUFFIX (bf16, svbfloat16_t, bfloat, 16, VNx8BFmode)
+DEF_SVE_TYPE_SUFFIX (f16, svfloat16_t, float, 16, VNx8HFmode)
+DEF_SVE_TYPE_SUFFIX (f32, svfloat32_t, float, 32, VNx4SFmode)
+DEF_SVE_TYPE_SUFFIX (f64, svfloat64_t, float, 64, VNx2DFmode)
+DEF_SVE_TYPE_SUFFIX (s8, svint8_t, signed, 8, VNx16QImode)
+DEF_SVE_TYPE_SUFFIX (s16, svint16_t, signed, 16, VNx8HImode)
+DEF_SVE_TYPE_SUFFIX (s32, svint32_t, signed, 32, VNx4SImode)
+DEF_SVE_TYPE_SUFFIX (s64, svint64_t, signed, 64, VNx2DImode)
+DEF_SVE_TYPE_SUFFIX (u8, svuint8_t, unsigned, 8, VNx16QImode)
+DEF_SVE_TYPE_SUFFIX (u16, svuint16_t, unsigned, 16, VNx8HImode)
+DEF_SVE_TYPE_SUFFIX (u32, svuint32_t, unsigned, 32, VNx4SImode)
+DEF_SVE_TYPE_SUFFIX (u64, svuint64_t, unsigned, 64, VNx2DImode)
+
+#include "aarch64-sve-builtins-base.def"
+#include "aarch64-sve-builtins-sve2.def"
+
+#undef DEF_SVE_FUNCTION
+#undef DEF_SVE_TYPE_SUFFIX
+#undef DEF_SVE_TYPE
+#undef DEF_SVE_MODE
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h
new file mode 100644
index 0000000000000..b701f90ac1aa3
--- /dev/null
+++ b/gcc/config/aarch64/aarch64-sve-builtins.h
@@ -0,0 +1,882 @@
+/* ACLE support for AArch64 SVE
+   Copyright (C) 2018-2021 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_AARCH64_SVE_BUILTINS_H
+#define GCC_AARCH64_SVE_BUILTINS_H
+
+/* The full name of an SVE ACLE function is the concatenation of:
+
+   - the base name ("svadd", etc.)
+   - the "mode" suffix ("_n", "_index", etc.)
+   - the type suffixes ("_s32", "_b8", etc.)
+   - the predication suffix ("_x", "_z", etc.)
+
+   Each piece of information is individually useful, so we retain this
+   classification throughout:
+
+   - function_base represents the base name
+
+   - mode_suffix_index represents the mode suffix
+
+   - type_suffix_index represents individual type suffixes, while
+     type_suffix_pair represents a pair of them
+
+   - prediction_index extends the predication suffix with an additional
+     alternative: PRED_implicit for implicitly-predicated operations
+
+   In addition to its unique full name, a function may have a shorter
+   overloaded alias.  This alias removes pieces of the suffixes that
+   can be inferred from the arguments, such as by shortening the mode
+   suffix or dropping some of the type suffixes.  The base name and the
+   predication suffix stay the same.
+
+   The function_shape class describes what arguments a given function
+   takes and what its overloaded alias is called.  In broad terms,
+   function_base describes how the underlying instruction behaves while
+   function_shape describes how that instruction has been presented at
+   the language level.
+
+   The static list of functions uses function_group to describe a group
+   of related functions.  The function_builder class is responsible for
+   expanding this static description into a list of individual functions
+   and registering the associated built-in functions.  function_instance
+   describes one of these individual functions in terms of the properties
+   described above.
+
+   The classes involved in compiling a function call are:
+
+   - function_resolver, which resolves an overloaded function call to a
+     specific function_instance and its associated function decl
+
+   - function_checker, which checks whether the values of the arguments
+     conform to the ACLE specification
+
+   - gimple_folder, which tries to fold a function call at the gimple level
+
+   - function_expander, which expands a function call into rtl instructions
+
+   function_resolver and function_checker operate at the language level
+   and so are associated with the function_shape.  gimple_folder and
+   function_expander are concerned with the behavior of the function
+   and so are associated with the function_base.
+
+   Note that we've specifically chosen not to fold calls in the frontend,
+   since SVE intrinsics will hardly ever fold a useful language-level
+   constant.  */
+namespace aarch64_sve
+{
+/* The maximum number of vectors in an ACLE tuple type.  */
+const unsigned int MAX_TUPLE_SIZE = 4;
+
+/* Used to represent the default merge argument index for _m functions.
+   The actual index depends on how many arguments the function takes.  */
+const unsigned int DEFAULT_MERGE_ARGNO = ~0U;
+
+/* Flags that describe what a function might do, in addition to reading
+   its arguments and returning a result.  */
+const unsigned int CP_READ_FPCR = 1U << 0;
+const unsigned int CP_RAISE_FP_EXCEPTIONS = 1U << 1;
+const unsigned int CP_READ_MEMORY = 1U << 2;
+const unsigned int CP_PREFETCH_MEMORY = 1U << 3;
+const unsigned int CP_WRITE_MEMORY = 1U << 4;
+const unsigned int CP_READ_FFR = 1U << 5;
+const unsigned int CP_WRITE_FFR = 1U << 6;
+
+/* Enumerates the SVE predicate and (data) vector types, together called
+   "vector types" for brevity.  */
+enum vector_type_index
+{
+#define DEF_SVE_TYPE(ACLE_NAME, NCHARS, ABI_NAME, SCALAR_TYPE) \
+  VECTOR_TYPE_ ## ACLE_NAME,
+#include "aarch64-sve-builtins.def"
+  NUM_VECTOR_TYPES
+};
+
+/* Classifies the available measurement units for an address displacement.  */
+enum units_index
+{
+  UNITS_none,
+  UNITS_bytes,
+  UNITS_elements,
+  UNITS_vectors
+};
+
+/* Describes the various uses of a governing predicate.  */
+enum predication_index
+{
+  /* No governing predicate is present.  */
+  PRED_none,
+
+  /* A governing predicate is present but there is no predication suffix
+     associated with it.  This is used when the result is neither a vector
+     nor a predicate, since the distinction between "zeroing" and "merging"
+     doesn't apply in that case.  It is also used when a suffix would be
+     redundant (such as for loads and comparisons, which are inherently
+     zeroing operations).  */
+  PRED_implicit,
+
+  /* Merging predication: copy inactive lanes from the first data argument
+     to the vector result.  */
+  PRED_m,
+
+  /* "Don't care" predication: set inactive lanes of the vector result
+     to arbitrary values.  */
+  PRED_x,
+
+  /* Zero predication: set inactive lanes of the vector result to zero.  */
+  PRED_z,
+
+  NUM_PREDS
+};
+
+/* Classifies element types, based on type suffixes with the bit count
+   removed.  */
+enum type_class_index
+{
+  TYPE_bool,
+  TYPE_bfloat,
+  TYPE_float,
+  TYPE_signed,
+  TYPE_unsigned,
+  NUM_TYPE_CLASSES
+};
+
+/* Classifies an operation into "modes"; for example, to distinguish
+   vector-scalar operations from vector-vector operations, or to
+   distinguish between different addressing modes.  This classification
+   accounts for the function suffixes that occur between the base name
+   and the first type suffix.  */
+enum mode_suffix_index
+{
+#define DEF_SVE_MODE(NAME, BASE, DISPLACEMENT, UNITS) MODE_##NAME,
+#include "aarch64-sve-builtins.def"
+  MODE_none
+};
+
+/* Enumerates the possible type suffixes.  Each suffix is associated with
+   a vector type, but for predicates provides extra information about the
+   element size.  */
+enum type_suffix_index
+{
+#define DEF_SVE_TYPE_SUFFIX(NAME, ACLE_TYPE, CLASS, BITS, MODE) \
+  TYPE_SUFFIX_ ## NAME,
+#include "aarch64-sve-builtins.def"
+  NUM_TYPE_SUFFIXES
+};
+
+/* Combines two type suffixes.  */
+typedef enum type_suffix_index type_suffix_pair[2];
+
+class function_base;
+class function_shape;
+
+/* Static information about a mode suffix.  */
+struct mode_suffix_info
+{
+  /* The suffix string itself.  */
+  const char *string;
+
+  /* The type of the vector base address, or NUM_VECTOR_TYPES if the
+     mode does not include a vector base address.  */
+  vector_type_index base_vector_type;
+
+  /* The type of the vector displacement, or NUM_VECTOR_TYPES if the
+     mode does not include a vector displacement.  (Note that scalar
+     displacements are always int64_t.)  */
+  vector_type_index displacement_vector_type;
+
+  /* The units in which the vector or scalar displacement is measured,
+     or UNITS_none if the mode doesn't take a displacement.  */
+  units_index displacement_units;
+};
+
+/* Static information about a type suffix.  */
+struct type_suffix_info
+{
+  /* The suffix string itself.  */
+  const char *string;
+
+  /* The associated ACLE vector or predicate type.  */
+  vector_type_index vector_type : 8;
+
+  /* What kind of type the suffix represents.  */
+  type_class_index tclass : 8;
+
+  /* The number of bits and bytes in an element.  For predicates this
+     measures the associated data elements.  */
+  unsigned int element_bits : 8;
+  unsigned int element_bytes : 8;
+
+  /* True if the suffix is for an integer type.  */
+  unsigned int integer_p : 1;
+  /* True if the suffix is for an unsigned type.  */
+  unsigned int unsigned_p : 1;
+  /* True if the suffix is for a floating-point type.  */
+  unsigned int float_p : 1;
+  /* True if the suffix is for a boolean type.  */
+  unsigned int bool_p : 1;
+  unsigned int spare : 12;
+
+  /* The associated vector or predicate mode.  */
+  machine_mode vector_mode : 16;
+};
+
+/* Static information about a set of functions.  */
+struct function_group_info
+{
+  /* The base name, as a string.  */
+  const char *base_name;
+
+  /* Describes the behavior associated with the function base name.  */
+  const function_base *const *base;
+
+  /* The shape of the functions, as described above the class definition.
+     It's possible to have entries with the same base name but different
+     shapes.  */
+  const function_shape *const *shape;
+
+  /* A list of the available type suffixes, and of the available predication
+     types.  The function supports every combination of the two.
+
+     The list of type suffixes is terminated by two NUM_TYPE_SUFFIXES
+     while the list of predication types is terminated by NUM_PREDS.
+     The list of type suffixes is lexicographically ordered based
+     on the index value.  */
+  const type_suffix_pair *types;
+  const predication_index *preds;
+
+  /* The architecture extensions that the functions require, as a set of
+     AARCH64_FL_* flags.  */
+  uint64_t required_extensions;
+};
+
+/* Describes a single fully-resolved function (i.e. one that has a
+   unique full name).  */
+class GTY((user)) function_instance
+{
+public:
+  function_instance (const char *, const function_base *,
+		     const function_shape *, mode_suffix_index,
+		     const type_suffix_pair &, predication_index);
+
+  bool operator== (const function_instance &) const;
+  bool operator!= (const function_instance &) const;
+  hashval_t hash () const;
+
+  unsigned int call_properties () const;
+  bool reads_global_state_p () const;
+  bool modifies_global_state_p () const;
+  bool could_trap_p () const;
+
+  unsigned int vectors_per_tuple () const;
+  tree memory_scalar_type () const;
+  machine_mode memory_vector_mode () const;
+
+  const mode_suffix_info &mode_suffix () const;
+  tree base_vector_type () const;
+  tree displacement_vector_type () const;
+  units_index displacement_units () const;
+
+  const type_suffix_info &type_suffix (unsigned int) const;
+  tree scalar_type (unsigned int) const;
+  tree vector_type (unsigned int) const;
+  tree tuple_type (unsigned int) const;
+  unsigned int elements_per_vq (unsigned int i) const;
+  machine_mode vector_mode (unsigned int) const;
+  machine_mode gp_mode (unsigned int) const;
+
+  /* The properties of the function.  (The explicit "enum"s are required
+     for gengtype.)  */
+  const char *base_name;
+  const function_base *base;
+  const function_shape *shape;
+  enum mode_suffix_index mode_suffix_id;
+  type_suffix_pair type_suffix_ids;
+  enum predication_index pred;
+};
+
+class registered_function;
+
+/* A class for building and registering function decls.  */
+class function_builder
+{
+public:
+  function_builder ();
+  ~function_builder ();
+
+  void add_unique_function (const function_instance &, tree,
+			    vec<tree> &, uint64_t, bool);
+  void add_overloaded_function (const function_instance &, uint64_t);
+  void add_overloaded_functions (const function_group_info &,
+				 mode_suffix_index);
+
+  void register_function_group (const function_group_info &);
+
+private:
+  void append_name (const char *);
+  char *finish_name ();
+
+  char *get_name (const function_instance &, bool);
+
+  tree get_attributes (const function_instance &);
+
+  registered_function &add_function (const function_instance &,
+				     const char *, tree, tree,
+				     uint64_t, bool, bool);
+
+  /* The function type to use for functions that are resolved by
+     function_resolver.  */
+  tree m_overload_type;
+
+  /* True if we should create a separate decl for each instance of an
+     overloaded function, instead of using function_resolver.  */
+  bool m_direct_overloads;
+
+  /* Used for building up function names.  */
+  obstack m_string_obstack;
+
+  /* Maps all overloaded function names that we've registered so far
+     to their associated function_instances.  */
+  hash_map<nofree_string_hash, registered_function *> m_overload_names;
+};
+
+/* A base class for handling calls to built-in functions.  */
+class function_call_info : public function_instance
+{
+public:
+  function_call_info (location_t, const function_instance &, tree);
+
+  bool function_returns_void_p ();
+
+  /* The location of the call.  */
+  location_t location;
+
+  /* The FUNCTION_DECL that is being called.  */
+  tree fndecl;
+};
+
+/* A class for resolving an overloaded function call.  */
+class function_resolver : public function_call_info
+{
+public:
+  enum { SAME_SIZE = 256, HALF_SIZE, QUARTER_SIZE };
+  static const type_class_index SAME_TYPE_CLASS = NUM_TYPE_CLASSES;
+
+  function_resolver (location_t, const function_instance &, tree,
+		     vec<tree, va_gc> &);
+
+  tree get_vector_type (type_suffix_index);
+  const char *get_scalar_type_name (type_suffix_index);
+  tree get_argument_type (unsigned int);
+  bool scalar_argument_p (unsigned int);
+
+  tree report_no_such_form (type_suffix_index);
+  tree lookup_form (mode_suffix_index,
+		    type_suffix_index = NUM_TYPE_SUFFIXES,
+		    type_suffix_index = NUM_TYPE_SUFFIXES);
+  tree resolve_to (mode_suffix_index,
+		   type_suffix_index = NUM_TYPE_SUFFIXES,
+		   type_suffix_index = NUM_TYPE_SUFFIXES);
+
+  type_suffix_index infer_integer_scalar_type (unsigned int);
+  type_suffix_index infer_pointer_type (unsigned int, bool = false);
+  type_suffix_index infer_vector_or_tuple_type (unsigned int, unsigned int);
+  type_suffix_index infer_vector_type (unsigned int);
+  type_suffix_index infer_integer_vector_type (unsigned int);
+  type_suffix_index infer_unsigned_vector_type (unsigned int);
+  type_suffix_index infer_sd_vector_type (unsigned int);
+  type_suffix_index infer_tuple_type (unsigned int);
+
+  bool require_vector_or_scalar_type (unsigned int);
+
+  bool require_vector_type (unsigned int, vector_type_index);
+  bool require_matching_vector_type (unsigned int, type_suffix_index);
+  bool require_derived_vector_type (unsigned int, unsigned int,
+				    type_suffix_index,
+				    type_class_index = SAME_TYPE_CLASS,
+				    unsigned int = SAME_SIZE);
+
+  bool require_scalar_type (unsigned int, const char *);
+  bool require_pointer_type (unsigned int);
+  bool require_matching_integer_scalar_type (unsigned int, unsigned int,
+					     type_suffix_index);
+  bool require_derived_scalar_type (unsigned int, type_class_index,
+				    unsigned int = SAME_SIZE);
+  bool require_matching_pointer_type (unsigned int, unsigned int,
+				      type_suffix_index);
+  bool require_integer_immediate (unsigned int);
+
+  vector_type_index infer_vector_base_type (unsigned int);
+  vector_type_index infer_vector_displacement_type (unsigned int);
+
+  mode_suffix_index resolve_sv_displacement (unsigned int,
+					     type_suffix_index, bool);
+  mode_suffix_index resolve_gather_address (unsigned int,
+					    type_suffix_index, bool);
+  mode_suffix_index resolve_adr_address (unsigned int);
+
+  bool check_num_arguments (unsigned int);
+  bool check_gp_argument (unsigned int, unsigned int &, unsigned int &);
+  tree resolve_unary (type_class_index = SAME_TYPE_CLASS,
+		      unsigned int = SAME_SIZE, bool = false);
+  tree resolve_uniform (unsigned int, unsigned int = 0);
+  tree resolve_uniform_opt_n (unsigned int);
+  tree finish_opt_n_resolution (unsigned int, unsigned int, type_suffix_index,
+				type_class_index = SAME_TYPE_CLASS,
+				unsigned int = SAME_SIZE,
+				type_suffix_index = NUM_TYPE_SUFFIXES);
+
+  tree resolve ();
+
+private:
+  /* The arguments to the overloaded function.  */
+  vec<tree, va_gc> &m_arglist;
+};
+
+/* A class for checking that the semantic constraints on a function call are
+   satisfied, such as arguments being integer constant expressions with
+   a particular range.  The parent class's FNDECL is the decl that was
+   called in the original source, before overload resolution.  */
+class function_checker : public function_call_info
+{
+public:
+  function_checker (location_t, const function_instance &, tree,
+		    tree, unsigned int, tree *);
+
+  bool require_immediate_either_or (unsigned int, HOST_WIDE_INT,
+				    HOST_WIDE_INT);
+  bool require_immediate_enum (unsigned int, tree);
+  bool require_immediate_lane_index (unsigned int, unsigned int = 1);
+  bool require_immediate_one_of (unsigned int, HOST_WIDE_INT, HOST_WIDE_INT,
+				 HOST_WIDE_INT, HOST_WIDE_INT);
+  bool require_immediate_range (unsigned int, HOST_WIDE_INT, HOST_WIDE_INT);
+
+  bool check ();
+
+private:
+  bool argument_exists_p (unsigned int);
+
+  bool require_immediate (unsigned int, HOST_WIDE_INT &);
+
+  /* The type of the resolved function.  */
+  tree m_fntype;
+
+  /* The arguments to the function.  */
+  unsigned int m_nargs;
+  tree *m_args;
+
+  /* The first argument not associated with the function's predication
+     type.  */
+  unsigned int m_base_arg;
+};
+
+/* A class for folding a gimple function call.  */
+class gimple_folder : public function_call_info
+{
+public:
+  gimple_folder (const function_instance &, tree,
+		 gimple_stmt_iterator *, gcall *);
+
+  tree force_vector (gimple_seq &, tree, tree);
+  tree convert_pred (gimple_seq &, tree, unsigned int);
+  tree fold_contiguous_base (gimple_seq &, tree);
+  tree load_store_cookie (tree);
+
+  gimple *redirect_call (const function_instance &);
+  gimple *fold_to_pfalse ();
+  gimple *fold_to_ptrue ();
+  gimple *fold_to_vl_pred (unsigned int);
+
+  gimple *fold ();
+
+  /* Where to insert extra statements that feed the final replacement.  */
+  gimple_stmt_iterator *gsi;
+
+  /* The call we're folding.  */
+  gcall *call;
+
+  /* The result of the call, or null if none.  */
+  tree lhs;
+};
+
+/* A class for expanding a function call into RTL.  */
+class function_expander : public function_call_info
+{
+public:
+  function_expander (const function_instance &, tree, tree, rtx);
+  rtx expand ();
+
+  insn_code direct_optab_handler (optab, unsigned int = 0);
+  insn_code direct_optab_handler_for_sign (optab, optab, unsigned int = 0,
+					   machine_mode = E_VOIDmode);
+
+  bool overlaps_input_p (rtx);
+
+  rtx convert_to_pmode (rtx);
+  rtx get_contiguous_base (machine_mode);
+  rtx get_fallback_value (machine_mode, unsigned int,
+			  unsigned int, unsigned int &);
+  rtx get_reg_target ();
+  rtx get_nonoverlapping_reg_target ();
+
+  void add_output_operand (insn_code);
+  void add_input_operand (insn_code, rtx);
+  void add_integer_operand (HOST_WIDE_INT);
+  void add_mem_operand (machine_mode, rtx);
+  void add_address_operand (rtx);
+  void add_fixed_operand (rtx);
+  rtx generate_insn (insn_code);
+
+  void prepare_gather_address_operands (unsigned int, bool = true);
+  void prepare_prefetch_operands ();
+  void add_ptrue_hint (unsigned int, machine_mode);
+  void rotate_inputs_left (unsigned int, unsigned int);
+  bool try_negating_argument (unsigned int, machine_mode);
+
+  rtx use_exact_insn (insn_code);
+  rtx use_unpred_insn (insn_code);
+  rtx use_pred_x_insn (insn_code);
+  rtx use_cond_insn (insn_code, unsigned int = DEFAULT_MERGE_ARGNO);
+  rtx use_vcond_mask_insn (insn_code, unsigned int = DEFAULT_MERGE_ARGNO);
+  rtx use_contiguous_load_insn (insn_code);
+  rtx use_contiguous_prefetch_insn (insn_code);
+  rtx use_contiguous_store_insn (insn_code);
+
+  rtx map_to_rtx_codes (rtx_code, rtx_code, int,
+			unsigned int = DEFAULT_MERGE_ARGNO);
+  rtx map_to_unspecs (int, int, int, unsigned int = DEFAULT_MERGE_ARGNO);
+
+  /* The function call expression.  */
+  tree call_expr;
+
+  /* For functions that return a value, this is the preferred location
+     of that value.  It could be null or could have a different mode
+     from the function return type.  */
+  rtx possible_target;
+
+  /* The expanded arguments.  */
+  auto_vec<rtx, 16> args;
+
+private:
+  /* Used to build up the operands to an instruction.  */
+  auto_vec<expand_operand, 8> m_ops;
+};
+
+/* Provides information about a particular function base name, and handles
+   tasks related to the base name.  */
+class function_base
+{
+public:
+  /* Return a set of CP_* flags that describe what the function might do,
+     in addition to reading its arguments and returning a result.  */
+  virtual unsigned int call_properties (const function_instance &) const;
+
+  /* If the function operates on tuples of vectors, return the number
+     of vectors in the tuples, otherwise return 1.  */
+  virtual unsigned int vectors_per_tuple () const { return 1; }
+
+  /* If the function addresses memory, return the type of a single
+     scalar memory element.  */
+  virtual tree
+  memory_scalar_type (const function_instance &) const
+  {
+    gcc_unreachable ();
+  }
+
+  /* If the function addresses memory, return a vector mode whose
+     GET_MODE_NUNITS is the number of elements addressed and whose
+     GET_MODE_INNER is the mode of a single scalar memory element.  */
+  virtual machine_mode
+  memory_vector_mode (const function_instance &) const
+  {
+    gcc_unreachable ();
+  }
+
+  /* Try to fold the given gimple call.  Return the new gimple statement
+     on success, otherwise return null.  */
+  virtual gimple *fold (gimple_folder &) const { return NULL; }
+
+  /* Expand the given call into rtl.  Return the result of the function,
+     or an arbitrary value if the function doesn't return a result.  */
+  virtual rtx expand (function_expander &) const = 0;
+};
+
+/* Classifies functions into "shapes".  The idea is to take all the
+   type signatures for a set of functions, remove the governing predicate
+   (if any), and classify what's left based on:
+
+   - the number of arguments
+
+   - the process of determining the types in the signature from the mode
+     and type suffixes in the function name (including types that are not
+     affected by the suffixes)
+
+   - which arguments must be integer constant expressions, and what range
+     those arguments have
+
+   - the process for mapping overloaded names to "full" names.  */
+class function_shape
+{
+public:
+  virtual bool explicit_type_suffix_p (unsigned int) const = 0;
+
+  /* Define all functions associated with the given group.  */
+  virtual void build (function_builder &,
+		      const function_group_info &) const = 0;
+
+  /* Try to resolve the overloaded call.  Return the non-overloaded
+     function decl on success and error_mark_node on failure.  */
+  virtual tree resolve (function_resolver &) const = 0;
+
+  /* Check whether the given call is semantically valid.  Return true
+     if it is, otherwise report an error and return false.  */
+  virtual bool check (function_checker &) const { return true; }
+};
+
+/* RAII class for enabling enough SVE features to define the built-in
+   types and implement the arm_sve.h pragma.  */
+class sve_switcher
+{
+public:
+  sve_switcher ();
+  ~sve_switcher ();
+
+private:
+  unsigned long m_old_isa_flags;
+  unsigned int m_old_maximum_field_alignment;
+  bool m_old_general_regs_only;
+  bool m_old_have_regs_of_mode[MAX_MACHINE_MODE];
+};
+
+extern const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1];
+extern const mode_suffix_info mode_suffixes[MODE_none + 1];
+
+extern tree scalar_types[NUM_VECTOR_TYPES];
+extern tree acle_vector_types[MAX_TUPLE_SIZE][NUM_VECTOR_TYPES + 1];
+extern tree acle_svpattern;
+extern tree acle_svprfop;
+
+/* Return the ACLE type svbool_t.  */
+inline tree
+get_svbool_t (void)
+{
+  return acle_vector_types[0][VECTOR_TYPE_svbool_t];
+}
+
+/* Try to find a mode with the given mode_suffix_info fields.  Return the
+   mode on success or MODE_none on failure.  */
+inline mode_suffix_index
+find_mode_suffix (vector_type_index base_vector_type,
+		  vector_type_index displacement_vector_type,
+		  units_index displacement_units)
+{
+  for (unsigned int mode_i = 0; mode_i < ARRAY_SIZE (mode_suffixes); ++mode_i)
+    {
+      const mode_suffix_info &mode = mode_suffixes[mode_i];
+      if (mode.base_vector_type == base_vector_type
+	  && mode.displacement_vector_type == displacement_vector_type
+	  && mode.displacement_units == displacement_units)
+	return mode_suffix_index (mode_i);
+    }
+  return MODE_none;
+}
+
+/* Return the type suffix associated with ELEMENT_BITS-bit elements of type
+   class TCLASS.  */
+inline type_suffix_index
+find_type_suffix (type_class_index tclass, unsigned int element_bits)
+{
+  for (unsigned int i = 0; i < NUM_TYPE_SUFFIXES; ++i)
+    if (type_suffixes[i].tclass == tclass
+	&& type_suffixes[i].element_bits == element_bits)
+      return type_suffix_index (i);
+  gcc_unreachable ();
+}
+
+/* Return the single field in tuple type TYPE.  */
+inline tree
+tuple_type_field (tree type)
+{
+  for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+    if (TREE_CODE (field) == FIELD_DECL)
+      return field;
+  gcc_unreachable ();
+}
+
+inline function_instance::
+function_instance (const char *base_name_in,
+		   const function_base *base_in,
+		   const function_shape *shape_in,
+		   mode_suffix_index mode_suffix_id_in,
+		   const type_suffix_pair &type_suffix_ids_in,
+		   predication_index pred_in)
+  : base_name (base_name_in), base (base_in), shape (shape_in),
+    mode_suffix_id (mode_suffix_id_in), pred (pred_in)
+{
+  memcpy (type_suffix_ids, type_suffix_ids_in, sizeof (type_suffix_ids));
+}
+
+inline bool
+function_instance::operator== (const function_instance &other) const
+{
+  return (base == other.base
+	  && shape == other.shape
+	  && mode_suffix_id == other.mode_suffix_id
+	  && pred == other.pred
+	  && type_suffix_ids[0] == other.type_suffix_ids[0]
+	  && type_suffix_ids[1] == other.type_suffix_ids[1]);
+}
+
+inline bool
+function_instance::operator!= (const function_instance &other) const
+{
+  return !operator== (other);
+}
+
+/* If the function operates on tuples of vectors, return the number
+   of vectors in the tuples, otherwise return 1.  */
+inline unsigned int
+function_instance::vectors_per_tuple () const
+{
+  return base->vectors_per_tuple ();
+}
+
+/* If the function addresses memory, return the type of a single
+   scalar memory element.  */
+inline tree
+function_instance::memory_scalar_type () const
+{
+  return base->memory_scalar_type (*this);
+}
+
+/* If the function addresses memory, return a vector mode whose
+   GET_MODE_NUNITS is the number of elements addressed and whose
+   GET_MODE_INNER is the mode of a single scalar memory element.  */
+inline machine_mode
+function_instance::memory_vector_mode () const
+{
+  return base->memory_vector_mode (*this);
+}
+
+/* Return information about the function's mode suffix.  */
+inline const mode_suffix_info &
+function_instance::mode_suffix () const
+{
+  return mode_suffixes[mode_suffix_id];
+}
+
+/* Return the type of the function's vector base address argument,
+   or null it doesn't have a vector base address.  */
+inline tree
+function_instance::base_vector_type () const
+{
+  return acle_vector_types[0][mode_suffix ().base_vector_type];
+}
+
+/* Return the type of the function's vector index or offset argument,
+   or null if doesn't have a vector index or offset argument.  */
+inline tree
+function_instance::displacement_vector_type () const
+{
+  return acle_vector_types[0][mode_suffix ().displacement_vector_type];
+}
+
+/* If the function takes a vector or scalar displacement, return the units
+   in which the displacement is measured, otherwise return UNITS_none.  */
+inline units_index
+function_instance::displacement_units () const
+{
+  return mode_suffix ().displacement_units;
+}
+
+/* Return information about type suffix I.  */
+inline const type_suffix_info &
+function_instance::type_suffix (unsigned int i) const
+{
+  return type_suffixes[type_suffix_ids[i]];
+}
+
+/* Return the scalar type associated with type suffix I.  */
+inline tree
+function_instance::scalar_type (unsigned int i) const
+{
+  return scalar_types[type_suffix (i).vector_type];
+}
+
+/* Return the vector type associated with type suffix I.  */
+inline tree
+function_instance::vector_type (unsigned int i) const
+{
+  return acle_vector_types[0][type_suffix (i).vector_type];
+}
+
+/* If the function operates on tuples of vectors, return the tuple type
+   associated with type suffix I, otherwise return the vector type associated
+   with type suffix I.  */
+inline tree
+function_instance::tuple_type (unsigned int i) const
+{
+  unsigned int num_vectors = vectors_per_tuple ();
+  return acle_vector_types[num_vectors - 1][type_suffix (i).vector_type];
+}
+
+/* Return the number of elements of type suffix I that fit within a
+   128-bit block.  */
+inline unsigned int
+function_instance::elements_per_vq (unsigned int i) const
+{
+  return 128 / type_suffix (i).element_bits;
+}
+
+/* Return the vector or predicate mode associated with type suffix I.  */
+inline machine_mode
+function_instance::vector_mode (unsigned int i) const
+{
+  return type_suffix (i).vector_mode;
+}
+
+/* Return the mode of the governing predicate to use when operating on
+   type suffix I.  */
+inline machine_mode
+function_instance::gp_mode (unsigned int i) const
+{
+  return aarch64_sve_pred_mode (type_suffix (i).element_bytes).require ();
+}
+
+/* Return true if the function has no return value.  */
+inline bool
+function_call_info::function_returns_void_p ()
+{
+  return TREE_TYPE (TREE_TYPE (fndecl)) == void_type_node;
+}
+
+/* Default implementation of function::call_properties, with conservatively
+   correct behavior for floating-point instructions.  */
+inline unsigned int
+function_base::call_properties (const function_instance &instance) const
+{
+  unsigned int flags = 0;
+  if (instance.type_suffix (0).float_p || instance.type_suffix (1).float_p)
+    flags |= CP_READ_FPCR | CP_RAISE_FP_EXCEPTIONS;
+  return flags;
+}
+
+}
+
+#endif
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index d885534050948..b8b6f55e1607e 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -1,5 +1,5 @@
 ;; Machine description for AArch64 SVE.
-;; Copyright (C) 2009-2016 Free Software Foundation, Inc.
+;; Copyright (C) 2009-2021 Free Software Foundation, Inc.
 ;; Contributed by ARM Ltd.
 ;;
 ;; This file is part of GCC.
@@ -18,8 +18,171 @@
 ;; along with GCC; see the file COPYING3.  If not see
 ;; <http://www.gnu.org/licenses/>.
 
-;; Note on the handling of big-endian SVE
-;; --------------------------------------
+;; The file is organised into the following sections (search for the full
+;; line):
+;;
+;; == General notes
+;; ---- Note on the handling of big-endian SVE
+;; ---- Description of UNSPEC_PTEST
+;; ---- Description of UNSPEC_PRED_Z
+;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
+;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
+;; ---- Note on FFR handling
+;;
+;; == Moves
+;; ---- Moves of single vectors
+;; ---- Moves of multiple vectors
+;; ---- Moves of predicates
+;; ---- Moves relating to the FFR
+;;
+;; == Loads
+;; ---- Normal contiguous loads
+;; ---- Extending contiguous loads
+;; ---- First-faulting contiguous loads
+;; ---- First-faulting extending contiguous loads
+;; ---- Non-temporal contiguous loads
+;; ---- Normal gather loads
+;; ---- Extending gather loads
+;; ---- First-faulting gather loads
+;; ---- First-faulting extending gather loads
+;;
+;; == Prefetches
+;; ---- Contiguous prefetches
+;; ---- Gather prefetches
+;;
+;; == Stores
+;; ---- Normal contiguous stores
+;; ---- Truncating contiguous stores
+;; ---- Non-temporal contiguous stores
+;; ---- Normal scatter stores
+;; ---- Truncating scatter stores
+;;
+;; == Vector creation
+;; ---- [INT,FP] Duplicate element
+;; ---- [INT,FP] Initialize from individual elements
+;; ---- [INT] Linear series
+;; ---- [PRED] Duplicate element
+;;
+;; == Vector decomposition
+;; ---- [INT,FP] Extract index
+;; ---- [INT,FP] Extract active element
+;; ---- [PRED] Extract index
+;;
+;; == Unary arithmetic
+;; ---- [INT] General unary arithmetic corresponding to rtx codes
+;; ---- [INT] General unary arithmetic corresponding to unspecs
+;; ---- [INT] Sign and zero extension
+;; ---- [INT] Truncation
+;; ---- [INT] Logical inverse
+;; ---- [FP<-INT] General unary arithmetic that maps to unspecs
+;; ---- [FP] General unary arithmetic corresponding to unspecs
+;; ---- [FP] Square root
+;; ---- [FP] Reciprocal square root
+;; ---- [PRED] Inverse
+
+;; == Binary arithmetic
+;; ---- [INT] General binary arithmetic corresponding to rtx codes
+;; ---- [INT] Addition
+;; ---- [INT] Subtraction
+;; ---- [INT] Take address
+;; ---- [INT] Absolute difference
+;; ---- [INT] Saturating addition and subtraction
+;; ---- [INT] Highpart multiplication
+;; ---- [INT] Division
+;; ---- [INT] Binary logical operations
+;; ---- [INT] Binary logical operations (inverted second input)
+;; ---- [INT] Shifts (rounding towards -Inf)
+;; ---- [INT] Shifts (rounding towards 0)
+;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs
+;; ---- [FP] General binary arithmetic corresponding to rtx codes
+;; ---- [FP] General binary arithmetic corresponding to unspecs
+;; ---- [FP] Addition
+;; ---- [FP] Complex addition
+;; ---- [FP] Subtraction
+;; ---- [FP] Absolute difference
+;; ---- [FP] Multiplication
+;; ---- [FP] Division
+;; ---- [FP] Binary logical operations
+;; ---- [FP] Sign copying
+;; ---- [FP] Maximum and minimum
+;; ---- [PRED] Binary logical operations
+;; ---- [PRED] Binary logical operations (inverted second input)
+;; ---- [PRED] Binary logical operations (inverted result)
+;;
+;; == Ternary arithmetic
+;; ---- [INT] MLA and MAD
+;; ---- [INT] MLS and MSB
+;; ---- [INT] Dot product
+;; ---- [INT] Sum of absolute differences
+;; ---- [INT] Matrix multiply-accumulate
+;; ---- [FP] General ternary arithmetic corresponding to unspecs
+;; ---- [FP] Complex multiply-add
+;; ---- [FP] Trigonometric multiply-add
+;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF)
+;; ---- [FP] Matrix multiply-accumulate
+;;
+;; == Comparisons and selects
+;; ---- [INT,FP] Select based on predicates
+;; ---- [INT,FP] Compare and select
+;; ---- [INT] Comparisons
+;; ---- [INT] While tests
+;; ---- [FP] Direct comparisons
+;; ---- [FP] Absolute comparisons
+;; ---- [PRED] Select
+;; ---- [PRED] Test bits
+;;
+;; == Reductions
+;; ---- [INT,FP] Conditional reductions
+;; ---- [INT] Tree reductions
+;; ---- [FP] Tree reductions
+;; ---- [FP] Left-to-right reductions
+;;
+;; == Permutes
+;; ---- [INT,FP] General permutes
+;; ---- [INT,FP] Special-purpose unary permutes
+;; ---- [INT,FP] Special-purpose binary permutes
+;; ---- [PRED] Special-purpose unary permutes
+;; ---- [PRED] Special-purpose binary permutes
+;;
+;; == Conversions
+;; ---- [INT<-INT] Packs
+;; ---- [INT<-INT] Unpacks
+;; ---- [INT<-FP] Conversions
+;; ---- [INT<-FP] Packs
+;; ---- [INT<-FP] Unpacks
+;; ---- [FP<-INT] Conversions
+;; ---- [FP<-INT] Packs
+;; ---- [FP<-INT] Unpacks
+;; ---- [FP<-FP] Packs
+;; ---- [FP<-FP] Packs (bfloat16)
+;; ---- [FP<-FP] Unpacks
+;; ---- [PRED<-PRED] Packs
+;; ---- [PRED<-PRED] Unpacks
+;;
+;; == Vector partitioning
+;; ---- [PRED] Unary partitioning
+;; ---- [PRED] Binary partitioning
+;; ---- [PRED] Scalarization
+;;
+;; == Counting elements
+;; ---- [INT] Count elements in a pattern (scalar)
+;; ---- [INT] Increment by the number of elements in a pattern (scalar)
+;; ---- [INT] Increment by the number of elements in a pattern (vector)
+;; ---- [INT] Decrement by the number of elements in a pattern (scalar)
+;; ---- [INT] Decrement by the number of elements in a pattern (vector)
+;; ---- [INT] Count elements in a predicate (scalar)
+;; ---- [INT] Increment by the number of elements in a predicate (scalar)
+;; ---- [INT] Increment by the number of elements in a predicate (vector)
+;; ---- [INT] Decrement by the number of elements in a predicate (scalar)
+;; ---- [INT] Decrement by the number of elements in a predicate (vector)
+
+;; =========================================================================
+;; == General notes
+;; =========================================================================
+;;
+;; -------------------------------------------------------------------------
+;; ---- Note on the handling of big-endian SVE
+;; -------------------------------------------------------------------------
 ;;
 ;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the
 ;; same way as movdi or movti would: the first byte of memory goes
@@ -59,9 +222,425 @@
 ;;   the order of the bytes within the elements is different.  We instead
 ;;   access spill slots via LD1 and ST1, using secondary reloads to
 ;;   reserve a predicate register.
+;;
+;; -------------------------------------------------------------------------
+;; ---- Description of UNSPEC_PTEST
+;; -------------------------------------------------------------------------
+;;
+;; SVE provides a PTEST instruction for testing the active lanes of a
+;; predicate and setting the flags based on the result.  The associated
+;; condition code tests are:
+;;
+;; - any   (= ne): at least one active bit is set
+;; - none  (= eq): all active bits are clear (*)
+;; - first (= mi): the first active bit is set
+;; - nfrst (= pl): the first active bit is clear (*)
+;; - last  (= cc): the last active bit is set
+;; - nlast (= cs): the last active bit is clear (*)
+;;
+;; where the conditions marked (*) are also true when there are no active
+;; lanes (i.e. when the governing predicate is a PFALSE).  The flags results
+;; of a PTEST use the condition code mode CC_NZC.
+;;
+;; PTEST is always a .B operation (i.e. it always operates on VNx16BI).
+;; This means that for other predicate modes, we need a governing predicate
+;; in which all bits are defined.
+;;
+;; For example, most predicated .H operations ignore the odd bits of the
+;; governing predicate, so that an active lane is represented by the
+;; bits "1x" and an inactive lane by the bits "0x", where "x" can be
+;; any value.  To test a .H predicate, we instead need "10" and "00"
+;; respectively, so that the condition only tests the even bits of the
+;; predicate.
+;;
+;; Several instructions set the flags as a side-effect, in the same way
+;; that a separate PTEST would.  It's important for code quality that we
+;; use these flags results as often as possible, particularly in the case
+;; of WHILE* and RDFFR.
+;;
+;; Also, some of the instructions that set the flags are unpredicated
+;; and instead implicitly test all .B, .H, .S or .D elements, as though
+;; they were predicated on a PTRUE of that size.  For example, a .S
+;; WHILELO sets the flags in the same way as a PTEST with a .S PTRUE
+;; would.
+;;
+;; We therefore need to represent PTEST operations in a way that
+;; makes it easy to combine them with both predicated and unpredicated
+;; operations, while using a VNx16BI governing predicate for all
+;; predicate modes.  We do this using:
+;;
+;;   (unspec:CC_NZC [gp cast_gp ptrue_flag op] UNSPEC_PTEST)
+;;
+;; where:
+;;
+;; - GP is the real VNx16BI governing predicate
+;;
+;; - CAST_GP is GP cast to the mode of OP.  All bits dropped by casting
+;;   GP to CAST_GP are guaranteed to be clear in GP.
+;;
+;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value
+;;   SVE_KNOWN_PTRUE if we know that CAST_GP (rather than GP) is all-true and
+;;   SVE_MAYBE_NOT_PTRUE otherwise.
+;;
+;; - OP is the predicate we want to test, of the same mode as CAST_GP.
+;;
+;; -------------------------------------------------------------------------
+;; ---- Description of UNSPEC_PRED_Z
+;; -------------------------------------------------------------------------
+;;
+;; SVE integer comparisons are predicated and return zero for inactive
+;; lanes.  Sometimes we use them with predicates that are all-true and
+;; sometimes we use them with general predicates.
+;;
+;; The integer comparisons also set the flags and so build-in the effect
+;; of a PTEST.  We therefore want to be able to combine integer comparison
+;; patterns with PTESTs of the result.  One difficulty with doing this is
+;; that (as noted above) the PTEST is always a .B operation and so can place
+;; stronger requirements on the governing predicate than the comparison does.
+;;
+;; For example, when applying a separate PTEST to the result of a full-vector
+;; .H comparison, the PTEST must be predicated on a .H PTRUE instead of a
+;; .B PTRUE.  In constrast, the comparison might be predicated on either
+;; a .H PTRUE or a .B PTRUE, since the values of odd-indexed predicate
+;; bits don't matter for .H operations.
+;;
+;; We therefore can't rely on a full-vector comparison using the same
+;; predicate register as a following PTEST.  We instead need to remember
+;; whether a comparison is known to be a full-vector comparison and use
+;; this information in addition to a check for equal predicate registers.
+;; At the same time, it's useful to have a common representation for all
+;; integer comparisons, so that they can be handled by a single set of
+;; patterns.
+;;
+;; We therefore take a similar approach to UNSPEC_PTEST above and use:
+;;
+;;   (unspec:<M:VPRED> [gp ptrue_flag (code:M op0 op1)] UNSPEC_PRED_Z)
+;;
+;; where:
+;;
+;; - GP is the governing predicate, of mode <M:VPRED>
+;;
+;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value
+;;   SVE_KNOWN_PTRUE if we know that GP is all-true and SVE_MAYBE_NOT_PTRUE
+;;   otherwise
+;;
+;; - CODE is the comparison code
+;;
+;; - OP0 and OP1 are the values being compared, of mode M
+;;
+;; The "Z" in UNSPEC_PRED_Z indicates that inactive lanes are zero.
+;;
+;; -------------------------------------------------------------------------
+;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
+;; -------------------------------------------------------------------------
+;;
+;; Many SVE integer operations are predicated.  We can generate them
+;; from four sources:
+;;
+;; (1) Using normal unpredicated optabs.  In this case we need to create
+;;     an all-true predicate register to act as the governing predicate
+;;     for the SVE instruction.  There are no inactive lanes, and thus
+;;     the values of inactive lanes don't matter.
+;;
+;; (2) Using _x ACLE functions.  In this case the function provides a
+;;     specific predicate and some lanes might be inactive.  However,
+;;     as for (1), the values of the inactive lanes don't matter.
+;;     We can make extra lanes active without changing the behavior
+;;     (although for code-quality reasons we should avoid doing so
+;;     needlessly).
+;;
+;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions.
+;;     These optabs have a predicate operand that specifies which lanes are
+;;     active and another operand that provides the values of inactive lanes.
+;;
+;; (4) Using _m and _z ACLE functions.  These functions map to the same
+;;     patterns as (3), with the _z functions setting inactive lanes to zero
+;;     and the _m functions setting the inactive lanes to one of the function
+;;     arguments.
+;;
+;; For (1) and (2) we need a way of attaching the predicate to a normal
+;; unpredicated integer operation.  We do this using:
+;;
+;;   (unspec:M [pred (code:M (op0 op1 ...))] UNSPEC_PRED_X)
+;;
+;; where (code:M (op0 op1 ...)) is the normal integer operation and PRED
+;; is a predicate of mode <M:VPRED>.  PRED might or might not be a PTRUE;
+;; it always is for (1), but might not be for (2).
+;;
+;; The unspec as a whole has the same value as (code:M ...) when PRED is
+;; all-true.  It is always semantically valid to replace PRED with a PTRUE,
+;; but as noted above, we should only do so if there's a specific benefit.
+;;
+;; (The "_X" in the unspec is named after the ACLE functions in (2).)
+;;
+;; For (3) and (4) we can simply use the SVE port's normal representation
+;; of a predicate-based select:
+;;
+;;   (unspec:M [pred (code:M (op0 op1 ...)) inactive] UNSPEC_SEL)
+;;
+;; where INACTIVE specifies the values of inactive lanes.
+;;
+;; We can also use the UNSPEC_PRED_X wrapper in the UNSPEC_SEL rather
+;; than inserting the integer operation directly.  This is mostly useful
+;; if we want the combine pass to merge an integer operation with an explicit
+;; vcond_mask (in other words, with a following SEL instruction).  However,
+;; it's generally better to merge such operations at the gimple level
+;; using (3).
+;;
+;; -------------------------------------------------------------------------
+;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
+;; -------------------------------------------------------------------------
+;;
+;; Most SVE floating-point operations are predicated.  We can generate
+;; them from four sources:
+;;
+;; (1) Using normal unpredicated optabs.  In this case we need to create
+;;     an all-true predicate register to act as the governing predicate
+;;     for the SVE instruction.  There are no inactive lanes, and thus
+;;     the values of inactive lanes don't matter.
+;;
+;; (2) Using _x ACLE functions.  In this case the function provides a
+;;     specific predicate and some lanes might be inactive.  However,
+;;     as for (1), the values of the inactive lanes don't matter.
+;;
+;;     The instruction must have the same exception behavior as the
+;;     function call unless things like command-line flags specifically
+;;     allow otherwise.  For example, with -ffast-math, it is OK to
+;;     raise exceptions for inactive lanes, but normally it isn't.
+;;
+;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions.
+;;     These optabs have a predicate operand that specifies which lanes are
+;;     active and another operand that provides the values of inactive lanes.
+;;
+;; (4) Using _m and _z ACLE functions.  These functions map to the same
+;;     patterns as (3), with the _z functions setting inactive lanes to zero
+;;     and the _m functions setting the inactive lanes to one of the function
+;;     arguments.
+;;
+;; So:
+;;
+;; - In (1), the predicate is known to be all true and the pattern can use
+;;   unpredicated operations where available.
+;;
+;; - In (2), the predicate might or might not be all true.  The pattern can
+;;   use unpredicated instructions if the predicate is all-true or if things
+;;   like command-line flags allow exceptions for inactive lanes.
+;;
+;; - (3) and (4) represent a native SVE predicated operation.  Some lanes
+;;   might be inactive and inactive lanes of the result must have specific
+;;   values.  There is no scope for using unpredicated instructions (and no
+;;   reason to want to), so the question about command-line flags doesn't
+;;   arise.
+;;
+;; It would be inaccurate to model (2) as an rtx code like (sqrt ...)
+;; in combination with a separate predicate operand, e.g.
+;;
+;;   (unspec [(match_operand:<VPRED> 1 "register_operand" "Upl")
+;;	      (sqrt:SVE_FULL_F 2 "register_operand" "w")]
+;;	     ....)
+;;
+;; because (sqrt ...) can raise an exception for any lane, including
+;; inactive ones.  We therefore need to use an unspec instead.
+;;
+;; Also, (2) requires some way of distinguishing the case in which the
+;; predicate might have inactive lanes and cannot be changed from the
+;; case in which the predicate has no inactive lanes or can be changed.
+;; This information is also useful when matching combined FP patterns
+;; in which the predicates might not be equal.
+;;
+;; We therefore model FP operations as an unspec of the form:
+;;
+;;   (unspec [pred strictness op0 op1 ...] UNSPEC_COND_<MNEMONIC>)
+;;
+;; where:
+;;
+;; - PRED is the governing predicate.
+;;
+;; - STRICTNESS is a CONST_INT that conceptually has mode SI.  It has the
+;;   value SVE_STRICT_GP if PRED might have inactive lanes and if those
+;;   lanes must remain inactive.  It has the value SVE_RELAXED_GP otherwise.
+;;
+;; - OP0 OP1 ... are the normal input operands to the operation.
+;;
+;; - MNEMONIC is the mnemonic of the associated SVE instruction.
+;;
+;; For (3) and (4), we combine these operations with an UNSPEC_SEL
+;; that selects between the result of the FP operation and the "else"
+;; value.  (This else value is a merge input for _m ACLE functions
+;; and zero for _z ACLE functions.)  The outer pattern then has the form:
+;;
+;;   (unspec [pred fp_operation else_value] UNSPEC_SEL)
+;;
+;; This means that the patterns for (3) and (4) have two predicates:
+;; one for the FP operation itself and one for the UNSPEC_SEL.
+;; This pattern is equivalent to the result of combining an instance
+;; of (1) or (2) with a separate vcond instruction, so these patterns
+;; are useful as combine targets too.
+;;
+;; However, in the combine case, the instructions that we want to
+;; combine might use different predicates.  Then:
+;;
+;; - Some of the active lanes of the FP operation might be discarded
+;;   by the UNSPEC_SEL.  It's OK to drop the FP operation on those lanes,
+;;   even for SVE_STRICT_GP, since the operations on those lanes are
+;;   effectively dead code.
+;;
+;; - Some of the inactive lanes of the FP operation might be selected
+;;   by the UNSPEC_SEL, giving unspecified values for those lanes.
+;;   SVE_RELAXED_GP lets us extend the FP operation to cover these
+;;   extra lanes, but SVE_STRICT_GP does not.
+;;
+;; Thus SVE_RELAXED_GP allows us to ignore the predicate on the FP operation
+;; and operate on exactly the lanes selected by the UNSPEC_SEL predicate.
+;; This typically leads to patterns like:
+;;
+;;    (unspec [(match_operand 1 "register_operand" "Upl")
+;;             (unspec [(match_operand N)
+;;                      (const_int SVE_RELAXED_GP)
+;;                      ...]
+;;                     UNSPEC_COND_<MNEMONIC>)
+;;             ...])
+;;
+;; where operand N is allowed to be anything.  These instructions then
+;; have rewrite rules to replace operand N with operand 1, which gives the
+;; instructions a canonical form and means that the original operand N is
+;; not kept live unnecessarily.
+;;
+;; In contrast, SVE_STRICT_GP only allows the UNSPEC_SEL predicate to be
+;; a subset of the FP operation predicate.  This case isn't interesting
+;; for FP operations that have an all-true predicate, since such operations
+;; use SVE_RELAXED_GP instead.  And it is not possible for instruction
+;; conditions to track the subset relationship for arbitrary registers.
+;; So in practice, the only useful case for SVE_STRICT_GP is the one
+;; in which the predicates match:
+;;
+;;    (unspec [(match_operand 1 "register_operand" "Upl")
+;;             (unspec [(match_dup 1)
+;;                      (const_int SVE_STRICT_GP)
+;;                      ...]
+;;                     UNSPEC_COND_<MNEMONIC>)
+;;             ...])
+;;
+;; This pattern would also be correct for SVE_RELAXED_GP, but it would
+;; be redundant with the one above.  However, if the combine pattern
+;; has multiple FP operations, using a match_operand allows combinations
+;; of SVE_STRICT_GP and SVE_RELAXED_GP in the same operation, provided
+;; that the predicates are the same:
+;;
+;;    (unspec [(match_operand 1 "register_operand" "Upl")
+;;             (...
+;;                (unspec [(match_dup 1)
+;;                         (match_operand:SI N "aarch64_sve_gp_strictness")
+;;                         ...]
+;;                        UNSPEC_COND_<MNEMONIC1>)
+;;                (unspec [(match_dup 1)
+;;                         (match_operand:SI M "aarch64_sve_gp_strictness")
+;;                         ...]
+;;                        UNSPEC_COND_<MNEMONIC2>) ...)
+;;             ...])
+;;
+;; The fully-relaxed version of this pattern is:
+;;
+;;    (unspec [(match_operand 1 "register_operand" "Upl")
+;;             (...
+;;                (unspec [(match_operand:SI N)
+;;                         (const_int SVE_RELAXED_GP)
+;;                         ...]
+;;                        UNSPEC_COND_<MNEMONIC1>)
+;;                (unspec [(match_operand:SI M)
+;;                         (const_int SVE_RELAXED_GP)
+;;                         ...]
+;;                        UNSPEC_COND_<MNEMONIC2>) ...)
+;;             ...])
+;;
+;; -------------------------------------------------------------------------
+;; ---- Note on FFR handling
+;; -------------------------------------------------------------------------
+;;
+;; Logically we want to divide FFR-related instructions into regions
+;; that contain exactly one of:
+;;
+;; - a single write to the FFR
+;; - any number of reads from the FFR (but only one read is likely)
+;; - any number of LDFF1 and LDNF1 instructions
+;;
+;; However, LDFF1 and LDNF1 instructions should otherwise behave like
+;; normal loads as far as possible.  This means that they should be
+;; schedulable within a region in the same way that LD1 would be,
+;; and they should be deleted as dead if the result is unused.  The loads
+;; should therefore not write to the FFR, since that would both serialize
+;; the loads with respect to each other and keep the loads live for any
+;; later RDFFR.
+;;
+;; We get around this by using a fake "FFR token" (FFRT) to help describe
+;; the dependencies.  Writing to the FFRT starts a new "FFRT region",
+;; while using the FFRT keeps the instruction within its region.
+;; Specifically:
+;;
+;; - Writes start a new FFRT region as well as setting the FFR:
+;;
+;;       W1: parallel (FFRT = <new value>, FFR = <actual FFR value>)
+;;
+;; - Loads use an LD1-like instruction that also uses the FFRT, so that the
+;;   loads stay within the same FFRT region:
+;;
+;;       L1: load data while using the FFRT
+;;
+;;   In addition, any FFRT region that includes a load also has at least one
+;;   instance of:
+;;
+;;       L2: FFR = update(FFR, FFRT)  [type == no_insn]
+;;
+;;   to make it clear that the region both reads from and writes to the FFR.
+;;
+;; - Reads do the following:
+;;
+;;       R1: FFRT = FFR               [type == no_insn]
+;;       R2: read from the FFRT
+;;       R3: FFRT = update(FFRT)      [type == no_insn]
+;;
+;;   R1 and R3 both create new FFRT regions, so that previous LDFF1s and
+;;   LDNF1s cannot move forwards across R1 and later LDFF1s and LDNF1s
+;;   cannot move backwards across R3.
+;;
+;; This way, writes are only kept alive by later loads or reads,
+;; and write/read pairs fold normally.  For two consecutive reads,
+;; the first R3 is made dead by the second R1, which in turn becomes
+;; redundant with the first R1.  We then have:
+;;
+;;     first R1: FFRT = FFR
+;;     first read from the FFRT
+;;     second read from the FFRT
+;;     second R3: FFRT = update(FFRT)
+;;
+;; i.e. the two FFRT regions collapse into a single one with two
+;; independent reads.
+;;
+;; The model still prevents some valid optimizations though.  For example,
+;; if all loads in an FFRT region are deleted as dead, nothing would remove
+;; the L2 instructions.
+
+;; =========================================================================
+;; == Moves
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- Moves of single vectors
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - MOV  (including aliases)
+;; - LD1B (contiguous form)
+;; - LD1D (    "    "     )
+;; - LD1H (    "    "     )
+;; - LD1W (    "    "     )
+;; - LDR
+;; - ST1B (contiguous form)
+;; - ST1D (    "    "     )
+;; - ST1H (    "    "     )
+;; - ST1W (    "    "     )
+;; - STR
+;; -------------------------------------------------------------------------
 
-
-;; SVE data moves.
 (define_expand "mov<mode>"
   [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
 	(match_operand:SVE_ALL 1 "general_operand"))]
@@ -72,7 +651,7 @@
        head of the file) and increases the addressing choices for
        little-endian.  */
     if ((MEM_P (operands[0]) || MEM_P (operands[1]))
-        && can_create_pseudo_p ())
+	&& can_create_pseudo_p ())
       {
 	aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
 	DONE;
@@ -80,47 +659,38 @@
 
     if (CONSTANT_P (operands[1]))
       {
-	aarch64_expand_mov_immediate (operands[0], operands[1],
-				      gen_vec_duplicate<mode>);
+	aarch64_expand_mov_immediate (operands[0], operands[1]);
 	DONE;
       }
 
     /* Optimize subregs on big-endian targets: we can use REV[BHW]
        instead of going through memory.  */
     if (BYTES_BIG_ENDIAN
-        && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1]))
+	&& aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1]))
       DONE;
   }
 )
 
-;; A pattern for optimizing SUBREGs that have a reinterpreting effect
-;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move
-;; for details.  We use a special predicate for operand 2 to reduce
-;; the number of patterns.
-(define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be"
-  [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w")
-	(unspec:SVE_ALL
-          [(match_operand:VNx16BI 1 "register_operand" "Upl")
-	   (match_operand 2 "aarch64_any_register_operand" "w")]
-	  UNSPEC_REV_SUBREG))]
-  "TARGET_SVE && BYTES_BIG_ENDIAN"
-  "#"
-  "&& reload_completed"
-  [(const_int 0)]
+(define_expand "movmisalign<mode>"
+  [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
+	(match_operand:SVE_ALL 1 "general_operand"))]
+  "TARGET_SVE"
   {
-    aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]);
+    /* Equivalent to a normal move for our purpooses.  */
+    emit_move_insn (operands[0], operands[1]);
     DONE;
   }
 )
 
-;; Unpredicated moves (little-endian).  Only allow memory operations
-;; during and after RA; before RA we want the predicated load and
-;; store patterns to be used instead.
-(define_insn "*aarch64_sve_mov<mode>_le"
-  [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
-	(match_operand:SVE_ALL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
+;; Unpredicated moves that can use LDR and STR, i.e. full vectors for which
+;; little-endian ordering is acceptable.  Only allow memory operations during
+;; and after RA; before RA we want the predicated load and store patterns to
+;; be used instead.
+(define_insn "*aarch64_sve_mov<mode>_ldr_str"
+  [(set (match_operand:SVE_FULL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
+	(match_operand:SVE_FULL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
   "TARGET_SVE
-   && !BYTES_BIG_ENDIAN
+   && (<MODE>mode == VNx16QImode || !BYTES_BIG_ENDIAN)
    && ((lra_in_progress || reload_completed)
        || (register_operand (operands[0], <MODE>mode)
 	   && nonmemory_operand (operands[1], <MODE>mode)))"
@@ -131,34 +701,37 @@
    * return aarch64_output_sve_mov_immediate (operands[1]);"
 )
 
-;; Unpredicated moves (big-endian).  Memory accesses require secondary
-;; reloads.
-(define_insn "*aarch64_sve_mov<mode>_be"
+;; Unpredicated moves that cannot use LDR and STR, i.e. partial vectors
+;; or vectors for which little-endian ordering isn't acceptable.  Memory
+;; accesses require secondary reloads.
+(define_insn "*aarch64_sve_mov<mode>_no_ldr_str"
   [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
 	(match_operand:SVE_ALL 1 "aarch64_nonmemory_operand" "w, Dn"))]
-  "TARGET_SVE && BYTES_BIG_ENDIAN"
+  "TARGET_SVE
+   && <MODE>mode != VNx16QImode
+   && (BYTES_BIG_ENDIAN
+       || maybe_ne (BYTES_PER_SVE_VECTOR, GET_MODE_SIZE (<MODE>mode)))"
   "@
    mov\t%0.d, %1.d
    * return aarch64_output_sve_mov_immediate (operands[1]);"
 )
 
-;; Handle big-endian memory reloads.  We use byte PTRUE for all modes
-;; to try to encourage reuse.
-(define_expand "aarch64_sve_reload_be"
+;; Handle memory reloads for modes that can't use LDR and STR.  We use
+;; byte PTRUE for all modes to try to encourage reuse.  This pattern
+;; needs constraints because it is returned by TARGET_SECONDARY_RELOAD.
+(define_expand "aarch64_sve_reload_mem"
   [(parallel
      [(set (match_operand 0)
-           (match_operand 1))
+	   (match_operand 1))
       (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])]
-  "TARGET_SVE && BYTES_BIG_ENDIAN"
+  "TARGET_SVE"
   {
     /* Create a PTRUE.  */
     emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
 
     /* Refer to the PTRUE in the appropriate mode for this move.  */
     machine_mode mode = GET_MODE (operands[0]);
-    machine_mode pred_mode
-      = aarch64_sve_pred_mode (GET_MODE_UNIT_SIZE (mode)).require ();
-    rtx pred = gen_lowpart (pred_mode, operands[2]);
+    rtx pred = gen_lowpart (aarch64_sve_pred_mode (mode), operands[2]);
 
     /* Emit a predicated load or store.  */
     aarch64_emit_sve_pred_move (operands[0], pred, operands[1]);
@@ -166,170 +739,89 @@
   }
 )
 
-;; A predicated load or store for which the predicate is known to be
-;; all-true.  Note that this pattern is generated directly by
-;; aarch64_emit_sve_pred_move, so changes to this pattern will
-;; need changes there as well.
-(define_insn "*pred_mov<mode>"
-  [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, m")
+;; A predicated move in which the predicate is known to be all-true.
+;; Note that this pattern is generated directly by aarch64_emit_sve_pred_move,
+;; so changes to this pattern will need changes there as well.
+(define_insn_and_split "@aarch64_pred_mov<mode>"
+  [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, w, m")
 	(unspec:SVE_ALL
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-	   (match_operand:SVE_ALL 2 "nonimmediate_operand" "m, w")]
-	  UNSPEC_MERGE_PTRUE))]
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (match_operand:SVE_ALL 2 "nonimmediate_operand" "w, m, w")]
+	  UNSPEC_PRED_X))]
   "TARGET_SVE
    && (register_operand (operands[0], <MODE>mode)
        || register_operand (operands[2], <MODE>mode))"
   "@
-   ld1<Vesize>\t%0.<Vetype>, %1/z, %2
-   st1<Vesize>\t%2.<Vetype>, %1, %0"
+   #
+   ld1<Vesize>\t%0.<Vctype>, %1/z, %2
+   st1<Vesize>\t%2.<Vctype>, %1, %0"
+  "&& register_operand (operands[0], <MODE>mode)
+   && register_operand (operands[2], <MODE>mode)"
+  [(set (match_dup 0) (match_dup 2))]
 )
 
-(define_expand "movmisalign<mode>"
-  [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
-	(match_operand:SVE_ALL 1 "general_operand"))]
-  "TARGET_SVE"
+;; A pattern for optimizing SUBREGs that have a reinterpreting effect
+;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move
+;; for details.  We use a special predicate for operand 2 to reduce
+;; the number of patterns.
+(define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be"
+  [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w")
+	(unspec:SVE_ALL
+	  [(match_operand:VNx16BI 1 "register_operand" "Upl")
+	   (match_operand 2 "aarch64_any_register_operand" "w")]
+	  UNSPEC_REV_SUBREG))]
+  "TARGET_SVE && BYTES_BIG_ENDIAN"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
   {
-    /* Equivalent to a normal move for our purpooses.  */
-    emit_move_insn (operands[0], operands[1]);
+    aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]);
     DONE;
   }
 )
 
-(define_insn "maskload<mode><vpred>"
-  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+;; Reinterpret operand 1 in operand 0's mode, without changing its contents.
+;; This is equivalent to a subreg on little-endian targets but not for
+;; big-endian; see the comment at the head of the file for details.
+(define_expand "@aarch64_sve_reinterpret<mode>"
+  [(set (match_operand:SVE_ALL 0 "register_operand")
 	(unspec:SVE_ALL
-	  [(match_operand:<VPRED> 2 "register_operand" "Upl")
-	   (match_operand:SVE_ALL 1 "memory_operand" "m")]
-	  UNSPEC_LD1_SVE))]
-  "TARGET_SVE"
-  "ld1<Vesize>\t%0.<Vetype>, %2/z, %1"
-)
-
-(define_insn "maskstore<mode><vpred>"
-  [(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
-	(unspec:SVE_ALL [(match_operand:<VPRED> 2 "register_operand" "Upl")
-			 (match_operand:SVE_ALL 1 "register_operand" "w")
-			 (match_dup 0)]
-			UNSPEC_ST1_SVE))]
-  "TARGET_SVE"
-  "st1<Vesize>\t%1.<Vetype>, %2, %0"
-)
-
-;; Unpredicated gather loads.
-(define_expand "gather_load<mode>"
-  [(set (match_operand:SVE_SD 0 "register_operand")
-	(unspec:SVE_SD
-	  [(match_dup 5)
-	   (match_operand:DI 1 "aarch64_reg_or_zero")
-	   (match_operand:<V_INT_EQUIV> 2 "register_operand")
-	   (match_operand:DI 3 "const_int_operand")
-	   (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
-	   (mem:BLK (scratch))]
-	  UNSPEC_LD1_GATHER))]
+	  [(match_operand 1 "aarch64_any_register_operand")]
+	  UNSPEC_REINTERPRET))]
   "TARGET_SVE"
   {
-    operands[5] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+    machine_mode src_mode = GET_MODE (operands[1]);
+    if (targetm.can_change_mode_class (<MODE>mode, src_mode, FP_REGS))
+      {
+	emit_move_insn (operands[0], gen_lowpart (<MODE>mode, operands[1]));
+	DONE;
+      }
   }
 )
 
-;; Predicated gather loads for 32-bit elements.  Operand 3 is true for
-;; unsigned extension and false for signed extension.
-(define_insn "mask_gather_load<mode>"
-  [(set (match_operand:SVE_S 0 "register_operand" "=w, w, w, w, w")
-	(unspec:SVE_S
-	  [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl")
-	   (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk")
-	   (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w, w, w")
-	   (match_operand:DI 3 "const_int_operand" "i, Z, Ui1, Z, Ui1")
-	   (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i")
-	   (mem:BLK (scratch))]
-	  UNSPEC_LD1_GATHER))]
-  "TARGET_SVE"
-  "@
-   ld1w\t%0.s, %5/z, [%2.s]
-   ld1w\t%0.s, %5/z, [%1, %2.s, sxtw]
-   ld1w\t%0.s, %5/z, [%1, %2.s, uxtw]
-   ld1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
-   ld1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
-)
-
-;; Predicated gather loads for 64-bit elements.  The value of operand 3
-;; doesn't matter in this case.
-(define_insn "mask_gather_load<mode>"
-  [(set (match_operand:SVE_D 0 "register_operand" "=w, w, w")
-	(unspec:SVE_D
-	  [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl")
-	   (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk")
-	   (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w")
-	   (match_operand:DI 3 "const_int_operand")
-	   (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i")
-	   (mem:BLK (scratch))]
-	  UNSPEC_LD1_GATHER))]
-  "TARGET_SVE"
-  "@
-   ld1d\t%0.d, %5/z, [%2.d]
-   ld1d\t%0.d, %5/z, [%1, %2.d]
-   ld1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
-)
-
-;; Unpredicated scatter store.
-(define_expand "scatter_store<mode>"
-  [(set (mem:BLK (scratch))
-	(unspec:BLK
-	  [(match_dup 5)
-	   (match_operand:DI 0 "aarch64_reg_or_zero")
-	   (match_operand:<V_INT_EQUIV> 1 "register_operand")
-	   (match_operand:DI 2 "const_int_operand")
-	   (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
-	   (match_operand:SVE_SD 4 "register_operand")]
-	  UNSPEC_ST1_SCATTER))]
+;; A pattern for handling type punning on big-endian targets.  We use a
+;; special predicate for operand 1 to reduce the number of patterns.
+(define_insn_and_split "*aarch64_sve_reinterpret<mode>"
+  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+	(unspec:SVE_ALL
+	  [(match_operand 1 "aarch64_any_register_operand" "w")]
+	  UNSPEC_REINTERPRET))]
   "TARGET_SVE"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
   {
-    operands[5] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+    operands[1] = aarch64_replace_reg_mode (operands[1], <MODE>mode);
   }
 )
 
-;; Predicated scatter stores for 32-bit elements.  Operand 2 is true for
-;; unsigned extension and false for signed extension.
-(define_insn "mask_scatter_store<mode>"
-  [(set (mem:BLK (scratch))
-	(unspec:BLK
-	  [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl")
-	   (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk")
-	   (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w, w, w")
-	   (match_operand:DI 2 "const_int_operand" "i, Z, Ui1, Z, Ui1")
-	   (match_operand:DI 3 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i")
-	   (match_operand:SVE_S 4 "register_operand" "w, w, w, w, w")]
-	  UNSPEC_ST1_SCATTER))]
-  "TARGET_SVE"
-  "@
-   st1w\t%4.s, %5, [%1.s]
-   st1w\t%4.s, %5, [%0, %1.s, sxtw]
-   st1w\t%4.s, %5, [%0, %1.s, uxtw]
-   st1w\t%4.s, %5, [%0, %1.s, sxtw %p3]
-   st1w\t%4.s, %5, [%0, %1.s, uxtw %p3]"
-)
-
-;; Predicated scatter stores for 64-bit elements.  The value of operand 2
-;; doesn't matter in this case.
-(define_insn "mask_scatter_store<mode>"
-  [(set (mem:BLK (scratch))
-	(unspec:BLK
-	  [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl")
-	   (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk")
-	   (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w")
-	   (match_operand:DI 2 "const_int_operand")
-	   (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i")
-	   (match_operand:SVE_D 4 "register_operand" "w, w, w")]
-	  UNSPEC_ST1_SCATTER))]
-  "TARGET_SVE"
-  "@
-   st1d\t%4.d, %5, [%1.d]
-   st1d\t%4.d, %5, [%0, %1.d]
-   st1d\t%4.d, %5, [%0, %1.d, lsl %p3]"
-)
+;; -------------------------------------------------------------------------
+;; ---- Moves of multiple vectors
+;; -------------------------------------------------------------------------
+;; All patterns in this section are synthetic and split to real
+;; instructions after reload.
+;; -------------------------------------------------------------------------
 
-;; SVE structure moves.
 (define_expand "mov<mode>"
   [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand")
 	(match_operand:SVE_STRUCT 1 "general_operand"))]
@@ -364,7 +856,7 @@
 
 ;; Unpredicated structure moves (big-endian).  Memory accesses require
 ;; secondary reloads.
-(define_insn "*aarch64_sve_mov<mode>_le"
+(define_insn "*aarch64_sve_mov<mode>_be"
   [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w")
 	(match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))]
   "TARGET_SVE && BYTES_BIG_ENDIAN"
@@ -400,12 +892,12 @@
 
 ;; Predicated structure moves.  This works for both endiannesses but in
 ;; practice is only useful for big-endian.
-(define_insn_and_split "pred_mov<mode>"
-  [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, Utx")
+(define_insn_and_split "@aarch64_pred_mov<mode>"
+  [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, w, Utx")
 	(unspec:SVE_STRUCT
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-	   (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "Utx, w")]
-	  UNSPEC_MERGE_PTRUE))]
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "w, Utx, w")]
+	  UNSPEC_PRED_X))]
   "TARGET_SVE
    && (register_operand (operands[0], <MODE>mode)
        || register_operand (operands[2], <MODE>mode))"
@@ -428,6 +920,18 @@
   [(set_attr "length" "<insn_length>")]
 )
 
+;; -------------------------------------------------------------------------
+;; ---- Moves of predicates
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - MOV
+;; - LDR
+;; - PFALSE
+;; - PTRUE
+;; - PTRUES
+;; - STR
+;; -------------------------------------------------------------------------
+
 (define_expand "mov<mode>"
   [(set (match_operand:PRED_ALL 0 "nonimmediate_operand")
 	(match_operand:PRED_ALL 1 "general_operand"))]
@@ -435,12 +939,18 @@
   {
     if (GET_CODE (operands[0]) == MEM)
       operands[1] = force_reg (<MODE>mode, operands[1]);
+
+    if (CONSTANT_P (operands[1]))
+      {
+	aarch64_expand_mov_immediate (operands[0], operands[1]);
+	DONE;
+      }
   }
 )
 
 (define_insn "*aarch64_sve_mov<mode>"
-  [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa, Upa")
-	(match_operand:PRED_ALL 1 "general_operand" "Upa, Upa, m, Dz, Dm"))]
+  [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa")
+	(match_operand:PRED_ALL 1 "aarch64_mov_operand" "Upa, Upa, m, Dn"))]
   "TARGET_SVE
    && (register_operand (operands[0], <MODE>mode)
        || register_operand (operands[1], <MODE>mode))"
@@ -448,287 +958,296 @@
    mov\t%0.b, %1.b
    str\t%1, %0
    ldr\t%0, %1
-   pfalse\t%0.b
-   * return aarch64_output_ptrue (<MODE>mode, '<Vetype>');"
+   * return aarch64_output_sve_mov_immediate (operands[1]);"
 )
 
-;; Handle extractions from a predicate by converting to an integer vector
-;; and extracting from there.
-(define_expand "vec_extract<vpred><Vel>"
-  [(match_operand:<VEL> 0 "register_operand")
-   (match_operand:<VPRED> 1 "register_operand")
-   (match_operand:SI 2 "nonmemory_operand")
-   ;; Dummy operand to which we can attach the iterator.
-   (reg:SVE_I V0_REGNUM)]
+;; Match PTRUES Pn.B when both the predicate and flags are useful.
+(define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_cc"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand 2)
+	   (match_operand 3)
+	   (const_int SVE_KNOWN_PTRUE)
+	   (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
+	     [(unspec:VNx16BI
+		[(match_operand:SI 4 "const_int_operand")
+		 (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")]
+		UNSPEC_PTRUE)])]
+	  UNSPEC_PTEST))
+   (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+	(match_dup 1))]
   "TARGET_SVE"
   {
-    rtx tmp = gen_reg_rtx (<MODE>mode);
-    emit_insn (gen_aarch64_sve_dup<mode>_const (tmp, operands[1],
-						CONST1_RTX (<MODE>mode),
-						CONST0_RTX (<MODE>mode)));
-    emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
-    DONE;
+    return aarch64_output_sve_ptrues (operands[1]);
   }
-)
-
-(define_expand "vec_extract<mode><Vel>"
-  [(set (match_operand:<VEL> 0 "register_operand")
-	(vec_select:<VEL>
-	  (match_operand:SVE_ALL 1 "register_operand")
-	  (parallel [(match_operand:SI 2 "nonmemory_operand")])))]
-  "TARGET_SVE"
+  "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
   {
-    poly_int64 val;
-    if (poly_int_rtx_p (operands[2], &val)
-	&& known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1))
-      {
-	/* The last element can be extracted with a LASTB and a false
-	   predicate.  */
-	rtx sel = force_reg (<VPRED>mode, CONST0_RTX (<VPRED>mode));
-	emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
-	DONE;
-      }
-    if (!CONST_INT_P (operands[2]))
-      {
-	/* Create an index with operand[2] as the base and -1 as the step.
-	   It will then be zero for the element we care about.  */
-	rtx index = gen_lowpart (<VEL_INT>mode, operands[2]);
-	index = force_reg (<VEL_INT>mode, index);
-	rtx series = gen_reg_rtx (<V_INT_EQUIV>mode);
-	emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx));
-
-	/* Get a predicate that is true for only that element.  */
-	rtx zero = CONST0_RTX (<V_INT_EQUIV>mode);
-	rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero);
-	rtx sel = gen_reg_rtx (<VPRED>mode);
-	emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero));
-
-	/* Select the element using LASTB.  */
-	emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
-	DONE;
-      }
+    operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode);
   }
 )
 
-;; Extract element zero.  This is a special case because we want to force
-;; the registers to be the same for the second alternative, and then
-;; split the instruction into nothing after RA.
-(define_insn_and_split "*vec_extract<mode><Vel>_0"
-  [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
-	(vec_select:<VEL>
-	  (match_operand:SVE_ALL 1 "register_operand" "w, 0, w")
-	  (parallel [(const_int 0)])))]
+;; Match PTRUES Pn.[HSD] when both the predicate and flags are useful.
+(define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_cc"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand 2)
+	   (match_operand 3)
+	   (const_int SVE_KNOWN_PTRUE)
+	   (subreg:PRED_HSD
+	     (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
+	       [(unspec:VNx16BI
+		  [(match_operand:SI 4 "const_int_operand")
+		   (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")]
+		  UNSPEC_PTRUE)]) 0)]
+	  UNSPEC_PTEST))
+   (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+	(match_dup 1))]
   "TARGET_SVE"
   {
-    operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
-    switch (which_alternative)
-      {
-	case 0:
-	  return "umov\\t%<vwcore>0, %1.<Vetype>[0]";
-	case 1:
-	  return "#";
-	case 2:
-	  return "st1\\t{%1.<Vetype>}[0], %0";
-	default:
-	  gcc_unreachable ();
-      }
+    return aarch64_output_sve_ptrues (operands[1]);
   }
-  "&& reload_completed
-   && REG_P (operands[0])
-   && REGNO (operands[0]) == REGNO (operands[1])"
-  [(const_int 0)]
+  "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
   {
-    emit_note (NOTE_INSN_DELETED);
-    DONE;
+    operands[2] = CONSTM1_RTX (VNx16BImode);
+    operands[3] = CONSTM1_RTX (<MODE>mode);
   }
-  [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")]
 )
 
-;; Extract an element from the Advanced SIMD portion of the register.
-;; We don't just reuse the aarch64-simd.md pattern because we don't
-;; want any change in lane number on big-endian targets.
-(define_insn "*vec_extract<mode><Vel>_v128"
-  [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
-	(vec_select:<VEL>
-	  (match_operand:SVE_ALL 1 "register_operand" "w, w, w")
-	  (parallel [(match_operand:SI 2 "const_int_operand")])))]
-  "TARGET_SVE
-   && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)"
+;; Match PTRUES Pn.B when only the flags result is useful (which is
+;; a way of testing VL).
+(define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_ptest"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand 2)
+	   (match_operand 3)
+	   (const_int SVE_KNOWN_PTRUE)
+	   (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
+	     [(unspec:VNx16BI
+		[(match_operand:SI 4 "const_int_operand")
+		 (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")]
+		UNSPEC_PTRUE)])]
+	  UNSPEC_PTEST))
+   (clobber (match_scratch:VNx16BI 0 "=Upa"))]
+  "TARGET_SVE"
   {
-    operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
-    switch (which_alternative)
-      {
-	case 0:
-	  return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
-	case 1:
-	  return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
-	case 2:
-	  return "st1\\t{%1.<Vetype>}[%2], %0";
-	default:
-	  gcc_unreachable ();
-      }
+    return aarch64_output_sve_ptrues (operands[1]);
   }
-  [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")]
-)
-
-;; Extract an element in the range of DUP.  This pattern allows the
-;; source and destination to be different.
-(define_insn "*vec_extract<mode><Vel>_dup"
-  [(set (match_operand:<VEL> 0 "register_operand" "=w")
-	(vec_select:<VEL>
-	  (match_operand:SVE_ALL 1 "register_operand" "w")
-	  (parallel [(match_operand:SI 2 "const_int_operand")])))]
-  "TARGET_SVE
-   && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)"
+  "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
   {
-    operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
-    return "dup\t%0.<Vetype>, %1.<Vetype>[%2]";
+    operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode);
   }
 )
 
-;; Extract an element outside the range of DUP.  This pattern requires the
-;; source and destination to be the same.
-(define_insn "*vec_extract<mode><Vel>_ext"
-  [(set (match_operand:<VEL> 0 "register_operand" "=w")
-	(vec_select:<VEL>
-	  (match_operand:SVE_ALL 1 "register_operand" "0")
-	  (parallel [(match_operand:SI 2 "const_int_operand")])))]
-  "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64"
+;; Match PTRUES Pn.[HWD] when only the flags result is useful (which is
+;; a way of testing VL).
+(define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_ptest"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand 2)
+	   (match_operand 3)
+	   (const_int SVE_KNOWN_PTRUE)
+	   (subreg:PRED_HSD
+	     (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
+	       [(unspec:VNx16BI
+		  [(match_operand:SI 4 "const_int_operand")
+		   (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")]
+		  UNSPEC_PTRUE)]) 0)]
+	  UNSPEC_PTEST))
+   (clobber (match_scratch:VNx16BI 0 "=Upa"))]
+  "TARGET_SVE"
   {
-    operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
-    operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode));
-    return "ext\t%0.b, %0.b, %0.b, #%2";
+    return aarch64_output_sve_ptrues (operands[1]);
   }
-)
-
-;; Extract the last active element of operand 1 into operand 0.
-;; If no elements are active, extract the last inactive element instead.
-(define_insn "extract_last_<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand" "=r, w")
-	(unspec:<VEL>
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-	   (match_operand:SVE_ALL 2 "register_operand" "w, w")]
-	  UNSPEC_LASTB))]
-  "TARGET_SVE"
-  "@
-   lastb\t%<vwcore>0, %1, %2.<Vetype>
-   lastb\t%<Vetype>0, %1, %2.<Vetype>"
-)
-
-(define_expand "vec_duplicate<mode>"
-  [(parallel
-    [(set (match_operand:SVE_ALL 0 "register_operand")
-	  (vec_duplicate:SVE_ALL
-	    (match_operand:<VEL> 1 "aarch64_sve_dup_operand")))
-     (clobber (scratch:<VPRED>))])]
-  "TARGET_SVE"
+  "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
   {
-    if (MEM_P (operands[1]))
-      {
-	rtx ptrue = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
-	emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1],
-				       CONST0_RTX (<MODE>mode)));
-	DONE;
-      }
+    operands[2] = CONSTM1_RTX (VNx16BImode);
+    operands[3] = CONSTM1_RTX (<MODE>mode);
   }
 )
 
-;; Accept memory operands for the benefit of combine, and also in case
-;; the scalar input gets spilled to memory during RA.  We want to split
-;; the load at the first opportunity in order to allow the PTRUE to be
-;; optimized with surrounding code.
-(define_insn_and_split "*vec_duplicate<mode>_reg"
-  [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w")
-	(vec_duplicate:SVE_ALL
-	  (match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty")))
-   (clobber (match_scratch:<VPRED> 2 "=X, X, Upl"))]
+;; -------------------------------------------------------------------------
+;; ---- Moves relating to the FFR
+;; -------------------------------------------------------------------------
+;; RDFFR
+;; RDFFRS
+;; SETFFR
+;; WRFFR
+;; -------------------------------------------------------------------------
+
+;; [W1 in the block comment above about FFR handling]
+;;
+;; Write to the FFR and start a new FFRT scheduling region.
+(define_insn "aarch64_wrffr"
+  [(set (reg:VNx16BI FFR_REGNUM)
+	(match_operand:VNx16BI 0 "aarch64_simd_reg_or_minus_one" "Dm, Upa"))
+   (set (reg:VNx16BI FFRT_REGNUM)
+	(unspec:VNx16BI [(match_dup 0)] UNSPEC_WRFFR))]
   "TARGET_SVE"
   "@
-   mov\t%0.<Vetype>, %<vwcore>1
-   mov\t%0.<Vetype>, %<Vetype>1
-   #"
-  "&& MEM_P (operands[1])"
-  [(const_int 0)]
-  {
-    if (GET_CODE (operands[2]) == SCRATCH)
-      operands[2] = gen_reg_rtx (<VPRED>mode);
-    emit_move_insn (operands[2], CONSTM1_RTX (<VPRED>mode));
-    emit_insn (gen_sve_ld1r<mode> (operands[0], operands[2], operands[1],
-				   CONST0_RTX (<MODE>mode)));
-    DONE;
-  }
-  [(set_attr "length" "4,4,8")]
+   setffr
+   wrffr\t%0.b"
 )
 
-;; This is used for vec_duplicate<mode>s from memory, but can also
-;; be used by combine to optimize selects of a a vec_duplicate<mode>
-;; with zero.
-(define_insn "sve_ld1r<mode>"
-  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
-	(unspec:SVE_ALL
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
-	   (vec_duplicate:SVE_ALL
-	     (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty"))
-	   (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")]
-	  UNSPEC_SEL))]
+;; [L2 in the block comment above about FFR handling]
+;;
+;; Introduce a read from and write to the FFR in the current FFRT region,
+;; so that the FFR value is live on entry to the region and so that the FFR
+;; value visibly changes within the region.  This is used (possibly multiple
+;; times) in an FFRT region that includes LDFF1 or LDNF1 instructions.
+(define_insn "aarch64_update_ffr_for_load"
+  [(set (reg:VNx16BI FFR_REGNUM)
+	(unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM)
+			 (reg:VNx16BI FFR_REGNUM)] UNSPEC_UPDATE_FFR))]
   "TARGET_SVE"
-  "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2"
+  ""
+  [(set_attr "type" "no_insn")]
 )
 
-;; Load 128 bits from memory and duplicate to fill a vector.  Since there
-;; are so few operations on 128-bit "elements", we don't define a VNx1TI
-;; and simply use vectors of bytes instead.
-(define_insn "*sve_ld1rq<Vesize>"
-  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
-	(unspec:SVE_ALL
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
-	   (match_operand:TI 2 "aarch64_sve_ld1r_operand" "Uty")]
-	  UNSPEC_LD1RQ))]
+;; [R1 in the block comment above about FFR handling]
+;;
+;; Notionally copy the FFR to the FFRT, so that the current FFR value
+;; can be read from there by the RDFFR instructions below.  This acts
+;; as a scheduling barrier for earlier LDFF1 and LDNF1 instructions and
+;; creates a natural dependency with earlier writes.
+(define_insn "aarch64_copy_ffr_to_ffrt"
+  [(set (reg:VNx16BI FFRT_REGNUM)
+	(reg:VNx16BI FFR_REGNUM))]
   "TARGET_SVE"
-  "ld1rq<Vesize>\t%0.<Vetype>, %1/z, %2"
+  ""
+  [(set_attr "type" "no_insn")]
 )
 
-;; Implement a predicate broadcast by shifting the low bit of the scalar
-;; input into the top bit and using a WHILELO.  An alternative would be to
-;; duplicate the input and do a compare with zero.
-(define_expand "vec_duplicate<mode>"
-  [(set (match_operand:PRED_ALL 0 "register_operand")
-	(vec_duplicate:PRED_ALL (match_operand 1 "register_operand")))]
+;; [R2 in the block comment above about FFR handling]
+;;
+;; Read the FFR via the FFRT.
+(define_insn "aarch64_rdffr"
+  [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+	(reg:VNx16BI FFRT_REGNUM))]
+  "TARGET_SVE"
+  "rdffr\t%0.b"
+)
+
+;; Likewise with zero predication.
+(define_insn "aarch64_rdffr_z"
+  [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+	(and:VNx16BI
+	  (reg:VNx16BI FFRT_REGNUM)
+	  (match_operand:VNx16BI 1 "register_operand" "Upa")))]
+  "TARGET_SVE"
+  "rdffr\t%0.b, %1/z"
+)
+
+;; Read the FFR to test for a fault, without using the predicate result.
+(define_insn "*aarch64_rdffr_z_ptest"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
+	   (match_dup 1)
+	   (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+	   (and:VNx16BI
+	     (reg:VNx16BI FFRT_REGNUM)
+	     (match_dup 1))]
+	  UNSPEC_PTEST))
+   (clobber (match_scratch:VNx16BI 0 "=Upa"))]
+  "TARGET_SVE"
+  "rdffrs\t%0.b, %1/z"
+)
+
+;; Same for unpredicated RDFFR when tested with a known PTRUE.
+(define_insn "*aarch64_rdffr_ptest"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
+	   (match_dup 1)
+	   (const_int SVE_KNOWN_PTRUE)
+	   (reg:VNx16BI FFRT_REGNUM)]
+	  UNSPEC_PTEST))
+   (clobber (match_scratch:VNx16BI 0 "=Upa"))]
+  "TARGET_SVE"
+  "rdffrs\t%0.b, %1/z"
+)
+
+;; Read the FFR with zero predication and test the result.
+(define_insn "*aarch64_rdffr_z_cc"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
+	   (match_dup 1)
+	   (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+	   (and:VNx16BI
+	     (reg:VNx16BI FFRT_REGNUM)
+	     (match_dup 1))]
+	  UNSPEC_PTEST))
+   (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+	(and:VNx16BI
+	  (reg:VNx16BI FFRT_REGNUM)
+	  (match_dup 1)))]
+  "TARGET_SVE"
+  "rdffrs\t%0.b, %1/z"
+)
+
+;; Same for unpredicated RDFFR when tested with a known PTRUE.
+(define_insn "*aarch64_rdffr_cc"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
+	   (match_dup 1)
+	   (const_int SVE_KNOWN_PTRUE)
+	   (reg:VNx16BI FFRT_REGNUM)]
+	  UNSPEC_PTEST))
+   (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+	(reg:VNx16BI FFRT_REGNUM))]
+  "TARGET_SVE"
+  "rdffrs\t%0.b, %1/z"
+)
+
+;; [R3 in the block comment above about FFR handling]
+;;
+;; Arbitrarily update the FFRT after a read from the FFR.  This acts as
+;; a scheduling barrier for later LDFF1 and LDNF1 instructions.
+(define_insn "aarch64_update_ffrt"
+  [(set (reg:VNx16BI FFRT_REGNUM)
+	(unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM)] UNSPEC_UPDATE_FFRT))]
   "TARGET_SVE"
-  {
-    rtx tmp = gen_reg_rtx (DImode);
-    rtx op1 = gen_lowpart (DImode, operands[1]);
-    emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode)));
-    emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp));
-    DONE;
-  }
-)
-
-(define_insn "vec_series<mode>"
-  [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w")
-	(vec_series:SVE_I
-	  (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r")
-	  (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))]
+  ""
+  [(set_attr "type" "no_insn")]
+)
+
+;; =========================================================================
+;; == Loads
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- Normal contiguous loads
+;; -------------------------------------------------------------------------
+;; Includes contiguous forms of:
+;; - LD1B
+;; - LD1D
+;; - LD1H
+;; - LD1W
+;; - LD2B
+;; - LD2D
+;; - LD2H
+;; - LD2W
+;; - LD3B
+;; - LD3D
+;; - LD3H
+;; - LD3W
+;; - LD4B
+;; - LD4D
+;; - LD4H
+;; - LD4W
+;; -------------------------------------------------------------------------
+
+;; Predicated LD1.
+(define_insn "maskload<mode><vpred>"
+  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+	(unspec:SVE_ALL
+	  [(match_operand:<VPRED> 2 "register_operand" "Upl")
+	   (match_operand:SVE_ALL 1 "memory_operand" "m")]
+	  UNSPEC_LD1_SVE))]
   "TARGET_SVE"
-  "@
-   index\t%0.<Vetype>, #%1, %<vw>2
-   index\t%0.<Vetype>, %<vw>1, #%2
-   index\t%0.<Vetype>, %<vw>1, %<vw>2"
-)
-
-;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range
-;; of an INDEX instruction.
-(define_insn "*vec_series<mode>_plus"
-  [(set (match_operand:SVE_I 0 "register_operand" "=w")
-	(plus:SVE_I
-	  (vec_duplicate:SVE_I
-	    (match_operand:<VEL> 1 "register_operand" "r"))
-	  (match_operand:SVE_I 2 "immediate_operand")))]
-  "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])"
-  {
-    operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]);
-    return "index\t%0.<Vetype>, %<vw>1, #%2";
-  }
+  "ld1<Vesize>\t%0.<Vctype>, %2/z, %1"
 )
 
 ;; Unpredicated LD[234].
@@ -740,7 +1259,7 @@
 	  UNSPEC_LDN))]
   "TARGET_SVE"
   {
-    operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
   }
 )
 
@@ -755,1668 +1274,9156 @@
   "ld<vector_count><Vesize>\t%0, %2/z, %1"
 )
 
-;; Unpredicated ST[234].  This is always a full update, so the dependence
-;; on the old value of the memory location (via (match_dup 0)) is redundant.
-;; There doesn't seem to be any obvious benefit to treating the all-true
-;; case differently though.  In particular, it's very unlikely that we'll
-;; only find out during RTL that a store_lanes is dead.
-(define_expand "vec_store_lanes<mode><vsingle>"
-  [(set (match_operand:SVE_STRUCT 0 "memory_operand")
-	(unspec:SVE_STRUCT
-	  [(match_dup 2)
-	   (match_operand:SVE_STRUCT 1 "register_operand")
-	   (match_dup 0)]
-	  UNSPEC_STN))]
-  "TARGET_SVE"
+;; -------------------------------------------------------------------------
+;; ---- Extending contiguous loads
+;; -------------------------------------------------------------------------
+;; Includes contiguous forms of:
+;; LD1B
+;; LD1H
+;; LD1SB
+;; LD1SH
+;; LD1SW
+;; LD1W
+;; -------------------------------------------------------------------------
+
+;; Predicated load and extend, with 8 elements per 128-bit block.
+(define_insn_and_rewrite "@aarch64_load_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
+  [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
+	(unspec:SVE_HSDI
+	  [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
+	   (ANY_EXTEND:SVE_HSDI
+	     (unspec:SVE_PARTIAL_I
+	       [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
+		(match_operand:SVE_PARTIAL_I 1 "memory_operand" "m")]
+	       UNSPEC_LD1_SVE))]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
+  "ld1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
+  "&& !CONSTANT_P (operands[3])"
   {
-    operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+    operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
   }
 )
 
-;; Predicated ST[234].
-(define_insn "vec_mask_store_lanes<mode><vsingle>"
-  [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m")
-	(unspec:SVE_STRUCT
+;; -------------------------------------------------------------------------
+;; ---- First-faulting contiguous loads
+;; -------------------------------------------------------------------------
+;; Includes contiguous forms of:
+;; - LDFF1B
+;; - LDFF1D
+;; - LDFF1H
+;; - LDFF1W
+;; - LDNF1B
+;; - LDNF1D
+;; - LDNF1H
+;; - LDNF1W
+;; -------------------------------------------------------------------------
+
+;; Contiguous non-extending first-faulting or non-faulting loads.
+(define_insn "@aarch64_ld<fn>f1<mode>"
+  [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
+	(unspec:SVE_FULL
 	  [(match_operand:<VPRED> 2 "register_operand" "Upl")
-	   (match_operand:SVE_STRUCT 1 "register_operand" "w")
-	   (match_dup 0)]
-	  UNSPEC_STN))]
-  "TARGET_SVE"
-  "st<vector_count><Vesize>\t%1, %2, %0"
-)
-
-(define_expand "vec_perm<mode>"
-  [(match_operand:SVE_ALL 0 "register_operand")
-   (match_operand:SVE_ALL 1 "register_operand")
-   (match_operand:SVE_ALL 2 "register_operand")
-   (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")]
-  "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()"
+	   (match_operand:SVE_FULL 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
+	   (reg:VNx16BI FFRT_REGNUM)]
+	  SVE_LDFF1_LDNF1))]
+  "TARGET_SVE"
+  "ld<fn>f1<Vesize>\t%0.<Vetype>, %2/z, %1"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- First-faulting extending contiguous loads
+;; -------------------------------------------------------------------------
+;; Includes contiguous forms of:
+;; - LDFF1B
+;; - LDFF1H
+;; - LDFF1SB
+;; - LDFF1SH
+;; - LDFF1SW
+;; - LDFF1W
+;; - LDNF1B
+;; - LDNF1H
+;; - LDNF1SB
+;; - LDNF1SH
+;; - LDNF1SW
+;; - LDNF1W
+;; -------------------------------------------------------------------------
+
+;; Predicated first-faulting or non-faulting load and extend.
+(define_insn_and_rewrite "@aarch64_ld<fn>f1_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
+  [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
+	(unspec:SVE_HSDI
+	  [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
+	   (ANY_EXTEND:SVE_HSDI
+	     (unspec:SVE_PARTIAL_I
+	       [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
+		(match_operand:SVE_PARTIAL_I 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
+		(reg:VNx16BI FFRT_REGNUM)]
+	       SVE_LDFF1_LDNF1))]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
+  "ld<fn>f1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
+  "&& !CONSTANT_P (operands[3])"
   {
-    aarch64_expand_sve_vec_perm (operands[0], operands[1],
-				 operands[2], operands[3]);
-    DONE;
+    operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
   }
 )
 
-(define_insn "*aarch64_sve_tbl<mode>"
-  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
-	(unspec:SVE_ALL
-	  [(match_operand:SVE_ALL 1 "register_operand" "w")
-	   (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
-	  UNSPEC_TBL))]
+;; -------------------------------------------------------------------------
+;; ---- Non-temporal contiguous loads
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - LDNT1B
+;; - LDNT1D
+;; - LDNT1H
+;; - LDNT1W
+;; -------------------------------------------------------------------------
+
+;; Predicated contiguous non-temporal load.
+(define_insn "@aarch64_ldnt1<mode>"
+  [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
+	(unspec:SVE_FULL
+	  [(match_operand:<VPRED> 2 "register_operand" "Upl")
+	   (match_operand:SVE_FULL 1 "memory_operand" "m")]
+	  UNSPEC_LDNT1_SVE))]
   "TARGET_SVE"
-  "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
+  "ldnt1<Vesize>\t%0.<Vetype>, %2/z, %1"
 )
 
-(define_insn "*aarch64_sve_<perm_insn><perm_hilo><mode>"
-  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
-	(unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")
-			  (match_operand:PRED_ALL 2 "register_operand" "Upa")]
-			 PERMUTE))]
-  "TARGET_SVE"
-  "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
-)
+;; -------------------------------------------------------------------------
+;; ---- Normal gather loads
+;; -------------------------------------------------------------------------
+;; Includes gather forms of:
+;; - LD1D
+;; - LD1W
+;; -------------------------------------------------------------------------
 
-(define_insn "aarch64_sve_<perm_insn><perm_hilo><mode>"
-  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
-	(unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")
-			 (match_operand:SVE_ALL 2 "register_operand" "w")]
-			PERMUTE))]
+;; Unpredicated gather loads.
+(define_expand "gather_load<mode><v_int_container>"
+  [(set (match_operand:SVE_24 0 "register_operand")
+	(unspec:SVE_24
+	  [(match_dup 5)
+	   (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>")
+	   (match_operand:<V_INT_CONTAINER> 2 "register_operand")
+	   (match_operand:DI 3 "const_int_operand")
+	   (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
+	   (mem:BLK (scratch))]
+	  UNSPEC_LD1_GATHER))]
   "TARGET_SVE"
-  "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
+  {
+    operands[5] = aarch64_ptrue_reg (<VPRED>mode);
+  }
 )
 
-(define_insn "*aarch64_sve_rev64<mode>"
-  [(set (match_operand:SVE_BHS 0 "register_operand" "=w")
-	(unspec:SVE_BHS
-	  [(match_operand:VNx2BI 1 "register_operand" "Upl")
-	   (unspec:SVE_BHS [(match_operand:SVE_BHS 2 "register_operand" "w")]
-			   UNSPEC_REV64)]
-	  UNSPEC_MERGE_PTRUE))]
+;; Predicated gather loads for 32-bit elements.  Operand 3 is true for
+;; unsigned extension and false for signed extension.
+(define_insn "mask_gather_load<mode><v_int_container>"
+  [(set (match_operand:SVE_4 0 "register_operand" "=w, w, w, w, w, w")
+	(unspec:SVE_4
+	  [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
+	   (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>" "Z, vgw, rk, rk, rk, rk")
+	   (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w")
+	   (match_operand:DI 3 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
+	   (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
+	   (mem:BLK (scratch))]
+	  UNSPEC_LD1_GATHER))]
   "TARGET_SVE"
-  "rev<Vesize>\t%0.d, %1/m, %2.d"
+  "@
+   ld1<Vesize>\t%0.s, %5/z, [%2.s]
+   ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1]
+   ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
+   ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
+   ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
+   ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
 )
 
-(define_insn "*aarch64_sve_rev32<mode>"
-  [(set (match_operand:SVE_BH 0 "register_operand" "=w")
-	(unspec:SVE_BH
-	  [(match_operand:VNx4BI 1 "register_operand" "Upl")
-	   (unspec:SVE_BH [(match_operand:SVE_BH 2 "register_operand" "w")]
-			  UNSPEC_REV32)]
-	  UNSPEC_MERGE_PTRUE))]
+;; Predicated gather loads for 64-bit elements.  The value of operand 3
+;; doesn't matter in this case.
+(define_insn "mask_gather_load<mode><v_int_container>"
+  [(set (match_operand:SVE_2 0 "register_operand" "=w, w, w, w")
+	(unspec:SVE_2
+	  [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
+	   (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>" "Z, vgd, rk, rk")
+	   (match_operand:VNx2DI 2 "register_operand" "w, w, w, w")
+	   (match_operand:DI 3 "const_int_operand")
+	   (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, i")
+	   (mem:BLK (scratch))]
+	  UNSPEC_LD1_GATHER))]
+  "TARGET_SVE"
+  "@
+   ld1<Vesize>\t%0.d, %5/z, [%2.d]
+   ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1]
+   ld1<Vesize>\t%0.d, %5/z, [%1, %2.d]
+   ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
+)
+
+;; Likewise, but with the offset being extended from 32 bits.
+(define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_<su>xtw_unpacked"
+  [(set (match_operand:SVE_2 0 "register_operand" "=w, w")
+	(unspec:SVE_2
+	  [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
+	   (match_operand:DI 1 "register_operand" "rk, rk")
+	   (unspec:VNx2DI
+	     [(match_operand 6)
+	      (ANY_EXTEND:VNx2DI
+		(match_operand:VNx2SI 2 "register_operand" "w, w"))]
+	     UNSPEC_PRED_X)
+	   (match_operand:DI 3 "const_int_operand")
+	   (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
+	   (mem:BLK (scratch))]
+	  UNSPEC_LD1_GATHER))]
   "TARGET_SVE"
-  "rev<Vesize>\t%0.s, %1/m, %2.s"
+  "@
+   ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw]
+   ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw %p4]"
+  "&& !CONSTANT_P (operands[6])"
+  {
+    operands[6] = CONSTM1_RTX (VNx2BImode);
+  }
 )
 
-(define_insn "*aarch64_sve_rev16vnx16qi"
-  [(set (match_operand:VNx16QI 0 "register_operand" "=w")
-	(unspec:VNx16QI
-	  [(match_operand:VNx8BI 1 "register_operand" "Upl")
-	   (unspec:VNx16QI [(match_operand:VNx16QI 2 "register_operand" "w")]
-			   UNSPEC_REV16)]
-	  UNSPEC_MERGE_PTRUE))]
+;; Likewise, but with the offset being truncated to 32 bits and then
+;; sign-extended.
+(define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_sxtw"
+  [(set (match_operand:SVE_2 0 "register_operand" "=w, w")
+	(unspec:SVE_2
+	  [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
+	   (match_operand:DI 1 "register_operand" "rk, rk")
+	   (unspec:VNx2DI
+	     [(match_operand 6)
+	      (sign_extend:VNx2DI
+		(truncate:VNx2SI
+		  (match_operand:VNx2DI 2 "register_operand" "w, w")))]
+	     UNSPEC_PRED_X)
+	   (match_operand:DI 3 "const_int_operand")
+	   (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
+	   (mem:BLK (scratch))]
+	  UNSPEC_LD1_GATHER))]
   "TARGET_SVE"
-  "revb\t%0.h, %1/m, %2.h"
+  "@
+   ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
+   ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
+  "&& !CONSTANT_P (operands[6])"
+  {
+    operands[6] = CONSTM1_RTX (VNx2BImode);
+  }
 )
 
-(define_insn "*aarch64_sve_rev<mode>"
-  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
-	(unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")]
-			UNSPEC_REV))]
+;; Likewise, but with the offset being truncated to 32 bits and then
+;; zero-extended.
+(define_insn "*mask_gather_load<mode><v_int_container>_uxtw"
+  [(set (match_operand:SVE_2 0 "register_operand" "=w, w")
+	(unspec:SVE_2
+	  [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
+	   (match_operand:DI 1 "register_operand" "rk, rk")
+	   (and:VNx2DI
+	     (match_operand:VNx2DI 2 "register_operand" "w, w")
+	     (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
+	   (match_operand:DI 3 "const_int_operand")
+	   (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
+	   (mem:BLK (scratch))]
+	  UNSPEC_LD1_GATHER))]
   "TARGET_SVE"
-  "rev\t%0.<Vetype>, %1.<Vetype>")
-
-(define_insn "*aarch64_sve_dup_lane<mode>"
-  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
-	(vec_duplicate:SVE_ALL
-	  (vec_select:<VEL>
-	    (match_operand:SVE_ALL 1 "register_operand" "w")
-	    (parallel [(match_operand:SI 2 "const_int_operand")]))))]
-  "TARGET_SVE
-   && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 0, 63)"
-  "dup\t%0.<Vetype>, %1.<Vetype>[%2]"
+  "@
+   ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
+   ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- Extending gather loads
+;; -------------------------------------------------------------------------
+;; Includes gather forms of:
+;; - LD1B
+;; - LD1H
+;; - LD1SB
+;; - LD1SH
+;; - LD1SW
+;; - LD1W
+;; -------------------------------------------------------------------------
+
+;; Predicated extending gather loads for 32-bit elements.  Operand 3 is
+;; true for unsigned extension and false for signed extension.
+(define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_4HSI:mode><SVE_4BHI:mode>"
+  [(set (match_operand:SVE_4HSI 0 "register_operand" "=w, w, w, w, w, w")
+	(unspec:SVE_4HSI
+	  [(match_operand:VNx4BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm, UplDnm, UplDnm")
+	   (ANY_EXTEND:SVE_4HSI
+	     (unspec:SVE_4BHI
+	       [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
+		(match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_4BHI:Vesize>" "Z, vg<SVE_4BHI:Vesize>, rk, rk, rk, rk")
+		(match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w")
+		(match_operand:DI 3 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
+		(match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_4BHI:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
+		(mem:BLK (scratch))]
+	       UNSPEC_LD1_GATHER))]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE && (~<SVE_4HSI:narrower_mask> & <SVE_4BHI:self_mask>) == 0"
+  "@
+   ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s]
+   ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s, #%1]
+   ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
+   ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
+   ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
+   ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
+  "&& !CONSTANT_P (operands[6])"
+  {
+    operands[6] = CONSTM1_RTX (VNx4BImode);
+  }
 )
 
-;; Note that the immediate (third) operand is the lane index not
-;; the byte index.
-(define_insn "*aarch64_sve_ext<mode>"
-  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
-	(unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "0")
-			 (match_operand:SVE_ALL 2 "register_operand" "w")
-			 (match_operand:SI 3 "const_int_operand")]
-			UNSPEC_EXT))]
-  "TARGET_SVE
-   && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode), 0, 255)"
+;; Predicated extending gather loads for 64-bit elements.  The value of
+;; operand 3 doesn't matter in this case.
+(define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>"
+  [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w, w, w")
+	(unspec:SVE_2HSDI
+	  [(match_operand:VNx2BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm")
+	   (ANY_EXTEND:SVE_2HSDI
+	     (unspec:SVE_2BHSI
+	       [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
+		(match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_2BHSI:Vesize>" "Z, vg<SVE_2BHSI:Vesize>, rk, rk")
+		(match_operand:VNx2DI 2 "register_operand" "w, w, w, w")
+		(match_operand:DI 3 "const_int_operand")
+		(match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, Ui1, Ui1, i")
+		(mem:BLK (scratch))]
+	       UNSPEC_LD1_GATHER))]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
+  "@
+   ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d]
+   ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d, #%1]
+   ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d]
+   ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
+  "&& !CONSTANT_P (operands[6])"
   {
-    operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode));
-    return "ext\\t%0.b, %0.b, %2.b, #%3";
+    operands[6] = CONSTM1_RTX (VNx2BImode);
   }
 )
 
-(define_insn "add<mode>3"
-  [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, w")
-	(plus:SVE_I
-	  (match_operand:SVE_I 1 "register_operand" "%0, 0, 0, w")
-	  (match_operand:SVE_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, w")))]
-  "TARGET_SVE"
+;; Likewise, but with the offset being extended from 32 bits.
+(define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_<ANY_EXTEND2:su>xtw_unpacked"
+  [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w")
+	(unspec:SVE_2HSDI
+	  [(match_operand 6)
+	   (ANY_EXTEND:SVE_2HSDI
+	     (unspec:SVE_2BHSI
+	       [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
+		(match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
+		(unspec:VNx2DI
+		  [(match_operand 7)
+		   (ANY_EXTEND2:VNx2DI
+		     (match_operand:VNx2SI 2 "register_operand" "w, w"))]
+		  UNSPEC_PRED_X)
+		(match_operand:DI 3 "const_int_operand")
+		(match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i")
+		(mem:BLK (scratch))]
+	       UNSPEC_LD1_GATHER))]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
   "@
-   add\t%0.<Vetype>, %0.<Vetype>, #%D2
-   sub\t%0.<Vetype>, %0.<Vetype>, #%N2
-   * return aarch64_output_sve_inc_dec_immediate (\"%0.<Vetype>\", operands[2]);
-   add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
+   ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw]
+   ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw %p4]"
+  "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
+  {
+    operands[6] = CONSTM1_RTX (VNx2BImode);
+    operands[7] = CONSTM1_RTX (VNx2BImode);
+  }
 )
 
-(define_insn "sub<mode>3"
-  [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
-	(minus:SVE_I
-	  (match_operand:SVE_I 1 "aarch64_sve_arith_operand" "w, vsa")
-	  (match_operand:SVE_I 2 "register_operand" "w, 0")))]
-  "TARGET_SVE"
+;; Likewise, but with the offset being truncated to 32 bits and then
+;; sign-extended.
+(define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_sxtw"
+  [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w")
+	(unspec:SVE_2HSDI
+	  [(match_operand 6)
+	   (ANY_EXTEND:SVE_2HSDI
+	     (unspec:SVE_2BHSI
+	       [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
+		(match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
+		(unspec:VNx2DI
+		  [(match_operand 7)
+		   (sign_extend:VNx2DI
+		     (truncate:VNx2SI
+		       (match_operand:VNx2DI 2 "register_operand" "w, w")))]
+		  UNSPEC_PRED_X)
+		(match_operand:DI 3 "const_int_operand")
+		(match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i")
+		(mem:BLK (scratch))]
+	       UNSPEC_LD1_GATHER))]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
   "@
-   sub\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
-   subr\t%0.<Vetype>, %0.<Vetype>, #%D1"
+   ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
+   ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
+  "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
+  {
+    operands[6] = CONSTM1_RTX (VNx2BImode);
+    operands[7] = CONSTM1_RTX (VNx2BImode);
+  }
 )
 
-;; Unpredicated multiplication.
-(define_expand "mul<mode>3"
-  [(set (match_operand:SVE_I 0 "register_operand")
-	(unspec:SVE_I
-	  [(match_dup 3)
-	   (mult:SVE_I
-	     (match_operand:SVE_I 1 "register_operand")
-	     (match_operand:SVE_I 2 "aarch64_sve_mul_operand"))]
-	  UNSPEC_MERGE_PTRUE))]
-  "TARGET_SVE"
+;; Likewise, but with the offset being truncated to 32 bits and then
+;; zero-extended.
+(define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_uxtw"
+  [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w")
+	(unspec:SVE_2HSDI
+	  [(match_operand 7)
+	   (ANY_EXTEND:SVE_2HSDI
+	     (unspec:SVE_2BHSI
+	       [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
+		(match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
+		(and:VNx2DI
+		  (match_operand:VNx2DI 2 "register_operand" "w, w")
+		  (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
+		(match_operand:DI 3 "const_int_operand")
+		(match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i")
+		(mem:BLK (scratch))]
+	       UNSPEC_LD1_GATHER))]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
+  "@
+   ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
+   ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
+  "&& !CONSTANT_P (operands[7])"
   {
-    operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+    operands[7] = CONSTM1_RTX (VNx2BImode);
   }
 )
 
-;; Multiplication predicated with a PTRUE.  We don't actually need the
-;; predicate for the first alternative, but using Upa or X isn't likely
-;; to gain much and would make the instruction seem less uniform to the
-;; register allocator.
-(define_insn "*mul<mode>3"
-  [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
-	(unspec:SVE_I
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-	   (mult:SVE_I
-	     (match_operand:SVE_I 2 "register_operand" "%0, 0")
-	     (match_operand:SVE_I 3 "aarch64_sve_mul_operand" "vsm, w"))]
-	  UNSPEC_MERGE_PTRUE))]
+;; -------------------------------------------------------------------------
+;; ---- First-faulting gather loads
+;; -------------------------------------------------------------------------
+;; Includes gather forms of:
+;; - LDFF1D
+;; - LDFF1W
+;; -------------------------------------------------------------------------
+
+;; Predicated first-faulting gather loads for 32-bit elements.  Operand
+;; 3 is true for unsigned extension and false for signed extension.
+(define_insn "@aarch64_ldff1_gather<mode>"
+  [(set (match_operand:SVE_FULL_S 0 "register_operand" "=w, w, w, w, w, w")
+	(unspec:SVE_FULL_S
+	  [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
+	   (match_operand:DI 1 "aarch64_sve_gather_offset_w" "Z, vgw, rk, rk, rk, rk")
+	   (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w")
+	   (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
+	   (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, Ui1, i, i")
+	   (mem:BLK (scratch))
+	   (reg:VNx16BI FFRT_REGNUM)]
+	  UNSPEC_LDFF1_GATHER))]
+  "TARGET_SVE"
+  "@
+   ldff1w\t%0.s, %5/z, [%2.s]
+   ldff1w\t%0.s, %5/z, [%2.s, #%1]
+   ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw]
+   ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw]
+   ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
+   ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
+)
+
+;; Predicated first-faulting gather loads for 64-bit elements.  The value
+;; of operand 3 doesn't matter in this case.
+(define_insn "@aarch64_ldff1_gather<mode>"
+  [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w, w, w")
+	(unspec:SVE_FULL_D
+	  [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
+	   (match_operand:DI 1 "aarch64_sve_gather_offset_d" "Z, vgd, rk, rk")
+	   (match_operand:VNx2DI 2 "register_operand" "w, w, w, w")
+	   (match_operand:DI 3 "const_int_operand")
+	   (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, Ui1, i")
+	   (mem:BLK (scratch))
+	   (reg:VNx16BI FFRT_REGNUM)]
+	  UNSPEC_LDFF1_GATHER))]
+  "TARGET_SVE"
+  "@
+   ldff1d\t%0.d, %5/z, [%2.d]
+   ldff1d\t%0.d, %5/z, [%2.d, #%1]
+   ldff1d\t%0.d, %5/z, [%1, %2.d]
+   ldff1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
+)
+
+;; Likewise, but with the offset being sign-extended from 32 bits.
+(define_insn_and_rewrite "*aarch64_ldff1_gather<mode>_sxtw"
+  [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w")
+	(unspec:SVE_FULL_D
+	  [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
+	   (match_operand:DI 1 "register_operand" "rk, rk")
+	   (unspec:VNx2DI
+	     [(match_operand 6)
+	      (sign_extend:VNx2DI
+		(truncate:VNx2SI
+		  (match_operand:VNx2DI 2 "register_operand" "w, w")))]
+	     UNSPEC_PRED_X)
+	   (match_operand:DI 3 "const_int_operand")
+	   (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i")
+	   (mem:BLK (scratch))
+	   (reg:VNx16BI FFRT_REGNUM)]
+	  UNSPEC_LDFF1_GATHER))]
   "TARGET_SVE"
   "@
-   mul\t%0.<Vetype>, %0.<Vetype>, #%3
-   mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+   ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw]
+   ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
+  "&& !CONSTANT_P (operands[6])"
+  {
+    operands[6] = CONSTM1_RTX (VNx2BImode);
+  }
 )
 
-(define_insn "*madd<mode>"
-  [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
-	(plus:SVE_I
-	  (unspec:SVE_I
-	    [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-	     (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w")
-			 (match_operand:SVE_I 3 "register_operand" "w, w"))]
-	    UNSPEC_MERGE_PTRUE)
-	  (match_operand:SVE_I 4 "register_operand" "w, 0")))]
+;; Likewise, but with the offset being zero-extended from 32 bits.
+(define_insn "*aarch64_ldff1_gather<mode>_uxtw"
+  [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w")
+	(unspec:SVE_FULL_D
+	  [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
+	   (match_operand:DI 1 "register_operand" "rk, rk")
+	   (and:VNx2DI
+	     (match_operand:VNx2DI 2 "register_operand" "w, w")
+	     (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
+	   (match_operand:DI 3 "const_int_operand")
+	   (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i")
+	   (mem:BLK (scratch))
+	   (reg:VNx16BI FFRT_REGNUM)]
+	  UNSPEC_LDFF1_GATHER))]
   "TARGET_SVE"
   "@
-   mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
-   mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
+   ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw]
+   ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- First-faulting extending gather loads
+;; -------------------------------------------------------------------------
+;; Includes gather forms of:
+;; - LDFF1B
+;; - LDFF1H
+;; - LDFF1SB
+;; - LDFF1SH
+;; - LDFF1SW
+;; - LDFF1W
+;; -------------------------------------------------------------------------
+
+;; Predicated extending first-faulting gather loads for 32-bit elements.
+;; Operand 3 is true for unsigned extension and false for signed extension.
+(define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx4_WIDE:mode><VNx4_NARROW:mode>"
+  [(set (match_operand:VNx4_WIDE 0 "register_operand" "=w, w, w, w, w, w")
+	(unspec:VNx4_WIDE
+	  [(match_operand:VNx4BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm, UplDnm, UplDnm")
+	   (ANY_EXTEND:VNx4_WIDE
+	     (unspec:VNx4_NARROW
+	       [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
+		(match_operand:DI 1 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>" "Z, vg<VNx4_NARROW:Vesize>, rk, rk, rk, rk")
+		(match_operand:VNx4_WIDE 2 "register_operand" "w, w, w, w, w, w")
+		(match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
+		(match_operand:DI 4 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
+		(mem:BLK (scratch))
+		(reg:VNx16BI FFRT_REGNUM)]
+	       UNSPEC_LDFF1_GATHER))]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE"
+  "@
+   ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s]
+   ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s, #%1]
+   ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
+   ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
+   ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
+   ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
+  "&& !CONSTANT_P (operands[6])"
+  {
+    operands[6] = CONSTM1_RTX (VNx4BImode);
+  }
 )
 
-(define_insn "*msub<mode>3"
-  [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
-	(minus:SVE_I
-	  (match_operand:SVE_I 4 "register_operand" "w, 0")
-	  (unspec:SVE_I
-	    [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-	     (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w")
-			 (match_operand:SVE_I 3 "register_operand" "w, w"))]
-	    UNSPEC_MERGE_PTRUE)))]
+;; Predicated extending first-faulting gather loads for 64-bit elements.
+;; The value of operand 3 doesn't matter in this case.
+(define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>"
+  [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w, w, w")
+	(unspec:VNx2_WIDE
+	  [(match_operand:VNx2BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm")
+	   (ANY_EXTEND:VNx2_WIDE
+	     (unspec:VNx2_NARROW
+	       [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
+		(match_operand:DI 1 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>" "Z, vg<VNx2_NARROW:Vesize>, rk, rk")
+		(match_operand:VNx2_WIDE 2 "register_operand" "w, w, w, w")
+		(match_operand:DI 3 "const_int_operand")
+		(match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, Ui1, Ui1, i")
+		(mem:BLK (scratch))
+		(reg:VNx16BI FFRT_REGNUM)]
+	       UNSPEC_LDFF1_GATHER))]
+	  UNSPEC_PRED_X))]
   "TARGET_SVE"
   "@
-   msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
-   mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
+   ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d]
+   ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d, #%1]
+   ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d]
+   ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
+  "&& !CONSTANT_P (operands[6])"
+  {
+    operands[6] = CONSTM1_RTX (VNx2BImode);
+  }
 )
 
-;; Unpredicated highpart multiplication.
-(define_expand "<su>mul<mode>3_highpart"
-  [(set (match_operand:SVE_I 0 "register_operand")
-	(unspec:SVE_I
-	  [(match_dup 3)
-	   (unspec:SVE_I [(match_operand:SVE_I 1 "register_operand")
-			  (match_operand:SVE_I 2 "register_operand")]
-			 MUL_HIGHPART)]
-	  UNSPEC_MERGE_PTRUE))]
+;; Likewise, but with the offset being sign-extended from 32 bits.
+(define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_sxtw"
+  [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w")
+	(unspec:VNx2_WIDE
+	  [(match_operand 6)
+	   (ANY_EXTEND:VNx2_WIDE
+	     (unspec:VNx2_NARROW
+	       [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
+		(match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
+		(unspec:VNx2DI
+		  [(match_operand 7)
+		   (sign_extend:VNx2DI
+		     (truncate:VNx2SI
+		       (match_operand:VNx2DI 2 "register_operand" "w, w")))]
+		  UNSPEC_PRED_X)
+		(match_operand:DI 3 "const_int_operand")
+		(match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
+		(mem:BLK (scratch))
+		(reg:VNx16BI FFRT_REGNUM)]
+	       UNSPEC_LDFF1_GATHER))]
+	  UNSPEC_PRED_X))]
   "TARGET_SVE"
+  "@
+   ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
+   ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
+  "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
   {
-    operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+    operands[6] = CONSTM1_RTX (VNx2BImode);
+    operands[7] = CONSTM1_RTX (VNx2BImode);
   }
 )
 
-;; Predicated highpart multiplication.
-(define_insn "*<su>mul<mode>3_highpart"
-  [(set (match_operand:SVE_I 0 "register_operand" "=w")
-	(unspec:SVE_I
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
-	   (unspec:SVE_I [(match_operand:SVE_I 2 "register_operand" "%0")
-			  (match_operand:SVE_I 3 "register_operand" "w")]
-			 MUL_HIGHPART)]
-	  UNSPEC_MERGE_PTRUE))]
+;; Likewise, but with the offset being zero-extended from 32 bits.
+(define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_uxtw"
+  [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w")
+	(unspec:VNx2_WIDE
+	  [(match_operand 7)
+	   (ANY_EXTEND:VNx2_WIDE
+	     (unspec:VNx2_NARROW
+	       [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
+		(match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
+		(and:VNx2DI
+		  (match_operand:VNx2DI 2 "register_operand" "w, w")
+		  (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
+		(match_operand:DI 3 "const_int_operand")
+		(match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
+		(mem:BLK (scratch))
+		(reg:VNx16BI FFRT_REGNUM)]
+	       UNSPEC_LDFF1_GATHER))]
+	  UNSPEC_PRED_X))]
   "TARGET_SVE"
-  "<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  "@
+   ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
+   ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
+  "&& !CONSTANT_P (operands[7])"
+  {
+    operands[7] = CONSTM1_RTX (VNx2BImode);
+  }
 )
 
-;; Unpredicated NEG, NOT and POPCOUNT.
-(define_expand "<optab><mode>2"
-  [(set (match_operand:SVE_I 0 "register_operand")
-	(unspec:SVE_I
-	  [(match_dup 2)
-	   (SVE_INT_UNARY:SVE_I (match_operand:SVE_I 1 "register_operand"))]
-	  UNSPEC_MERGE_PTRUE))]
+;; =========================================================================
+;; == Prefetches
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- Contiguous prefetches
+;; -------------------------------------------------------------------------
+;; Includes contiguous forms of:
+;; - PRFB
+;; - PRFD
+;; - PRFH
+;; - PRFW
+;; -------------------------------------------------------------------------
+
+;; Contiguous predicated prefetches.  Operand 2 gives the real prefetch
+;; operation (as an svprfop), with operands 3 and 4 providing distilled
+;; information.
+(define_insn "@aarch64_sve_prefetch<mode>"
+  [(prefetch (unspec:DI
+	       [(match_operand:<VPRED> 0 "register_operand" "Upl")
+		(match_operand:SVE_FULL_I 1 "aarch64_sve_prefetch_operand" "UP<Vesize>")
+		(match_operand:DI 2 "const_int_operand")]
+	       UNSPEC_SVE_PREFETCH)
+	     (match_operand:DI 3 "const_int_operand")
+	     (match_operand:DI 4 "const_int_operand"))]
   "TARGET_SVE"
   {
-    operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+    operands[1] = gen_rtx_MEM (<MODE>mode, operands[1]);
+    return aarch64_output_sve_prefetch ("prf<Vesize>", operands[2], "%0, %1");
   }
 )
 
-;; NEG, NOT and POPCOUNT predicated with a PTRUE.
-(define_insn "*<optab><mode>2"
-  [(set (match_operand:SVE_I 0 "register_operand" "=w")
-	(unspec:SVE_I
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
-	   (SVE_INT_UNARY:SVE_I
-	     (match_operand:SVE_I 2 "register_operand" "w"))]
-	  UNSPEC_MERGE_PTRUE))]
+;; -------------------------------------------------------------------------
+;; ---- Gather prefetches
+;; -------------------------------------------------------------------------
+;; Includes gather forms of:
+;; - PRFB
+;; - PRFD
+;; - PRFH
+;; - PRFW
+;; -------------------------------------------------------------------------
+
+;; Predicated gather prefetches for 32-bit bases and offsets.  The operands
+;; are:
+;; 0: the governing predicate
+;; 1: the scalar component of the address
+;; 2: the vector component of the address
+;; 3: 1 for zero extension, 0 for sign extension
+;; 4: the scale multiplier
+;; 5: a vector zero that identifies the mode of data being accessed
+;; 6: the prefetch operator (an svprfop)
+;; 7: the normal RTL prefetch rw flag
+;; 8: the normal RTL prefetch locality value
+(define_insn "@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx4SI_ONLY:mode>"
+  [(prefetch (unspec:DI
+	       [(match_operand:VNx4BI 0 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
+		(match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_FULL_I:Vesize>" "Z, vg<SVE_FULL_I:Vesize>, rk, rk, rk, rk")
+		(match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w, w, w, w")
+		(match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
+		(match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
+		(match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
+		(match_operand:DI 6 "const_int_operand")]
+	       UNSPEC_SVE_PREFETCH_GATHER)
+	     (match_operand:DI 7 "const_int_operand")
+	     (match_operand:DI 8 "const_int_operand"))]
   "TARGET_SVE"
-  "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  {
+    static const char *const insns[][2] = {
+      "prf<SVE_FULL_I:Vesize>", "%0, [%2.s]",
+      "prf<SVE_FULL_I:Vesize>", "%0, [%2.s, #%1]",
+      "prfb", "%0, [%1, %2.s, sxtw]",
+      "prfb", "%0, [%1, %2.s, uxtw]",
+      "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.s, sxtw %p4]",
+      "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.s, uxtw %p4]"
+    };
+    const char *const *parts = insns[which_alternative];
+    return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
+  }
 )
 
-;; Vector AND, ORR and XOR.
-(define_insn "<optab><mode>3"
-  [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
-	(LOGICAL:SVE_I
-	  (match_operand:SVE_I 1 "register_operand" "%0, w")
-	  (match_operand:SVE_I 2 "aarch64_sve_logical_operand" "vsl, w")))]
+;; Predicated gather prefetches for 64-bit elements.  The value of operand 3
+;; doesn't matter in this case.
+(define_insn "@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>"
+  [(prefetch (unspec:DI
+	       [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl, Upl, Upl")
+		(match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_FULL_I:Vesize>" "Z, vg<SVE_FULL_I:Vesize>, rk, rk")
+		(match_operand:VNx2DI_ONLY 2 "register_operand" "w, w, w, w")
+		(match_operand:DI 3 "const_int_operand")
+		(match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, Ui1, Ui1, i")
+		(match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
+		(match_operand:DI 6 "const_int_operand")]
+	       UNSPEC_SVE_PREFETCH_GATHER)
+	     (match_operand:DI 7 "const_int_operand")
+	     (match_operand:DI 8 "const_int_operand"))]
   "TARGET_SVE"
-  "@
-   <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
-   <logical>\t%0.d, %1.d, %2.d"
+  {
+    static const char *const insns[][2] = {
+      "prf<SVE_FULL_I:Vesize>", "%0, [%2.d]",
+      "prf<SVE_FULL_I:Vesize>", "%0, [%2.d, #%1]",
+      "prfb", "%0, [%1, %2.d]",
+      "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, lsl %p4]"
+    };
+    const char *const *parts = insns[which_alternative];
+    return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
+  }
 )
 
-;; Vector AND, ORR and XOR on floating-point modes.  We avoid subregs
-;; by providing this, but we need to use UNSPECs since rtx logical ops
-;; aren't defined for floating-point modes.
-(define_insn "*<optab><mode>3"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w")
-	(unspec:SVE_F [(match_operand:SVE_F 1 "register_operand" "w")
-		       (match_operand:SVE_F 2 "register_operand" "w")]
-		      LOGICALF))]
+;; Likewise, but with the offset being sign-extended from 32 bits.
+(define_insn_and_rewrite "*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_sxtw"
+  [(prefetch (unspec:DI
+	       [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl")
+		(match_operand:DI 1 "register_operand" "rk, rk")
+		(unspec:VNx2DI_ONLY
+		  [(match_operand 9)
+		   (sign_extend:VNx2DI
+		     (truncate:VNx2SI
+		       (match_operand:VNx2DI 2 "register_operand" "w, w")))]
+		  UNSPEC_PRED_X)
+		(match_operand:DI 3 "const_int_operand")
+		(match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, i")
+		(match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
+		(match_operand:DI 6 "const_int_operand")]
+	       UNSPEC_SVE_PREFETCH_GATHER)
+	     (match_operand:DI 7 "const_int_operand")
+	     (match_operand:DI 8 "const_int_operand"))]
   "TARGET_SVE"
-  "<logicalf_op>\t%0.d, %1.d, %2.d"
+  {
+    static const char *const insns[][2] = {
+      "prfb", "%0, [%1, %2.d, sxtw]",
+      "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, sxtw %p4]"
+    };
+    const char *const *parts = insns[which_alternative];
+    return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
+  }
+  "&& !rtx_equal_p (operands[0], operands[9])"
+  {
+    operands[9] = copy_rtx (operands[0]);
+  }
 )
 
-;; REG_EQUAL notes on "not<mode>3" should ensure that we can generate
-;; this pattern even though the NOT instruction itself is predicated.
-(define_insn "bic<mode>3"
-  [(set (match_operand:SVE_I 0 "register_operand" "=w")
-	(and:SVE_I
-	  (not:SVE_I (match_operand:SVE_I 1 "register_operand" "w"))
-	  (match_operand:SVE_I 2 "register_operand" "w")))]
+;; Likewise, but with the offset being zero-extended from 32 bits.
+(define_insn "*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_uxtw"
+  [(prefetch (unspec:DI
+	       [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl")
+		(match_operand:DI 1 "register_operand" "rk, rk")
+		(and:VNx2DI_ONLY
+		  (match_operand:VNx2DI 2 "register_operand" "w, w")
+		  (match_operand:VNx2DI 9 "aarch64_sve_uxtw_immediate"))
+		(match_operand:DI 3 "const_int_operand")
+		(match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, i")
+		(match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
+		(match_operand:DI 6 "const_int_operand")]
+	       UNSPEC_SVE_PREFETCH_GATHER)
+	     (match_operand:DI 7 "const_int_operand")
+	     (match_operand:DI 8 "const_int_operand"))]
   "TARGET_SVE"
-  "bic\t%0.d, %2.d, %1.d"
+  {
+    static const char *const insns[][2] = {
+      "prfb", "%0, [%1, %2.d, uxtw]",
+      "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, uxtw %p4]"
+    };
+    const char *const *parts = insns[which_alternative];
+    return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
+  }
 )
 
-;; Predicate AND.  We can reuse one of the inputs as the GP.
-(define_insn "and<mode>3"
-  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
-	(and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa")
-		      (match_operand:PRED_ALL 2 "register_operand" "Upa")))]
+;; =========================================================================
+;; == Stores
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- Normal contiguous stores
+;; -------------------------------------------------------------------------
+;; Includes contiguous forms of:
+;; - ST1B
+;; - ST1D
+;; - ST1H
+;; - ST1W
+;; - ST2B
+;; - ST2D
+;; - ST2H
+;; - ST2W
+;; - ST3B
+;; - ST3D
+;; - ST3H
+;; - ST3W
+;; - ST4B
+;; - ST4D
+;; - ST4H
+;; - ST4W
+;; -------------------------------------------------------------------------
+
+;; Predicated ST1.
+(define_insn "maskstore<mode><vpred>"
+  [(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
+	(unspec:SVE_ALL
+	  [(match_operand:<VPRED> 2 "register_operand" "Upl")
+	   (match_operand:SVE_ALL 1 "register_operand" "w")
+	   (match_dup 0)]
+	  UNSPEC_ST1_SVE))]
   "TARGET_SVE"
-  "and\t%0.b, %1/z, %1.b, %2.b"
+  "st1<Vesize>\t%1.<Vctype>, %2, %0"
 )
 
-;; Unpredicated predicate ORR and XOR.
-(define_expand "<optab><mode>3"
-  [(set (match_operand:PRED_ALL 0 "register_operand")
-	(and:PRED_ALL
-	  (LOGICAL_OR:PRED_ALL
-	    (match_operand:PRED_ALL 1 "register_operand")
-	    (match_operand:PRED_ALL 2 "register_operand"))
-	  (match_dup 3)))]
+;; Unpredicated ST[234].  This is always a full update, so the dependence
+;; on the old value of the memory location (via (match_dup 0)) is redundant.
+;; There doesn't seem to be any obvious benefit to treating the all-true
+;; case differently though.  In particular, it's very unlikely that we'll
+;; only find out during RTL that a store_lanes is dead.
+(define_expand "vec_store_lanes<mode><vsingle>"
+  [(set (match_operand:SVE_STRUCT 0 "memory_operand")
+	(unspec:SVE_STRUCT
+	  [(match_dup 2)
+	   (match_operand:SVE_STRUCT 1 "register_operand")
+	   (match_dup 0)]
+	  UNSPEC_STN))]
   "TARGET_SVE"
   {
-    operands[3] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));
+    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
   }
 )
 
-;; Predicated predicate ORR and XOR.
-(define_insn "pred_<optab><mode>3"
-  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
-	(and:PRED_ALL
-	  (LOGICAL:PRED_ALL
-	    (match_operand:PRED_ALL 2 "register_operand" "Upa")
-	    (match_operand:PRED_ALL 3 "register_operand" "Upa"))
-	  (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
+;; Predicated ST[234].
+(define_insn "vec_mask_store_lanes<mode><vsingle>"
+  [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m")
+	(unspec:SVE_STRUCT
+	  [(match_operand:<VPRED> 2 "register_operand" "Upl")
+	   (match_operand:SVE_STRUCT 1 "register_operand" "w")
+	   (match_dup 0)]
+	  UNSPEC_STN))]
   "TARGET_SVE"
-  "<logical>\t%0.b, %1/z, %2.b, %3.b"
+  "st<vector_count><Vesize>\t%1, %2, %0"
 )
 
-;; Perform a logical operation on operands 2 and 3, using operand 1 as
-;; the GP (which is known to be a PTRUE).  Store the result in operand 0
-;; and set the flags in the same way as for PTEST.  The (and ...) in the
-;; UNSPEC_PTEST_PTRUE is logically redundant, but means that the tested
-;; value is structurally equivalent to rhs of the second set.
-(define_insn "*<optab><mode>3_cc"
-  [(set (reg:CC CC_REGNUM)
-	(compare:CC
-	  (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upa")
-		      (and:PRED_ALL
-			(LOGICAL:PRED_ALL
-			  (match_operand:PRED_ALL 2 "register_operand" "Upa")
-			  (match_operand:PRED_ALL 3 "register_operand" "Upa"))
-			(match_dup 1))]
-		     UNSPEC_PTEST_PTRUE)
-	  (const_int 0)))
-   (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
-	(and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3))
-		      (match_dup 1)))]
+;; -------------------------------------------------------------------------
+;; ---- Truncating contiguous stores
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ST1B
+;; - ST1H
+;; - ST1W
+;; -------------------------------------------------------------------------
+
+;; Predicated truncate and store, with 8 elements per 128-bit block.
+(define_insn "@aarch64_store_trunc<VNx8_NARROW:mode><VNx8_WIDE:mode>"
+  [(set (match_operand:VNx8_NARROW 0 "memory_operand" "+m")
+	(unspec:VNx8_NARROW
+	  [(match_operand:VNx8BI 2 "register_operand" "Upl")
+	   (truncate:VNx8_NARROW
+	     (match_operand:VNx8_WIDE 1 "register_operand" "w"))
+	   (match_dup 0)]
+	  UNSPEC_ST1_SVE))]
   "TARGET_SVE"
-  "<logical>s\t%0.b, %1/z, %2.b, %3.b"
+  "st1<VNx8_NARROW:Vesize>\t%1.<VNx8_WIDE:Vetype>, %2, %0"
 )
 
-;; Unpredicated predicate inverse.
-(define_expand "one_cmpl<mode>2"
-  [(set (match_operand:PRED_ALL 0 "register_operand")
-	(and:PRED_ALL
-	  (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand"))
-	  (match_dup 2)))]
+;; Predicated truncate and store, with 4 elements per 128-bit block.
+(define_insn "@aarch64_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>"
+  [(set (match_operand:VNx4_NARROW 0 "memory_operand" "+m")
+	(unspec:VNx4_NARROW
+	  [(match_operand:VNx4BI 2 "register_operand" "Upl")
+	   (truncate:VNx4_NARROW
+	     (match_operand:VNx4_WIDE 1 "register_operand" "w"))
+	   (match_dup 0)]
+	  UNSPEC_ST1_SVE))]
   "TARGET_SVE"
-  {
-    operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));
-  }
+  "st1<VNx4_NARROW:Vesize>\t%1.<VNx4_WIDE:Vetype>, %2, %0"
 )
 
-;; Predicated predicate inverse.
-(define_insn "*one_cmpl<mode>3"
-  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
-	(and:PRED_ALL
-	  (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
-	  (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
+;; Predicated truncate and store, with 2 elements per 128-bit block.
+(define_insn "@aarch64_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>"
+  [(set (match_operand:VNx2_NARROW 0 "memory_operand" "+m")
+	(unspec:VNx2_NARROW
+	  [(match_operand:VNx2BI 2 "register_operand" "Upl")
+	   (truncate:VNx2_NARROW
+	     (match_operand:VNx2_WIDE 1 "register_operand" "w"))
+	   (match_dup 0)]
+	  UNSPEC_ST1_SVE))]
   "TARGET_SVE"
-  "not\t%0.b, %1/z, %2.b"
+  "st1<VNx2_NARROW:Vesize>\t%1.<VNx2_WIDE:Vetype>, %2, %0"
 )
 
-;; Predicated predicate BIC and ORN.
-(define_insn "*<nlogical><mode>3"
-  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
-	(and:PRED_ALL
-	  (NLOGICAL:PRED_ALL
-	    (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
-	    (match_operand:PRED_ALL 3 "register_operand" "Upa"))
-	  (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
-  "TARGET_SVE"
-  "<nlogical>\t%0.b, %1/z, %3.b, %2.b"
-)
+;; -------------------------------------------------------------------------
+;; ---- Non-temporal contiguous stores
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - STNT1B
+;; - STNT1D
+;; - STNT1H
+;; - STNT1W
+;; -------------------------------------------------------------------------
 
-;; Predicated predicate NAND and NOR.
-(define_insn "*<logical_nn><mode>3"
-  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
-	(and:PRED_ALL
-	  (NLOGICAL:PRED_ALL
-	    (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
-	    (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa")))
-	  (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
+(define_insn "@aarch64_stnt1<mode>"
+  [(set (match_operand:SVE_FULL 0 "memory_operand" "+m")
+	(unspec:SVE_FULL
+	  [(match_operand:<VPRED> 2 "register_operand" "Upl")
+	   (match_operand:SVE_FULL 1 "register_operand" "w")
+	   (match_dup 0)]
+	  UNSPEC_STNT1_SVE))]
   "TARGET_SVE"
-  "<logical_nn>\t%0.b, %1/z, %2.b, %3.b"
+  "stnt1<Vesize>\t%1.<Vetype>, %2, %0"
 )
 
-;; Unpredicated LSL, LSR and ASR by a vector.
-(define_expand "v<optab><mode>3"
-  [(set (match_operand:SVE_I 0 "register_operand")
-	(unspec:SVE_I
-	  [(match_dup 3)
-	   (ASHIFT:SVE_I
-	     (match_operand:SVE_I 1 "register_operand")
-	     (match_operand:SVE_I 2 "aarch64_sve_<lr>shift_operand"))]
-	  UNSPEC_MERGE_PTRUE))]
+;; -------------------------------------------------------------------------
+;; ---- Normal scatter stores
+;; -------------------------------------------------------------------------
+;; Includes scatter forms of:
+;; - ST1D
+;; - ST1W
+;; -------------------------------------------------------------------------
+
+;; Unpredicated scatter stores.
+(define_expand "scatter_store<mode><v_int_container>"
+  [(set (mem:BLK (scratch))
+	(unspec:BLK
+	  [(match_dup 5)
+	   (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>")
+	   (match_operand:<V_INT_CONTAINER> 1 "register_operand")
+	   (match_operand:DI 2 "const_int_operand")
+	   (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
+	   (match_operand:SVE_24 4 "register_operand")]
+	  UNSPEC_ST1_SCATTER))]
   "TARGET_SVE"
   {
-    operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+    operands[5] = aarch64_ptrue_reg (<VPRED>mode);
   }
 )
 
-;; LSL, LSR and ASR by a vector, predicated with a PTRUE.  We don't
-;; actually need the predicate for the first alternative, but using Upa
-;; or X isn't likely to gain much and would make the instruction seem
-;; less uniform to the register allocator.
-(define_insn "*v<optab><mode>3"
-  [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
-	(unspec:SVE_I
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-	   (ASHIFT:SVE_I
-	     (match_operand:SVE_I 2 "register_operand" "w, 0")
-	     (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w"))]
-	  UNSPEC_MERGE_PTRUE))]
+;; Predicated scatter stores for 32-bit elements.  Operand 2 is true for
+;; unsigned extension and false for signed extension.
+(define_insn "mask_scatter_store<mode><v_int_container>"
+  [(set (mem:BLK (scratch))
+	(unspec:BLK
+	  [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
+	   (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgw, rk, rk, rk, rk")
+	   (match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w")
+	   (match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
+	   (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
+	   (match_operand:SVE_4 4 "register_operand" "w, w, w, w, w, w")]
+	  UNSPEC_ST1_SCATTER))]
   "TARGET_SVE"
   "@
-   <shift>\t%0.<Vetype>, %2.<Vetype>, #%3
-   <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+   st1<Vesize>\t%4.s, %5, [%1.s]
+   st1<Vesize>\t%4.s, %5, [%1.s, #%0]
+   st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
+   st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
+   st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
+   st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]"
 )
 
-;; LSL, LSR and ASR by a scalar, which expands into one of the vector
-;; shifts above.
-(define_expand "<ASHIFT:optab><mode>3"
-  [(set (match_operand:SVE_I 0 "register_operand")
-	(ASHIFT:SVE_I (match_operand:SVE_I 1 "register_operand")
-		      (match_operand:<VEL> 2 "general_operand")))]
+;; Predicated scatter stores for 64-bit elements.  The value of operand 2
+;; doesn't matter in this case.
+(define_insn "mask_scatter_store<mode><v_int_container>"
+  [(set (mem:BLK (scratch))
+	(unspec:BLK
+	  [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
+	   (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgd, rk, rk")
+	   (match_operand:VNx2DI 1 "register_operand" "w, w, w, w")
+	   (match_operand:DI 2 "const_int_operand")
+	   (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, i")
+	   (match_operand:SVE_2 4 "register_operand" "w, w, w, w")]
+	  UNSPEC_ST1_SCATTER))]
+  "TARGET_SVE"
+  "@
+   st1<Vesize>\t%4.d, %5, [%1.d]
+   st1<Vesize>\t%4.d, %5, [%1.d, #%0]
+   st1<Vesize>\t%4.d, %5, [%0, %1.d]
+   st1<Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]"
+)
+
+;; Likewise, but with the offset being extended from 32 bits.
+(define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_<su>xtw_unpacked"
+  [(set (mem:BLK (scratch))
+	(unspec:BLK
+	  [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
+	   (match_operand:DI 0 "register_operand" "rk, rk")
+	   (unspec:VNx2DI
+	     [(match_operand 6)
+	      (ANY_EXTEND:VNx2DI
+		(match_operand:VNx2SI 1 "register_operand" "w, w"))]
+	     UNSPEC_PRED_X)
+	   (match_operand:DI 2 "const_int_operand")
+	   (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
+	   (match_operand:SVE_2 4 "register_operand" "w, w")]
+	  UNSPEC_ST1_SCATTER))]
+  "TARGET_SVE"
+  "@
+   st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw]
+   st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw %p3]"
+  "&& !CONSTANT_P (operands[6])"
+  {
+    operands[6] = CONSTM1_RTX (<VPRED>mode);
+  }
+)
+
+;; Likewise, but with the offset being truncated to 32 bits and then
+;; sign-extended.
+(define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_sxtw"
+  [(set (mem:BLK (scratch))
+	(unspec:BLK
+	  [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
+	   (match_operand:DI 0 "register_operand" "rk, rk")
+	   (unspec:VNx2DI
+	     [(match_operand 6)
+	      (sign_extend:VNx2DI
+		(truncate:VNx2SI
+		  (match_operand:VNx2DI 1 "register_operand" "w, w")))]
+	     UNSPEC_PRED_X)
+	   (match_operand:DI 2 "const_int_operand")
+	   (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
+	   (match_operand:SVE_2 4 "register_operand" "w, w")]
+	  UNSPEC_ST1_SCATTER))]
+  "TARGET_SVE"
+  "@
+   st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw]
+   st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]"
+  "&& !CONSTANT_P (operands[6])"
+  {
+    operands[6] = CONSTM1_RTX (<VPRED>mode);
+  }
+)
+
+;; Likewise, but with the offset being truncated to 32 bits and then
+;; zero-extended.
+(define_insn "*mask_scatter_store<mode><v_int_container>_uxtw"
+  [(set (mem:BLK (scratch))
+	(unspec:BLK
+	  [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
+	   (match_operand:DI 0 "aarch64_reg_or_zero" "rk, rk")
+	   (and:VNx2DI
+	     (match_operand:VNx2DI 1 "register_operand" "w, w")
+	     (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
+	   (match_operand:DI 2 "const_int_operand")
+	   (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
+	   (match_operand:SVE_2 4 "register_operand" "w, w")]
+	  UNSPEC_ST1_SCATTER))]
+  "TARGET_SVE"
+  "@
+   st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw]
+   st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- Truncating scatter stores
+;; -------------------------------------------------------------------------
+;; Includes scatter forms of:
+;; - ST1B
+;; - ST1H
+;; - ST1W
+;; -------------------------------------------------------------------------
+
+;; Predicated truncating scatter stores for 32-bit elements.  Operand 2 is
+;; true for unsigned extension and false for signed extension.
+(define_insn "@aarch64_scatter_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>"
+  [(set (mem:BLK (scratch))
+	(unspec:BLK
+	  [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
+	   (match_operand:DI 0 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>" "Z, vg<VNx4_NARROW:Vesize>, rk, rk, rk, rk")
+	   (match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w")
+	   (match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
+	   (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
+	   (truncate:VNx4_NARROW
+	     (match_operand:VNx4_WIDE 4 "register_operand" "w, w, w, w, w, w"))]
+	  UNSPEC_ST1_SCATTER))]
+  "TARGET_SVE"
+  "@
+   st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%1.s]
+   st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%1.s, #%0]
+   st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
+   st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
+   st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
+   st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]"
+)
+
+;; Predicated truncating scatter stores for 64-bit elements.  The value of
+;; operand 2 doesn't matter in this case.
+(define_insn "@aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>"
+  [(set (mem:BLK (scratch))
+	(unspec:BLK
+	  [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
+	   (match_operand:DI 0 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>" "Z, vg<VNx2_NARROW:Vesize>, rk, rk")
+	   (match_operand:VNx2DI 1 "register_operand" "w, w, w, w")
+	   (match_operand:DI 2 "const_int_operand")
+	   (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, Ui1, Ui1, i")
+	   (truncate:VNx2_NARROW
+	     (match_operand:VNx2_WIDE 4 "register_operand" "w, w, w, w"))]
+	  UNSPEC_ST1_SCATTER))]
+  "TARGET_SVE"
+  "@
+   st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%1.d]
+   st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%1.d, #%0]
+   st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d]
+   st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]"
+)
+
+;; Likewise, but with the offset being sign-extended from 32 bits.
+(define_insn_and_rewrite "*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_sxtw"
+  [(set (mem:BLK (scratch))
+	(unspec:BLK
+	  [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
+	   (match_operand:DI 0 "register_operand" "rk, rk")
+	   (unspec:VNx2DI
+	     [(match_operand 6)
+	      (sign_extend:VNx2DI
+		(truncate:VNx2SI
+		  (match_operand:VNx2DI 1 "register_operand" "w, w")))]
+	     UNSPEC_PRED_X)
+	   (match_operand:DI 2 "const_int_operand")
+	   (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
+	   (truncate:VNx2_NARROW
+	     (match_operand:VNx2_WIDE 4 "register_operand" "w, w"))]
+	  UNSPEC_ST1_SCATTER))]
+  "TARGET_SVE"
+  "@
+   st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, sxtw]
+   st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]"
+  "&& !rtx_equal_p (operands[5], operands[6])"
+  {
+    operands[6] = copy_rtx (operands[5]);
+  }
+)
+
+;; Likewise, but with the offset being zero-extended from 32 bits.
+(define_insn "*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_uxtw"
+  [(set (mem:BLK (scratch))
+	(unspec:BLK
+	  [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
+	   (match_operand:DI 0 "aarch64_reg_or_zero" "rk, rk")
+	   (and:VNx2DI
+	     (match_operand:VNx2DI 1 "register_operand" "w, w")
+	     (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
+	   (match_operand:DI 2 "const_int_operand")
+	   (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
+	   (truncate:VNx2_NARROW
+	     (match_operand:VNx2_WIDE 4 "register_operand" "w, w"))]
+	  UNSPEC_ST1_SCATTER))]
+  "TARGET_SVE"
+  "@
+   st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, uxtw]
+   st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]"
+)
+
+;; =========================================================================
+;; == Vector creation
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Duplicate element
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - DUP
+;; - MOV
+;; - LD1RB
+;; - LD1RD
+;; - LD1RH
+;; - LD1RW
+;; - LD1ROB (F64MM)
+;; - LD1ROD (F64MM)
+;; - LD1ROH (F64MM)
+;; - LD1ROW (F64MM)
+;; - LD1RQB
+;; - LD1RQD
+;; - LD1RQH
+;; - LD1RQW
+;; -------------------------------------------------------------------------
+
+(define_expand "vec_duplicate<mode>"
+  [(parallel
+    [(set (match_operand:SVE_ALL 0 "register_operand")
+	  (vec_duplicate:SVE_ALL
+	    (match_operand:<VEL> 1 "aarch64_sve_dup_operand")))
+     (clobber (scratch:VNx16BI))])]
+  "TARGET_SVE"
+  {
+    if (MEM_P (operands[1]))
+      {
+	rtx ptrue = aarch64_ptrue_reg (<VPRED>mode);
+	emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1],
+				       CONST0_RTX (<MODE>mode)));
+	DONE;
+      }
+  }
+)
+
+;; Accept memory operands for the benefit of combine, and also in case
+;; the scalar input gets spilled to memory during RA.  We want to split
+;; the load at the first opportunity in order to allow the PTRUE to be
+;; optimized with surrounding code.
+(define_insn_and_split "*vec_duplicate<mode>_reg"
+  [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w")
+	(vec_duplicate:SVE_ALL
+	  (match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty")))
+   (clobber (match_scratch:VNx16BI 2 "=X, X, Upl"))]
+  "TARGET_SVE"
+  "@
+   mov\t%0.<Vetype>, %<vwcore>1
+   mov\t%0.<Vetype>, %<Vetype>1
+   #"
+  "&& MEM_P (operands[1])"
+  [(const_int 0)]
+  {
+    if (GET_CODE (operands[2]) == SCRATCH)
+      operands[2] = gen_reg_rtx (VNx16BImode);
+    emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
+    rtx gp = gen_lowpart (<VPRED>mode, operands[2]);
+    emit_insn (gen_sve_ld1r<mode> (operands[0], gp, operands[1],
+				   CONST0_RTX (<MODE>mode)));
+    DONE;
+  }
+  [(set_attr "length" "4,4,8")]
+)
+
+;; Duplicate an Advanced SIMD vector to fill an SVE vector (LE version).
+(define_insn "@aarch64_vec_duplicate_vq<mode>_le"
+  [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
+	(vec_duplicate:SVE_FULL
+	  (match_operand:<V128> 1 "register_operand" "w")))]
+  "TARGET_SVE && !BYTES_BIG_ENDIAN"
+  {
+    operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
+    return "dup\t%0.q, %1.q[0]";
+  }
+)
+
+;; Duplicate an Advanced SIMD vector to fill an SVE vector (BE version).
+;; The SVE register layout puts memory lane N into (architectural)
+;; register lane N, whereas the Advanced SIMD layout puts the memory
+;; lsb into the register lsb.  We therefore have to describe this in rtl
+;; terms as a reverse of the V128 vector followed by a duplicate.
+(define_insn "@aarch64_vec_duplicate_vq<mode>_be"
+  [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
+	(vec_duplicate:SVE_FULL
+	  (vec_select:<V128>
+	    (match_operand:<V128> 1 "register_operand" "w")
+	    (match_operand 2 "descending_int_parallel"))))]
+  "TARGET_SVE
+   && BYTES_BIG_ENDIAN
+   && known_eq (INTVAL (XVECEXP (operands[2], 0, 0)),
+		GET_MODE_NUNITS (<V128>mode) - 1)"
+  {
+    operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
+    return "dup\t%0.q, %1.q[0]";
+  }
+)
+
+;; This is used for vec_duplicate<mode>s from memory, but can also
+;; be used by combine to optimize selects of a vec_duplicate<mode>
+;; with zero.
+(define_insn "sve_ld1r<mode>"
+  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+	(unspec:SVE_ALL
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	   (vec_duplicate:SVE_ALL
+	     (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty"))
+	   (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2"
+)
+
+;; Load 128 bits from memory under predicate control and duplicate to
+;; fill a vector.
+(define_insn "@aarch64_sve_ld1rq<mode>"
+  [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
+	(unspec:SVE_FULL
+	  [(match_operand:<VPRED> 2 "register_operand" "Upl")
+	   (match_operand:<V128> 1 "aarch64_sve_ld1rq_operand" "UtQ")]
+	  UNSPEC_LD1RQ))]
+  "TARGET_SVE"
+  {
+    operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0));
+    return "ld1rq<Vesize>\t%0.<Vetype>, %2/z, %1";
+  }
+)
+
+(define_insn "@aarch64_sve_ld1ro<mode>"
+  [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
+	(unspec:SVE_FULL
+	  [(match_operand:<VPRED> 2 "register_operand" "Upl")
+	   (match_operand:OI 1 "aarch64_sve_ld1ro_operand_<Vesize>"
+			       "UO<Vesize>")]
+	  UNSPEC_LD1RO))]
+  "TARGET_SVE_F64MM"
+  {
+    operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0));
+    return "ld1ro<Vesize>\t%0.<Vetype>, %2/z, %1";
+  }
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Initialize from individual elements
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - INSR
+;; -------------------------------------------------------------------------
+
+(define_expand "vec_init<mode><Vel>"
+  [(match_operand:SVE_FULL 0 "register_operand")
+    (match_operand 1 "")]
+  "TARGET_SVE"
+  {
+    aarch64_sve_expand_vector_init (operands[0], operands[1]);
+    DONE;
+  }
+)
+
+;; Shift an SVE vector left and insert a scalar into element 0.
+(define_insn "vec_shl_insert_<mode>"
+  [(set (match_operand:SVE_FULL 0 "register_operand" "=?w, w, ??&w, ?&w")
+	(unspec:SVE_FULL
+	  [(match_operand:SVE_FULL 1 "register_operand" "0, 0, w, w")
+	   (match_operand:<VEL> 2 "aarch64_reg_or_zero" "rZ, w, rZ, w")]
+	  UNSPEC_INSR))]
+  "TARGET_SVE"
+  "@
+   insr\t%0.<Vetype>, %<vwcore>2
+   insr\t%0.<Vetype>, %<Vetype>2
+   movprfx\t%0, %1\;insr\t%0.<Vetype>, %<vwcore>2
+   movprfx\t%0, %1\;insr\t%0.<Vetype>, %<Vetype>2"
+  [(set_attr "movprfx" "*,*,yes,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Linear series
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - INDEX
+;; -------------------------------------------------------------------------
+
+(define_insn "vec_series<mode>"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w")
+	(vec_series:SVE_I
+	  (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r")
+	  (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))]
+  "TARGET_SVE"
+  "@
+   index\t%0.<Vctype>, #%1, %<vccore>2
+   index\t%0.<Vctype>, %<vccore>1, #%2
+   index\t%0.<Vctype>, %<vccore>1, %<vccore>2"
+)
+
+;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range
+;; of an INDEX instruction.
+(define_insn "*vec_series<mode>_plus"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w")
+	(plus:SVE_I
+	  (vec_duplicate:SVE_I
+	    (match_operand:<VEL> 1 "register_operand" "r"))
+	  (match_operand:SVE_I 2 "immediate_operand")))]
+  "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])"
+  {
+    operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]);
+    return "index\t%0.<Vctype>, %<vccore>1, #%2";
+  }
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Duplicate element
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic.
+;; -------------------------------------------------------------------------
+
+;; Implement a predicate broadcast by shifting the low bit of the scalar
+;; input into the top bit and using a WHILELO.  An alternative would be to
+;; duplicate the input and do a compare with zero.
+(define_expand "vec_duplicate<mode>"
+  [(set (match_operand:PRED_ALL 0 "register_operand")
+	(vec_duplicate:PRED_ALL (match_operand:QI 1 "register_operand")))]
+  "TARGET_SVE"
+  {
+    rtx tmp = gen_reg_rtx (DImode);
+    rtx op1 = gen_lowpart (DImode, operands[1]);
+    emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode)));
+    emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp));
+    DONE;
+  }
+)
+
+;; =========================================================================
+;; == Vector decomposition
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Extract index
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - DUP    (Advanced SIMD)
+;; - DUP    (SVE)
+;; - EXT    (SVE)
+;; - ST1    (Advanced SIMD)
+;; - UMOV   (Advanced SIMD)
+;; -------------------------------------------------------------------------
+
+(define_expand "vec_extract<mode><Vel>"
+  [(set (match_operand:<VEL> 0 "register_operand")
+	(vec_select:<VEL>
+	  (match_operand:SVE_FULL 1 "register_operand")
+	  (parallel [(match_operand:SI 2 "nonmemory_operand")])))]
+  "TARGET_SVE"
+  {
+    poly_int64 val;
+    if (poly_int_rtx_p (operands[2], &val)
+	&& known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1))
+      {
+	/* The last element can be extracted with a LASTB and a false
+	   predicate.  */
+	rtx sel = aarch64_pfalse_reg (<VPRED>mode);
+	emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
+	DONE;
+      }
+    if (!CONST_INT_P (operands[2]))
+      {
+	/* Create an index with operand[2] as the base and -1 as the step.
+	   It will then be zero for the element we care about.  */
+	rtx index = gen_lowpart (<VEL_INT>mode, operands[2]);
+	index = force_reg (<VEL_INT>mode, index);
+	rtx series = gen_reg_rtx (<V_INT_EQUIV>mode);
+	emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx));
+
+	/* Get a predicate that is true for only that element.  */
+	rtx zero = CONST0_RTX (<V_INT_EQUIV>mode);
+	rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero);
+	rtx sel = gen_reg_rtx (<VPRED>mode);
+	emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero));
+
+	/* Select the element using LASTB.  */
+	emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
+	DONE;
+      }
+  }
+)
+
+;; Extract element zero.  This is a special case because we want to force
+;; the registers to be the same for the second alternative, and then
+;; split the instruction into nothing after RA.
+(define_insn_and_split "*vec_extract<mode><Vel>_0"
+  [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
+	(vec_select:<VEL>
+	  (match_operand:SVE_FULL 1 "register_operand" "w, 0, w")
+	  (parallel [(const_int 0)])))]
+  "TARGET_SVE"
+  {
+    operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
+    switch (which_alternative)
+      {
+	case 0:
+	  return "umov\\t%<vwcore>0, %1.<Vetype>[0]";
+	case 1:
+	  return "#";
+	case 2:
+	  return "st1\\t{%1.<Vetype>}[0], %0";
+	default:
+	  gcc_unreachable ();
+      }
+  }
+  "&& reload_completed
+   && REG_P (operands[0])
+   && REGNO (operands[0]) == REGNO (operands[1])"
+  [(const_int 0)]
+  {
+    emit_note (NOTE_INSN_DELETED);
+    DONE;
+  }
+  [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")]
+)
+
+;; Extract an element from the Advanced SIMD portion of the register.
+;; We don't just reuse the aarch64-simd.md pattern because we don't
+;; want any change in lane number on big-endian targets.
+(define_insn "*vec_extract<mode><Vel>_v128"
+  [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
+	(vec_select:<VEL>
+	  (match_operand:SVE_FULL 1 "register_operand" "w, w, w")
+	  (parallel [(match_operand:SI 2 "const_int_operand")])))]
+  "TARGET_SVE
+   && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)"
+  {
+    operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
+    switch (which_alternative)
+      {
+	case 0:
+	  return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
+	case 1:
+	  return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
+	case 2:
+	  return "st1\\t{%1.<Vetype>}[%2], %0";
+	default:
+	  gcc_unreachable ();
+      }
+  }
+  [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")]
+)
+
+;; Extract an element in the range of DUP.  This pattern allows the
+;; source and destination to be different.
+(define_insn "*vec_extract<mode><Vel>_dup"
+  [(set (match_operand:<VEL> 0 "register_operand" "=w")
+	(vec_select:<VEL>
+	  (match_operand:SVE_FULL 1 "register_operand" "w")
+	  (parallel [(match_operand:SI 2 "const_int_operand")])))]
+  "TARGET_SVE
+   && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)"
+  {
+    operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
+    return "dup\t%0.<Vetype>, %1.<Vetype>[%2]";
+  }
+)
+
+;; Extract an element outside the range of DUP.  This pattern requires the
+;; source and destination to be the same.
+(define_insn "*vec_extract<mode><Vel>_ext"
+  [(set (match_operand:<VEL> 0 "register_operand" "=w, ?&w")
+	(vec_select:<VEL>
+	  (match_operand:SVE_FULL 1 "register_operand" "0, w")
+	  (parallel [(match_operand:SI 2 "const_int_operand")])))]
+  "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64"
+  {
+    operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
+    operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode));
+    return (which_alternative == 0
+	    ? "ext\t%0.b, %0.b, %0.b, #%2"
+	    : "movprfx\t%0, %1\;ext\t%0.b, %0.b, %1.b, #%2");
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Extract active element
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - LASTA
+;; - LASTB
+;; -------------------------------------------------------------------------
+
+;; Extract the last active element of operand 1 into operand 0.
+;; If no elements are active, extract the last inactive element instead.
+(define_insn "@extract_<last_op>_<mode>"
+  [(set (match_operand:<VEL> 0 "register_operand" "=?r, w")
+	(unspec:<VEL>
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (match_operand:SVE_FULL 2 "register_operand" "w, w")]
+	  LAST))]
+  "TARGET_SVE"
+  "@
+   last<ab>\t%<vwcore>0, %1, %2.<Vetype>
+   last<ab>\t%<Vetype>0, %1, %2.<Vetype>"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Extract index
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic.
+;; -------------------------------------------------------------------------
+
+;; Handle extractions from a predicate by converting to an integer vector
+;; and extracting from there.
+(define_expand "vec_extract<vpred><Vel>"
+  [(match_operand:<VEL> 0 "register_operand")
+   (match_operand:<VPRED> 1 "register_operand")
+   (match_operand:SI 2 "nonmemory_operand")
+   ;; Dummy operand to which we can attach the iterator.
+   (reg:SVE_FULL_I V0_REGNUM)]
+  "TARGET_SVE"
+  {
+    rtx tmp = gen_reg_rtx (<MODE>mode);
+    emit_insn (gen_vcond_mask_<mode><vpred> (tmp, operands[1],
+					     CONST1_RTX (<MODE>mode),
+					     CONST0_RTX (<MODE>mode)));
+    emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
+    DONE;
+  }
+)
+
+;; =========================================================================
+;; == Unary arithmetic
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] General unary arithmetic corresponding to rtx codes
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ABS
+;; - CLS (= clrsb)
+;; - CLZ
+;; - CNT (= popcount)
+;; - NEG
+;; - NOT
+;; -------------------------------------------------------------------------
+
+;; Unpredicated integer unary arithmetic.
+(define_expand "<optab><mode>2"
+  [(set (match_operand:SVE_I 0 "register_operand")
+	(unspec:SVE_I
+	  [(match_dup 2)
+	   (SVE_INT_UNARY:SVE_I
+	     (match_operand:SVE_I 1 "register_operand"))]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE"
+  {
+    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+  }
+)
+
+;; Integer unary arithmetic predicated with a PTRUE.
+(define_insn "@aarch64_pred_<optab><mode>"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (SVE_INT_UNARY:SVE_I
+	     (match_operand:SVE_I 2 "register_operand" "0, w"))]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated integer unary arithmetic with merging.
+(define_expand "@cond_<optab><mode>"
+  [(set (match_operand:SVE_I 0 "register_operand")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand")
+	   (SVE_INT_UNARY:SVE_I
+	     (match_operand:SVE_I 2 "register_operand"))
+	   (match_operand:SVE_I 3 "aarch64_simd_reg_or_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+)
+
+;; Predicated integer unary arithmetic, merging with the first input.
+(define_insn "*cond_<optab><mode>_2"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (SVE_INT_UNARY:SVE_I
+	     (match_operand:SVE_I 2 "register_operand" "0, w"))
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
+   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated integer unary arithmetic, merging with an independent value.
+;;
+;; The earlyclobber isn't needed for the first alternative, but omitting
+;; it would only help the case in which operands 2 and 3 are the same,
+;; which is handled above rather than here.  Marking all the alternatives
+;; as earlyclobber helps to make the instruction more regular to the
+;; register allocator.
+(define_insn "*cond_<optab><mode>_any"
+  [(set (match_operand:SVE_I 0 "register_operand" "=&w, ?&w, ?&w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (SVE_INT_UNARY:SVE_I
+	     (match_operand:SVE_I 2 "register_operand" "w, w, w"))
+	   (match_operand:SVE_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+   movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  [(set_attr "movprfx" "*,yes,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] General unary arithmetic corresponding to unspecs
+;; -------------------------------------------------------------------------
+;; Includes
+;; - RBIT
+;; - REVB
+;; - REVH
+;; - REVW
+;; -------------------------------------------------------------------------
+
+;; Predicated integer unary operations.
+(define_insn "@aarch64_pred_<optab><mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_I
+	     [(match_operand:SVE_FULL_I 2 "register_operand" "0, w")]
+	     SVE_INT_UNARY)]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE && <elem_bits> >= <min_elem_bits>"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Another way of expressing the REVB, REVH and REVW patterns, with this
+;; form being easier for permutes.  The predicate mode determines the number
+;; of lanes and the data mode decides the granularity of the reversal within
+;; each lane.
+(define_insn "@aarch64_sve_revbhw_<SVE_ALL:mode><PRED_HSD:mode>"
+  [(set (match_operand:SVE_ALL 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_ALL
+	  [(match_operand:PRED_HSD 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_ALL
+	     [(match_operand:SVE_ALL 2 "register_operand" "0, w")]
+	     UNSPEC_REVBHW)]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE && <PRED_HSD:elem_bits> > <SVE_ALL:container_bits>"
+  "@
+   rev<SVE_ALL:Vcwtype>\t%0.<PRED_HSD:Vetype>, %1/m, %2.<PRED_HSD:Vetype>
+   movprfx\t%0, %2\;rev<SVE_ALL:Vcwtype>\t%0.<PRED_HSD:Vetype>, %1/m, %2.<PRED_HSD:Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated integer unary operations with merging.
+(define_insn "@cond_<optab><mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w, ?&w")
+	(unspec:SVE_FULL_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (unspec:SVE_FULL_I
+	     [(match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")]
+	     SVE_INT_UNARY)
+	   (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && <elem_bits> >= <min_elem_bits>"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+   movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  [(set_attr "movprfx" "*,yes,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Sign and zero extension
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SXTB
+;; - SXTH
+;; - SXTW
+;; - UXTB
+;; - UXTH
+;; - UXTW
+;; -------------------------------------------------------------------------
+
+;; Unpredicated sign and zero extension from a narrower mode.
+(define_expand "<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2"
+  [(set (match_operand:SVE_HSDI 0 "register_operand")
+	(unspec:SVE_HSDI
+	  [(match_dup 2)
+	   (ANY_EXTEND:SVE_HSDI
+	     (match_operand:SVE_PARTIAL_I 1 "register_operand"))]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
+  {
+    operands[2] = aarch64_ptrue_reg (<SVE_HSDI:VPRED>mode);
+  }
+)
+
+;; Predicated sign and zero extension from a narrower mode.
+(define_insn "*<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2"
+  [(set (match_operand:SVE_HSDI 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_HSDI
+	  [(match_operand:<SVE_HSDI:VPRED> 1 "register_operand" "Upl, Upl")
+	   (ANY_EXTEND:SVE_HSDI
+	     (match_operand:SVE_PARTIAL_I 2 "register_operand" "0, w"))]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
+  "@
+   <su>xt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>
+   movprfx\t%0, %2\;<su>xt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated truncate-and-sign-extend operations.
+(define_insn "@aarch64_pred_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>"
+  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_HSDI
+	  [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl")
+	   (sign_extend:SVE_FULL_HSDI
+	     (truncate:SVE_PARTIAL_I
+	       (match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w")))]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE
+   && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
+  "@
+   sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
+   movprfx\t%0, %2\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated truncate-and-sign-extend operations with merging.
+(define_insn "@aarch64_cond_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>"
+  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w, ?&w")
+	(unspec:SVE_FULL_HSDI
+	  [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (sign_extend:SVE_FULL_HSDI
+	     (truncate:SVE_PARTIAL_I
+	       (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")))
+	   (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE
+   && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
+  "@
+   sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
+   movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
+   movprfx\t%0, %3\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
+  [(set_attr "movprfx" "*,yes,yes")]
+)
+
+;; Predicated truncate-and-zero-extend operations, merging with the
+;; first input.
+;;
+;; The canonical form of this operation is an AND of a constant rather
+;; than (zero_extend (truncate ...)).
+(define_insn "*cond_uxt<mode>_2"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (and:SVE_I
+	     (match_operand:SVE_I 2 "register_operand" "0, w")
+	     (match_operand:SVE_I 3 "aarch64_sve_uxt_immediate"))
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   uxt%e3\t%0.<Vetype>, %1/m, %0.<Vetype>
+   movprfx\t%0, %2\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated truncate-and-zero-extend operations, merging with an
+;; independent value.
+;;
+;; The earlyclobber isn't needed for the first alternative, but omitting
+;; it would only help the case in which operands 2 and 4 are the same,
+;; which is handled above rather than here.  Marking all the alternatives
+;; as early-clobber helps to make the instruction more regular to the
+;; register allocator.
+(define_insn "*cond_uxt<mode>_any"
+  [(set (match_operand:SVE_I 0 "register_operand" "=&w, ?&w, ?&w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (and:SVE_I
+	     (match_operand:SVE_I 2 "register_operand" "w, w, w")
+	     (match_operand:SVE_I 3 "aarch64_sve_uxt_immediate"))
+	   (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
+  "@
+   uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
+   movprfx\t%0, %4\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  [(set_attr "movprfx" "*,yes,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Truncation
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic.
+;; -------------------------------------------------------------------------
+
+;; Truncate to a partial SVE vector from either a full vector or a
+;; wider partial vector.  This is a no-op, because we can just ignore
+;; the unused upper bits of the source.
+(define_insn_and_split "trunc<SVE_HSDI:mode><SVE_PARTIAL_I:mode>2"
+  [(set (match_operand:SVE_PARTIAL_I 0 "register_operand" "=w")
+	(truncate:SVE_PARTIAL_I
+	  (match_operand:SVE_HSDI 1 "register_operand" "w")))]
+  "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+  {
+    operands[1] = aarch64_replace_reg_mode (operands[1],
+					    <SVE_PARTIAL_I:MODE>mode);
+  }
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Logical inverse
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - CNOT
+;; -------------------------------------------------------------------------
+
+;; Predicated logical inverse.
+(define_expand "@aarch64_pred_cnot<mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand")
+	(unspec:SVE_FULL_I
+	  [(unspec:<VPRED>
+	     [(match_operand:<VPRED> 1 "register_operand")
+	      (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+	      (eq:<VPRED>
+		(match_operand:SVE_FULL_I 3 "register_operand")
+		(match_dup 4))]
+	     UNSPEC_PRED_Z)
+	   (match_dup 5)
+	   (match_dup 4)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  {
+    operands[4] = CONST0_RTX (<MODE>mode);
+    operands[5] = CONST1_RTX (<MODE>mode);
+  }
+)
+
+(define_insn "*cnot<mode>"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_I
+	  [(unspec:<VPRED>
+	     [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	      (match_operand:SI 5 "aarch64_sve_ptrue_flag")
+	      (eq:<VPRED>
+		(match_operand:SVE_I 2 "register_operand" "0, w")
+		(match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
+	     UNSPEC_PRED_Z)
+	   (match_operand:SVE_I 4 "aarch64_simd_imm_one")
+	   (match_dup 3)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
+   movprfx\t%0, %2\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated logical inverse with merging.
+(define_expand "@cond_cnot<mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand")
+	(unspec:SVE_FULL_I
+	  [(match_operand:<VPRED> 1 "register_operand")
+	   (unspec:SVE_FULL_I
+	     [(unspec:<VPRED>
+		[(match_dup 4)
+		 (const_int SVE_KNOWN_PTRUE)
+		 (eq:<VPRED>
+		   (match_operand:SVE_FULL_I 2 "register_operand")
+		   (match_dup 5))]
+		UNSPEC_PRED_Z)
+	      (match_dup 6)
+	      (match_dup 5)]
+	     UNSPEC_SEL)
+	   (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  {
+    operands[4] = CONSTM1_RTX (<VPRED>mode);
+    operands[5] = CONST0_RTX (<MODE>mode);
+    operands[6] = CONST1_RTX (<MODE>mode);
+  }
+)
+
+;; Predicated logical inverse, merging with the first input.
+(define_insn_and_rewrite "*cond_cnot<mode>_2"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   ;; Logical inverse of operand 2 (as above).
+	   (unspec:SVE_I
+	     [(unspec:<VPRED>
+		[(match_operand 5)
+		 (const_int SVE_KNOWN_PTRUE)
+		 (eq:<VPRED>
+		   (match_operand:SVE_I 2 "register_operand" "0, w")
+		   (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
+		UNSPEC_PRED_Z)
+	      (match_operand:SVE_I 4 "aarch64_simd_imm_one")
+	      (match_dup 3)]
+	     UNSPEC_SEL)
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   cnot\t%0.<Vetype>, %1/m, %0.<Vetype>
+   movprfx\t%0, %2\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  "&& !CONSTANT_P (operands[5])"
+  {
+    operands[5] = CONSTM1_RTX (<VPRED>mode);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated logical inverse, merging with an independent value.
+;;
+;; The earlyclobber isn't needed for the first alternative, but omitting
+;; it would only help the case in which operands 2 and 6 are the same,
+;; which is handled above rather than here.  Marking all the alternatives
+;; as earlyclobber helps to make the instruction more regular to the
+;; register allocator.
+(define_insn_and_rewrite "*cond_cnot<mode>_any"
+  [(set (match_operand:SVE_I 0 "register_operand" "=&w, ?&w, ?&w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   ;; Logical inverse of operand 2 (as above).
+	   (unspec:SVE_I
+	     [(unspec:<VPRED>
+		[(match_operand 5)
+		 (const_int SVE_KNOWN_PTRUE)
+		 (eq:<VPRED>
+		   (match_operand:SVE_I 2 "register_operand" "w, w, w")
+		   (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
+		UNSPEC_PRED_Z)
+	      (match_operand:SVE_I 4 "aarch64_simd_imm_one")
+	      (match_dup 3)]
+	     UNSPEC_SEL)
+	   (match_operand:SVE_I 6 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && !rtx_equal_p (operands[2], operands[6])"
+  "@
+   cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
+   movprfx\t%0, %6\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  "&& !CONSTANT_P (operands[5])"
+  {
+    operands[5] = CONSTM1_RTX (<VPRED>mode);
+  }
+  [(set_attr "movprfx" "*,yes,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP<-INT] General unary arithmetic that maps to unspecs
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FEXPA
+;; -------------------------------------------------------------------------
+
+;; Unpredicated unary operations that take an integer and return a float.
+(define_insn "@aarch64_sve_<optab><mode>"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<V_INT_EQUIV> 1 "register_operand" "w")]
+	  SVE_FP_UNARY_INT))]
+  "TARGET_SVE"
+  "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] General unary arithmetic corresponding to unspecs
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FABS
+;; - FNEG
+;; - FRECPE
+;; - FRECPX
+;; - FRINTA
+;; - FRINTI
+;; - FRINTM
+;; - FRINTN
+;; - FRINTP
+;; - FRINTX
+;; - FRINTZ
+;; - FRSQRTE
+;; - FSQRT
+;; -------------------------------------------------------------------------
+
+;; Unpredicated floating-point unary operations.
+(define_insn "@aarch64_sve_<optab><mode>"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:SVE_FULL_F 1 "register_operand" "w")]
+	  SVE_FP_UNARY))]
+  "TARGET_SVE"
+  "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>"
+)
+
+;; Unpredicated floating-point unary operations.
+(define_expand "<optab><mode>2"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand")
+	(unspec:SVE_FULL_F
+	  [(match_dup 2)
+	   (const_int SVE_RELAXED_GP)
+	   (match_operand:SVE_FULL_F 1 "register_operand")]
+	  SVE_COND_FP_UNARY_OPTAB))]
+  "TARGET_SVE"
+  {
+    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+  }
+)
+
+;; Predicated floating-point unary operations.
+(define_insn "@aarch64_pred_<optab><mode>"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (match_operand:SI 3 "aarch64_sve_gp_strictness")
+	   (match_operand:SVE_FULL_F 2 "register_operand" "0, w")]
+	  SVE_COND_FP_UNARY))]
+  "TARGET_SVE"
+  "@
+   <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated floating-point unary arithmetic with merging.
+(define_expand "@cond_<optab><mode>"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand")]
+	     SVE_COND_FP_UNARY)
+	   (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+)
+
+;; Predicated floating-point unary arithmetic, merging with the first input.
+(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_operand 3)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w")]
+	     SVE_COND_FP_UNARY)
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
+   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  "&& !rtx_equal_p (operands[1], operands[3])"
+  {
+    operands[3] = copy_rtx (operands[1]);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+(define_insn "*cond_<optab><mode>_2_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w")]
+	     SVE_COND_FP_UNARY)
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
+   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated floating-point unary arithmetic, merging with an independent
+;; value.
+;;
+;; The earlyclobber isn't needed for the first alternative, but omitting
+;; it would only help the case in which operands 2 and 3 are the same,
+;; which is handled above rather than here.  Marking all the alternatives
+;; as earlyclobber helps to make the instruction more regular to the
+;; register allocator.
+(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, ?&w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_operand 4)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
+	     SVE_COND_FP_UNARY)
+	   (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
+  "@
+   <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+   movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  "&& !rtx_equal_p (operands[1], operands[4])"
+  {
+    operands[4] = copy_rtx (operands[1]);
+  }
+  [(set_attr "movprfx" "*,yes,yes")]
+)
+
+(define_insn "*cond_<optab><mode>_any_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, ?&w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
+	     SVE_COND_FP_UNARY)
+	   (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
+  "@
+   <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+   movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  [(set_attr "movprfx" "*,yes,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Square root
+;; -------------------------------------------------------------------------
+
+(define_expand "sqrt<mode>2"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand")
+	(unspec:SVE_FULL_F
+	  [(match_dup 2)
+	   (const_int SVE_RELAXED_GP)
+	   (match_operand:SVE_FULL_F 1 "register_operand")]
+	  UNSPEC_COND_FSQRT))]
+  "TARGET_SVE"
+{
+  if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
+    DONE;
+  operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+})
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Reciprocal square root
+;; -------------------------------------------------------------------------
+
+(define_expand "rsqrt<mode>2"
+  [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
+	(unspec:SVE_FULL_SDF
+	  [(match_operand:SVE_FULL_SDF 1 "register_operand")]
+	  UNSPEC_RSQRT))]
+  "TARGET_SVE"
+{
+  aarch64_emit_approx_sqrt (operands[0], operands[1], true);
+  DONE;
+})
+
+(define_expand "@aarch64_rsqrte<mode>"
+  [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
+	(unspec:SVE_FULL_SDF
+	  [(match_operand:SVE_FULL_SDF 1 "register_operand")]
+	  UNSPEC_RSQRTE))]
+  "TARGET_SVE"
+)
+
+(define_expand "@aarch64_rsqrts<mode>"
+  [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
+	(unspec:SVE_FULL_SDF
+	  [(match_operand:SVE_FULL_SDF 1 "register_operand")
+	   (match_operand:SVE_FULL_SDF 2 "register_operand")]
+	  UNSPEC_RSQRTS))]
+  "TARGET_SVE"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Inverse
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - NOT
+;; -------------------------------------------------------------------------
+
+;; Unpredicated predicate inverse.
+(define_expand "one_cmpl<mode>2"
+  [(set (match_operand:PRED_ALL 0 "register_operand")
+	(and:PRED_ALL
+	  (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand"))
+	  (match_dup 2)))]
+  "TARGET_SVE"
+  {
+    operands[2] = aarch64_ptrue_reg (<MODE>mode);
+  }
+)
+
+;; Predicated predicate inverse.
+(define_insn "*one_cmpl<mode>3"
+  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
+	(and:PRED_ALL
+	  (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
+	  (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
+  "TARGET_SVE"
+  "not\t%0.b, %1/z, %2.b"
+)
+
+;; =========================================================================
+;; == Binary arithmetic
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] General binary arithmetic corresponding to rtx codes
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ADD    (merging form only)
+;; - AND    (merging form only)
+;; - ASR    (merging form only)
+;; - EOR    (merging form only)
+;; - LSL    (merging form only)
+;; - LSR    (merging form only)
+;; - MUL
+;; - ORR    (merging form only)
+;; - SMAX
+;; - SMIN
+;; - SQADD  (SVE2 merging form only)
+;; - SQSUB  (SVE2 merging form only)
+;; - SUB    (merging form only)
+;; - UMAX
+;; - UMIN
+;; - UQADD  (SVE2 merging form only)
+;; - UQSUB  (SVE2 merging form only)
+;; -------------------------------------------------------------------------
+
+;; Unpredicated integer binary operations that have an immediate form.
+(define_expand "<optab><mode>3"
+  [(set (match_operand:SVE_I 0 "register_operand")
+	(unspec:SVE_I
+	  [(match_dup 3)
+	   (SVE_INT_BINARY_IMM:SVE_I
+	     (match_operand:SVE_I 1 "register_operand")
+	     (match_operand:SVE_I 2 "aarch64_sve_<sve_imm_con>_operand"))]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE"
+  {
+    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+  }
+)
+
+;; Integer binary operations that have an immediate form, predicated
+;; with a PTRUE.  We don't actually need the predicate for the first
+;; and third alternatives, but using Upa or X isn't likely to gain much
+;; and would make the instruction seem less uniform to the register
+;; allocator.
+(define_insn_and_split "@aarch64_pred_<optab><mode>"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w, ?&w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
+	   (SVE_INT_BINARY_IMM:SVE_I
+	     (match_operand:SVE_I 2 "register_operand" "%0, 0, w, w")
+	     (match_operand:SVE_I 3 "aarch64_sve_<sve_imm_con>_operand" "<sve_imm_con>, w, <sve_imm_con>, w"))]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE"
+  "@
+   #
+   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   #
+   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  ; Split the unpredicated form after reload, so that we don't have
+  ; the unnecessary PTRUE.
+  "&& reload_completed
+   && !register_operand (operands[3], <MODE>mode)"
+  [(set (match_dup 0)
+	(SVE_INT_BINARY_IMM:SVE_I (match_dup 2) (match_dup 3)))]
+  ""
+  [(set_attr "movprfx" "*,*,yes,yes")]
+)
+
+;; Unpredicated binary operations with a constant (post-RA only).
+;; These are generated by splitting a predicated instruction whose
+;; predicate is unused.
+(define_insn "*post_ra_<optab><mode>3"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+	(SVE_INT_BINARY_IMM:SVE_I
+	  (match_operand:SVE_I 1 "register_operand" "0, w")
+	  (match_operand:SVE_I 2 "aarch64_sve_<sve_imm_con>_immediate")))]
+  "TARGET_SVE && reload_completed"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2
+   movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated integer operations with merging.
+(define_expand "@cond_<optab><mode>"
+  [(set (match_operand:SVE_I 0 "register_operand")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand")
+	   (SVE_INT_BINARY:SVE_I
+	     (match_operand:SVE_I 2 "register_operand")
+	     (match_operand:SVE_I 3 "<sve_pred_int_rhs2_operand>"))
+	   (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+)
+
+;; Predicated integer operations, merging with the first input.
+(define_insn "*cond_<optab><mode>_2"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (SVE_INT_BINARY:SVE_I
+	     (match_operand:SVE_I 2 "register_operand" "0, w")
+	     (match_operand:SVE_I 3 "register_operand" "w, w"))
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated integer operations, merging with the second input.
+(define_insn "*cond_<optab><mode>_3"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (SVE_INT_BINARY:SVE_I
+	     (match_operand:SVE_I 2 "register_operand" "w, w")
+	     (match_operand:SVE_I 3 "register_operand" "0, w"))
+	   (match_dup 3)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated integer operations, merging with an independent value.
+(define_insn_and_rewrite "*cond_<optab><mode>_any"
+  [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
+	   (SVE_INT_BINARY:SVE_I
+	     (match_operand:SVE_I 2 "register_operand" "0, w, w, w, w")
+	     (match_operand:SVE_I 3 "register_operand" "w, 0, w, w, w"))
+	   (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE
+   && !rtx_equal_p (operands[2], operands[4])
+   && !rtx_equal_p (operands[3], operands[4])"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   #"
+  "&& reload_completed
+   && register_operand (operands[4], <MODE>mode)
+   && !rtx_equal_p (operands[0], operands[4])"
+  {
+    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+					     operands[4], operands[1]));
+    operands[4] = operands[2] = operands[0];
+  }
+  [(set_attr "movprfx" "yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Addition
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ADD
+;; - DECB
+;; - DECD
+;; - DECH
+;; - DECW
+;; - INCB
+;; - INCD
+;; - INCH
+;; - INCW
+;; - SUB
+;; -------------------------------------------------------------------------
+
+(define_insn "add<mode>3"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?w, ?w, w")
+	(plus:SVE_I
+	  (match_operand:SVE_I 1 "register_operand" "%0, 0, 0, w, w, w")
+	  (match_operand:SVE_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, vsa, vsn, w")))]
+  "TARGET_SVE"
+  "@
+   add\t%0.<Vetype>, %0.<Vetype>, #%D2
+   sub\t%0.<Vetype>, %0.<Vetype>, #%N2
+   * return aarch64_output_sve_vector_inc_dec (\"%0.<Vetype>\", operands[2]);
+   movprfx\t%0, %1\;add\t%0.<Vetype>, %0.<Vetype>, #%D2
+   movprfx\t%0, %1\;sub\t%0.<Vetype>, %0.<Vetype>, #%N2
+   add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
+  [(set_attr "movprfx" "*,*,*,yes,yes,*")]
+)
+
+;; Merging forms are handled through SVE_INT_BINARY.
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Subtraction
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SUB
+;; - SUBR
+;; -------------------------------------------------------------------------
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
+	(minus:SVE_I
+	  (match_operand:SVE_I 1 "aarch64_sve_arith_operand" "w, vsa, vsa")
+	  (match_operand:SVE_I 2 "register_operand" "w, 0, w")))]
+  "TARGET_SVE"
+  "@
+   sub\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
+   subr\t%0.<Vetype>, %0.<Vetype>, #%D1
+   movprfx\t%0, %2\;subr\t%0.<Vetype>, %0.<Vetype>, #%D1"
+  [(set_attr "movprfx" "*,*,yes")]
+)
+
+;; Merging forms are handled through SVE_INT_BINARY.
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Take address
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ADR
+;; -------------------------------------------------------------------------
+
+;; An unshifted and unscaled ADR.  This is functionally equivalent to an ADD,
+;; but the svadrb intrinsics should preserve the user's choice.
+(define_insn "@aarch64_adr<mode>"
+  [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
+	(unspec:SVE_FULL_SDI
+	  [(match_operand:SVE_FULL_SDI 1 "register_operand" "w")
+	   (match_operand:SVE_FULL_SDI 2 "register_operand" "w")]
+	  UNSPEC_ADR))]
+  "TARGET_SVE"
+  "adr\t%0.<Vetype>, [%1.<Vetype>, %2.<Vetype>]"
+)
+
+;; Same, but with the offset being sign-extended from the low 32 bits.
+(define_insn_and_rewrite "*aarch64_adr_sxtw"
+  [(set (match_operand:VNx2DI 0 "register_operand" "=w")
+	(unspec:VNx2DI
+	  [(match_operand:VNx2DI 1 "register_operand" "w")
+	   (unspec:VNx2DI
+	     [(match_operand 3)
+	      (sign_extend:VNx2DI
+		(truncate:VNx2SI
+		  (match_operand:VNx2DI 2 "register_operand" "w")))]
+	     UNSPEC_PRED_X)]
+	  UNSPEC_ADR))]
+  "TARGET_SVE"
+  "adr\t%0.d, [%1.d, %2.d, sxtw]"
+  "&& !CONSTANT_P (operands[3])"
+  {
+    operands[3] = CONSTM1_RTX (VNx2BImode);
+  }
+)
+
+;; Same, but with the offset being zero-extended from the low 32 bits.
+(define_insn "*aarch64_adr_uxtw_unspec"
+  [(set (match_operand:VNx2DI 0 "register_operand" "=w")
+	(unspec:VNx2DI
+	  [(match_operand:VNx2DI 1 "register_operand" "w")
+	   (and:VNx2DI
+	     (match_operand:VNx2DI 2 "register_operand" "w")
+	     (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate"))]
+	  UNSPEC_ADR))]
+  "TARGET_SVE"
+  "adr\t%0.d, [%1.d, %2.d, uxtw]"
+)
+
+;; Same, matching as a PLUS rather than unspec.
+(define_insn "*aarch64_adr_uxtw_and"
+  [(set (match_operand:VNx2DI 0 "register_operand" "=w")
+	(plus:VNx2DI
+	  (and:VNx2DI
+	    (match_operand:VNx2DI 2 "register_operand" "w")
+	    (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate"))
+	  (match_operand:VNx2DI 1 "register_operand" "w")))]
+  "TARGET_SVE"
+  "adr\t%0.d, [%1.d, %2.d, uxtw]"
+)
+
+;; ADR with a nonzero shift.
+(define_expand "@aarch64_adr<mode>_shift"
+  [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
+	(plus:SVE_FULL_SDI
+	  (unspec:SVE_FULL_SDI
+	    [(match_dup 4)
+	     (ashift:SVE_FULL_SDI
+	       (match_operand:SVE_FULL_SDI 2 "register_operand")
+	       (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))]
+	    UNSPEC_PRED_X)
+	  (match_operand:SVE_FULL_SDI 1 "register_operand")))]
+  "TARGET_SVE"
+  {
+    operands[4] = CONSTM1_RTX (<VPRED>mode);
+  }
+)
+
+(define_insn_and_rewrite "*aarch64_adr<mode>_shift"
+  [(set (match_operand:SVE_24I 0 "register_operand" "=w")
+	(plus:SVE_24I
+	  (unspec:SVE_24I
+	    [(match_operand 4)
+	     (ashift:SVE_24I
+	       (match_operand:SVE_24I 2 "register_operand" "w")
+	       (match_operand:SVE_24I 3 "const_1_to_3_operand"))]
+	    UNSPEC_PRED_X)
+	  (match_operand:SVE_24I 1 "register_operand" "w")))]
+  "TARGET_SVE"
+  "adr\t%0.<Vctype>, [%1.<Vctype>, %2.<Vctype>, lsl %3]"
+  "&& !CONSTANT_P (operands[4])"
+  {
+    operands[4] = CONSTM1_RTX (<VPRED>mode);
+  }
+)
+
+;; Same, but with the index being sign-extended from the low 32 bits.
+(define_insn_and_rewrite "*aarch64_adr_shift_sxtw"
+  [(set (match_operand:VNx2DI 0 "register_operand" "=w")
+	(plus:VNx2DI
+	  (unspec:VNx2DI
+	    [(match_operand 4)
+	     (ashift:VNx2DI
+	       (unspec:VNx2DI
+		 [(match_operand 5)
+		  (sign_extend:VNx2DI
+		    (truncate:VNx2SI
+		      (match_operand:VNx2DI 2 "register_operand" "w")))]
+		 UNSPEC_PRED_X)
+	       (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
+	    UNSPEC_PRED_X)
+	  (match_operand:VNx2DI 1 "register_operand" "w")))]
+  "TARGET_SVE"
+  "adr\t%0.d, [%1.d, %2.d, sxtw %3]"
+  "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
+  {
+    operands[5] = operands[4] = CONSTM1_RTX (VNx2BImode);
+  }
+)
+
+;; Same, but with the index being zero-extended from the low 32 bits.
+(define_insn_and_rewrite "*aarch64_adr_shift_uxtw"
+  [(set (match_operand:VNx2DI 0 "register_operand" "=w")
+	(plus:VNx2DI
+	  (unspec:VNx2DI
+	    [(match_operand 5)
+	     (ashift:VNx2DI
+	       (and:VNx2DI
+		 (match_operand:VNx2DI 2 "register_operand" "w")
+		 (match_operand:VNx2DI 4 "aarch64_sve_uxtw_immediate"))
+	       (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
+	    UNSPEC_PRED_X)
+	  (match_operand:VNx2DI 1 "register_operand" "w")))]
+  "TARGET_SVE"
+  "adr\t%0.d, [%1.d, %2.d, uxtw %3]"
+  "&& !CONSTANT_P (operands[5])"
+  {
+    operands[5] = CONSTM1_RTX (VNx2BImode);
+  }
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Absolute difference
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SABD
+;; - UABD
+;; -------------------------------------------------------------------------
+
+;; Unpredicated integer absolute difference.
+(define_expand "<su>abd<mode>_3"
+  [(use (match_operand:SVE_I 0 "register_operand"))
+   (USMAX:SVE_I
+     (match_operand:SVE_I 1 "register_operand")
+     (match_operand:SVE_I 2 "register_operand"))]
+  "TARGET_SVE"
+  {
+    rtx pred = aarch64_ptrue_reg (<VPRED>mode);
+    emit_insn (gen_aarch64_pred_<su>abd<mode> (operands[0], pred, operands[1],
+					       operands[2]));
+    DONE;
+  }
+)
+
+;; Predicated integer absolute difference.
+(define_insn "@aarch64_pred_<su>abd<mode>"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+	(minus:SVE_I
+	  (unspec:SVE_I
+	    [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	     (USMAX:SVE_I
+	       (match_operand:SVE_I 2 "register_operand" "%0, w")
+	       (match_operand:SVE_I 3 "register_operand" "w, w"))]
+	    UNSPEC_PRED_X)
+	  (unspec:SVE_I
+	    [(match_dup 1)
+	     (<max_opp>:SVE_I
+	       (match_dup 2)
+	       (match_dup 3))]
+	    UNSPEC_PRED_X)))]
+  "TARGET_SVE"
+  "@
+   <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+(define_expand "@aarch64_cond_<su>abd<mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand")
+	(unspec:SVE_FULL_I
+	  [(match_operand:<VPRED> 1 "register_operand")
+	   (minus:SVE_FULL_I
+	     (unspec:SVE_FULL_I
+	       [(match_dup 1)
+		(USMAX:SVE_FULL_I
+		  (match_operand:SVE_FULL_I 2 "register_operand")
+		  (match_operand:SVE_FULL_I 3 "register_operand"))]
+	       UNSPEC_PRED_X)
+	     (unspec:SVE_FULL_I
+	       [(match_dup 1)
+		(<max_opp>:SVE_FULL_I
+		  (match_dup 2)
+		  (match_dup 3))]
+	       UNSPEC_PRED_X))
+	   (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+{
+  if (rtx_equal_p (operands[3], operands[4]))
+    std::swap (operands[2], operands[3]);
+})
+
+;; Predicated integer absolute difference, merging with the first input.
+(define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_2"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (minus:SVE_I
+	     (unspec:SVE_I
+	       [(match_operand 4)
+		(USMAX:SVE_I
+		  (match_operand:SVE_I 2 "register_operand" "0, w")
+		  (match_operand:SVE_I 3 "register_operand" "w, w"))]
+	       UNSPEC_PRED_X)
+	     (unspec:SVE_I
+	       [(match_operand 5)
+		(<max_opp>:SVE_I
+		  (match_dup 2)
+		  (match_dup 3))]
+	       UNSPEC_PRED_X))
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
+  {
+    operands[4] = operands[5] = CONSTM1_RTX (<VPRED>mode);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated integer absolute difference, merging with the second input.
+(define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_3"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (minus:SVE_I
+	     (unspec:SVE_I
+	       [(match_operand 4)
+		(USMAX:SVE_I
+		  (match_operand:SVE_I 2 "register_operand" "w, w")
+		  (match_operand:SVE_I 3 "register_operand" "0, w"))]
+	       UNSPEC_PRED_X)
+	     (unspec:SVE_I
+	       [(match_operand 5)
+		(<max_opp>:SVE_I
+		  (match_dup 2)
+		  (match_dup 3))]
+	       UNSPEC_PRED_X))
+	   (match_dup 3)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0, %3\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+  "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
+  {
+    operands[4] = operands[5] = CONSTM1_RTX (<VPRED>mode);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated integer absolute difference, merging with an independent value.
+(define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_any"
+  [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
+	   (minus:SVE_I
+	     (unspec:SVE_I
+	       [(match_operand 5)
+		(USMAX:SVE_I
+		  (match_operand:SVE_I 2 "register_operand" "0, w, w, w, w")
+		  (match_operand:SVE_I 3 "register_operand" "w, 0, w, w, w"))]
+	       UNSPEC_PRED_X)
+	     (unspec:SVE_I
+	       [(match_operand 6)
+		(<max_opp>:SVE_I
+		  (match_dup 2)
+		  (match_dup 3))]
+	       UNSPEC_PRED_X))
+	   (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE
+   && !rtx_equal_p (operands[2], operands[4])
+   && !rtx_equal_p (operands[3], operands[4])"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   #"
+  "&& 1"
+  {
+    if (!CONSTANT_P (operands[5]) || !CONSTANT_P (operands[6]))
+      operands[5] = operands[6] = CONSTM1_RTX (<VPRED>mode);
+    else if (reload_completed
+	     && register_operand (operands[4], <MODE>mode)
+	     && !rtx_equal_p (operands[0], operands[4]))
+      {
+	emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+						 operands[4], operands[1]));
+	operands[4] = operands[2] = operands[0];
+      }
+    else
+      FAIL;
+  }
+  [(set_attr "movprfx" "yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Saturating addition and subtraction
+;; -------------------------------------------------------------------------
+;; - SQADD
+;; - SQSUB
+;; - UQADD
+;; - UQSUB
+;; -------------------------------------------------------------------------
+
+;; Unpredicated saturating signed addition and subtraction.
+(define_insn "@aarch64_sve_<optab><mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w, w")
+	(SBINQOPS:SVE_FULL_I
+	  (match_operand:SVE_FULL_I 1 "register_operand" "0, 0, w, w, w")
+	  (match_operand:SVE_FULL_I 2 "aarch64_sve_sqadd_operand" "vsQ, vsS, vsQ, vsS, w")))]
+  "TARGET_SVE"
+  "@
+   <binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
+   <binqops_op_rev>\t%0.<Vetype>, %0.<Vetype>, #%N2
+   movprfx\t%0, %1\;<binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
+   movprfx\t%0, %1\;<binqops_op_rev>\t%0.<Vetype>, %0.<Vetype>, #%N2
+   <binqops_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
+  [(set_attr "movprfx" "*,*,yes,yes,*")]
+)
+
+;; Unpredicated saturating unsigned addition and subtraction.
+(define_insn "@aarch64_sve_<optab><mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w, w")
+	(UBINQOPS:SVE_FULL_I
+	  (match_operand:SVE_FULL_I 1 "register_operand" "0, w, w")
+	  (match_operand:SVE_FULL_I 2 "aarch64_sve_arith_operand" "vsa, vsa, w")))]
+  "TARGET_SVE"
+  "@
+   <binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
+   movprfx\t%0, %1\;<binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
+   <binqops_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
+  [(set_attr "movprfx" "*,yes,*")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Highpart multiplication
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SMULH
+;; - UMULH
+;; -------------------------------------------------------------------------
+
+;; Unpredicated highpart multiplication.
+(define_expand "<su>mul<mode>3_highpart"
+  [(set (match_operand:SVE_I 0 "register_operand")
+	(unspec:SVE_I
+	  [(match_dup 3)
+	   (unspec:SVE_I
+	     [(match_operand:SVE_I 1 "register_operand")
+	      (match_operand:SVE_I 2 "register_operand")]
+	     MUL_HIGHPART)]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE"
+  {
+    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+  }
+)
+
+;; Predicated highpart multiplication.
+(define_insn "@aarch64_pred_<optab><mode>"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_I
+	     [(match_operand:SVE_I 2 "register_operand" "%0, w")
+	      (match_operand:SVE_I 3 "register_operand" "w, w")]
+	     MUL_HIGHPART)]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE"
+  "@
+   <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated highpart multiplications with merging.
+(define_expand "@cond_<optab><mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand")
+	(unspec:SVE_FULL_I
+	  [(match_operand:<VPRED> 1 "register_operand")
+	   (unspec:SVE_FULL_I
+	     [(match_operand:SVE_FULL_I 2 "register_operand")
+	      (match_operand:SVE_FULL_I 3 "register_operand")]
+	     MUL_HIGHPART)
+	   (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+{
+  /* Only target code is aware of these operations, so we don't need
+     to handle the fully-general case.  */
+  gcc_assert (rtx_equal_p (operands[2], operands[4])
+	      || CONSTANT_P (operands[4]));
+})
+
+;; Predicated highpart multiplications, merging with the first input.
+(define_insn "*cond_<optab><mode>_2"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_I
+	     [(match_operand:SVE_FULL_I 2 "register_operand" "0, w")
+	      (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
+	     MUL_HIGHPART)
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,yes")])
+
+;; Predicated highpart multiplications, merging with zero.
+(define_insn "*cond_<optab><mode>_z"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w")
+	(unspec:SVE_FULL_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_I
+	     [(match_operand:SVE_FULL_I 2 "register_operand" "%0, w")
+	      (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
+	     MUL_HIGHPART)
+	   (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "yes")])
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Division
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SDIV
+;; - SDIVR
+;; - UDIV
+;; - UDIVR
+;; -------------------------------------------------------------------------
+
+;; Unpredicated integer division.
+(define_expand "<optab><mode>3"
+  [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
+	(unspec:SVE_FULL_SDI
+	  [(match_dup 3)
+	   (SVE_INT_BINARY_SD:SVE_FULL_SDI
+	     (match_operand:SVE_FULL_SDI 1 "register_operand")
+	     (match_operand:SVE_FULL_SDI 2 "register_operand"))]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE"
+  {
+    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+  }
+)
+
+;; Integer division predicated with a PTRUE.
+(define_insn "@aarch64_pred_<optab><mode>"
+  [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, w, ?&w")
+	(unspec:SVE_FULL_SDI
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (SVE_INT_BINARY_SD:SVE_FULL_SDI
+	     (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w, w")
+	     (match_operand:SVE_FULL_SDI 3 "register_operand" "w, 0, w"))]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,*,yes")]
+)
+
+;; Predicated integer division with merging.
+(define_expand "@cond_<optab><mode>"
+  [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
+	(unspec:SVE_FULL_SDI
+	  [(match_operand:<VPRED> 1 "register_operand")
+	   (SVE_INT_BINARY_SD:SVE_FULL_SDI
+	     (match_operand:SVE_FULL_SDI 2 "register_operand")
+	     (match_operand:SVE_FULL_SDI 3 "register_operand"))
+	   (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+)
+
+;; Predicated integer division, merging with the first input.
+(define_insn "*cond_<optab><mode>_2"
+  [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_SDI
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (SVE_INT_BINARY_SD:SVE_FULL_SDI
+	     (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w")
+	     (match_operand:SVE_FULL_SDI 3 "register_operand" "w, w"))
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated integer division, merging with the second input.
+(define_insn "*cond_<optab><mode>_3"
+  [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_SDI
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (SVE_INT_BINARY_SD:SVE_FULL_SDI
+	     (match_operand:SVE_FULL_SDI 2 "register_operand" "w, w")
+	     (match_operand:SVE_FULL_SDI 3 "register_operand" "0, w"))
+	   (match_dup 3)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated integer division, merging with an independent value.
+(define_insn_and_rewrite "*cond_<optab><mode>_any"
+  [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=&w, &w, &w, &w, ?&w")
+	(unspec:SVE_FULL_SDI
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
+	   (SVE_INT_BINARY_SD:SVE_FULL_SDI
+	     (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w, w, w, w")
+	     (match_operand:SVE_FULL_SDI 3 "register_operand" "w, 0, w, w, w"))
+	   (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE
+   && !rtx_equal_p (operands[2], operands[4])
+   && !rtx_equal_p (operands[3], operands[4])"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   #"
+  "&& reload_completed
+   && register_operand (operands[4], <MODE>mode)
+   && !rtx_equal_p (operands[0], operands[4])"
+  {
+    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+					     operands[4], operands[1]));
+    operands[4] = operands[2] = operands[0];
+  }
+  [(set_attr "movprfx" "yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Binary logical operations
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - AND
+;; - EOR
+;; - ORR
+;; -------------------------------------------------------------------------
+
+;; Unpredicated integer binary logical operations.
+(define_insn "<optab><mode>3"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?w, w")
+	(LOGICAL:SVE_I
+	  (match_operand:SVE_I 1 "register_operand" "%0, w, w")
+	  (match_operand:SVE_I 2 "aarch64_sve_logical_operand" "vsl, vsl, w")))]
+  "TARGET_SVE"
+  "@
+   <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
+   movprfx\t%0, %1\;<logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
+   <logical>\t%0.d, %1.d, %2.d"
+  [(set_attr "movprfx" "*,yes,*")]
+)
+
+;; Merging forms are handled through SVE_INT_BINARY.
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Binary logical operations (inverted second input)
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - BIC
+;; -------------------------------------------------------------------------
+
+;; Unpredicated BIC.
+(define_expand "@aarch64_bic<mode>"
+  [(set (match_operand:SVE_I 0 "register_operand")
+	(and:SVE_I
+	  (unspec:SVE_I
+	    [(match_dup 3)
+	     (not:SVE_I (match_operand:SVE_I 2 "register_operand"))]
+	    UNSPEC_PRED_X)
+	  (match_operand:SVE_I 1 "register_operand")))]
+  "TARGET_SVE"
+  {
+    operands[3] = CONSTM1_RTX (<VPRED>mode);
+  }
+)
+
+;; Predicated BIC.
+(define_insn_and_rewrite "*bic<mode>3"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w")
+	(and:SVE_I
+	  (unspec:SVE_I
+	    [(match_operand 3)
+	     (not:SVE_I
+	       (match_operand:SVE_I 2 "register_operand" "w"))]
+	    UNSPEC_PRED_X)
+	  (match_operand:SVE_I 1 "register_operand" "w")))]
+  "TARGET_SVE"
+  "bic\t%0.d, %1.d, %2.d"
+  "&& !CONSTANT_P (operands[3])"
+  {
+    operands[3] = CONSTM1_RTX (<VPRED>mode);
+  }
+)
+
+;; Predicated BIC with merging.
+(define_expand "@cond_bic<mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand")
+	(unspec:SVE_FULL_I
+	  [(match_operand:<VPRED> 1 "register_operand")
+	   (and:SVE_FULL_I
+	     (not:SVE_FULL_I (match_operand:SVE_FULL_I 3 "register_operand"))
+	     (match_operand:SVE_FULL_I 2 "register_operand"))
+	   (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+)
+
+;; Predicated integer BIC, merging with the first input.
+(define_insn "*cond_bic<mode>_2"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (and:SVE_I
+	     (not:SVE_I
+	       (match_operand:SVE_I 3 "register_operand" "w, w"))
+	     (match_operand:SVE_I 2 "register_operand" "0, w"))
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated integer BIC, merging with an independent value.
+(define_insn_and_rewrite "*cond_bic<mode>_any"
+  [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, ?&w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
+	   (and:SVE_I
+	     (not:SVE_I
+	       (match_operand:SVE_I 3 "register_operand" "w, w, w, w"))
+	     (match_operand:SVE_I 2 "register_operand" "0, w, w, w"))
+	   (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   #"
+  "&& reload_completed
+   && register_operand (operands[4], <MODE>mode)
+   && !rtx_equal_p (operands[0], operands[4])"
+  {
+    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+					     operands[4], operands[1]));
+    operands[4] = operands[2] = operands[0];
+  }
+  [(set_attr "movprfx" "yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Shifts (rounding towards -Inf)
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ASR
+;; - ASRR
+;; - LSL
+;; - LSLR
+;; - LSR
+;; - LSRR
+;; -------------------------------------------------------------------------
+
+;; Unpredicated shift by a scalar, which expands into one of the vector
+;; shifts below.
+(define_expand "<ASHIFT:optab><mode>3"
+  [(set (match_operand:SVE_I 0 "register_operand")
+	(ASHIFT:SVE_I
+	  (match_operand:SVE_I 1 "register_operand")
+	  (match_operand:<VEL> 2 "general_operand")))]
+  "TARGET_SVE"
+  {
+    rtx amount;
+    if (CONST_INT_P (operands[2]))
+      {
+	amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
+	if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
+	  amount = force_reg (<MODE>mode, amount);
+      }
+    else
+      {
+	amount = convert_to_mode (<VEL>mode, operands[2], 0);
+	amount = expand_vector_broadcast (<MODE>mode, amount);
+      }
+    emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount));
+    DONE;
+  }
+)
+
+;; Unpredicated shift by a vector.
+(define_expand "v<optab><mode>3"
+  [(set (match_operand:SVE_I 0 "register_operand")
+	(unspec:SVE_I
+	  [(match_dup 3)
+	   (ASHIFT:SVE_I
+	     (match_operand:SVE_I 1 "register_operand")
+	     (match_operand:SVE_I 2 "aarch64_sve_<lr>shift_operand"))]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE"
+  {
+    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+  }
+)
+
+;; Shift by a vector, predicated with a PTRUE.  We don't actually need
+;; the predicate for the first alternative, but using Upa or X isn't
+;; likely to gain much and would make the instruction seem less uniform
+;; to the register allocator.
+(define_insn_and_split "@aarch64_pred_<optab><mode>"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?&w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
+	   (ASHIFT:SVE_I
+	     (match_operand:SVE_I 2 "register_operand" "w, 0, w, w")
+	     (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, w"))]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE"
+  "@
+   #
+   <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   <shift>r\t%0.<Vetype>, %1/m, %3.<Vetype>, %2.<Vetype>
+   movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  "&& reload_completed
+   && !register_operand (operands[3], <MODE>mode)"
+  [(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))]
+  ""
+  [(set_attr "movprfx" "*,*,*,yes")]
+)
+
+;; Unpredicated shift operations by a constant (post-RA only).
+;; These are generated by splitting a predicated instruction whose
+;; predicate is unused.
+(define_insn "*post_ra_v<optab><mode>3"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w")
+	(ASHIFT:SVE_I
+	  (match_operand:SVE_I 1 "register_operand" "w")
+	  (match_operand:SVE_I 2 "aarch64_simd_<lr>shift_imm")))]
+  "TARGET_SVE && reload_completed"
+  "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
+)
+
+;; Predicated integer shift, merging with the first input.
+(define_insn "*cond_<optab><mode>_2_const"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (ASHIFT:SVE_I
+	     (match_operand:SVE_I 2 "register_operand" "0, w")
+	     (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm"))
+	   (match_dup 2)]
+	 UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated integer shift, merging with an independent value.
+(define_insn_and_rewrite "*cond_<optab><mode>_any_const"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, &w, ?&w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (ASHIFT:SVE_I
+	     (match_operand:SVE_I 2 "register_operand" "w, w, w")
+	     (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm"))
+	   (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
+	 UNSPEC_SEL))]
+  "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   #"
+  "&& reload_completed
+   && register_operand (operands[4], <MODE>mode)
+   && !rtx_equal_p (operands[0], operands[4])"
+  {
+    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+					     operands[4], operands[1]));
+    operands[4] = operands[2] = operands[0];
+  }
+  [(set_attr "movprfx" "yes")]
+)
+
+;; Unpredicated shifts of narrow elements by 64-bit amounts.
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+  [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
+	(unspec:SVE_FULL_BHSI
+	  [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")
+	   (match_operand:VNx2DI 2 "register_operand" "w")]
+	  SVE_SHIFT_WIDE))]
+  "TARGET_SVE"
+  "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.d"
+)
+
+;; Merging predicated shifts of narrow elements by 64-bit amounts.
+(define_expand "@cond_<sve_int_op><mode>"
+  [(set (match_operand:SVE_FULL_BHSI 0 "register_operand")
+	(unspec:SVE_FULL_BHSI
+	  [(match_operand:<VPRED> 1 "register_operand")
+	   (unspec:SVE_FULL_BHSI
+	     [(match_operand:SVE_FULL_BHSI 2 "register_operand")
+	      (match_operand:VNx2DI 3 "register_operand")]
+	     SVE_SHIFT_WIDE)
+	   (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_reg_or_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+)
+
+;; Predicated shifts of narrow elements by 64-bit amounts, merging with
+;; the first input.
+(define_insn "*cond_<sve_int_op><mode>_m"
+  [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_BHSI
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_BHSI
+	     [(match_operand:SVE_FULL_BHSI 2 "register_operand" "0, w")
+	      (match_operand:VNx2DI 3 "register_operand" "w, w")]
+	     SVE_SHIFT_WIDE)
+	   (match_dup 2)]
+	 UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
+   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d"
+  [(set_attr "movprfx" "*, yes")])
+
+;; Predicated shifts of narrow elements by 64-bit amounts, merging with zero.
+(define_insn "*cond_<sve_int_op><mode>_z"
+  [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=&w, &w")
+	(unspec:SVE_FULL_BHSI
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_BHSI
+	     [(match_operand:SVE_FULL_BHSI 2 "register_operand" "0, w")
+	      (match_operand:VNx2DI 3 "register_operand" "w, w")]
+	     SVE_SHIFT_WIDE)
+	   (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_imm_zero")]
+	 UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d"
+  [(set_attr "movprfx" "yes")])
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Shifts (rounding towards 0)
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ASRD
+;; - SQSHLU (SVE2)
+;; - SRSHR (SVE2)
+;; - URSHR (SVE2)
+;; -------------------------------------------------------------------------
+
+;; Unpredicated ASRD.
+(define_expand "sdiv_pow2<mode>3"
+  [(set (match_operand:SVE_I 0 "register_operand")
+	(unspec:SVE_I
+	  [(match_dup 3)
+	   (unspec:SVE_I
+	     [(match_operand:SVE_I 1 "register_operand")
+	      (match_operand 2 "aarch64_simd_rshift_imm")]
+	     UNSPEC_ASRD)]
+	 UNSPEC_PRED_X))]
+  "TARGET_SVE"
+  {
+    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+  }
+)
+
+;; Predicated ASRD.
+(define_insn "*sdiv_pow2<mode>3"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_I
+	     [(match_operand:SVE_I 2 "register_operand" "0, w")
+	      (match_operand:SVE_I 3 "aarch64_simd_rshift_imm")]
+	     UNSPEC_ASRD)]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE"
+  "@
+   asrd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   movprfx\t%0, %2\;asrd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
+  [(set_attr "movprfx" "*,yes")])
+
+;; Predicated shift with merging.
+(define_expand "@cond_<sve_int_op><mode>"
+  [(set (match_operand:SVE_I 0 "register_operand")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand")
+	   (unspec:SVE_I
+	     [(match_dup 5)
+	      (unspec:SVE_I
+		[(match_operand:SVE_I 2 "register_operand")
+		 (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm")]
+		SVE_INT_SHIFT_IMM)]
+	     UNSPEC_PRED_X)
+	   (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  {
+    operands[5] = aarch64_ptrue_reg (<VPRED>mode);
+  }
+)
+
+;; Predicated shift, merging with the first input.
+(define_insn_and_rewrite "*cond_<sve_int_op><mode>_2"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_I
+	     [(match_operand 4)
+	      (unspec:SVE_I
+		[(match_operand:SVE_I 2 "register_operand" "0, w")
+		 (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm")]
+		SVE_INT_SHIFT_IMM)]
+	     UNSPEC_PRED_X)
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
+  "&& !CONSTANT_P (operands[4])"
+  {
+    operands[4] = CONSTM1_RTX (<VPRED>mode);
+  }
+  [(set_attr "movprfx" "*,yes")])
+
+;; Predicated shift, merging with an independent value.
+(define_insn_and_rewrite "*cond_<sve_int_op><mode>_any"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, &w, ?&w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (unspec:SVE_I
+	     [(match_operand 5)
+	      (unspec:SVE_I
+		[(match_operand:SVE_I 2 "register_operand" "w, w, w")
+		 (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm")]
+		SVE_INT_SHIFT_IMM)]
+	     UNSPEC_PRED_X)
+	   (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
+	 UNSPEC_SEL))]
+  "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   #"
+  "&& reload_completed
+   && register_operand (operands[4], <MODE>mode)
+   && !rtx_equal_p (operands[0], operands[4])"
+  {
+    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+					     operands[4], operands[1]));
+    operands[4] = operands[2] = operands[0];
+  }
+  [(set_attr "movprfx" "yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FSCALE
+;; - FTSMUL
+;; - FTSSEL
+;; -------------------------------------------------------------------------
+
+;; Unpredicated floating-point binary operations that take an integer as
+;; their second operand.
+(define_insn "@aarch64_sve_<optab><mode>"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:SVE_FULL_F 1 "register_operand" "w")
+	   (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
+	  SVE_FP_BINARY_INT))]
+  "TARGET_SVE"
+  "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
+)
+
+;; Predicated floating-point binary operations that take an integer
+;; as their second operand.
+(define_insn "@aarch64_pred_<optab><mode>"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (match_operand:SI 4 "aarch64_sve_gp_strictness")
+	   (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
+	   (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")]
+	  SVE_COND_FP_BINARY_INT))]
+  "TARGET_SVE"
+  "@
+   <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated floating-point binary operations with merging, taking an
+;; integer as their second operand.
+(define_expand "@cond_<optab><mode>"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand")
+	      (match_operand:<V_INT_EQUIV> 3 "register_operand")]
+	     SVE_COND_FP_BINARY_INT)
+	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+)
+
+;; Predicated floating-point binary operations that take an integer as their
+;; second operand, with inactive lanes coming from the first operand.
+(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_operand 4)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
+	      (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")]
+	     SVE_COND_FP_BINARY_INT)
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  "&& !rtx_equal_p (operands[1], operands[4])"
+  {
+    operands[4] = copy_rtx (operands[1]);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+(define_insn "*cond_<optab><mode>_2_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
+	      (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")]
+	     SVE_COND_FP_BINARY_INT)
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated floating-point binary operations that take an integer as
+;; their second operand, with the values of inactive lanes being distinct
+;; from the other inputs.
+(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_operand 5)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w")
+	      (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w, w, w")]
+	     SVE_COND_FP_BINARY_INT)
+	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   #"
+  "&& 1"
+  {
+    if (reload_completed
+        && register_operand (operands[4], <MODE>mode)
+        && !rtx_equal_p (operands[0], operands[4]))
+      {
+	emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+						 operands[4], operands[1]));
+	operands[4] = operands[2] = operands[0];
+      }
+    else if (!rtx_equal_p (operands[1], operands[5]))
+      operands[5] = copy_rtx (operands[1]);
+    else
+      FAIL;
+  }
+  [(set_attr "movprfx" "yes")]
+)
+
+(define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w")
+	      (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w, w, w")]
+	     SVE_COND_FP_BINARY_INT)
+	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   #"
+  "&& reload_completed
+   && register_operand (operands[4], <MODE>mode)
+   && !rtx_equal_p (operands[0], operands[4])"
+  {
+    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+					     operands[4], operands[1]));
+    operands[4] = operands[2] = operands[0];
+  }
+  [(set_attr "movprfx" "yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] General binary arithmetic corresponding to rtx codes
+;; -------------------------------------------------------------------------
+;; Includes post-RA forms of:
+;; - FADD
+;; - FMUL
+;; - FSUB
+;; -------------------------------------------------------------------------
+
+;; Unpredicated floating-point binary operations (post-RA only).
+;; These are generated by splitting a predicated instruction whose
+;; predicate is unused.
+(define_insn "*post_ra_<sve_fp_op><mode>3"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
+	(SVE_UNPRED_FP_BINARY:SVE_FULL_F
+	  (match_operand:SVE_FULL_F 1 "register_operand" "w")
+	  (match_operand:SVE_FULL_F 2 "register_operand" "w")))]
+  "TARGET_SVE && reload_completed"
+  "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>")
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] General binary arithmetic corresponding to unspecs
+;; -------------------------------------------------------------------------
+;; Includes merging forms of:
+;; - FADD    (constant forms handled in the "Addition" section)
+;; - FDIV
+;; - FDIVR
+;; - FMAX
+;; - FMAXNM  (including #0.0 and #1.0)
+;; - FMIN
+;; - FMINNM  (including #0.0 and #1.0)
+;; - FMUL    (including #0.5 and #2.0)
+;; - FMULX
+;; - FRECPS
+;; - FRSQRTS
+;; - FSUB    (constant forms handled in the "Addition" section)
+;; - FSUBR   (constant forms handled in the "Subtraction" section)
+;; -------------------------------------------------------------------------
+
+;; Unpredicated floating-point binary operations.
+(define_insn "@aarch64_sve_<optab><mode>"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:SVE_FULL_F 1 "register_operand" "w")
+	   (match_operand:SVE_FULL_F 2 "register_operand" "w")]
+	  SVE_FP_BINARY))]
+  "TARGET_SVE"
+  "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
+)
+
+;; Unpredicated floating-point binary operations that need to be predicated
+;; for SVE.
+(define_expand "<optab><mode>3"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand")
+	(unspec:SVE_FULL_F
+	  [(match_dup 3)
+	   (const_int SVE_RELAXED_GP)
+	   (match_operand:SVE_FULL_F 1 "<sve_pred_fp_rhs1_operand>")
+	   (match_operand:SVE_FULL_F 2 "<sve_pred_fp_rhs2_operand>")]
+	  SVE_COND_FP_BINARY_OPTAB))]
+  "TARGET_SVE"
+  {
+    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+  }
+)
+
+;; Predicated floating-point binary operations that have no immediate forms.
+(define_insn "@aarch64_pred_<optab><mode>"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (match_operand:SI 4 "aarch64_sve_gp_strictness")
+	   (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w")
+	   (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w")]
+	  SVE_COND_FP_BINARY_REG))]
+  "TARGET_SVE"
+  "@
+   <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,*,yes")]
+)
+
+;; Predicated floating-point operations with merging.
+(define_expand "@cond_<optab><mode>"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "<sve_pred_fp_rhs1_operand>")
+	      (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_operand>")]
+	     SVE_COND_FP_BINARY)
+	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+)
+
+;; Predicated floating-point operations, merging with the first input.
+(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_operand 4)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
+	     SVE_COND_FP_BINARY)
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  "&& !rtx_equal_p (operands[1], operands[4])"
+  {
+    operands[4] = copy_rtx (operands[1]);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+(define_insn "*cond_<optab><mode>_2_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
+	     SVE_COND_FP_BINARY)
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Same for operations that take a 1-bit constant.
+(define_insn_and_rewrite "*cond_<optab><mode>_2_const_relaxed"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_operand 4)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
+	      (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
+	     SVE_COND_FP_BINARY_I1)
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
+  "&& !rtx_equal_p (operands[1], operands[4])"
+  {
+    operands[4] = copy_rtx (operands[1]);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+(define_insn "*cond_<optab><mode>_2_const_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
+	      (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
+	     SVE_COND_FP_BINARY_I1)
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated floating-point operations, merging with the second input.
+(define_insn_and_rewrite "*cond_<optab><mode>_3_relaxed"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_operand 4)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
+	     SVE_COND_FP_BINARY)
+	   (match_dup 3)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+  "&& !rtx_equal_p (operands[1], operands[4])"
+  {
+    operands[4] = copy_rtx (operands[1]);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+(define_insn "*cond_<optab><mode>_3_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
+	     SVE_COND_FP_BINARY)
+	   (match_dup 3)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated floating-point operations, merging with an independent value.
+(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_operand 5)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
+	     SVE_COND_FP_BINARY)
+	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE
+   && !rtx_equal_p (operands[2], operands[4])
+   && !rtx_equal_p (operands[3], operands[4])"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   #"
+  "&& 1"
+  {
+    if (reload_completed
+        && register_operand (operands[4], <MODE>mode)
+        && !rtx_equal_p (operands[0], operands[4]))
+      {
+	emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+						 operands[4], operands[1]));
+	operands[4] = operands[2] = operands[0];
+      }
+    else if (!rtx_equal_p (operands[1], operands[5]))
+      operands[5] = copy_rtx (operands[1]);
+    else
+      FAIL;
+  }
+  [(set_attr "movprfx" "yes")]
+)
+
+(define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
+	     SVE_COND_FP_BINARY)
+	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE
+   && !rtx_equal_p (operands[2], operands[4])
+   && !rtx_equal_p (operands[3], operands[4])"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   #"
+  "&& reload_completed
+   && register_operand (operands[4], <MODE>mode)
+   && !rtx_equal_p (operands[0], operands[4])"
+  {
+    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+					     operands[4], operands[1]));
+    operands[4] = operands[2] = operands[0];
+  }
+  [(set_attr "movprfx" "yes")]
+)
+
+;; Same for operations that take a 1-bit constant.
+(define_insn_and_rewrite "*cond_<optab><mode>_any_const_relaxed"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_operand 5)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")
+	      (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
+	     SVE_COND_FP_BINARY_I1)
+	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   #"
+  "&& 1"
+  {
+    if (reload_completed
+        && register_operand (operands[4], <MODE>mode)
+        && !rtx_equal_p (operands[0], operands[4]))
+      {
+	emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+						 operands[4], operands[1]));
+	operands[4] = operands[2] = operands[0];
+      }
+    else if (!rtx_equal_p (operands[1], operands[5]))
+      operands[5] = copy_rtx (operands[1]);
+    else
+      FAIL;
+  }
+  [(set_attr "movprfx" "yes")]
+)
+
+(define_insn_and_rewrite "*cond_<optab><mode>_any_const_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")
+	      (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
+	     SVE_COND_FP_BINARY_I1)
+	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   #"
+  "&& reload_completed
+   && register_operand (operands[4], <MODE>mode)
+   && !rtx_equal_p (operands[0], operands[4])"
+  {
+    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+					     operands[4], operands[1]));
+    operands[4] = operands[2] = operands[0];
+  }
+  [(set_attr "movprfx" "yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Addition
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FADD
+;; - FSUB
+;; -------------------------------------------------------------------------
+
+;; Predicated floating-point addition.
+(define_insn_and_split "@aarch64_pred_<optab><mode>"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?&w, ?&w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl, Upl")
+	   (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, i, Z, Ui1, i, i, Ui1")
+	   (match_operand:SVE_FULL_F 2 "register_operand" "%0, 0, w, 0, w, w, w")
+	   (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w, w, vsA, vsN, w")]
+	  SVE_COND_FP_ADD))]
+  "TARGET_SVE"
+  "@
+   fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
+   #
+   fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
+   movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  ; Split the unpredicated form after reload, so that we don't have
+  ; the unnecessary PTRUE.
+  "&& reload_completed
+   && register_operand (operands[3], <MODE>mode)
+   && INTVAL (operands[4]) == SVE_RELAXED_GP"
+  [(set (match_dup 0) (plus:SVE_FULL_F (match_dup 2) (match_dup 3)))]
+  ""
+  [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")]
+)
+
+;; Predicated floating-point addition of a constant, merging with the
+;; first input.
+(define_insn_and_rewrite "*cond_add<mode>_2_const_relaxed"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w, ?w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_operand 4)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "0, 0, w, w")
+	      (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN")]
+	     UNSPEC_COND_FADD)
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
+   movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3"
+  "&& !rtx_equal_p (operands[1], operands[4])"
+  {
+    operands[4] = copy_rtx (operands[1]);
+  }
+  [(set_attr "movprfx" "*,*,yes,yes")]
+)
+
+(define_insn "*cond_add<mode>_2_const_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w, ?w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "0, 0, w, w")
+	      (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN")]
+	     UNSPEC_COND_FADD)
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
+   movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3"
+  [(set_attr "movprfx" "*,*,yes,yes")]
+)
+
+;; Predicated floating-point addition of a constant, merging with an
+;; independent value.
+(define_insn_and_rewrite "*cond_add<mode>_any_const_relaxed"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?w, ?w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_operand 5)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w, w, w")
+	      (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN, vsA, vsN")]
+	     UNSPEC_COND_FADD)
+	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, 0, w, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
+   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
+   #
+   #"
+  "&& 1"
+  {
+    if (reload_completed
+        && register_operand (operands[4], <MODE>mode)
+        && !rtx_equal_p (operands[0], operands[4]))
+      {
+	emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+						 operands[4], operands[1]));
+	operands[4] = operands[2] = operands[0];
+      }
+    else if (!rtx_equal_p (operands[1], operands[5]))
+      operands[5] = copy_rtx (operands[1]);
+    else
+      FAIL;
+  }
+  [(set_attr "movprfx" "yes")]
+)
+
+(define_insn_and_rewrite "*cond_add<mode>_any_const_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?w, ?w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w, w, w")
+	      (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN, vsA, vsN")]
+	     UNSPEC_COND_FADD)
+	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, 0, w, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
+   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
+   #
+   #"
+  "&& reload_completed
+   && register_operand (operands[4], <MODE>mode)
+   && !rtx_equal_p (operands[0], operands[4])"
+  {
+    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+					     operands[4], operands[1]));
+    operands[4] = operands[2] = operands[0];
+  }
+  [(set_attr "movprfx" "yes")]
+)
+
+;; Register merging forms are handled through SVE_COND_FP_BINARY.
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Complex addition
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FCADD
+;; -------------------------------------------------------------------------
+
+;; Predicated FCADD.
+(define_insn "@aarch64_pred_<optab><mode>"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (match_operand:SI 4 "aarch64_sve_gp_strictness")
+	   (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
+	   (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
+	  SVE_COND_FCADD))]
+  "TARGET_SVE"
+  "@
+   fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
+   movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated FCADD with merging.
+(define_expand "@cond_<optab><mode>"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand")
+	      (match_operand:SVE_FULL_F 3 "register_operand")]
+	     SVE_COND_FCADD)
+	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+)
+
+;; Predicated FCADD using ptrue for unpredicated optab for auto-vectorizer
+(define_expand "@cadd<rot><mode>3"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand")
+	(unspec:SVE_FULL_F
+	  [(match_dup 3)
+	   (const_int SVE_RELAXED_GP)
+	   (match_operand:SVE_FULL_F 1 "register_operand")
+	   (match_operand:SVE_FULL_F 2 "register_operand")]
+	  SVE_COND_FCADD))]
+  "TARGET_SVE"
+{
+  operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+})
+
+;; Predicated FCADD, merging with the first input.
+(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_operand 4)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
+	     SVE_COND_FCADD)
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
+   movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>"
+  "&& !rtx_equal_p (operands[1], operands[4])"
+  {
+    operands[4] = copy_rtx (operands[1]);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+(define_insn "*cond_<optab><mode>_2_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
+	     SVE_COND_FCADD)
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
+   movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated FCADD, merging with an independent value.
+(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_operand 5)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "w, 0, w, w")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")]
+	     SVE_COND_FCADD)
+	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
+   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
+   #"
+  "&& 1"
+  {
+    if (reload_completed
+        && register_operand (operands[4], <MODE>mode)
+        && !rtx_equal_p (operands[0], operands[4]))
+      {
+	emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+						 operands[4], operands[1]));
+	operands[4] = operands[2] = operands[0];
+      }
+    else if (!rtx_equal_p (operands[1], operands[5]))
+      operands[5] = copy_rtx (operands[1]);
+    else
+      FAIL;
+  }
+  [(set_attr "movprfx" "yes")]
+)
+
+(define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "w, 0, w, w")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")]
+	     SVE_COND_FCADD)
+	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
+   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
+   #"
+  "&& reload_completed
+   && register_operand (operands[4], <MODE>mode)
+   && !rtx_equal_p (operands[0], operands[4])"
+  {
+    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+					     operands[4], operands[1]));
+    operands[4] = operands[2] = operands[0];
+  }
+  [(set_attr "movprfx" "yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Subtraction
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FSUB
+;; - FSUBR
+;; -------------------------------------------------------------------------
+
+;; Predicated floating-point subtraction.
+(define_insn_and_split "@aarch64_pred_<optab><mode>"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?&w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
+	   (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z, Ui1, Ui1, i, Ui1")
+	   (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_operand" "vsA, w, 0, w, vsA, w")
+	   (match_operand:SVE_FULL_F 3 "register_operand" "0, w, w, 0, w, w")]
+	  SVE_COND_FP_SUB))]
+  "TARGET_SVE"
+  "@
+   fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
+   #
+   fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
+   movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  ; Split the unpredicated form after reload, so that we don't have
+  ; the unnecessary PTRUE.
+  "&& reload_completed
+   && register_operand (operands[2], <MODE>mode)
+   && INTVAL (operands[4]) == SVE_RELAXED_GP"
+  [(set (match_dup 0) (minus:SVE_FULL_F (match_dup 2) (match_dup 3)))]
+  ""
+  [(set_attr "movprfx" "*,*,*,*,yes,yes")]
+)
+
+;; Predicated floating-point subtraction from a constant, merging with the
+;; second input.
+(define_insn_and_rewrite "*cond_sub<mode>_3_const_relaxed"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_operand 4)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
+	     UNSPEC_COND_FSUB)
+	   (match_dup 3)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
+   movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2"
+  "&& !rtx_equal_p (operands[1], operands[4])"
+  {
+    operands[4] = copy_rtx (operands[1]);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+(define_insn "*cond_sub<mode>_3_const_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
+	     UNSPEC_COND_FSUB)
+	   (match_dup 3)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
+   movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated floating-point subtraction from a constant, merging with an
+;; independent value.
+(define_insn_and_rewrite "*cond_sub<mode>_const_relaxed"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_operand 5)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w")]
+	     UNSPEC_COND_FSUB)
+	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
+   movprfx\t%0.<Vetype>, %1/m, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
+   #"
+  "&& 1"
+  {
+    if (reload_completed
+        && register_operand (operands[4], <MODE>mode)
+        && !rtx_equal_p (operands[0], operands[4]))
+      {
+	emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
+						 operands[4], operands[1]));
+	operands[4] = operands[3] = operands[0];
+      }
+    else if (!rtx_equal_p (operands[1], operands[5]))
+      operands[5] = copy_rtx (operands[1]);
+    else
+      FAIL;
+  }
+  [(set_attr "movprfx" "yes")]
+)
+
+(define_insn_and_rewrite "*cond_sub<mode>_const_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w")]
+	     UNSPEC_COND_FSUB)
+	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
+   movprfx\t%0.<Vetype>, %1/m, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
+   #"
+  "&& reload_completed
+   && register_operand (operands[4], <MODE>mode)
+   && !rtx_equal_p (operands[0], operands[4])"
+  {
+    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
+                                             operands[4], operands[1]));
+    operands[4] = operands[3] = operands[0];
+  }
+  [(set_attr "movprfx" "yes")]
+)
+;; Register merging forms are handled through SVE_COND_FP_BINARY.
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Absolute difference
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FABD
+;; -------------------------------------------------------------------------
+
+;; Predicated floating-point absolute difference.
+(define_expand "@aarch64_pred_abd<mode>"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand")
+	   (match_operand:SI 4 "aarch64_sve_gp_strictness")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (match_dup 4)
+	      (match_operand:SVE_FULL_F 2 "register_operand")
+	      (match_operand:SVE_FULL_F 3 "register_operand")]
+	     UNSPEC_COND_FSUB)]
+	  UNSPEC_COND_FABS))]
+  "TARGET_SVE"
+)
+
+;; Predicated floating-point absolute difference.
+(define_insn_and_rewrite "*aarch64_pred_abd<mode>_relaxed"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (match_operand:SI 4 "aarch64_sve_gp_strictness")
+	   (unspec:SVE_FULL_F
+	     [(match_operand 5)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "%0, w")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
+	     UNSPEC_COND_FSUB)]
+	  UNSPEC_COND_FABS))]
+  "TARGET_SVE"
+  "@
+   fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  "&& !rtx_equal_p (operands[1], operands[5])"
+  {
+    operands[5] = copy_rtx (operands[1]);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+(define_insn "*aarch64_pred_abd<mode>_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (match_operand:SI 4 "aarch64_sve_gp_strictness")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "%0, w")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
+	     UNSPEC_COND_FSUB)]
+	  UNSPEC_COND_FABS))]
+  "TARGET_SVE"
+  "@
+   fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+(define_expand "@aarch64_cond_abd<mode>"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (unspec:SVE_FULL_F
+		[(match_dup 1)
+		 (const_int SVE_STRICT_GP)
+		 (match_operand:SVE_FULL_F 2 "register_operand")
+		 (match_operand:SVE_FULL_F 3 "register_operand")]
+		UNSPEC_COND_FSUB)]
+	     UNSPEC_COND_FABS)
+	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+{
+  if (rtx_equal_p (operands[3], operands[4]))
+    std::swap (operands[2], operands[3]);
+})
+
+;; Predicated floating-point absolute difference, merging with the first
+;; input.
+(define_insn_and_rewrite "*aarch64_cond_abd<mode>_2_relaxed"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_operand 4)
+	      (const_int SVE_RELAXED_GP)
+	      (unspec:SVE_FULL_F
+		[(match_operand 5)
+		 (const_int SVE_RELAXED_GP)
+		 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
+		 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
+		UNSPEC_COND_FSUB)]
+	     UNSPEC_COND_FABS)
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  "&& (!rtx_equal_p (operands[1], operands[4])
+       || !rtx_equal_p (operands[1], operands[5]))"
+  {
+    operands[4] = copy_rtx (operands[1]);
+    operands[5] = copy_rtx (operands[1]);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+(define_insn "*aarch64_cond_abd<mode>_2_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (match_operand:SI 4 "aarch64_sve_gp_strictness")
+	      (unspec:SVE_FULL_F
+		[(match_dup 1)
+		 (match_operand:SI 5 "aarch64_sve_gp_strictness")
+		 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
+		 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
+		UNSPEC_COND_FSUB)]
+	     UNSPEC_COND_FABS)
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated floating-point absolute difference, merging with the second
+;; input.
+(define_insn_and_rewrite "*aarch64_cond_abd<mode>_3_relaxed"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_operand 4)
+	      (const_int SVE_RELAXED_GP)
+	      (unspec:SVE_FULL_F
+		[(match_operand 5)
+		 (const_int SVE_RELAXED_GP)
+		 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
+		 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
+		UNSPEC_COND_FSUB)]
+	     UNSPEC_COND_FABS)
+	   (match_dup 3)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0, %3\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+  "&& (!rtx_equal_p (operands[1], operands[4])
+       || !rtx_equal_p (operands[1], operands[5]))"
+  {
+    operands[4] = copy_rtx (operands[1]);
+    operands[5] = copy_rtx (operands[1]);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+(define_insn "*aarch64_cond_abd<mode>_3_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (match_operand:SI 4 "aarch64_sve_gp_strictness")
+	      (unspec:SVE_FULL_F
+		[(match_dup 1)
+		 (match_operand:SI 5 "aarch64_sve_gp_strictness")
+		 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
+		 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
+		UNSPEC_COND_FSUB)]
+	     UNSPEC_COND_FABS)
+	   (match_dup 3)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0, %3\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated floating-point absolute difference, merging with an
+;; independent value.
+(define_insn_and_rewrite "*aarch64_cond_abd<mode>_any_relaxed"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_operand 5)
+	      (const_int SVE_RELAXED_GP)
+	      (unspec:SVE_FULL_F
+		[(match_operand 6)
+		 (const_int SVE_RELAXED_GP)
+		 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
+		 (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
+		UNSPEC_COND_FSUB)]
+	     UNSPEC_COND_FABS)
+	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE
+   && !rtx_equal_p (operands[2], operands[4])
+   && !rtx_equal_p (operands[3], operands[4])"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   #"
+  "&& 1"
+  {
+    if (reload_completed
+	&& register_operand (operands[4], <MODE>mode)
+	&& !rtx_equal_p (operands[0], operands[4]))
+      {
+	emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
+						 operands[4], operands[1]));
+	operands[4] = operands[3] = operands[0];
+      }
+    else if (!rtx_equal_p (operands[1], operands[5])
+	     || !rtx_equal_p (operands[1], operands[6]))
+      {
+	operands[5] = copy_rtx (operands[1]);
+	operands[6] = copy_rtx (operands[1]);
+      }
+    else
+      FAIL;
+  }
+  [(set_attr "movprfx" "yes")]
+)
+
+(define_insn_and_rewrite "*aarch64_cond_abd<mode>_any_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (match_operand:SI 5 "aarch64_sve_gp_strictness")
+	      (unspec:SVE_FULL_F
+		[(match_dup 1)
+		 (match_operand:SI 6 "aarch64_sve_gp_strictness")
+		 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
+		 (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
+		UNSPEC_COND_FSUB)]
+	     UNSPEC_COND_FABS)
+	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE
+   && !rtx_equal_p (operands[2], operands[4])
+   && !rtx_equal_p (operands[3], operands[4])"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   #"
+  "&& reload_completed
+   && register_operand (operands[4], <MODE>mode)
+   && !rtx_equal_p (operands[0], operands[4])"
+  {
+    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
+					     operands[4], operands[1]));
+    operands[4] = operands[3] = operands[0];
+  }
+  [(set_attr "movprfx" "yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Multiplication
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FMUL
+;; -------------------------------------------------------------------------
+
+;; Predicated floating-point multiplication.
+(define_insn_and_split "@aarch64_pred_<optab><mode>"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, ?&w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
+	   (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z, Ui1, i, Ui1")
+	   (match_operand:SVE_FULL_F 2 "register_operand" "%0, w, 0, w, w")
+	   (match_operand:SVE_FULL_F 3 "aarch64_sve_float_mul_operand" "vsM, w, w, vsM, w")]
+	  SVE_COND_FP_MUL))]
+  "TARGET_SVE"
+  "@
+   fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   #
+   fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  ; Split the unpredicated form after reload, so that we don't have
+  ; the unnecessary PTRUE.
+  "&& reload_completed
+   && register_operand (operands[3], <MODE>mode)
+   && INTVAL (operands[4]) == SVE_RELAXED_GP"
+  [(set (match_dup 0) (mult:SVE_FULL_F (match_dup 2) (match_dup 3)))]
+  ""
+  [(set_attr "movprfx" "*,*,*,yes,yes")]
+)
+
+;; Merging forms are handled through SVE_COND_FP_BINARY and
+;; SVE_COND_FP_BINARY_I1.
+
+;; Unpredicated multiplication by selected lanes.
+(define_insn "@aarch64_mul_lane_<mode>"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
+	(mult:SVE_FULL_F
+	  (unspec:SVE_FULL_F
+	    [(match_operand:SVE_FULL_F 2 "register_operand" "<sve_lane_con>")
+	     (match_operand:SI 3 "const_int_operand")]
+	    UNSPEC_SVE_LANE_SELECT)
+	  (match_operand:SVE_FULL_F 1 "register_operand" "w")))]
+  "TARGET_SVE"
+  "fmul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Division
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic.
+;; -------------------------------------------------------------------------
+
+(define_expand "div<mode>3"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand")
+	(unspec:SVE_FULL_F
+	  [(match_dup 3)
+	   (const_int SVE_RELAXED_GP)
+	   (match_operand:SVE_FULL_F 1 "nonmemory_operand")
+	   (match_operand:SVE_FULL_F 2 "register_operand")]
+	  UNSPEC_COND_FDIV))]
+  "TARGET_SVE"
+  {
+    if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
+      DONE;
+
+    operands[1] = force_reg (<MODE>mode, operands[1]);
+    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+  }
+)
+
+(define_expand "@aarch64_frecpe<mode>"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand")
+	(unspec:SVE_FULL_F
+	  [(match_operand:SVE_FULL_F 1 "register_operand")]
+	  UNSPEC_FRECPE))]
+  "TARGET_SVE"
+)
+
+(define_expand "@aarch64_frecps<mode>"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand")
+	(unspec:SVE_FULL_F
+	  [(match_operand:SVE_FULL_F 1 "register_operand")
+	   (match_operand:SVE_FULL_F 2 "register_operand")]
+	  UNSPEC_FRECPS))]
+  "TARGET_SVE"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Binary logical operations
+;; -------------------------------------------------------------------------
+;; Includes
+;; - AND
+;; - EOR
+;; - ORR
+;; -------------------------------------------------------------------------
+
+;; Binary logical operations on floating-point modes.  We avoid subregs
+;; by providing this, but we need to use UNSPECs since rtx logical ops
+;; aren't defined for floating-point modes.
+(define_insn "*<optab><mode>3"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:SVE_FULL_F 1 "register_operand" "w")
+	   (match_operand:SVE_FULL_F 2 "register_operand" "w")]
+	  LOGICALF))]
+  "TARGET_SVE"
+  "<logicalf_op>\t%0.d, %1.d, %2.d"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Sign copying
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic.
+;; -------------------------------------------------------------------------
+
+(define_expand "copysign<mode>3"
+  [(match_operand:SVE_FULL_F 0 "register_operand")
+   (match_operand:SVE_FULL_F 1 "register_operand")
+   (match_operand:SVE_FULL_F 2 "register_operand")]
+  "TARGET_SVE"
+  {
+    rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
+    rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode);
+    rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
+    int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
+
+    rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
+    rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
+
+    emit_insn (gen_and<v_int_equiv>3
+	       (sign, arg2,
+		aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
+						   HOST_WIDE_INT_M1U
+						   << bits)));
+    emit_insn (gen_and<v_int_equiv>3
+	       (mant, arg1,
+		aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
+						   ~(HOST_WIDE_INT_M1U
+						     << bits))));
+    emit_insn (gen_ior<v_int_equiv>3 (int_res, sign, mant));
+    emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
+    DONE;
+  }
+)
+
+(define_expand "xorsign<mode>3"
+  [(match_operand:SVE_FULL_F 0 "register_operand")
+   (match_operand:SVE_FULL_F 1 "register_operand")
+   (match_operand:SVE_FULL_F 2 "register_operand")]
+  "TARGET_SVE"
+  {
+    rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
+    rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
+    int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
+
+    rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
+    rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
+
+    emit_insn (gen_and<v_int_equiv>3
+	       (sign, arg2,
+		aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
+						   HOST_WIDE_INT_M1U
+						   << bits)));
+    emit_insn (gen_xor<v_int_equiv>3 (int_res, arg1, sign));
+    emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
+    DONE;
+  }
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Maximum and minimum
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FMAX
+;; - FMAXNM
+;; - FMIN
+;; - FMINNM
+;; -------------------------------------------------------------------------
+
+;; Unpredicated fmax/fmin (the libm functions).  The optabs for the
+;; smin/smax rtx codes are handled in the generic section above.
+(define_expand "<maxmin_uns><mode>3"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand")
+	(unspec:SVE_FULL_F
+	  [(match_dup 3)
+	   (const_int SVE_RELAXED_GP)
+	   (match_operand:SVE_FULL_F 1 "register_operand")
+	   (match_operand:SVE_FULL_F 2 "aarch64_sve_float_maxmin_operand")]
+	  SVE_COND_FP_MAXMIN_PUBLIC))]
+  "TARGET_SVE"
+  {
+    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+  }
+)
+
+;; Predicated floating-point maximum/minimum.
+(define_insn "@aarch64_pred_<optab><mode>"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
+	   (match_operand:SI 4 "aarch64_sve_gp_strictness")
+	   (match_operand:SVE_FULL_F 2 "register_operand" "%0, 0, w, w")
+	   (match_operand:SVE_FULL_F 3 "aarch64_sve_float_maxmin_operand" "vsB, w, vsB, w")]
+	  SVE_COND_FP_MAXMIN))]
+  "TARGET_SVE"
+  "@
+   <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,*,yes,yes")]
+)
+
+;; Merging forms are handled through SVE_COND_FP_BINARY and
+;; SVE_COND_FP_BINARY_I1.
+
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Binary logical operations
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - AND
+;; - ANDS
+;; - EOR
+;; - EORS
+;; - ORR
+;; - ORRS
+;; -------------------------------------------------------------------------
+
+;; Predicate AND.  We can reuse one of the inputs as the GP.
+;; Doubling the second operand is the preferred implementation
+;; of the MOV alias, so we use that instead of %1/z, %1, %2.
+(define_insn "and<mode>3"
+  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
+	(and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa")
+		      (match_operand:PRED_ALL 2 "register_operand" "Upa")))]
+  "TARGET_SVE"
+  "and\t%0.b, %1/z, %2.b, %2.b"
+)
+
+;; Unpredicated predicate EOR and ORR.
+(define_expand "<optab><mode>3"
+  [(set (match_operand:PRED_ALL 0 "register_operand")
+	(and:PRED_ALL
+	  (LOGICAL_OR:PRED_ALL
+	    (match_operand:PRED_ALL 1 "register_operand")
+	    (match_operand:PRED_ALL 2 "register_operand"))
+	  (match_dup 3)))]
+  "TARGET_SVE"
+  {
+    operands[3] = aarch64_ptrue_reg (<MODE>mode);
+  }
+)
+
+;; Predicated predicate AND, EOR and ORR.
+(define_insn "@aarch64_pred_<optab><mode>_z"
+  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
+	(and:PRED_ALL
+	  (LOGICAL:PRED_ALL
+	    (match_operand:PRED_ALL 2 "register_operand" "Upa")
+	    (match_operand:PRED_ALL 3 "register_operand" "Upa"))
+	  (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
+  "TARGET_SVE"
+  "<logical>\t%0.b, %1/z, %2.b, %3.b"
+)
+
+;; Perform a logical operation on operands 2 and 3, using operand 1 as
+;; the GP.  Store the result in operand 0 and set the flags in the same
+;; way as for PTEST.
+(define_insn "*<optab><mode>3_cc"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
+	   (match_operand 4)
+	   (match_operand:SI 5 "aarch64_sve_ptrue_flag")
+	   (and:PRED_ALL
+	     (LOGICAL:PRED_ALL
+	       (match_operand:PRED_ALL 2 "register_operand" "Upa")
+	       (match_operand:PRED_ALL 3 "register_operand" "Upa"))
+	     (match_dup 4))]
+	  UNSPEC_PTEST))
+   (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
+	(and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3))
+		      (match_dup 4)))]
+  "TARGET_SVE"
+  "<logical>s\t%0.b, %1/z, %2.b, %3.b"
+)
+
+;; Same with just the flags result.
+(define_insn "*<optab><mode>3_ptest"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
+	   (match_operand 4)
+	   (match_operand:SI 5 "aarch64_sve_ptrue_flag")
+	   (and:PRED_ALL
+	     (LOGICAL:PRED_ALL
+	       (match_operand:PRED_ALL 2 "register_operand" "Upa")
+	       (match_operand:PRED_ALL 3 "register_operand" "Upa"))
+	     (match_dup 4))]
+	  UNSPEC_PTEST))
+   (clobber (match_scratch:VNx16BI 0 "=Upa"))]
+  "TARGET_SVE"
+  "<logical>s\t%0.b, %1/z, %2.b, %3.b"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Binary logical operations (inverted second input)
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - BIC
+;; - ORN
+;; -------------------------------------------------------------------------
+
+;; Predicated predicate BIC and ORN.
+(define_insn "aarch64_pred_<nlogical><mode>_z"
+  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
+	(and:PRED_ALL
+	  (NLOGICAL:PRED_ALL
+	    (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa"))
+	    (match_operand:PRED_ALL 2 "register_operand" "Upa"))
+	  (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
+  "TARGET_SVE"
+  "<nlogical>\t%0.b, %1/z, %2.b, %3.b"
+)
+
+;; Same, but set the flags as a side-effect.
+(define_insn "*<nlogical><mode>3_cc"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
+	   (match_operand 4)
+	   (match_operand:SI 5 "aarch64_sve_ptrue_flag")
+	   (and:PRED_ALL
+	     (NLOGICAL:PRED_ALL
+	       (not:PRED_ALL
+		 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
+	       (match_operand:PRED_ALL 2 "register_operand" "Upa"))
+	     (match_dup 4))]
+	  UNSPEC_PTEST))
+   (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
+	(and:PRED_ALL (NLOGICAL:PRED_ALL
+			(not:PRED_ALL (match_dup 3))
+			(match_dup 2))
+		      (match_dup 4)))]
+  "TARGET_SVE"
+  "<nlogical>s\t%0.b, %1/z, %2.b, %3.b"
+)
+
+;; Same with just the flags result.
+(define_insn "*<nlogical><mode>3_ptest"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
+	   (match_operand 4)
+	   (match_operand:SI 5 "aarch64_sve_ptrue_flag")
+	   (and:PRED_ALL
+	     (NLOGICAL:PRED_ALL
+	       (not:PRED_ALL
+		 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
+	       (match_operand:PRED_ALL 2 "register_operand" "Upa"))
+	     (match_dup 4))]
+	  UNSPEC_PTEST))
+   (clobber (match_scratch:VNx16BI 0 "=Upa"))]
+  "TARGET_SVE"
+  "<nlogical>s\t%0.b, %1/z, %2.b, %3.b"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Binary logical operations (inverted result)
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - NAND
+;; - NOR
+;; -------------------------------------------------------------------------
+
+;; Predicated predicate NAND and NOR.
+(define_insn "aarch64_pred_<logical_nn><mode>_z"
+  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
+	(and:PRED_ALL
+	  (NLOGICAL:PRED_ALL
+	    (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
+	    (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa")))
+	  (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
+  "TARGET_SVE"
+  "<logical_nn>\t%0.b, %1/z, %2.b, %3.b"
+)
+
+;; Same, but set the flags as a side-effect.
+(define_insn "*<logical_nn><mode>3_cc"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
+	   (match_operand 4)
+	   (match_operand:SI 5 "aarch64_sve_ptrue_flag")
+	   (and:PRED_ALL
+	     (NLOGICAL:PRED_ALL
+	       (not:PRED_ALL
+		 (match_operand:PRED_ALL 2 "register_operand" "Upa"))
+	       (not:PRED_ALL
+		 (match_operand:PRED_ALL 3 "register_operand" "Upa")))
+	     (match_dup 4))]
+	  UNSPEC_PTEST))
+   (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
+	(and:PRED_ALL (NLOGICAL:PRED_ALL
+			(not:PRED_ALL (match_dup 2))
+			(not:PRED_ALL (match_dup 3)))
+		      (match_dup 4)))]
+  "TARGET_SVE"
+  "<logical_nn>s\t%0.b, %1/z, %2.b, %3.b"
+)
+
+;; Same with just the flags result.
+(define_insn "*<logical_nn><mode>3_ptest"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
+	   (match_operand 4)
+	   (match_operand:SI 5 "aarch64_sve_ptrue_flag")
+	   (and:PRED_ALL
+	     (NLOGICAL:PRED_ALL
+	       (not:PRED_ALL
+		 (match_operand:PRED_ALL 2 "register_operand" "Upa"))
+	       (not:PRED_ALL
+		 (match_operand:PRED_ALL 3 "register_operand" "Upa")))
+	     (match_dup 4))]
+	  UNSPEC_PTEST))
+   (clobber (match_scratch:VNx16BI 0 "=Upa"))]
+  "TARGET_SVE"
+  "<logical_nn>s\t%0.b, %1/z, %2.b, %3.b"
+)
+
+;; =========================================================================
+;; == Ternary arithmetic
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] MLA and MAD
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - MAD
+;; - MLA
+;; -------------------------------------------------------------------------
+
+;; Unpredicated integer addition of product.
+(define_expand "fma<mode>4"
+  [(set (match_operand:SVE_I 0 "register_operand")
+	(plus:SVE_I
+	  (unspec:SVE_I
+	    [(match_dup 4)
+	     (mult:SVE_I
+	       (match_operand:SVE_I 1 "register_operand")
+	       (match_operand:SVE_I 2 "nonmemory_operand"))]
+	    UNSPEC_PRED_X)
+	  (match_operand:SVE_I 3 "register_operand")))]
+  "TARGET_SVE"
+  {
+    if (aarch64_prepare_sve_int_fma (operands, PLUS))
+      DONE;
+    operands[4] = aarch64_ptrue_reg (<VPRED>mode);
+  }
+)
+
+;; Predicated integer addition of product.
+(define_insn "@aarch64_pred_fma<mode>"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
+	(plus:SVE_I
+	  (unspec:SVE_I
+	    [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	     (mult:SVE_I
+	       (match_operand:SVE_I 2 "register_operand" "%0, w, w")
+	       (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
+	    UNSPEC_PRED_X)
+	  (match_operand:SVE_I 4 "register_operand" "w, 0, w")))]
+  "TARGET_SVE"
+  "@
+   mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+   mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,*,yes")]
+)
+
+;; Predicated integer addition of product with merging.
+(define_expand "cond_fma<mode>"
+  [(set (match_operand:SVE_I 0 "register_operand")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand")
+	   (plus:SVE_I
+	     (mult:SVE_I
+	       (match_operand:SVE_I 2 "register_operand")
+	       (match_operand:SVE_I 3 "general_operand"))
+	     (match_operand:SVE_I 4 "register_operand"))
+	   (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  {
+    if (aarch64_prepare_sve_cond_int_fma (operands, PLUS))
+      DONE;
+    /* Swap the multiplication operands if the fallback value is the
+       second of the two.  */
+    if (rtx_equal_p (operands[3], operands[5]))
+      std::swap (operands[2], operands[3]);
+  }
+)
+
+;; Predicated integer addition of product, merging with the first input.
+(define_insn "*cond_fma<mode>_2"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (plus:SVE_I
+	     (mult:SVE_I
+	       (match_operand:SVE_I 2 "register_operand" "0, w")
+	       (match_operand:SVE_I 3 "register_operand" "w, w"))
+	     (match_operand:SVE_I 4 "register_operand" "w, w"))
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+   movprfx\t%0, %2\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated integer addition of product, merging with the third input.
+(define_insn "*cond_fma<mode>_4"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (plus:SVE_I
+	     (mult:SVE_I
+	       (match_operand:SVE_I 2 "register_operand" "w, w")
+	       (match_operand:SVE_I 3 "register_operand" "w, w"))
+	     (match_operand:SVE_I 4 "register_operand" "0, w"))
+	   (match_dup 4)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated integer addition of product, merging with an independent value.
+(define_insn_and_rewrite "*cond_fma<mode>_any"
+  [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
+	   (plus:SVE_I
+	     (mult:SVE_I
+	       (match_operand:SVE_I 2 "register_operand" "w, w, 0, w, w, w")
+	       (match_operand:SVE_I 3 "register_operand" "w, w, w, 0, w, w"))
+	     (match_operand:SVE_I 4 "register_operand" "w, 0, w, w, w, w"))
+	   (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE
+   && !rtx_equal_p (operands[2], operands[5])
+   && !rtx_equal_p (operands[3], operands[5])
+   && !rtx_equal_p (operands[4], operands[5])"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
+   movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+   #"
+  "&& reload_completed
+   && register_operand (operands[5], <MODE>mode)
+   && !rtx_equal_p (operands[0], operands[5])"
+  {
+    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
+					     operands[5], operands[1]));
+    operands[5] = operands[4] = operands[0];
+  }
+  [(set_attr "movprfx" "yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] MLS and MSB
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - MLS
+;; - MSB
+;; -------------------------------------------------------------------------
+
+;; Unpredicated integer subtraction of product.
+(define_expand "fnma<mode>4"
+  [(set (match_operand:SVE_I 0 "register_operand")
+	(minus:SVE_I
+	  (match_operand:SVE_I 3 "register_operand")
+	  (unspec:SVE_I
+	    [(match_dup 4)
+	     (mult:SVE_I
+	       (match_operand:SVE_I 1 "register_operand")
+	       (match_operand:SVE_I 2 "general_operand"))]
+	    UNSPEC_PRED_X)))]
+  "TARGET_SVE"
+  {
+    if (aarch64_prepare_sve_int_fma (operands, MINUS))
+      DONE;
+    operands[4] = aarch64_ptrue_reg (<VPRED>mode);
+  }
+)
+
+;; Predicated integer subtraction of product.
+(define_insn "@aarch64_pred_fnma<mode>"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
+	(minus:SVE_I
+	  (match_operand:SVE_I 4 "register_operand" "w, 0, w")
+	  (unspec:SVE_I
+	    [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	     (mult:SVE_I
+	       (match_operand:SVE_I 2 "register_operand" "%0, w, w")
+	       (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
+	    UNSPEC_PRED_X)))]
+  "TARGET_SVE"
+  "@
+   msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+   mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,*,yes")]
+)
+
+;; Predicated integer subtraction of product with merging.
+(define_expand "cond_fnma<mode>"
+  [(set (match_operand:SVE_I 0 "register_operand")
+   (unspec:SVE_I
+	[(match_operand:<VPRED> 1 "register_operand")
+	 (minus:SVE_I
+	   (match_operand:SVE_I 4 "register_operand")
+	   (mult:SVE_I
+	     (match_operand:SVE_I 2 "register_operand")
+	     (match_operand:SVE_I 3 "general_operand")))
+	 (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")]
+	UNSPEC_SEL))]
+  "TARGET_SVE"
+  {
+    if (aarch64_prepare_sve_cond_int_fma (operands, MINUS))
+      DONE;
+    /* Swap the multiplication operands if the fallback value is the
+       second of the two.  */
+    if (rtx_equal_p (operands[3], operands[5]))
+      std::swap (operands[2], operands[3]);
+  }
+)
+
+;; Predicated integer subtraction of product, merging with the first input.
+(define_insn "*cond_fnma<mode>_2"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (minus:SVE_I
+	     (match_operand:SVE_I 4 "register_operand" "w, w")
+	     (mult:SVE_I
+	       (match_operand:SVE_I 2 "register_operand" "0, w")
+	       (match_operand:SVE_I 3 "register_operand" "w, w")))
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+   movprfx\t%0, %2\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated integer subtraction of product, merging with the third input.
+(define_insn "*cond_fnma<mode>_4"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (minus:SVE_I
+	     (match_operand:SVE_I 4 "register_operand" "0, w")
+	     (mult:SVE_I
+	       (match_operand:SVE_I 2 "register_operand" "w, w")
+	       (match_operand:SVE_I 3 "register_operand" "w, w")))
+	   (match_dup 4)]
+	  UNSPEC_SEL))]
   "TARGET_SVE"
+  "@
+   mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated integer subtraction of product, merging with an
+;; independent value.
+(define_insn_and_rewrite "*cond_fnma<mode>_any"
+  [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
+	   (minus:SVE_I
+	     (match_operand:SVE_I 4 "register_operand" "w, 0, w, w, w, w")
+	     (mult:SVE_I
+	       (match_operand:SVE_I 2 "register_operand" "w, w, 0, w, w, w")
+	       (match_operand:SVE_I 3 "register_operand" "w, w, w, 0, w, w")))
+	   (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE
+   && !rtx_equal_p (operands[2], operands[5])
+   && !rtx_equal_p (operands[3], operands[5])
+   && !rtx_equal_p (operands[4], operands[5])"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
+   movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+   #"
+  "&& reload_completed
+   && register_operand (operands[5], <MODE>mode)
+   && !rtx_equal_p (operands[0], operands[5])"
   {
-    rtx amount;
-    if (CONST_INT_P (operands[2]))
+    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
+					     operands[5], operands[1]));
+    operands[5] = operands[4] = operands[0];
+  }
+  [(set_attr "movprfx" "yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Dot product
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SDOT
+;; - SUDOT   (I8MM)
+;; - UDOT
+;; - USDOT   (I8MM)
+;; -------------------------------------------------------------------------
+
+;; Four-element integer dot-product with accumulation.
+(define_insn "<sur>dot_prod<vsi2qi>"
+  [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
+	(plus:SVE_FULL_SDI
+	  (unspec:SVE_FULL_SDI
+	    [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
+	     (match_operand:<VSI2QI> 2 "register_operand" "w, w")]
+	    DOTPROD)
+	  (match_operand:SVE_FULL_SDI 3 "register_operand" "0, w")))]
+  "TARGET_SVE"
+  "@
+   <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>
+   movprfx\t%0, %3\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Four-element integer dot-product by selected lanes with accumulation.
+(define_insn "@aarch64_<sur>dot_prod_lane<vsi2qi>"
+  [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
+	(plus:SVE_FULL_SDI
+	  (unspec:SVE_FULL_SDI
+	    [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
+	     (unspec:<VSI2QI>
+	       [(match_operand:<VSI2QI> 2 "register_operand" "<sve_lane_con>, <sve_lane_con>")
+		(match_operand:SI 3 "const_int_operand")]
+	       UNSPEC_SVE_LANE_SELECT)]
+	    DOTPROD)
+	  (match_operand:SVE_FULL_SDI 4 "register_operand" "0, w")))]
+  "TARGET_SVE"
+  "@
+   <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>[%3]
+   movprfx\t%0, %4\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>[%3]"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+(define_insn "@aarch64_<sur>dot_prod<vsi2qi>"
+  [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
+        (plus:VNx4SI_ONLY
+	  (unspec:VNx4SI_ONLY
+	    [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
+	     (match_operand:<VSI2QI> 2 "register_operand" "w, w")]
+	    DOTPROD_US_ONLY)
+	  (match_operand:VNx4SI_ONLY 3 "register_operand" "0, w")))]
+  "TARGET_SVE_I8MM"
+  "@
+   <sur>dot\\t%0.s, %1.b, %2.b
+   movprfx\t%0, %3\;<sur>dot\\t%0.s, %1.b, %2.b"
+   [(set_attr "movprfx" "*,yes")]
+)
+
+(define_insn "@aarch64_<sur>dot_prod_lane<vsi2qi>"
+  [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
+	(plus:VNx4SI_ONLY
+	  (unspec:VNx4SI_ONLY
+	    [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
+	     (unspec:<VSI2QI>
+	       [(match_operand:<VSI2QI> 2 "register_operand" "y, y")
+		(match_operand:SI 3 "const_int_operand")]
+	       UNSPEC_SVE_LANE_SELECT)]
+	    DOTPROD_I8MM)
+	  (match_operand:VNx4SI_ONLY 4 "register_operand" "0, w")))]
+  "TARGET_SVE_I8MM"
+  "@
+   <sur>dot\\t%0.s, %1.b, %2.b[%3]
+   movprfx\t%0, %4\;<sur>dot\\t%0.s, %1.b, %2.b[%3]"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Sum of absolute differences
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic.
+;; -------------------------------------------------------------------------
+
+;; Emit a sequence to produce a sum-of-absolute-differences of the inputs in
+;; operands 1 and 2.  The sequence also has to perform a widening reduction of
+;; the difference into a vector and accumulate that into operand 3 before
+;; copying that into the result operand 0.
+;; Perform that with a sequence of:
+;; MOV		ones.b, #1
+;; [SU]ABD	diff.b, p0/m, op1.b, op2.b
+;; MOVPRFX	op0, op3	// If necessary
+;; UDOT		op0.s, diff.b, ones.b
+(define_expand "<sur>sad<vsi2qi>"
+  [(use (match_operand:SVE_FULL_SDI 0 "register_operand"))
+   (unspec:<VSI2QI> [(use (match_operand:<VSI2QI> 1 "register_operand"))
+		    (use (match_operand:<VSI2QI> 2 "register_operand"))] ABAL)
+   (use (match_operand:SVE_FULL_SDI 3 "register_operand"))]
+  "TARGET_SVE"
+  {
+    rtx ones = force_reg (<VSI2QI>mode, CONST1_RTX (<VSI2QI>mode));
+    rtx diff = gen_reg_rtx (<VSI2QI>mode);
+    emit_insn (gen_<sur>abd<vsi2qi>_3 (diff, operands[1], operands[2]));
+    emit_insn (gen_udot_prod<vsi2qi> (operands[0], diff, ones, operands[3]));
+    DONE;
+  }
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Matrix multiply-accumulate
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SMMLA (I8MM)
+;; - UMMLA (I8MM)
+;; - USMMLA (I8MM)
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_add_<optab><vsi2qi>"
+  [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
+	(plus:VNx4SI_ONLY
+	  (unspec:VNx4SI_ONLY
+	    [(match_operand:<VSI2QI> 2 "register_operand" "w, w")
+	     (match_operand:<VSI2QI> 3 "register_operand" "w, w")]
+	    MATMUL)
+	  (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))]
+  "TARGET_SVE_I8MM"
+  "@
+   <sur>mmla\\t%0.s, %2.b, %3.b
+   movprfx\t%0, %1\;<sur>mmla\\t%0.s, %2.b, %3.b"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] General ternary arithmetic corresponding to unspecs
+;; -------------------------------------------------------------------------
+;; Includes merging patterns for:
+;; - FMAD
+;; - FMLA
+;; - FMLS
+;; - FMSB
+;; - FNMAD
+;; - FNMLA
+;; - FNMLS
+;; - FNMSB
+;; -------------------------------------------------------------------------
+
+;; Unpredicated floating-point ternary operations.
+(define_expand "<optab><mode>4"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand")
+	(unspec:SVE_FULL_F
+	  [(match_dup 4)
+	   (const_int SVE_RELAXED_GP)
+	   (match_operand:SVE_FULL_F 1 "register_operand")
+	   (match_operand:SVE_FULL_F 2 "register_operand")
+	   (match_operand:SVE_FULL_F 3 "register_operand")]
+	  SVE_COND_FP_TERNARY))]
+  "TARGET_SVE"
+  {
+    operands[4] = aarch64_ptrue_reg (<VPRED>mode);
+  }
+)
+
+;; Predicated floating-point ternary operations.
+(define_insn "@aarch64_pred_<optab><mode>"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (match_operand:SI 5 "aarch64_sve_gp_strictness")
+	   (match_operand:SVE_FULL_F 2 "register_operand" "%w, 0, w")
+	   (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w")
+	   (match_operand:SVE_FULL_F 4 "register_operand" "0, w, w")]
+	  SVE_COND_FP_TERNARY))]
+  "TARGET_SVE"
+  "@
+   <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+   <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+   movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,*,yes")]
+)
+
+;; Predicated floating-point ternary operations with merging.
+(define_expand "@cond_<optab><mode>"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand")
+	      (match_operand:SVE_FULL_F 3 "register_operand")
+	      (match_operand:SVE_FULL_F 4 "register_operand")]
+	     SVE_COND_FP_TERNARY)
+	   (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+{
+  /* Swap the multiplication operands if the fallback value is the
+     second of the two.  */
+  if (rtx_equal_p (operands[3], operands[5]))
+    std::swap (operands[2], operands[3]);
+})
+
+;; Predicated floating-point ternary operations, merging with the
+;; first input.
+(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_operand 5)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
+	      (match_operand:SVE_FULL_F 4 "register_operand" "w, w")]
+	     SVE_COND_FP_TERNARY)
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+   movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
+  "&& !rtx_equal_p (operands[1], operands[5])"
+  {
+    operands[5] = copy_rtx (operands[1]);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+(define_insn "*cond_<optab><mode>_2_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
+	      (match_operand:SVE_FULL_F 4 "register_operand" "w, w")]
+	     SVE_COND_FP_TERNARY)
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+   movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated floating-point ternary operations, merging with the
+;; third input.
+(define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_operand 5)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
+	      (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
+	     SVE_COND_FP_TERNARY)
+	   (match_dup 4)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
+  "&& !rtx_equal_p (operands[1], operands[5])"
+  {
+    operands[5] = copy_rtx (operands[1]);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+(define_insn "*cond_<optab><mode>_4_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
+	      (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
+	     SVE_COND_FP_TERNARY)
+	   (match_dup 4)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated floating-point ternary operations, merging with an
+;; independent value.
+(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_operand 6)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, 0, w, w, w")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, 0, w, w")
+	      (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w, w, w")]
+	     SVE_COND_FP_TERNARY)
+	   (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE
+   && !rtx_equal_p (operands[2], operands[5])
+   && !rtx_equal_p (operands[3], operands[5])
+   && !rtx_equal_p (operands[4], operands[5])"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
+   movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+   #"
+  "&& 1"
+  {
+    if (reload_completed
+        && register_operand (operands[5], <MODE>mode)
+        && !rtx_equal_p (operands[0], operands[5]))
       {
-	amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
-	if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
-	  amount = force_reg (<MODE>mode, amount);
+	emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
+						 operands[5], operands[1]));
+	operands[5] = operands[4] = operands[0];
       }
+    else if (!rtx_equal_p (operands[1], operands[6]))
+      operands[6] = copy_rtx (operands[1]);
     else
+      FAIL;
+  }
+  [(set_attr "movprfx" "yes")]
+)
+
+(define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, 0, w, w, w")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, 0, w, w")
+	      (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w, w, w")]
+	     SVE_COND_FP_TERNARY)
+	   (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE
+   && !rtx_equal_p (operands[2], operands[5])
+   && !rtx_equal_p (operands[3], operands[5])
+   && !rtx_equal_p (operands[4], operands[5])"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
+   movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+   #"
+  "&& reload_completed
+   && register_operand (operands[5], <MODE>mode)
+   && !rtx_equal_p (operands[0], operands[5])"
+  {
+    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
+					     operands[5], operands[1]));
+    operands[5] = operands[4] = operands[0];
+  }
+  [(set_attr "movprfx" "yes")]
+)
+
+;; Unpredicated FMLA and FMLS by selected lanes.  It doesn't seem worth using
+;; (fma ...) since target-independent code won't understand the indexing.
+(define_insn "@aarch64_<optab>_lane_<mode>"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:SVE_FULL_F 1 "register_operand" "w, w")
+	   (unspec:SVE_FULL_F
+	     [(match_operand:SVE_FULL_F 2 "register_operand" "<sve_lane_con>, <sve_lane_con>")
+	      (match_operand:SI 3 "const_int_operand")]
+	     UNSPEC_SVE_LANE_SELECT)
+	   (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
+	  SVE_FP_TERNARY_LANE))]
+  "TARGET_SVE"
+  "@
+   <sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]
+   movprfx\t%0, %4\;<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Complex multiply-add
+;; -------------------------------------------------------------------------
+;; Includes merging patterns for:
+;; - FCMLA
+;; -------------------------------------------------------------------------
+
+;; Predicated FCMLA.
+(define_insn "@aarch64_pred_<optab><mode>"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (match_operand:SI 5 "aarch64_sve_gp_strictness")
+	   (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
+	   (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
+	   (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
+	  SVE_COND_FCMLA))]
+  "TARGET_SVE"
+  "@
+   fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
+   movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; unpredicated optab pattern for auto-vectorizer
+;; The complex mla/mls operations always need to expand to two instructions.
+;; The first operation does half the computation and the second does the
+;; remainder.  Because of this, expand early.
+(define_expand "cml<fcmac1><conj_op><mode>4"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand")
+	(unspec:SVE_FULL_F
+	  [(match_dup 4)
+	   (match_dup 5)
+	   (match_operand:SVE_FULL_F 1 "register_operand")
+	   (match_operand:SVE_FULL_F 2 "register_operand")
+	   (match_operand:SVE_FULL_F 3 "register_operand")]
+	  FCMLA_OP))]
+  "TARGET_SVE"
+{
+  operands[4] = aarch64_ptrue_reg (<VPRED>mode);
+  operands[5] = gen_int_mode (SVE_RELAXED_GP, SImode);
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  emit_insn
+    (gen_aarch64_pred_fcmla<sve_rot1><mode> (tmp, operands[4],
+					     operands[3], operands[2],
+					     operands[1], operands[5]));
+  emit_insn
+    (gen_aarch64_pred_fcmla<sve_rot2><mode> (operands[0], operands[4],
+					     operands[3], operands[2],
+					     tmp, operands[5]));
+  DONE;
+})
+
+;; unpredicated optab pattern for auto-vectorizer
+;; The complex mul operations always need to expand to two instructions.
+;; The first operation does half the computation and the second does the
+;; remainder.  Because of this, expand early.
+(define_expand "cmul<conj_op><mode>3"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand")
+	(unspec:SVE_FULL_F
+	   [(match_operand:SVE_FULL_F 1 "register_operand")
+	    (match_operand:SVE_FULL_F 2 "register_operand")]
+	  FCMUL_OP))]
+  "TARGET_SVE"
+{
+  rtx pred_reg = aarch64_ptrue_reg (<VPRED>mode);
+  rtx gp_mode = gen_int_mode (SVE_RELAXED_GP, SImode);
+  rtx accum = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  emit_insn
+    (gen_aarch64_pred_fcmla<sve_rot1><mode> (tmp, pred_reg,
+					     operands[2], operands[1],
+					     accum, gp_mode));
+  emit_insn
+    (gen_aarch64_pred_fcmla<sve_rot2><mode> (operands[0], pred_reg,
+					     operands[2], operands[1],
+					     tmp, gp_mode));
+  DONE;
+})
+
+;; Predicated FCMLA with merging.
+(define_expand "@cond_<optab><mode>"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand")
+	      (match_operand:SVE_FULL_F 3 "register_operand")
+	      (match_operand:SVE_FULL_F 4 "register_operand")]
+	     SVE_COND_FCMLA)
+	   (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+)
+
+;; Predicated FCMLA, merging with the third input.
+(define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_operand 5)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
+	      (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
+	     SVE_COND_FCMLA)
+	   (match_dup 4)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
+   movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>"
+  "&& !rtx_equal_p (operands[1], operands[5])"
+  {
+    operands[5] = copy_rtx (operands[1]);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+(define_insn "*cond_<optab><mode>_4_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
+	      (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
+	     SVE_COND_FCMLA)
+	   (match_dup 4)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
+   movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated FCMLA, merging with an independent value.
+(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_operand 6)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")
+	      (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w")]
+	     SVE_COND_FCMLA)
+	   (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && !rtx_equal_p (operands[4], operands[5])"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
+   movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
+   #"
+  "&& 1"
+  {
+    if (reload_completed
+        && register_operand (operands[5], <MODE>mode)
+        && !rtx_equal_p (operands[0], operands[5]))
       {
-	amount = gen_reg_rtx (<MODE>mode);
-	emit_insn (gen_vec_duplicate<mode> (amount,
-					    convert_to_mode (<VEL>mode,
-							     operands[2], 0)));
+	emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
+						 operands[5], operands[1]));
+	operands[5] = operands[4] = operands[0];
       }
-    emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount));
+    else if (!rtx_equal_p (operands[1], operands[6]))
+      operands[6] = copy_rtx (operands[1]);
+    else
+      FAIL;
+  }
+  [(set_attr "movprfx" "yes")]
+)
+
+(define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")
+	      (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w")]
+	     SVE_COND_FCMLA)
+	   (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && !rtx_equal_p (operands[4], operands[5])"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
+   movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
+   #"
+  "&& reload_completed
+   && register_operand (operands[5], <MODE>mode)
+   && !rtx_equal_p (operands[0], operands[5])"
+  {
+    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
+					     operands[5], operands[1]));
+    operands[5] = operands[4] = operands[0];
+  }
+  [(set_attr "movprfx" "yes")]
+)
+
+;; Unpredicated FCMLA with indexing.
+(define_insn "@aarch64_<optab>_lane_<mode>"
+  [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_HSF
+	  [(match_operand:SVE_FULL_HSF 1 "register_operand" "w, w")
+	   (unspec:SVE_FULL_HSF
+	     [(match_operand:SVE_FULL_HSF 2 "register_operand" "<sve_lane_pair_con>, <sve_lane_pair_con>")
+	      (match_operand:SI 3 "const_int_operand")]
+	     UNSPEC_SVE_LANE_SELECT)
+	   (match_operand:SVE_FULL_HSF 4 "register_operand" "0, w")]
+	  FCMLA))]
+  "TARGET_SVE"
+  "@
+   fcmla\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3], #<rot>
+   movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3], #<rot>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Trigonometric multiply-add
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FTMAD
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_tmad<mode>"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:SVE_FULL_F 1 "register_operand" "0, w")
+	   (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
+	   (match_operand:DI 3 "const_int_operand")]
+	  UNSPEC_FTMAD))]
+  "TARGET_SVE"
+  "@
+   ftmad\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3
+   movprfx\t%0, %1\;ftmad\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF)
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - BFDOT (BF16)
+;; - BFMLALB (BF16)
+;; - BFMLALT (BF16)
+;; - BFMMLA (BF16)
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<sve_fp_op>vnx4sf"
+  [(set (match_operand:VNx4SF 0 "register_operand" "=w, ?&w")
+	(unspec:VNx4SF
+	  [(match_operand:VNx4SF 1 "register_operand" "0, w")
+	   (match_operand:VNx8BF 2 "register_operand" "w, w")
+	   (match_operand:VNx8BF 3 "register_operand" "w, w")]
+	  SVE_BFLOAT_TERNARY_LONG))]
+  "TARGET_SVE_BF16"
+  "@
+   <sve_fp_op>\t%0.s, %2.h, %3.h
+   movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; The immediate range is enforced before generating the instruction.
+(define_insn "@aarch64_sve_<sve_fp_op>_lanevnx4sf"
+  [(set (match_operand:VNx4SF 0 "register_operand" "=w, ?&w")
+	(unspec:VNx4SF
+	  [(match_operand:VNx4SF 1 "register_operand" "0, w")
+	   (match_operand:VNx8BF 2 "register_operand" "w, w")
+	   (match_operand:VNx8BF 3 "register_operand" "y, y")
+	   (match_operand:SI 4 "const_int_operand")]
+	  SVE_BFLOAT_TERNARY_LONG_LANE))]
+  "TARGET_SVE_BF16"
+  "@
+   <sve_fp_op>\t%0.s, %2.h, %3.h[%4]
+   movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h[%4]"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Matrix multiply-accumulate
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FMMLA (F32MM,F64MM)
+;; -------------------------------------------------------------------------
+
+;; The mode iterator enforces the target requirements.
+(define_insn "@aarch64_sve_<sve_fp_op><mode>"
+  [(set (match_operand:SVE_MATMULF 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_MATMULF
+	  [(match_operand:SVE_MATMULF 2 "register_operand" "w, w")
+	   (match_operand:SVE_MATMULF 3 "register_operand" "w, w")
+	   (match_operand:SVE_MATMULF 1 "register_operand" "0, w")]
+	  FMMLA))]
+  "TARGET_SVE"
+  "@
+   <sve_fp_op>\\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %1\;<sve_fp_op>\\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; =========================================================================
+;; == Comparisons and selects
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Select based on predicates
+;; -------------------------------------------------------------------------
+;; Includes merging patterns for:
+;; - FMOV
+;; - MOV
+;; - SEL
+;; -------------------------------------------------------------------------
+
+;; vcond_mask operand order: true, false, mask
+;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
+;; SEL operand order:        mask, true, false
+(define_expand "@vcond_mask_<mode><vpred>"
+  [(set (match_operand:SVE_ALL 0 "register_operand")
+	(unspec:SVE_ALL
+	  [(match_operand:<VPRED> 3 "register_operand")
+	   (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm")
+	   (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  {
+    if (register_operand (operands[1], <MODE>mode))
+      operands[2] = force_reg (<MODE>mode, operands[2]);
+  }
+)
+
+;; Selects between:
+;; - two registers
+;; - a duplicated immediate and a register
+;; - a duplicated immediate and zero
+;;
+;; For unpacked vectors, it doesn't really matter whether SEL uses the
+;; the container size or the element size.  If SEL used the container size,
+;; it would ignore undefined bits of the predicate but would copy the
+;; upper (undefined) bits of each container along with the defined bits.
+;; If SEL used the element size, it would use undefined bits of the predicate
+;; to select between undefined elements in each input vector.  Thus the only
+;; difference is whether the undefined bits in a container always come from
+;; the same input as the defined bits, or whether the choice can vary
+;; independently of the defined bits.
+;;
+;; For the other instructions, using the element size is more natural,
+;; so we do that for SEL as well.
+(define_insn "*vcond_mask_<mode><vpred>"
+  [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w, w, ?w, ?&w, ?&w")
+	(unspec:SVE_ALL
+	  [(match_operand:<VPRED> 3 "register_operand" "Upa, Upa, Upa, Upa, Upl, Upl, Upl")
+	   (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm" "w, vss, vss, Ufc, Ufc, vss, Ufc")
+	   (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero" "w, 0, Dz, 0, Dz, w, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE
+   && (!register_operand (operands[1], <MODE>mode)
+       || register_operand (operands[2], <MODE>mode))"
+  "@
+   sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>
+   mov\t%0.<Vetype>, %3/m, #%I1
+   mov\t%0.<Vetype>, %3/z, #%I1
+   fmov\t%0.<Vetype>, %3/m, #%1
+   movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;fmov\t%0.<Vetype>, %3/m, #%1
+   movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, #%I1
+   movprfx\t%0, %2\;fmov\t%0.<Vetype>, %3/m, #%1"
+  [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")]
+)
+
+;; Optimize selects between a duplicated scalar variable and another vector,
+;; the latter of which can be a zero constant or a variable.  Treat duplicates
+;; of GPRs as being more expensive than duplicates of FPRs, since they
+;; involve a cross-file move.
+(define_insn "@aarch64_sel_dup<mode>"
+  [(set (match_operand:SVE_ALL 0 "register_operand" "=?w, w, ??w, ?&w, ??&w, ?&w")
+	(unspec:SVE_ALL
+	  [(match_operand:<VPRED> 3 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
+	   (vec_duplicate:SVE_ALL
+	     (match_operand:<VEL> 1 "register_operand" "r, w, r, w, r, w"))
+	   (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero" "0, 0, Dz, Dz, w, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   mov\t%0.<Vetype>, %3/m, %<vwcore>1
+   mov\t%0.<Vetype>, %3/m, %<Vetype>1
+   movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<vwcore>1
+   movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<Vetype>1
+   movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<vwcore>1
+   movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<Vetype>1"
+  [(set_attr "movprfx" "*,*,yes,yes,yes,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Compare and select
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic.
+;; -------------------------------------------------------------------------
+
+;; Integer (signed) vcond.  Don't enforce an immediate range here, since it
+;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
+(define_expand "vcond<SVE_ALL:mode><SVE_I:mode>"
+  [(set (match_operand:SVE_ALL 0 "register_operand")
+	(if_then_else:SVE_ALL
+	  (match_operator 3 "comparison_operator"
+	    [(match_operand:SVE_I 4 "register_operand")
+	     (match_operand:SVE_I 5 "nonmemory_operand")])
+	  (match_operand:SVE_ALL 1 "nonmemory_operand")
+	  (match_operand:SVE_ALL 2 "nonmemory_operand")))]
+  "TARGET_SVE && <SVE_ALL:container_bits> == <SVE_I:container_bits>"
+  {
+    aarch64_expand_sve_vcond (<SVE_ALL:MODE>mode, <SVE_I:MODE>mode, operands);
+    DONE;
+  }
+)
+
+;; Integer vcondu.  Don't enforce an immediate range here, since it
+;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
+(define_expand "vcondu<SVE_ALL:mode><SVE_I:mode>"
+  [(set (match_operand:SVE_ALL 0 "register_operand")
+	(if_then_else:SVE_ALL
+	  (match_operator 3 "comparison_operator"
+	    [(match_operand:SVE_I 4 "register_operand")
+	     (match_operand:SVE_I 5 "nonmemory_operand")])
+	  (match_operand:SVE_ALL 1 "nonmemory_operand")
+	  (match_operand:SVE_ALL 2 "nonmemory_operand")))]
+  "TARGET_SVE && <SVE_ALL:container_bits> == <SVE_I:container_bits>"
+  {
+    aarch64_expand_sve_vcond (<SVE_ALL:MODE>mode, <SVE_I:MODE>mode, operands);
+    DONE;
+  }
+)
+
+;; Floating-point vcond.  All comparisons except FCMUO allow a zero operand;
+;; aarch64_expand_sve_vcond handles the case of an FCMUO with zero.
+(define_expand "vcond<mode><v_fp_equiv>"
+  [(set (match_operand:SVE_FULL_HSD 0 "register_operand")
+	(if_then_else:SVE_FULL_HSD
+	  (match_operator 3 "comparison_operator"
+	    [(match_operand:<V_FP_EQUIV> 4 "register_operand")
+	     (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")])
+	  (match_operand:SVE_FULL_HSD 1 "nonmemory_operand")
+	  (match_operand:SVE_FULL_HSD 2 "nonmemory_operand")))]
+  "TARGET_SVE"
+  {
+    aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands);
+    DONE;
+  }
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Comparisons
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - CMPEQ
+;; - CMPGE
+;; - CMPGT
+;; - CMPHI
+;; - CMPHS
+;; - CMPLE
+;; - CMPLO
+;; - CMPLS
+;; - CMPLT
+;; - CMPNE
+;; -------------------------------------------------------------------------
+
+;; Signed integer comparisons.  Don't enforce an immediate range here, since
+;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
+;; instead.
+(define_expand "vec_cmp<mode><vpred>"
+  [(parallel
+    [(set (match_operand:<VPRED> 0 "register_operand")
+	  (match_operator:<VPRED> 1 "comparison_operator"
+	    [(match_operand:SVE_I 2 "register_operand")
+	     (match_operand:SVE_I 3 "nonmemory_operand")]))
+     (clobber (reg:CC_NZC CC_REGNUM))])]
+  "TARGET_SVE"
+  {
+    aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
+				    operands[2], operands[3]);
+    DONE;
+  }
+)
+
+;; Unsigned integer comparisons.  Don't enforce an immediate range here, since
+;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
+;; instead.
+(define_expand "vec_cmpu<mode><vpred>"
+  [(parallel
+    [(set (match_operand:<VPRED> 0 "register_operand")
+	  (match_operator:<VPRED> 1 "comparison_operator"
+	    [(match_operand:SVE_I 2 "register_operand")
+	     (match_operand:SVE_I 3 "nonmemory_operand")]))
+     (clobber (reg:CC_NZC CC_REGNUM))])]
+  "TARGET_SVE"
+  {
+    aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
+				    operands[2], operands[3]);
     DONE;
   }
 )
 
-;; Test all bits of operand 1.  Operand 0 is a GP that is known to hold PTRUE.
-;;
-;; Using UNSPEC_PTEST_PTRUE allows combine patterns to assume that the GP
-;; is a PTRUE even if the optimizers haven't yet been able to propagate
-;; the constant.  We would use a separate unspec code for PTESTs involving
-;; GPs that might not be PTRUEs.
-(define_insn "ptest_ptrue<mode>"
-  [(set (reg:CC CC_REGNUM)
-	(compare:CC
-	  (unspec:SI [(match_operand:PRED_ALL 0 "register_operand" "Upa")
-		      (match_operand:PRED_ALL 1 "register_operand" "Upa")]
-		     UNSPEC_PTEST_PTRUE)
-	  (const_int 0)))]
+;; Predicated integer comparisons.
+;;
+;; For unpacked vectors, only the lowpart element in each input container
+;; has a defined value, and only the predicate bits associated with
+;; those elements are defined.  For example, when comparing two VNx2SIs:
+;;
+;; - The VNx2SIs can be seem as VNx2DIs in which the low halves of each
+;;   DI container store an SI element.  The upper bits of each DI container
+;;   are undefined.
+;;
+;; - Alternatively, the VNx2SIs can be seen as VNx4SIs in which the
+;;   even elements are defined and the odd elements are undefined.
+;;
+;; - The associated predicate mode is VNx2BI.  This means that only the
+;;   low bit in each predicate byte is defined (on input and on output).
+;;
+;; - We use a .s comparison to compare VNx2SIs, under the control of a
+;;   VNx2BI governing predicate, to produce a VNx2BI result.  If we view
+;;   the .s operation as operating on VNx4SIs then for odd lanes:
+;;
+;;   - the input governing predicate bit is undefined
+;;   - the SI elements being compared are undefined
+;;   - the predicate result bit is therefore undefined, but
+;;   - the predicate result bit is in the undefined part of a VNx2BI,
+;;     so its value doesn't matter anyway.
+(define_insn "@aarch64_pred_cmp<cmp_op><mode>"
+  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
+	(unspec:<VPRED>
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+	   (SVE_INT_CMP:<VPRED>
+	     (match_operand:SVE_I 3 "register_operand" "w, w")
+	     (match_operand:SVE_I 4 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
+	  UNSPEC_PRED_Z))
+   (clobber (reg:CC_NZC CC_REGNUM))]
+  "TARGET_SVE"
+  "@
+   cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #%4
+   cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
+)
+
+;; Predicated integer comparisons in which both the flag and predicate
+;; results are interesting.
+(define_insn_and_rewrite "*cmp<cmp_op><mode>_cc"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl")
+	   (match_operand 4)
+	   (match_operand:SI 5 "aarch64_sve_ptrue_flag")
+	   (unspec:<VPRED>
+	     [(match_operand 6)
+	      (match_operand:SI 7 "aarch64_sve_ptrue_flag")
+	      (SVE_INT_CMP:<VPRED>
+		(match_operand:SVE_I 2 "register_operand" "w, w")
+		(match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
+	     UNSPEC_PRED_Z)]
+	  UNSPEC_PTEST))
+   (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
+	(unspec:<VPRED>
+	  [(match_dup 6)
+	   (match_dup 7)
+	   (SVE_INT_CMP:<VPRED>
+	     (match_dup 2)
+	     (match_dup 3))]
+	  UNSPEC_PRED_Z))]
+  "TARGET_SVE
+   && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
+  "@
+   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
+   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+  "&& !rtx_equal_p (operands[4], operands[6])"
+  {
+    operands[6] = copy_rtx (operands[4]);
+    operands[7] = operands[5];
+  }
+)
+
+;; Predicated integer comparisons in which only the flags result is
+;; interesting.
+(define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl")
+	   (match_operand 4)
+	   (match_operand:SI 5 "aarch64_sve_ptrue_flag")
+	   (unspec:<VPRED>
+	     [(match_operand 6)
+	      (match_operand:SI 7 "aarch64_sve_ptrue_flag")
+	      (SVE_INT_CMP:<VPRED>
+		(match_operand:SVE_I 2 "register_operand" "w, w")
+		(match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
+	     UNSPEC_PRED_Z)]
+	  UNSPEC_PTEST))
+   (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))]
+  "TARGET_SVE
+   && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
+  "@
+   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
+   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+  "&& !rtx_equal_p (operands[4], operands[6])"
+  {
+    operands[6] = copy_rtx (operands[4]);
+    operands[7] = operands[5];
+  }
+)
+
+;; Predicated integer comparisons, formed by combining a PTRUE-predicated
+;; comparison with an AND.  Split the instruction into its preferred form
+;; at the earliest opportunity, in order to get rid of the redundant
+;; operand 4.
+(define_insn_and_split "*cmp<cmp_op><mode>_and"
+  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
+	(and:<VPRED>
+	  (unspec:<VPRED>
+	    [(match_operand 4)
+	     (const_int SVE_KNOWN_PTRUE)
+	     (SVE_INT_CMP:<VPRED>
+	       (match_operand:SVE_I 2 "register_operand" "w, w")
+	       (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
+	    UNSPEC_PRED_Z)
+	  (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))
+   (clobber (reg:CC_NZC CC_REGNUM))]
   "TARGET_SVE"
-  "ptest\t%0, %1.b"
-)
-
-;; Set element I of the result if operand1 + J < operand2 for all J in [0, I].
-;; with the comparison being unsigned.
-(define_insn "while_ult<GPI:mode><PRED_ALL:mode>"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (match_dup 0)
+	   (unspec:<VPRED>
+	     [(match_dup 1)
+	      (const_int SVE_MAYBE_NOT_PTRUE)
+	      (SVE_INT_CMP:<VPRED>
+		(match_dup 2)
+		(match_dup 3))]
+	     UNSPEC_PRED_Z))
+      (clobber (reg:CC_NZC CC_REGNUM))])]
+)
+
+;; Predicated integer wide comparisons.
+(define_insn "@aarch64_pred_cmp<cmp_op><mode>_wide"
+  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
+	(unspec:<VPRED>
+	  [(match_operand:VNx16BI 1 "register_operand" "Upl")
+	   (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+	   (unspec:<VPRED>
+	     [(match_operand:SVE_FULL_BHSI 3 "register_operand" "w")
+	      (match_operand:VNx2DI 4 "register_operand" "w")]
+	     SVE_COND_INT_CMP_WIDE)]
+	  UNSPEC_PRED_Z))
+   (clobber (reg:CC_NZC CC_REGNUM))]
+  "TARGET_SVE"
+  "cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.d"
+)
+
+;; Predicated integer wide comparisons in which both the flag and
+;; predicate results are interesting.
+(define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_cc"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand:VNx16BI 1 "register_operand" "Upl")
+	   (match_operand 4)
+	   (match_operand:SI 5 "aarch64_sve_ptrue_flag")
+	   (unspec:<VPRED>
+	     [(match_operand:VNx16BI 6 "register_operand" "Upl")
+	      (match_operand:SI 7 "aarch64_sve_ptrue_flag")
+	      (unspec:<VPRED>
+		[(match_operand:SVE_FULL_BHSI 2 "register_operand" "w")
+		 (match_operand:VNx2DI 3 "register_operand" "w")]
+		SVE_COND_INT_CMP_WIDE)]
+	     UNSPEC_PRED_Z)]
+	  UNSPEC_PTEST))
+   (set (match_operand:<VPRED> 0 "register_operand" "=Upa")
+	(unspec:<VPRED>
+	  [(match_dup 6)
+	   (match_dup 7)
+	   (unspec:<VPRED>
+	     [(match_dup 2)
+	      (match_dup 3)]
+	     SVE_COND_INT_CMP_WIDE)]
+	  UNSPEC_PRED_Z))]
+  "TARGET_SVE
+   && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
+  "cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d"
+)
+
+;; Predicated integer wide comparisons in which only the flags result
+;; is interesting.
+(define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_ptest"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand:VNx16BI 1 "register_operand" "Upl")
+	   (match_operand 4)
+	   (match_operand:SI 5 "aarch64_sve_ptrue_flag")
+	   (unspec:<VPRED>
+	     [(match_operand:VNx16BI 6 "register_operand" "Upl")
+	      (match_operand:SI 7 "aarch64_sve_ptrue_flag")
+	      (unspec:<VPRED>
+		[(match_operand:SVE_FULL_BHSI 2 "register_operand" "w")
+		 (match_operand:VNx2DI 3 "register_operand" "w")]
+		SVE_COND_INT_CMP_WIDE)]
+	     UNSPEC_PRED_Z)]
+	  UNSPEC_PTEST))
+   (clobber (match_scratch:<VPRED> 0 "=Upa"))]
+  "TARGET_SVE
+   && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
+  "cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] While tests
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - WHILEGE (SVE2)
+;; - WHILEGT (SVE2)
+;; - WHILEHI (SVE2)
+;; - WHILEHS (SVE2)
+;; - WHILELE
+;; - WHILELO
+;; - WHILELS
+;; - WHILELT
+;; - WHILERW (SVE2)
+;; - WHILEWR (SVE2)
+;; -------------------------------------------------------------------------
+
+;; Set element I of the result if (cmp (plus operand1 J) operand2) is
+;; true for all J in [0, I].
+(define_insn "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>"
   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
 	(unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
 			  (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
-			 UNSPEC_WHILE_LO))
-   (clobber (reg:CC CC_REGNUM))]
-  "TARGET_SVE"
-  "whilelo\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
-)
-
-;; WHILELO sets the flags in the same way as a PTEST with a PTRUE GP.
-;; Handle the case in which both results are useful.  The GP operand
-;; to the PTEST isn't needed, so we allow it to be anything.
-(define_insn_and_split "while_ult<GPI:mode><PRED_ALL:mode>_cc"
-  [(set (reg:CC CC_REGNUM)
-	(compare:CC
-	  (unspec:SI [(match_operand:PRED_ALL 1)
-		      (unspec:PRED_ALL
-			[(match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")
-			 (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")]
-			UNSPEC_WHILE_LO)]
-		     UNSPEC_PTEST_PTRUE)
-	  (const_int 0)))
+			 SVE_WHILE))
+   (clobber (reg:CC_NZC CC_REGNUM))]
+  "TARGET_SVE"
+  "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
+)
+
+;; The WHILE instructions set the flags in the same way as a PTEST with
+;; a PTRUE GP.  Handle the case in which both results are useful.  The GP
+;; operands to the PTEST aren't needed, so we allow them to be anything.
+(define_insn_and_rewrite "*while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_cc"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand 3)
+	   (match_operand 4)
+	   (const_int SVE_KNOWN_PTRUE)
+	   (unspec:PRED_ALL
+	     [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
+	      (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
+	     SVE_WHILE)]
+	  UNSPEC_PTEST))
    (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
-	(unspec:PRED_ALL [(match_dup 2)
-			  (match_dup 3)]
-			 UNSPEC_WHILE_LO))]
+	(unspec:PRED_ALL [(match_dup 1)
+			  (match_dup 2)]
+			 SVE_WHILE))]
   "TARGET_SVE"
-  "whilelo\t%0.<PRED_ALL:Vetype>, %<w>2, %<w>3"
+  "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
   ;; Force the compiler to drop the unused predicate operand, so that we
   ;; don't have an unnecessary PTRUE.
-  "&& !CONSTANT_P (operands[1])"
-  [(const_int 0)]
+  "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))"
+  {
+    operands[3] = CONSTM1_RTX (VNx16BImode);
+    operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
+  }
+)
+
+;; Same, but handle the case in which only the flags result is useful.
+(define_insn_and_rewrite "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_ptest"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand 3)
+	   (match_operand 4)
+	   (const_int SVE_KNOWN_PTRUE)
+	   (unspec:PRED_ALL
+	     [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
+	      (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
+	     SVE_WHILE)]
+	  UNSPEC_PTEST))
+   (clobber (match_scratch:PRED_ALL 0 "=Upa"))]
+  "TARGET_SVE"
+  "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
+  ;; Force the compiler to drop the unused predicate operand, so that we
+  ;; don't have an unnecessary PTRUE.
+  "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))"
+  {
+    operands[3] = CONSTM1_RTX (VNx16BImode);
+    operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
+  }
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Direct comparisons
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FCMEQ
+;; - FCMGE
+;; - FCMGT
+;; - FCMLE
+;; - FCMLT
+;; - FCMNE
+;; - FCMUO
+;; -------------------------------------------------------------------------
+
+;; Floating-point comparisons.  All comparisons except FCMUO allow a zero
+;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO
+;; with zero.
+(define_expand "vec_cmp<mode><vpred>"
+  [(set (match_operand:<VPRED> 0 "register_operand")
+	(match_operator:<VPRED> 1 "comparison_operator"
+	  [(match_operand:SVE_FULL_F 2 "register_operand")
+	   (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]))]
+  "TARGET_SVE"
   {
-    emit_insn (gen_while_ult<GPI:mode><PRED_ALL:mode>_cc
-	       (operands[0], CONSTM1_RTX (<MODE>mode),
-		operands[2], operands[3]));
+    aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]),
+				      operands[2], operands[3], false);
     DONE;
   }
 )
 
-;; Predicated integer comparison.
-(define_insn "*vec_cmp<cmp_op>_<mode>"
+;; Predicated floating-point comparisons.
+(define_insn "@aarch64_pred_fcm<cmp_op><mode>"
   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
 	(unspec:<VPRED>
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-	   (match_operand:SVE_I 2 "register_operand" "w, w")
-	   (match_operand:SVE_I 3 "aarch64_sve_cmp_<imm_con>_operand" "<imm_con>, w")]
-	  SVE_COND_INT_CMP))
-   (clobber (reg:CC CC_REGNUM))]
+	   (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+	   (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
+	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, w")]
+	  SVE_COND_FP_CMP_I0))]
   "TARGET_SVE"
   "@
-   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
-   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+   fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #0.0
+   fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
 )
 
-;; Predicated integer comparison in which only the flags result is interesting.
-(define_insn "*vec_cmp<cmp_op>_<mode>_ptest"
-  [(set (reg:CC CC_REGNUM)
-	(compare:CC
-	  (unspec:SI
-	    [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-	     (unspec:<VPRED>
-	       [(match_dup 1)
-	        (match_operand:SVE_I 2 "register_operand" "w, w")
-		(match_operand:SVE_I 3 "aarch64_sve_cmp_<imm_con>_operand" "<imm_con>, w")]
-	       SVE_COND_INT_CMP)]
-	    UNSPEC_PTEST_PTRUE)
-	  (const_int 0)))
-   (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))]
+;; Same for unordered comparisons.
+(define_insn "@aarch64_pred_fcmuo<mode>"
+  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
+	(unspec:<VPRED>
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	   (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+	   (match_operand:SVE_FULL_F 3 "register_operand" "w")
+	   (match_operand:SVE_FULL_F 4 "register_operand" "w")]
+	  UNSPEC_COND_FCMUO))]
   "TARGET_SVE"
-  "@
-   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
-   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+  "fcmuo\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
 )
 
-;; Predicated comparison in which both the flag and predicate results
-;; are interesting.
-(define_insn "*vec_cmp<cmp_op>_<mode>_cc"
-  [(set (reg:CC CC_REGNUM)
-	(compare:CC
-	  (unspec:SI
-	    [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-	     (unspec:<VPRED>
-	       [(match_dup 1)
-		(match_operand:SVE_I 2 "register_operand" "w, w")
-		(match_operand:SVE_I 3 "aarch64_sve_cmp_<imm_con>_operand" "<imm_con>, w")]
-	       SVE_COND_INT_CMP)]
-	    UNSPEC_PTEST_PTRUE)
-	  (const_int 0)))
-   (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
+;; Floating-point comparisons predicated on a PTRUE, with the results ANDed
+;; with another predicate P.  This does not have the same trapping behavior
+;; as predicating the comparison itself on P, but it's a legitimate fold,
+;; since we can drop any potentially-trapping operations whose results
+;; are not needed.
+;;
+;; Split the instruction into its preferred form (below) at the earliest
+;; opportunity, in order to get rid of the redundant operand 1.
+(define_insn_and_split "*fcm<cmp_op><mode>_and_combine"
+  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
+	(and:<VPRED>
+	  (unspec:<VPRED>
+	    [(match_operand:<VPRED> 1)
+	     (const_int SVE_KNOWN_PTRUE)
+	     (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
+	     (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
+	    SVE_COND_FP_CMP_I0)
+	  (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))]
+  "TARGET_SVE"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
 	(unspec:<VPRED>
-	  [(match_dup 1)
+	  [(match_dup 4)
+	   (const_int SVE_MAYBE_NOT_PTRUE)
 	   (match_dup 2)
 	   (match_dup 3)]
-	  SVE_COND_INT_CMP))]
+	  SVE_COND_FP_CMP_I0))]
+)
+
+;; Same for unordered comparisons.
+(define_insn_and_split "*fcmuo<mode>_and_combine"
+  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
+	(and:<VPRED>
+	  (unspec:<VPRED>
+	    [(match_operand:<VPRED> 1)
+	     (const_int SVE_KNOWN_PTRUE)
+	     (match_operand:SVE_FULL_F 2 "register_operand" "w")
+	     (match_operand:SVE_FULL_F 3 "register_operand" "w")]
+	    UNSPEC_COND_FCMUO)
+	  (match_operand:<VPRED> 4 "register_operand" "Upl")))]
   "TARGET_SVE"
-  "@
-   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
-   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:<VPRED>
+	  [(match_dup 4)
+	   (const_int SVE_MAYBE_NOT_PTRUE)
+	   (match_dup 2)
+	   (match_dup 3)]
+	  UNSPEC_COND_FCMUO))]
 )
 
-;; Predicated floating-point comparison (excluding FCMUO, which doesn't
-;; allow #0.0 as an operand).
-(define_insn "*vec_fcm<cmp_op><mode>"
-  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
+;; -------------------------------------------------------------------------
+;; ---- [FP] Absolute comparisons
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FACGE
+;; - FACGT
+;; - FACLE
+;; - FACLT
+;; -------------------------------------------------------------------------
+
+;; Predicated floating-point absolute comparisons.
+(define_expand "@aarch64_pred_fac<cmp_op><mode>"
+  [(set (match_operand:<VPRED> 0 "register_operand")
 	(unspec:<VPRED>
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-	   (match_operand:SVE_F 2 "register_operand" "w, w")
-	   (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
-	  SVE_COND_FP_CMP))]
-  "TARGET_SVE"
-  "@
-   fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
-   fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+	  [(match_operand:<VPRED> 1 "register_operand")
+	   (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (match_dup 2)
+	      (match_operand:SVE_FULL_F 3 "register_operand")]
+	     UNSPEC_COND_FABS)
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (match_dup 2)
+	      (match_operand:SVE_FULL_F 4 "register_operand")]
+	     UNSPEC_COND_FABS)]
+	  SVE_COND_FP_ABS_CMP))]
+  "TARGET_SVE"
+)
+
+(define_insn_and_rewrite "*aarch64_pred_fac<cmp_op><mode>_relaxed"
+  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
+	(unspec:<VPRED>
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	   (match_operand:SI 4 "aarch64_sve_ptrue_flag")
+	   (unspec:SVE_FULL_F
+	     [(match_operand 5)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "w")]
+	     UNSPEC_COND_FABS)
+	   (unspec:SVE_FULL_F
+	     [(match_operand 6)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:SVE_FULL_F 3 "register_operand" "w")]
+	     UNSPEC_COND_FABS)]
+	  SVE_COND_FP_ABS_CMP))]
+  "TARGET_SVE"
+  "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+  "&& (!rtx_equal_p (operands[1], operands[5])
+       || !rtx_equal_p (operands[1], operands[6]))"
+  {
+    operands[5] = copy_rtx (operands[1]);
+    operands[6] = copy_rtx (operands[1]);
+  }
 )
 
-;; Predicated FCMUO.
-(define_insn "*vec_fcmuo<mode>"
+(define_insn "*aarch64_pred_fac<cmp_op><mode>_strict"
   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
 	(unspec:<VPRED>
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
-	   (match_operand:SVE_F 2 "register_operand" "w")
-	   (match_operand:SVE_F 3 "register_operand" "w")]
-	  UNSPEC_COND_UO))]
+	   (match_operand:SI 4 "aarch64_sve_ptrue_flag")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (match_operand:SI 5 "aarch64_sve_gp_strictness")
+	      (match_operand:SVE_FULL_F 2 "register_operand" "w")]
+	     UNSPEC_COND_FABS)
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (match_operand:SI 6 "aarch64_sve_gp_strictness")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "w")]
+	     UNSPEC_COND_FABS)]
+	  SVE_COND_FP_ABS_CMP))]
+  "TARGET_SVE"
+  "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Select
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SEL
+;; -------------------------------------------------------------------------
+
+(define_insn "@vcond_mask_<mode><mode>"
+  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
+	(ior:PRED_ALL
+	  (and:PRED_ALL
+	    (match_operand:PRED_ALL 3 "register_operand" "Upa")
+	    (match_operand:PRED_ALL 1 "register_operand" "Upa"))
+	  (and:PRED_ALL
+	    (not (match_dup 3))
+	    (match_operand:PRED_ALL 2 "register_operand" "Upa"))))]
   "TARGET_SVE"
-  "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+  "sel\t%0.b, %3, %1.b, %2.b"
 )
 
-;; vcond_mask operand order: true, false, mask
-;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
-;; SEL operand order:        mask, true, false
-(define_insn "vcond_mask_<mode><vpred>"
-  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
-	(unspec:SVE_ALL
-	  [(match_operand:<VPRED> 3 "register_operand" "Upa")
-	   (match_operand:SVE_ALL 1 "register_operand" "w")
-	   (match_operand:SVE_ALL 2 "register_operand" "w")]
-	  UNSPEC_SEL))]
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Test bits
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - PTEST
+;; -------------------------------------------------------------------------
+
+;; Branch based on predicate equality or inequality.
+(define_expand "cbranch<mode>4"
+  [(set (pc)
+	(if_then_else
+	  (match_operator 0 "aarch64_equality_operator"
+	    [(match_operand:PRED_ALL 1 "register_operand")
+	     (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")])
+	  (label_ref (match_operand 3 ""))
+	  (pc)))]
+  ""
+  {
+    rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all (<data_bytes>));
+    rtx cast_ptrue = gen_lowpart (<MODE>mode, ptrue);
+    rtx ptrue_flag = gen_int_mode (SVE_KNOWN_PTRUE, SImode);
+    rtx pred;
+    if (operands[2] == CONST0_RTX (<MODE>mode))
+      pred = operands[1];
+    else
+      {
+	pred = gen_reg_rtx (<MODE>mode);
+	emit_insn (gen_aarch64_pred_xor<mode>_z (pred, cast_ptrue, operands[1],
+						 operands[2]));
+      }
+    emit_insn (gen_aarch64_ptest<mode> (ptrue, cast_ptrue, ptrue_flag, pred));
+    operands[1] = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
+    operands[2] = const0_rtx;
+  }
+)
+
+;; See "Description of UNSPEC_PTEST" above for details.
+(define_insn "aarch64_ptest<mode>"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC [(match_operand:VNx16BI 0 "register_operand" "Upa")
+			(match_operand 1)
+			(match_operand:SI 2 "aarch64_sve_ptrue_flag")
+			(match_operand:PRED_ALL 3 "register_operand" "Upa")]
+		       UNSPEC_PTEST))]
   "TARGET_SVE"
-  "sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>"
+  "ptest\t%0, %3.b"
 )
 
-;; Selects between a duplicated immediate and zero.
-(define_insn "aarch64_sve_dup<mode>_const"
-  [(set (match_operand:SVE_I 0 "register_operand" "=w")
-	(unspec:SVE_I
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
-	   (match_operand:SVE_I 2 "aarch64_sve_dup_immediate")
-	   (match_operand:SVE_I 3 "aarch64_simd_imm_zero")]
-	  UNSPEC_SEL))]
+;; =========================================================================
+;; == Reductions
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Conditional reductions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - CLASTA
+;; - CLASTB
+;; -------------------------------------------------------------------------
+
+;; Set operand 0 to the last active element in operand 3, or to tied
+;; operand 1 if no elements are active.
+(define_insn "@fold_extract_<last_op>_<mode>"
+  [(set (match_operand:<VEL> 0 "register_operand" "=?r, w")
+	(unspec:<VEL>
+	  [(match_operand:<VEL> 1 "register_operand" "0, 0")
+	   (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
+	   (match_operand:SVE_FULL 3 "register_operand" "w, w")]
+	  CLAST))]
   "TARGET_SVE"
-  "mov\t%0.<Vetype>, %1/z, #%2"
+  "@
+   clast<ab>\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype>
+   clast<ab>\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>"
 )
 
-;; Integer (signed) vcond.  Don't enforce an immediate range here, since it
-;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
-(define_expand "vcond<mode><v_int_equiv>"
-  [(set (match_operand:SVE_ALL 0 "register_operand")
-	(if_then_else:SVE_ALL
-	  (match_operator 3 "comparison_operator"
-	    [(match_operand:<V_INT_EQUIV> 4 "register_operand")
-	     (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
-	  (match_operand:SVE_ALL 1 "register_operand")
-	  (match_operand:SVE_ALL 2 "register_operand")))]
+(define_insn "@aarch64_fold_extract_vector_<last_op>_<mode>"
+  [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL
+	  [(match_operand:SVE_FULL 1 "register_operand" "0, w")
+	   (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
+	   (match_operand:SVE_FULL 3 "register_operand" "w, w")]
+	  CLAST))]
+  "TARGET_SVE"
+  "@
+   clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %1\;clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype>"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Tree reductions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ANDV
+;; - EORV
+;; - ORV
+;; - SADDV
+;; - SMAXV
+;; - SMINV
+;; - UADDV
+;; - UMAXV
+;; - UMINV
+;; -------------------------------------------------------------------------
+
+;; Unpredicated integer add reduction.
+(define_expand "reduc_plus_scal_<mode>"
+  [(match_operand:<VEL> 0 "register_operand")
+   (match_operand:SVE_FULL_I 1 "register_operand")]
   "TARGET_SVE"
   {
-    aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
+    rtx pred = aarch64_ptrue_reg (<VPRED>mode);
+    rtx tmp = <VEL>mode == DImode ? operands[0] : gen_reg_rtx (DImode);
+    emit_insn (gen_aarch64_pred_reduc_uadd_<mode> (tmp, pred, operands[1]));
+    if (tmp != operands[0])
+      emit_move_insn (operands[0], gen_lowpart (<VEL>mode, tmp));
     DONE;
   }
 )
 
-;; Integer vcondu.  Don't enforce an immediate range here, since it
-;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
-(define_expand "vcondu<mode><v_int_equiv>"
-  [(set (match_operand:SVE_ALL 0 "register_operand")
-	(if_then_else:SVE_ALL
-	  (match_operator 3 "comparison_operator"
-	    [(match_operand:<V_INT_EQUIV> 4 "register_operand")
-	     (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
-	  (match_operand:SVE_ALL 1 "register_operand")
-	  (match_operand:SVE_ALL 2 "register_operand")))]
+;; Predicated integer add reduction.  The result is always 64-bits.
+(define_insn "@aarch64_pred_reduc_<optab>_<mode>"
+  [(set (match_operand:DI 0 "register_operand" "=w")
+	(unspec:DI [(match_operand:<VPRED> 1 "register_operand" "Upl")
+		    (match_operand:SVE_FULL_I 2 "register_operand" "w")]
+		   SVE_INT_ADDV))]
+  "TARGET_SVE && <max_elem_bits> >= <elem_bits>"
+  "<su>addv\t%d0, %1, %2.<Vetype>"
+)
+
+;; Unpredicated integer reductions.
+(define_expand "reduc_<optab>_scal_<mode>"
+  [(set (match_operand:<VEL> 0 "register_operand")
+	(unspec:<VEL> [(match_dup 2)
+		       (match_operand:SVE_FULL_I 1 "register_operand")]
+		      SVE_INT_REDUCTION))]
   "TARGET_SVE"
   {
-    aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
-    DONE;
+    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
   }
 )
 
-;; Floating-point vcond.  All comparisons except FCMUO allow a zero
-;; operand; aarch64_expand_sve_vcond handles the case of an FCMUO
-;; with zero.
-(define_expand "vcond<mode><v_fp_equiv>"
-  [(set (match_operand:SVE_SD 0 "register_operand")
-	(if_then_else:SVE_SD
-	  (match_operator 3 "comparison_operator"
-	    [(match_operand:<V_FP_EQUIV> 4 "register_operand")
-	     (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")])
-	  (match_operand:SVE_SD 1 "register_operand")
-	  (match_operand:SVE_SD 2 "register_operand")))]
+;; Predicated integer reductions.
+(define_insn "@aarch64_pred_reduc_<optab>_<mode>"
+  [(set (match_operand:<VEL> 0 "register_operand" "=w")
+	(unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
+		       (match_operand:SVE_FULL_I 2 "register_operand" "w")]
+		      SVE_INT_REDUCTION))]
+  "TARGET_SVE"
+  "<sve_int_op>\t%<Vetype>0, %1, %2.<Vetype>"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Tree reductions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FADDV
+;; - FMAXNMV
+;; - FMAXV
+;; - FMINNMV
+;; - FMINV
+;; -------------------------------------------------------------------------
+
+;; Unpredicated floating-point tree reductions.
+(define_expand "reduc_<optab>_scal_<mode>"
+  [(set (match_operand:<VEL> 0 "register_operand")
+	(unspec:<VEL> [(match_dup 2)
+		       (match_operand:SVE_FULL_F 1 "register_operand")]
+		      SVE_FP_REDUCTION))]
   "TARGET_SVE"
   {
-    aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands);
-    DONE;
+    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
   }
 )
 
-;; Signed integer comparisons.  Don't enforce an immediate range here, since
-;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
-;; instead.
-(define_expand "vec_cmp<mode><vpred>"
-  [(parallel
-    [(set (match_operand:<VPRED> 0 "register_operand")
-	  (match_operator:<VPRED> 1 "comparison_operator"
-	    [(match_operand:SVE_I 2 "register_operand")
-	     (match_operand:SVE_I 3 "nonmemory_operand")]))
-     (clobber (reg:CC CC_REGNUM))])]
+;; Predicated floating-point tree reductions.
+(define_insn "@aarch64_pred_reduc_<optab>_<mode>"
+  [(set (match_operand:<VEL> 0 "register_operand" "=w")
+	(unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
+		       (match_operand:SVE_FULL_F 2 "register_operand" "w")]
+		      SVE_FP_REDUCTION))]
+  "TARGET_SVE"
+  "<sve_fp_op>\t%<Vetype>0, %1, %2.<Vetype>"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Left-to-right reductions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FADDA
+;; -------------------------------------------------------------------------
+
+;; Unpredicated in-order FP reductions.
+(define_expand "fold_left_plus_<mode>"
+  [(set (match_operand:<VEL> 0 "register_operand")
+	(unspec:<VEL> [(match_dup 3)
+		       (match_operand:<VEL> 1 "register_operand")
+		       (match_operand:SVE_FULL_F 2 "register_operand")]
+		      UNSPEC_FADDA))]
   "TARGET_SVE"
   {
-    aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
-				    operands[2], operands[3]);
-    DONE;
+    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
   }
 )
 
-;; Unsigned integer comparisons.  Don't enforce an immediate range here, since
-;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
-;; instead.
-(define_expand "vec_cmpu<mode><vpred>"
-  [(parallel
-    [(set (match_operand:<VPRED> 0 "register_operand")
-	  (match_operator:<VPRED> 1 "comparison_operator"
-	    [(match_operand:SVE_I 2 "register_operand")
-	     (match_operand:SVE_I 3 "nonmemory_operand")]))
-     (clobber (reg:CC CC_REGNUM))])]
+;; Predicated in-order FP reductions.
+(define_insn "mask_fold_left_plus_<mode>"
+  [(set (match_operand:<VEL> 0 "register_operand" "=w")
+	(unspec:<VEL> [(match_operand:<VPRED> 3 "register_operand" "Upl")
+		       (match_operand:<VEL> 1 "register_operand" "0")
+		       (match_operand:SVE_FULL_F 2 "register_operand" "w")]
+		      UNSPEC_FADDA))]
   "TARGET_SVE"
+  "fadda\t%<Vetype>0, %3, %<Vetype>0, %2.<Vetype>"
+)
+
+;; =========================================================================
+;; == Permutes
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] General permutes
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - TBL
+;; -------------------------------------------------------------------------
+
+(define_expand "vec_perm<mode>"
+  [(match_operand:SVE_FULL 0 "register_operand")
+   (match_operand:SVE_FULL 1 "register_operand")
+   (match_operand:SVE_FULL 2 "register_operand")
+   (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")]
+  "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()"
   {
-    aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
-				    operands[2], operands[3]);
+    aarch64_expand_sve_vec_perm (operands[0], operands[1],
+				 operands[2], operands[3]);
     DONE;
   }
 )
 
-;; Floating-point comparisons.  All comparisons except FCMUO allow a zero
-;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO
-;; with zero.
-(define_expand "vec_cmp<mode><vpred>"
-  [(set (match_operand:<VPRED> 0 "register_operand")
-	(match_operator:<VPRED> 1 "comparison_operator"
-	  [(match_operand:SVE_F 2 "register_operand")
-	   (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")]))]
+(define_insn "@aarch64_sve_tbl<mode>"
+  [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
+	(unspec:SVE_FULL
+	  [(match_operand:SVE_FULL 1 "register_operand" "w")
+	   (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
+	  UNSPEC_TBL))]
+  "TARGET_SVE"
+  "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Special-purpose unary permutes
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - COMPACT
+;; - DUP
+;; - REV
+;; -------------------------------------------------------------------------
+
+;; Compact active elements and pad with zeros.
+(define_insn "@aarch64_sve_compact<mode>"
+  [(set (match_operand:SVE_FULL_SD 0 "register_operand" "=w")
+	(unspec:SVE_FULL_SD
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	   (match_operand:SVE_FULL_SD 2 "register_operand" "w")]
+	  UNSPEC_SVE_COMPACT))]
   "TARGET_SVE"
+  "compact\t%0.<Vetype>, %1, %2.<Vetype>"
+)
+
+;; Duplicate one element of a vector.
+(define_insn "@aarch64_sve_dup_lane<mode>"
+  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+	(vec_duplicate:SVE_ALL
+	  (vec_select:<VEL>
+	    (match_operand:SVE_ALL 1 "register_operand" "w")
+	    (parallel [(match_operand:SI 2 "const_int_operand")]))))]
+  "TARGET_SVE
+   && IN_RANGE (INTVAL (operands[2]) * <container_bits> / 8, 0, 63)"
+  "dup\t%0.<Vctype>, %1.<Vctype>[%2]"
+)
+
+;; Use DUP.Q to duplicate a 128-bit segment of a register.
+;;
+;; The vec_select:<V128> sets memory lane number N of the V128 to lane
+;; number op2 + N of op1.  (We don't need to distinguish between memory
+;; and architectural register lane numbering for op1 or op0, since the
+;; two numbering schemes are the same for SVE.)
+;;
+;; The vec_duplicate:SVE_FULL then copies memory lane number N of the
+;; V128 (and thus lane number op2 + N of op1) to lane numbers N + I * STEP
+;; of op0.  We therefore get the correct result for both endiannesses.
+;;
+;; The wrinkle is that for big-endian V128 registers, memory lane numbering
+;; is in the opposite order to architectural register lane numbering.
+;; Thus if we were to do this operation via a V128 temporary register,
+;; the vec_select and vec_duplicate would both involve a reverse operation
+;; for big-endian targets.  In this fused pattern the two reverses cancel
+;; each other out.
+(define_insn "@aarch64_sve_dupq_lane<mode>"
+  [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
+	(vec_duplicate:SVE_FULL
+	  (vec_select:<V128>
+	    (match_operand:SVE_FULL 1 "register_operand" "w")
+	    (match_operand 2 "ascending_int_parallel"))))]
+  "TARGET_SVE
+   && (INTVAL (XVECEXP (operands[2], 0, 0))
+       * GET_MODE_SIZE (<VEL>mode)) % 16 == 0
+   && IN_RANGE (INTVAL (XVECEXP (operands[2], 0, 0))
+		* GET_MODE_SIZE (<VEL>mode), 0, 63)"
   {
-    aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]),
-				      operands[2], operands[3], false);
-    DONE;
+    unsigned int byte = (INTVAL (XVECEXP (operands[2], 0, 0))
+			 * GET_MODE_SIZE (<VEL>mode));
+    operands[2] = gen_int_mode (byte / 16, DImode);
+    return "dup\t%0.q, %1.q[%2]";
   }
 )
 
-;; Branch based on predicate equality or inequality.
-(define_expand "cbranch<mode>4"
-  [(set (pc)
-	(if_then_else
-	  (match_operator 0 "aarch64_equality_operator"
-	    [(match_operand:PRED_ALL 1 "register_operand")
-	     (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")])
-	  (label_ref (match_operand 3 ""))
-	  (pc)))]
-  ""
+;; Reverse the order of elements within a full vector.
+(define_insn "@aarch64_sve_rev<mode>"
+  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+	(unspec:SVE_ALL
+	  [(match_operand:SVE_ALL 1 "register_operand" "w")]
+	  UNSPEC_REV))]
+  "TARGET_SVE"
+  "rev\t%0.<Vctype>, %1.<Vctype>")
+
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Special-purpose binary permutes
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - EXT
+;; - SPLICE
+;; - TRN1
+;; - TRN2
+;; - UZP1
+;; - UZP2
+;; - ZIP1
+;; - ZIP2
+;; -------------------------------------------------------------------------
+
+;; Like EXT, but start at the first active element.
+(define_insn "@aarch64_sve_splice<mode>"
+  [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (match_operand:SVE_FULL 2 "register_operand" "0, w")
+	   (match_operand:SVE_FULL 3 "register_operand" "w, w")]
+	  UNSPEC_SVE_SPLICE))]
+  "TARGET_SVE"
+  "@
+   splice\t%0.<Vetype>, %1, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;splice\t%0.<Vetype>, %1, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*, yes")]
+)
+
+;; Permutes that take half the elements from one vector and half the
+;; elements from the other.
+(define_insn "@aarch64_sve_<perm_insn><mode>"
+  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+	(unspec:SVE_ALL
+	  [(match_operand:SVE_ALL 1 "register_operand" "w")
+	   (match_operand:SVE_ALL 2 "register_operand" "w")]
+	  PERMUTE))]
+  "TARGET_SVE"
+  "<perm_insn>\t%0.<Vctype>, %1.<Vctype>, %2.<Vctype>"
+)
+
+;; Apply PERMUTE to 128-bit sequences.  The behavior of these patterns
+;; doesn't depend on the mode.
+(define_insn "@aarch64_sve_<optab><mode>"
+  [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
+	(unspec:SVE_FULL
+	  [(match_operand:SVE_FULL 1 "register_operand" "w")
+	   (match_operand:SVE_FULL 2 "register_operand" "w")]
+	  PERMUTEQ))]
+  "TARGET_SVE_F64MM"
+  "<perm_insn>\t%0.q, %1.q, %2.q"
+)
+
+;; Concatenate two vectors and extract a subvector.  Note that the
+;; immediate (third) operand is the lane index not the byte index.
+(define_insn "@aarch64_sve_ext<mode>"
+  [(set (match_operand:SVE_ALL 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_ALL
+	  [(match_operand:SVE_ALL 1 "register_operand" "0, w")
+	   (match_operand:SVE_ALL 2 "register_operand" "w, w")
+	   (match_operand:SI 3 "const_int_operand")]
+	  UNSPEC_EXT))]
+  "TARGET_SVE
+   && IN_RANGE (INTVAL (operands[3]) * <container_bits> / 8, 0, 255)"
   {
-    rtx ptrue = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));
-    rtx pred;
-    if (operands[2] == CONST0_RTX (<MODE>mode))
-      pred = operands[1];
-    else
-      {
-	pred = gen_reg_rtx (<MODE>mode);
-	emit_insn (gen_pred_xor<mode>3 (pred, ptrue, operands[1],
-					operands[2]));
-      }
-    emit_insn (gen_ptest_ptrue<mode> (ptrue, pred));
-    operands[1] = gen_rtx_REG (CCmode, CC_REGNUM);
-    operands[2] = const0_rtx;
+    operands[3] = GEN_INT (INTVAL (operands[3]) * <container_bits> / 8);
+    return (which_alternative == 0
+	    ? "ext\\t%0.b, %0.b, %2.b, #%3"
+	    : "movprfx\t%0, %1\;ext\\t%0.b, %0.b, %2.b, #%3");
   }
+  [(set_attr "movprfx" "*,yes")]
 )
 
-;; Unpredicated integer MIN/MAX.
-(define_expand "<su><maxmin><mode>3"
-  [(set (match_operand:SVE_I 0 "register_operand")
-	(unspec:SVE_I
-	  [(match_dup 3)
-	   (MAXMIN:SVE_I (match_operand:SVE_I 1 "register_operand")
-			 (match_operand:SVE_I 2 "register_operand"))]
-	  UNSPEC_MERGE_PTRUE))]
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Special-purpose unary permutes
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - REV
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_rev<mode>"
+  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
+	(unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")]
+			 UNSPEC_REV))]
   "TARGET_SVE"
-  {
-    operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
-  }
+  "rev\t%0.<Vetype>, %1.<Vetype>")
+
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Special-purpose binary permutes
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - TRN1
+;; - TRN2
+;; - UZP1
+;; - UZP2
+;; - ZIP1
+;; - ZIP2
+;; -------------------------------------------------------------------------
+
+;; Permutes that take half the elements from one vector and half the
+;; elements from the other.
+(define_insn "@aarch64_sve_<perm_insn><mode>"
+  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
+	(unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")
+			  (match_operand:PRED_ALL 2 "register_operand" "Upa")]
+			 PERMUTE))]
+  "TARGET_SVE"
+  "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
 )
 
-;; Integer MIN/MAX predicated with a PTRUE.
-(define_insn "*<su><maxmin><mode>3"
-  [(set (match_operand:SVE_I 0 "register_operand" "=w")
-	(unspec:SVE_I
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
-	   (MAXMIN:SVE_I (match_operand:SVE_I 2 "register_operand" "%0")
-			 (match_operand:SVE_I 3 "register_operand" "w"))]
-	  UNSPEC_MERGE_PTRUE))]
+;; Special purpose permute used by the predicate generation instructions.
+;; Unlike the normal permute patterns, these instructions operate on VNx16BI
+;; regardless of the element size, so that all input and output bits are
+;; well-defined.  Operand 3 then indicates the size of the permute.
+(define_insn "@aarch64_sve_trn1_conv<mode>"
+  [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+	(unspec:VNx16BI [(match_operand:VNx16BI 1 "register_operand" "Upa")
+			 (match_operand:VNx16BI 2 "register_operand" "Upa")
+			 (match_operand:PRED_ALL 3 "aarch64_simd_imm_zero")]
+			UNSPEC_TRN1_CONV))]
   "TARGET_SVE"
-  "<su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  "trn1\t%0.<PRED_ALL:Vetype>, %1.<PRED_ALL:Vetype>, %2.<PRED_ALL:Vetype>"
 )
 
-;; Unpredicated floating-point MIN/MAX.
-(define_expand "<su><maxmin><mode>3"
-  [(set (match_operand:SVE_F 0 "register_operand")
-	(unspec:SVE_F
-	  [(match_dup 3)
-	   (FMAXMIN:SVE_F (match_operand:SVE_F 1 "register_operand")
-			  (match_operand:SVE_F 2 "register_operand"))]
-	  UNSPEC_MERGE_PTRUE))]
+;; =========================================================================
+;; == Conversions
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT<-INT] Packs
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - UZP1
+;; -------------------------------------------------------------------------
+
+;; Integer pack.  Use UZP1 on the narrower type, which discards
+;; the high part of each wide element.
+(define_insn "vec_pack_trunc_<Vwide>"
+  [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
+	(unspec:SVE_FULL_BHSI
+	  [(match_operand:<VWIDE> 1 "register_operand" "w")
+	   (match_operand:<VWIDE> 2 "register_operand" "w")]
+	  UNSPEC_PACK))]
+  "TARGET_SVE"
+  "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT<-INT] Unpacks
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SUNPKHI
+;; - SUNPKLO
+;; - UUNPKHI
+;; - UUNPKLO
+;; -------------------------------------------------------------------------
+
+;; Unpack the low or high half of a vector, where "high" refers to
+;; the low-numbered lanes for big-endian and the high-numbered lanes
+;; for little-endian.
+(define_expand "vec_unpack<su>_<perm_hilo>_<SVE_FULL_BHSI:mode>"
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (unspec:<VWIDE>
+     [(match_operand:SVE_FULL_BHSI 1 "register_operand")] UNPACK)]
   "TARGET_SVE"
   {
-    operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+    emit_insn ((<hi_lanes_optab>
+		? gen_aarch64_sve_<su>unpkhi_<SVE_FULL_BHSI:mode>
+		: gen_aarch64_sve_<su>unpklo_<SVE_FULL_BHSI:mode>)
+	       (operands[0], operands[1]));
+    DONE;
   }
 )
 
-;; Floating-point MIN/MAX predicated with a PTRUE.
-(define_insn "*<su><maxmin><mode>3"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w")
-	(unspec:SVE_F
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
-	   (FMAXMIN:SVE_F (match_operand:SVE_F 2 "register_operand" "%0")
-			  (match_operand:SVE_F 3 "register_operand" "w"))]
-	  UNSPEC_MERGE_PTRUE))]
+(define_insn "@aarch64_sve_<su>unpk<perm_hilo>_<SVE_FULL_BHSI:mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+	(unspec:<VWIDE>
+	  [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")]
+	  UNPACK))]
   "TARGET_SVE"
-  "f<maxmin>nm\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
 )
 
-;; Unpredicated fmin/fmax.
-(define_expand "<maxmin_uns><mode>3"
-  [(set (match_operand:SVE_F 0 "register_operand")
-	(unspec:SVE_F
-	  [(match_dup 3)
-	   (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand")
-			  (match_operand:SVE_F 2 "register_operand")]
-			 FMAXMIN_UNS)]
-	  UNSPEC_MERGE_PTRUE))]
+;; -------------------------------------------------------------------------
+;; ---- [INT<-FP] Conversions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FCVTZS
+;; - FCVTZU
+;; -------------------------------------------------------------------------
+
+;; Unpredicated conversion of floats to integers of the same size (HF to HI,
+;; SF to SI or DF to DI).
+(define_expand "<optab><mode><v_int_equiv>2"
+  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
+	(unspec:<V_INT_EQUIV>
+	  [(match_dup 2)
+	   (const_int SVE_RELAXED_GP)
+	   (match_operand:SVE_FULL_F 1 "register_operand")]
+	  SVE_COND_FCVTI))]
   "TARGET_SVE"
   {
-    operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
   }
 )
 
-;; fmin/fmax predicated with a PTRUE.
-(define_insn "*<maxmin_uns><mode>3"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w")
-	(unspec:SVE_F
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
-	   (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "%0")
-			  (match_operand:SVE_F 3 "register_operand" "w")]
-			 FMAXMIN_UNS)]
-	  UNSPEC_MERGE_PTRUE))]
+;; Predicated float-to-integer conversion, either to the same width or wider.
+(define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
+  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_HSDI
+	  [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl")
+	   (match_operand:SI 3 "aarch64_sve_gp_strictness")
+	   (match_operand:SVE_FULL_F 2 "register_operand" "0, w")]
+	  SVE_COND_FCVTI))]
+  "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
+  "@
+   fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
+   movprfx\t%0, %2\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated narrowing float-to-integer conversion.
+(define_insn "@aarch64_sve_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
+  [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
+	(unspec:VNx4SI_ONLY
+	  [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl")
+	   (match_operand:SI 3 "aarch64_sve_gp_strictness")
+	   (match_operand:VNx2DF_ONLY 2 "register_operand" "0, w")]
+	  SVE_COND_FCVTI))]
   "TARGET_SVE"
-  "<maxmin_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  "@
+   fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
+   movprfx\t%0, %2\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated float-to-integer conversion with merging, either to the same
+;; width or wider.
+(define_expand "@cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
+  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
+	(unspec:SVE_FULL_HSDI
+	  [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
+	   (unspec:SVE_FULL_HSDI
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand")]
+	     SVE_COND_FCVTI)
+	   (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
 )
 
-;; Predicated integer operations.
-(define_insn "cond_<optab><mode>"
-  [(set (match_operand:SVE_I 0 "register_operand" "=w")
-	(unspec:SVE_I
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
-	   (match_operand:SVE_I 2 "register_operand" "0")
-	   (match_operand:SVE_I 3 "register_operand" "w")]
-	  SVE_COND_INT_OP))]
+;; The first alternative doesn't need the earlyclobber, but the only case
+;; it would help is the uninteresting one in which operands 2 and 3 are
+;; the same register (despite having different modes).  Making all the
+;; alternatives earlyclobber makes things more consistent for the
+;; register allocator.
+(define_insn_and_rewrite "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>_relaxed"
+  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w, ?&w")
+	(unspec:SVE_FULL_HSDI
+	  [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (unspec:SVE_FULL_HSDI
+	     [(match_operand 4)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
+	     SVE_COND_FCVTI)
+	   (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
+  "@
+   fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
+   movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
+   movprfx\t%0, %3\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>"
+  "&& !rtx_equal_p (operands[1], operands[4])"
+  {
+    operands[4] = copy_rtx (operands[1]);
+  }
+  [(set_attr "movprfx" "*,yes,yes")]
+)
+
+(define_insn "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>_strict"
+  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w, ?&w")
+	(unspec:SVE_FULL_HSDI
+	  [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (unspec:SVE_FULL_HSDI
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
+	     SVE_COND_FCVTI)
+	   (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
+  "@
+   fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
+   movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
+   movprfx\t%0, %3\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>"
+  [(set_attr "movprfx" "*,yes,yes")]
+)
+
+;; Predicated narrowing float-to-integer conversion with merging.
+(define_expand "@cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
+  [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
+	(unspec:VNx4SI_ONLY
+	  [(match_operand:VNx2BI 1 "register_operand")
+	   (unspec:VNx4SI_ONLY
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:VNx2DF_ONLY 2 "register_operand")]
+	     SVE_COND_FCVTI)
+	   (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")]
+	  UNSPEC_SEL))]
   "TARGET_SVE"
-  "<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
 )
 
-;; Set operand 0 to the last active element in operand 3, or to tied
-;; operand 1 if no elements are active.
-(define_insn "fold_extract_last_<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand" "=r, w")
-	(unspec:<VEL>
-	  [(match_operand:<VEL> 1 "register_operand" "0, 0")
-	   (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
-	   (match_operand:SVE_ALL 3 "register_operand" "w, w")]
-	  UNSPEC_CLASTB))]
+(define_insn "*cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
+  [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=&w, &w, ?&w")
+	(unspec:VNx4SI_ONLY
+	  [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl, Upl")
+	   (unspec:VNx4SI_ONLY
+	     [(match_dup 1)
+	      (match_operand:SI 4 "aarch64_sve_gp_strictness")
+	      (match_operand:VNx2DF_ONLY 2 "register_operand" "w, w, w")]
+	     SVE_COND_FCVTI)
+	   (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+	  UNSPEC_SEL))]
   "TARGET_SVE"
   "@
-   clastb\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype>
-   clastb\t%<vw>0, %2, %<vw>0, %3.<Vetype>"
+   fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
+   movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
+   movprfx\t%0, %3\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>"
+  [(set_attr "movprfx" "*,yes,yes")]
 )
 
-;; Unpredicated integer add reduction.
-(define_expand "reduc_plus_scal_<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand")
-	(unspec:<VEL> [(match_dup 2)
-		       (match_operand:SVE_I 1 "register_operand")]
-		      UNSPEC_ADDV))]
+;; -------------------------------------------------------------------------
+;; ---- [INT<-FP] Packs
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic.
+;; -------------------------------------------------------------------------
+
+;; Convert two vectors of DF to SI and pack the results into a single vector.
+(define_expand "vec_pack_<su>fix_trunc_vnx2df"
+  [(set (match_dup 4)
+	(unspec:VNx4SI
+	  [(match_dup 3)
+	   (const_int SVE_RELAXED_GP)
+	   (match_operand:VNx2DF 1 "register_operand")]
+	  SVE_COND_FCVTI))
+   (set (match_dup 5)
+	(unspec:VNx4SI
+	  [(match_dup 3)
+	   (const_int SVE_RELAXED_GP)
+	   (match_operand:VNx2DF 2 "register_operand")]
+	  SVE_COND_FCVTI))
+   (set (match_operand:VNx4SI 0 "register_operand")
+	(unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
   "TARGET_SVE"
   {
-    operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+    operands[3] = aarch64_ptrue_reg (VNx2BImode);
+    operands[4] = gen_reg_rtx (VNx4SImode);
+    operands[5] = gen_reg_rtx (VNx4SImode);
   }
 )
 
-;; Predicated integer add reduction.  The result is always 64-bits.
-(define_insn "*reduc_plus_scal_<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand" "=w")
-	(unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
-		       (match_operand:SVE_I 2 "register_operand" "w")]
-		      UNSPEC_ADDV))]
-  "TARGET_SVE"
-  "uaddv\t%d0, %1, %2.<Vetype>"
-)
+;; -------------------------------------------------------------------------
+;; ---- [INT<-FP] Unpacks
+;; -------------------------------------------------------------------------
+;; No patterns here yet!
+;; -------------------------------------------------------------------------
 
-;; Unpredicated floating-point add reduction.
-(define_expand "reduc_plus_scal_<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand")
-	(unspec:<VEL> [(match_dup 2)
-		       (match_operand:SVE_F 1 "register_operand")]
-		      UNSPEC_FADDV))]
+;; -------------------------------------------------------------------------
+;; ---- [FP<-INT] Conversions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SCVTF
+;; - UCVTF
+;; -------------------------------------------------------------------------
+
+;; Unpredicated conversion of integers to floats of the same size
+;; (HI to HF, SI to SF or DI to DF).
+(define_expand "<optab><v_int_equiv><mode>2"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand")
+	(unspec:SVE_FULL_F
+	  [(match_dup 2)
+	   (const_int SVE_RELAXED_GP)
+	   (match_operand:<V_INT_EQUIV> 1 "register_operand")]
+	  SVE_COND_ICVTF))]
   "TARGET_SVE"
   {
-    operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
   }
 )
 
-;; Predicated floating-point add reduction.
-(define_insn "*reduc_plus_scal_<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand" "=w")
-	(unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
-		       (match_operand:SVE_F 2 "register_operand" "w")]
-		      UNSPEC_FADDV))]
-  "TARGET_SVE"
-  "faddv\t%<Vetype>0, %1, %2.<Vetype>"
+;; Predicated integer-to-float conversion, either to the same width or
+;; narrower.
+(define_insn "@aarch64_sve_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl")
+	   (match_operand:SI 3 "aarch64_sve_gp_strictness")
+	   (match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w")]
+	  SVE_COND_ICVTF))]
+  "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
+  "@
+   <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
+   movprfx\t%0, %2\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
+  [(set_attr "movprfx" "*,yes")]
 )
 
-;; Unpredicated integer MIN/MAX reduction.
-(define_expand "reduc_<maxmin_uns>_scal_<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand")
-	(unspec:<VEL> [(match_dup 2)
-		       (match_operand:SVE_I 1 "register_operand")]
-		      MAXMINV))]
+;; Predicated widening integer-to-float conversion.
+(define_insn "@aarch64_sve_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
+  [(set (match_operand:VNx2DF_ONLY 0 "register_operand" "=w, ?&w")
+	(unspec:VNx2DF_ONLY
+	  [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl")
+	   (match_operand:SI 3 "aarch64_sve_gp_strictness")
+	   (match_operand:VNx4SI_ONLY 2 "register_operand" "0, w")]
+	  SVE_COND_ICVTF))]
   "TARGET_SVE"
+  "@
+   <su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
+   movprfx\t%0, %2\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated integer-to-float conversion with merging, either to the same
+;; width or narrower.
+(define_expand "@cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_HSDI 2 "register_operand")]
+	     SVE_COND_ICVTF)
+	   (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
+)
+
+;; The first alternative doesn't need the earlyclobber, but the only case
+;; it would help is the uninteresting one in which operands 2 and 3 are
+;; the same register (despite having different modes).  Making all the
+;; alternatives earlyclobber makes things more consistent for the
+;; register allocator.
+(define_insn_and_rewrite "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>_relaxed"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_operand 4)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")]
+	     SVE_COND_ICVTF)
+	   (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
+  "@
+   <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
+   movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
+   movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
+  "&& !rtx_equal_p (operands[1], operands[4])"
   {
-    operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+    operands[4] = copy_rtx (operands[1]);
   }
+  [(set_attr "movprfx" "*,yes,yes")]
+)
+
+(define_insn "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")]
+	     SVE_COND_ICVTF)
+	   (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
+  "@
+   <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
+   movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
+   movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
+  [(set_attr "movprfx" "*,yes,yes")]
+)
+
+;; Predicated widening integer-to-float conversion with merging.
+(define_expand "@cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
+  [(set (match_operand:VNx2DF_ONLY 0 "register_operand")
+	(unspec:VNx2DF_ONLY
+	  [(match_operand:VNx2BI 1 "register_operand")
+	   (unspec:VNx2DF_ONLY
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:VNx4SI_ONLY 2 "register_operand")]
+	     SVE_COND_ICVTF)
+	   (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
 )
 
-;; Predicated integer MIN/MAX reduction.
-(define_insn "*reduc_<maxmin_uns>_scal_<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand" "=w")
-	(unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
-		       (match_operand:SVE_I 2 "register_operand" "w")]
-		      MAXMINV))]
+(define_insn "*cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
+  [(set (match_operand:VNx2DF_ONLY 0 "register_operand" "=w, ?&w, ?&w")
+	(unspec:VNx2DF_ONLY
+	  [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl, Upl")
+	   (unspec:VNx2DF_ONLY
+	     [(match_dup 1)
+	      (match_operand:SI 4 "aarch64_sve_gp_strictness")
+	      (match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w")]
+	     SVE_COND_ICVTF)
+	   (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+	  UNSPEC_SEL))]
   "TARGET_SVE"
-  "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>"
+  "@
+   <su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
+   movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
+   movprfx\t%0, %3\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>"
+  [(set_attr "movprfx" "*,yes,yes")]
 )
 
-;; Unpredicated floating-point MIN/MAX reduction.
-(define_expand "reduc_<maxmin_uns>_scal_<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand")
-	(unspec:<VEL> [(match_dup 2)
-		       (match_operand:SVE_F 1 "register_operand")]
-		      FMAXMINV))]
+;; -------------------------------------------------------------------------
+;; ---- [FP<-INT] Packs
+;; -------------------------------------------------------------------------
+;; No patterns here yet!
+;; -------------------------------------------------------------------------
+
+;; -------------------------------------------------------------------------
+;; ---- [FP<-INT] Unpacks
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic.
+;; -------------------------------------------------------------------------
+
+;; Unpack one half of a VNx4SI to VNx2DF.  First unpack from VNx4SI
+;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the
+;; unpacked VNx4SI to VNx2DF.
+(define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si"
+  [(match_operand:VNx2DF 0 "register_operand")
+   (FLOATUORS:VNx2DF
+     (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")]
+		    UNPACK_UNSIGNED))]
   "TARGET_SVE"
   {
-    operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+    /* Use ZIP to do the unpack, since we don't care about the upper halves
+       and since it has the nice property of not needing any subregs.
+       If using UUNPK* turns out to be preferable, we could model it as
+       a ZIP whose first operand is zero.  */
+    rtx temp = gen_reg_rtx (VNx4SImode);
+    emit_insn ((<hi_lanes_optab>
+		? gen_aarch64_sve_zip2vnx4si
+		: gen_aarch64_sve_zip1vnx4si)
+	       (temp, operands[1], operands[1]));
+    rtx ptrue = aarch64_ptrue_reg (VNx2BImode);
+    rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode);
+    emit_insn (gen_aarch64_sve_<FLOATUORS:optab>_extendvnx4sivnx2df
+	       (operands[0], ptrue, temp, strictness));
+    DONE;
   }
 )
 
-;; Predicated floating-point MIN/MAX reduction.
-(define_insn "*reduc_<maxmin_uns>_scal_<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand" "=w")
-	(unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
-		       (match_operand:SVE_F 2 "register_operand" "w")]
-		      FMAXMINV))]
-  "TARGET_SVE"
-  "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>"
-)
+;; -------------------------------------------------------------------------
+;; ---- [FP<-FP] Packs
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FCVT
+;; -------------------------------------------------------------------------
 
-(define_expand "reduc_<optab>_scal_<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand")
-	(unspec:<VEL> [(match_dup 2)
-		       (match_operand:SVE_I 1 "register_operand")]
-		      BITWISEV))]
+;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack
+;; the results into a single vector.
+(define_expand "vec_pack_trunc_<Vwide>"
+  [(set (match_dup 4)
+	(unspec:SVE_FULL_HSF
+	  [(match_dup 3)
+	   (const_int SVE_RELAXED_GP)
+	   (match_operand:<VWIDE> 1 "register_operand")]
+	  UNSPEC_COND_FCVT))
+   (set (match_dup 5)
+	(unspec:SVE_FULL_HSF
+	  [(match_dup 3)
+	   (const_int SVE_RELAXED_GP)
+	   (match_operand:<VWIDE> 2 "register_operand")]
+	  UNSPEC_COND_FCVT))
+   (set (match_operand:SVE_FULL_HSF 0 "register_operand")
+	(unspec:SVE_FULL_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
   "TARGET_SVE"
   {
-    operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+    operands[3] = aarch64_ptrue_reg (<VWIDE_PRED>mode);
+    operands[4] = gen_reg_rtx (<MODE>mode);
+    operands[5] = gen_reg_rtx (<MODE>mode);
   }
 )
 
-(define_insn "*reduc_<optab>_scal_<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand" "=w")
-	(unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
-		       (match_operand:SVE_I 2 "register_operand" "w")]
-		      BITWISEV))]
-  "TARGET_SVE"
-  "<bit_reduc_op>\t%<Vetype>0, %1, %2.<Vetype>"
+;; Predicated float-to-float truncation.
+(define_insn "@aarch64_sve_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
+  [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_HSF
+	  [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl")
+	   (match_operand:SI 3 "aarch64_sve_gp_strictness")
+	   (match_operand:SVE_FULL_SDF 2 "register_operand" "0, w")]
+	  SVE_COND_FCVT))]
+  "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
+  "@
+   fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
+   movprfx\t%0, %2\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated float-to-float truncation with merging.
+(define_expand "@cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
+  [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
+	(unspec:SVE_FULL_HSF
+	  [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
+	   (unspec:SVE_FULL_HSF
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_SDF 2 "register_operand")]
+	     SVE_COND_FCVT)
+	   (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
+)
+
+(define_insn "*cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
+  [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w, ?&w")
+	(unspec:SVE_FULL_HSF
+	  [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (unspec:SVE_FULL_HSF
+	     [(match_dup 1)
+	      (match_operand:SI 4 "aarch64_sve_gp_strictness")
+	      (match_operand:SVE_FULL_SDF 2 "register_operand" "w, w, w")]
+	     SVE_COND_FCVT)
+	   (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
+  "@
+   fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
+   movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
+   movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>"
+  [(set_attr "movprfx" "*,yes,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP<-FP] Packs (bfloat16)
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - BFCVT (BF16)
+;; - BFCVTNT (BF16)
+;; -------------------------------------------------------------------------
+
+;; Predicated BFCVT.
+(define_insn "@aarch64_sve_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
+  [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w, ?&w")
+	(unspec:VNx8BF_ONLY
+	  [(match_operand:VNx4BI 1 "register_operand" "Upl, Upl")
+	   (match_operand:SI 3 "aarch64_sve_gp_strictness")
+	   (match_operand:VNx4SF_ONLY 2 "register_operand" "0, w")]
+	  SVE_COND_FCVT))]
+  "TARGET_SVE_BF16"
+  "@
+   bfcvt\t%0.h, %1/m, %2.s
+   movprfx\t%0, %2\;bfcvt\t%0.h, %1/m, %2.s"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated BFCVT with merging.
+(define_expand "@cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
+  [(set (match_operand:VNx8BF_ONLY 0 "register_operand")
+	(unspec:VNx8BF_ONLY
+	  [(match_operand:VNx4BI 1 "register_operand")
+	   (unspec:VNx8BF_ONLY
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:VNx4SF_ONLY 2 "register_operand")]
+	     SVE_COND_FCVT)
+	   (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE_BF16"
+)
+
+(define_insn "*cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
+  [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w, ?&w, ?&w")
+	(unspec:VNx8BF_ONLY
+	  [(match_operand:VNx4BI 1 "register_operand" "Upl, Upl, Upl")
+	   (unspec:VNx8BF_ONLY
+	     [(match_dup 1)
+	      (match_operand:SI 4 "aarch64_sve_gp_strictness")
+	      (match_operand:VNx4SF_ONLY 2 "register_operand" "w, w, w")]
+	     SVE_COND_FCVT)
+	   (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE_BF16"
+  "@
+   bfcvt\t%0.h, %1/m, %2.s
+   movprfx\t%0.s, %1/z, %2.s\;bfcvt\t%0.h, %1/m, %2.s
+   movprfx\t%0, %3\;bfcvt\t%0.h, %1/m, %2.s"
+  [(set_attr "movprfx" "*,yes,yes")]
 )
 
-;; Unpredicated in-order FP reductions.
-(define_expand "fold_left_plus_<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand")
-	(unspec:<VEL> [(match_dup 3)
-		       (match_operand:<VEL> 1 "register_operand")
-		       (match_operand:SVE_F 2 "register_operand")]
-		      UNSPEC_FADDA))]
+;; Predicated BFCVTNT.  This doesn't give a natural aarch64_pred_*/cond_*
+;; pair because the even elements always have to be supplied for active
+;; elements, even if the inactive elements don't matter.
+;;
+;; This instructions does not take MOVPRFX.
+(define_insn "@aarch64_sve_cvtnt<mode>"
+  [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w")
+	(unspec:VNx8BF_ONLY
+	  [(match_operand:VNx4BI 2 "register_operand" "Upl")
+	   (const_int SVE_STRICT_GP)
+	   (match_operand:VNx8BF_ONLY 1 "register_operand" "0")
+	   (match_operand:VNx4SF 3 "register_operand" "w")]
+	  UNSPEC_COND_FCVTNT))]
+  "TARGET_SVE_BF16"
+  "bfcvtnt\t%0.h, %2/m, %3.s"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP<-FP] Unpacks
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FCVT
+;; -------------------------------------------------------------------------
+
+;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF.
+;; First unpack the source without conversion, then float-convert the
+;; unpacked source.
+(define_expand "vec_unpacks_<perm_hilo>_<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (unspec:SVE_FULL_HSF
+     [(match_operand:SVE_FULL_HSF 1 "register_operand")]
+     UNPACK_UNSIGNED)]
   "TARGET_SVE"
   {
-    operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+    /* Use ZIP to do the unpack, since we don't care about the upper halves
+       and since it has the nice property of not needing any subregs.
+       If using UUNPK* turns out to be preferable, we could model it as
+       a ZIP whose first operand is zero.  */
+    rtx temp = gen_reg_rtx (<MODE>mode);
+    emit_insn ((<hi_lanes_optab>
+		? gen_aarch64_sve_zip2<mode>
+		: gen_aarch64_sve_zip1<mode>)
+		(temp, operands[1], operands[1]));
+    rtx ptrue = aarch64_ptrue_reg (<VWIDE_PRED>mode);
+    rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode);
+    emit_insn (gen_aarch64_sve_fcvt_nontrunc<mode><Vwide>
+	       (operands[0], ptrue, temp, strictness));
+    DONE;
   }
 )
 
-;; In-order FP reductions predicated with PTRUE.
-(define_insn "*fold_left_plus_<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand" "=w")
-	(unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
-		       (match_operand:<VEL> 2 "register_operand" "0")
-		       (match_operand:SVE_F 3 "register_operand" "w")]
-		      UNSPEC_FADDA))]
+;; Predicated float-to-float extension.
+(define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
+  [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_SDF
+	  [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl")
+	   (match_operand:SI 3 "aarch64_sve_gp_strictness")
+	   (match_operand:SVE_FULL_HSF 2 "register_operand" "0, w")]
+	  SVE_COND_FCVT))]
+  "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
+  "@
+   fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
+   movprfx\t%0, %2\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated float-to-float extension with merging.
+(define_expand "@cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
+  [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
+	(unspec:SVE_FULL_SDF
+	  [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
+	   (unspec:SVE_FULL_SDF
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_HSF 2 "register_operand")]
+	     SVE_COND_FCVT)
+	   (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
+)
+
+(define_insn "*cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
+  [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w, ?&w, ?&w")
+	(unspec:SVE_FULL_SDF
+	  [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (unspec:SVE_FULL_SDF
+	     [(match_dup 1)
+	      (match_operand:SI 4 "aarch64_sve_gp_strictness")
+	      (match_operand:SVE_FULL_HSF 2 "register_operand" "w, w, w")]
+	     SVE_COND_FCVT)
+	   (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
+  "@
+   fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
+   movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
+   movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>"
+  [(set_attr "movprfx" "*,yes,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [PRED<-PRED] Packs
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - UZP1
+;; -------------------------------------------------------------------------
+
+;; Predicate pack.  Use UZP1 on the narrower type, which discards
+;; the high part of each wide element.
+(define_insn "vec_pack_trunc_<Vwide>"
+  [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa")
+	(unspec:PRED_BHS
+	  [(match_operand:<VWIDE> 1 "register_operand" "Upa")
+	   (match_operand:<VWIDE> 2 "register_operand" "Upa")]
+	  UNSPEC_PACK))]
   "TARGET_SVE"
-  "fadda\t%<Vetype>0, %1, %<Vetype>0, %3.<Vetype>"
+  "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
 )
 
-;; Predicated form of the above in-order reduction.
-(define_insn "*pred_fold_left_plus_<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand" "=w")
-	(unspec:<VEL>
-	  [(match_operand:<VEL> 1 "register_operand" "0")
-	   (unspec:SVE_F
-	     [(match_operand:<VPRED> 2 "register_operand" "Upl")
-	      (match_operand:SVE_F 3 "register_operand" "w")
-	      (match_operand:SVE_F 4 "aarch64_simd_imm_zero")]
-	     UNSPEC_SEL)]
-	  UNSPEC_FADDA))]
+;; -------------------------------------------------------------------------
+;; ---- [PRED<-PRED] Unpacks
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - PUNPKHI
+;; - PUNPKLO
+;; -------------------------------------------------------------------------
+
+;; Unpack the low or high half of a predicate, where "high" refers to
+;; the low-numbered lanes for big-endian and the high-numbered lanes
+;; for little-endian.
+(define_expand "vec_unpack<su>_<perm_hilo>_<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")]
+		   UNPACK)]
   "TARGET_SVE"
-  "fadda\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>"
+  {
+    emit_insn ((<hi_lanes_optab>
+		? gen_aarch64_sve_punpkhi_<PRED_BHS:mode>
+		: gen_aarch64_sve_punpklo_<PRED_BHS:mode>)
+	       (operands[0], operands[1]));
+    DONE;
+  }
 )
 
-;; Unpredicated floating-point addition.
-(define_expand "add<mode>3"
-  [(set (match_operand:SVE_F 0 "register_operand")
-	(unspec:SVE_F
-	  [(match_dup 3)
-	   (plus:SVE_F
-	     (match_operand:SVE_F 1 "register_operand")
-	     (match_operand:SVE_F 2 "aarch64_sve_float_arith_with_sub_operand"))]
-	  UNSPEC_MERGE_PTRUE))]
+(define_insn "@aarch64_sve_punpk<perm_hilo>_<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa")
+	(unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")]
+			UNPACK_UNSIGNED))]
   "TARGET_SVE"
-  {
-    operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
-  }
+  "punpk<perm_hilo>\t%0.h, %1.b"
 )
 
-;; Floating-point addition predicated with a PTRUE.
-(define_insn "*add<mode>3"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w")
-	(unspec:SVE_F
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
-	   (plus:SVE_F
-	      (match_operand:SVE_F 2 "register_operand" "%0, 0, w")
-	      (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w"))]
-	  UNSPEC_MERGE_PTRUE))]
+;; =========================================================================
+;; == Vector partitioning
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Unary partitioning
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - BRKA
+;; - BRKAS
+;; - BRKB
+;; - BRKBS
+;; -------------------------------------------------------------------------
+
+;; Note that unlike most other instructions that have both merging and
+;; zeroing forms, these instructions don't operate elementwise and so
+;; don't fit the IFN_COND model.
+(define_insn "@aarch64_brk<brk_op>"
+  [(set (match_operand:VNx16BI 0 "register_operand" "=Upa, Upa")
+	(unspec:VNx16BI
+	  [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa")
+	   (match_operand:VNx16BI 2 "register_operand" "Upa, Upa")
+	   (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")]
+	  SVE_BRK_UNARY))]
   "TARGET_SVE"
   "@
-   fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
-   fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
-   fadd\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
-)
-
-;; Unpredicated floating-point subtraction.
-(define_expand "sub<mode>3"
-  [(set (match_operand:SVE_F 0 "register_operand")
-	(unspec:SVE_F
-	  [(match_dup 3)
-	   (minus:SVE_F
-	     (match_operand:SVE_F 1 "aarch64_sve_float_arith_operand")
-	     (match_operand:SVE_F 2 "register_operand"))]
-	  UNSPEC_MERGE_PTRUE))]
+   brk<brk_op>\t%0.b, %1/z, %2.b
+   brk<brk_op>\t%0.b, %1/m, %2.b"
+)
+
+;; Same, but also producing a flags result.
+(define_insn "*aarch64_brk<brk_op>_cc"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa")
+	   (match_dup 1)
+	   (match_operand:SI 4 "aarch64_sve_ptrue_flag")
+	   (unspec:VNx16BI
+	     [(match_dup 1)
+	      (match_operand:VNx16BI 2 "register_operand" "Upa, Upa")
+	      (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")]
+	     SVE_BRK_UNARY)]
+	  UNSPEC_PTEST))
+   (set (match_operand:VNx16BI 0 "register_operand" "=Upa, Upa")
+	(unspec:VNx16BI
+	  [(match_dup 1)
+	   (match_dup 2)
+	   (match_dup 3)]
+	  SVE_BRK_UNARY))]
+  "TARGET_SVE"
+  "@
+   brk<brk_op>s\t%0.b, %1/z, %2.b
+   brk<brk_op>s\t%0.b, %1/m, %2.b"
+)
+
+;; Same, but with only the flags result being interesting.
+(define_insn "*aarch64_brk<brk_op>_ptest"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa")
+	   (match_dup 1)
+	   (match_operand:SI 4 "aarch64_sve_ptrue_flag")
+	   (unspec:VNx16BI
+	     [(match_dup 1)
+	      (match_operand:VNx16BI 2 "register_operand" "Upa, Upa")
+	      (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")]
+	     SVE_BRK_UNARY)]
+	  UNSPEC_PTEST))
+   (clobber (match_scratch:VNx16BI 0 "=Upa, Upa"))]
   "TARGET_SVE"
+  "@
+   brk<brk_op>s\t%0.b, %1/z, %2.b
+   brk<brk_op>s\t%0.b, %1/m, %2.b"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Binary partitioning
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - BRKN
+;; - BRKNS
+;; - BRKPA
+;; - BRKPAS
+;; - BRKPB
+;; - BRKPBS
+;; -------------------------------------------------------------------------
+
+;; Binary BRKs (BRKN, BRKPA, BRKPB).
+(define_insn "@aarch64_brk<brk_op>"
+  [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+	(unspec:VNx16BI
+	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
+	   (match_operand:VNx16BI 2 "register_operand" "Upa")
+	   (match_operand:VNx16BI 3 "register_operand" "<brk_reg_con>")]
+	  SVE_BRK_BINARY))]
+  "TARGET_SVE"
+  "brk<brk_op>\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b"
+)
+
+;; Same, but also producing a flags result.
+(define_insn "*aarch64_brk<brk_op>_cc"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
+	   (match_dup 1)
+	   (match_operand:SI 4 "aarch64_sve_ptrue_flag")
+	   (unspec:VNx16BI
+	     [(match_dup 1)
+	      (match_operand:VNx16BI 2 "register_operand" "Upa")
+	      (match_operand:VNx16BI 3 "register_operand" "<brk_reg_con>")]
+	     SVE_BRK_BINARY)]
+	  UNSPEC_PTEST))
+   (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+	(unspec:VNx16BI
+	  [(match_dup 1)
+	   (match_dup 2)
+	   (match_dup 3)]
+	  SVE_BRK_BINARY))]
+  "TARGET_SVE"
+  "brk<brk_op>s\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b"
+)
+
+;; Same, but with only the flags result being interesting.
+(define_insn "*aarch64_brk<brk_op>_ptest"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
+	   (match_dup 1)
+	   (match_operand:SI 4 "aarch64_sve_ptrue_flag")
+	   (unspec:VNx16BI
+	     [(match_dup 1)
+	      (match_operand:VNx16BI 2 "register_operand" "Upa")
+	      (match_operand:VNx16BI 3 "register_operand" "<brk_reg_con>")]
+	     SVE_BRK_BINARY)]
+	  UNSPEC_PTEST))
+   (clobber (match_scratch:VNx16BI 0 "=Upa"))]
+  "TARGET_SVE"
+  "brk<brk_op>s\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Scalarization
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - PFIRST
+;; - PNEXT
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<sve_pred_op><mode>"
+  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
+	(unspec:PRED_ALL
+	  [(match_operand:PRED_ALL 1 "register_operand" "Upa")
+	   (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+	   (match_operand:PRED_ALL 3 "register_operand" "0")]
+	  SVE_PITER))
+   (clobber (reg:CC_NZC CC_REGNUM))]
+  "TARGET_SVE && <max_elem_bits> >= <elem_bits>"
+  "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
+)
+
+;; Same, but also producing a flags result.
+(define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_cc"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
+	   (match_operand 2)
+	   (match_operand:SI 3 "aarch64_sve_ptrue_flag")
+	   (unspec:PRED_ALL
+	     [(match_operand 4)
+	      (match_operand:SI 5 "aarch64_sve_ptrue_flag")
+	      (match_operand:PRED_ALL 6 "register_operand" "0")]
+	     SVE_PITER)]
+	  UNSPEC_PTEST))
+   (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
+	(unspec:PRED_ALL
+	  [(match_dup 4)
+	   (match_dup 5)
+	   (match_dup 6)]
+	  SVE_PITER))]
+  "TARGET_SVE
+   && <max_elem_bits> >= <elem_bits>
+   && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])"
+  "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
+  "&& !rtx_equal_p (operands[2], operands[4])"
   {
-    operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+    operands[4] = operands[2];
+    operands[5] = operands[3];
   }
 )
 
-;; Floating-point subtraction predicated with a PTRUE.
-(define_insn "*sub<mode>3"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w, w")
-	(unspec:SVE_F
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
-	   (minus:SVE_F
-	     (match_operand:SVE_F 2 "aarch64_sve_float_arith_operand" "0, 0, vsA, w")
-	     (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, 0, w"))]
-	  UNSPEC_MERGE_PTRUE))]
+;; Same, but with only the flags result being interesting.
+(define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_ptest"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
+	   (match_operand 2)
+	   (match_operand:SI 3 "aarch64_sve_ptrue_flag")
+	   (unspec:PRED_ALL
+	     [(match_operand 4)
+	      (match_operand:SI 5 "aarch64_sve_ptrue_flag")
+	      (match_operand:PRED_ALL 6 "register_operand" "0")]
+	     SVE_PITER)]
+	  UNSPEC_PTEST))
+   (clobber (match_scratch:PRED_ALL 0 "=Upa"))]
   "TARGET_SVE
-   && (register_operand (operands[2], <MODE>mode)
-       || register_operand (operands[3], <MODE>mode))"
-  "@
-   fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
-   fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
-   fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
-   fsub\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
-)
-
-;; Unpredicated floating-point multiplication.
-(define_expand "mul<mode>3"
-  [(set (match_operand:SVE_F 0 "register_operand")
-	(unspec:SVE_F
-	  [(match_dup 3)
-	   (mult:SVE_F
-	     (match_operand:SVE_F 1 "register_operand")
-	     (match_operand:SVE_F 2 "aarch64_sve_float_mul_operand"))]
-	  UNSPEC_MERGE_PTRUE))]
-  "TARGET_SVE"
+   && <max_elem_bits> >= <elem_bits>
+   && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])"
+  "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
+  "&& !rtx_equal_p (operands[2], operands[4])"
   {
-    operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+    operands[4] = operands[2];
+    operands[5] = operands[3];
   }
 )
 
-;; Floating-point multiplication predicated with a PTRUE.
-(define_insn "*mul<mode>3"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
-	(unspec:SVE_F
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-	   (mult:SVE_F
-	     (match_operand:SVE_F 2 "register_operand" "%0, w")
-	     (match_operand:SVE_F 3 "aarch64_sve_float_mul_operand" "vsM, w"))]
-	  UNSPEC_MERGE_PTRUE))]
-  "TARGET_SVE"
-  "@
-   fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
-   fmul\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
-)
+;; =========================================================================
+;; == Counting elements
+;; =========================================================================
 
-;; Unpredicated fma (%0 = (%1 * %2) + %3).
-(define_expand "fma<mode>4"
-  [(set (match_operand:SVE_F 0 "register_operand")
-	(unspec:SVE_F
-	  [(match_dup 4)
-	   (fma:SVE_F (match_operand:SVE_F 1 "register_operand")
-		      (match_operand:SVE_F 2 "register_operand")
-		      (match_operand:SVE_F 3 "register_operand"))]
-	  UNSPEC_MERGE_PTRUE))]
+;; -------------------------------------------------------------------------
+;; ---- [INT] Count elements in a pattern (scalar)
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - CNTB
+;; - CNTD
+;; - CNTH
+;; - CNTW
+;; -------------------------------------------------------------------------
+
+;; Count the number of elements in an svpattern.  Operand 1 is the pattern,
+;; operand 2 is the number of elements that fit in a 128-bit block, and
+;; operand 3 is a multiplier in the range [1, 16].
+;;
+;; Note that this pattern isn't used for SV_ALL (but would work for that too).
+(define_insn "aarch64_sve_cnt_pat"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (unspec:SI [(match_operand:DI 1 "const_int_operand")
+		      (match_operand:DI 2 "const_int_operand")
+		      (match_operand:DI 3 "const_int_operand")]
+		     UNSPEC_SVE_CNT_PAT)))]
   "TARGET_SVE"
   {
-    operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+    return aarch64_output_sve_cnt_pat_immediate ("cnt", "%x0", operands + 1);
   }
 )
 
-;; fma predicated with a PTRUE.
-(define_insn "*fma<mode>4"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
-	(unspec:SVE_F
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-	   (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w")
-		      (match_operand:SVE_F 4 "register_operand" "w, w")
-		      (match_operand:SVE_F 2 "register_operand" "w, 0"))]
-	  UNSPEC_MERGE_PTRUE))]
-  "TARGET_SVE"
-  "@
-   fmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
-   fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
-)
-
-;; Unpredicated fnma (%0 = (-%1 * %2) + %3).
-(define_expand "fnma<mode>4"
-  [(set (match_operand:SVE_F 0 "register_operand")
-	(unspec:SVE_F
-	  [(match_dup 4)
-	   (fma:SVE_F (neg:SVE_F
-			(match_operand:SVE_F 1 "register_operand"))
-		      (match_operand:SVE_F 2 "register_operand")
-		      (match_operand:SVE_F 3 "register_operand"))]
-	  UNSPEC_MERGE_PTRUE))]
+;; -------------------------------------------------------------------------
+;; ---- [INT] Increment by the number of elements in a pattern (scalar)
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - INC
+;; - SQINC
+;; - UQINC
+;; -------------------------------------------------------------------------
+
+;; Increment a DImode register by the number of elements in an svpattern.
+;; See aarch64_sve_cnt_pat for the counting behavior.
+(define_insn "@aarch64_sve_<inc_dec><mode>_pat"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ANY_PLUS:DI (zero_extend:DI
+		       (unspec:SI [(match_operand:DI 2 "const_int_operand")
+				   (match_operand:DI 3 "const_int_operand")
+				   (match_operand:DI 4 "const_int_operand")]
+				  UNSPEC_SVE_CNT_PAT))
+		     (match_operand:DI_ONLY 1 "register_operand" "0")))]
   "TARGET_SVE"
   {
-    operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+    return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%x0",
+						 operands + 2);
   }
 )
 
-;; fnma predicated with a PTRUE.
-(define_insn "*fnma<mode>4"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
-	(unspec:SVE_F
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-	   (fma:SVE_F (neg:SVE_F
-			(match_operand:SVE_F 3 "register_operand" "%0, w"))
-		      (match_operand:SVE_F 4 "register_operand" "w, w")
-		      (match_operand:SVE_F 2 "register_operand" "w, 0"))]
-	  UNSPEC_MERGE_PTRUE))]
+;; Increment an SImode register by the number of elements in an svpattern
+;; using modular arithmetic.  See aarch64_sve_cnt_pat for the counting
+;; behavior.
+(define_insn "*aarch64_sve_incsi_pat"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (unspec:SI [(match_operand:DI 2 "const_int_operand")
+			     (match_operand:DI 3 "const_int_operand")
+			     (match_operand:DI 4 "const_int_operand")]
+			    UNSPEC_SVE_CNT_PAT)
+		 (match_operand:SI 1 "register_operand" "0")))]
   "TARGET_SVE"
-  "@
-   fmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
-   fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
+  {
+    return aarch64_output_sve_cnt_pat_immediate ("inc", "%x0", operands + 2);
+  }
 )
 
-;; Unpredicated fms (%0 = (%1 * %2) - %3).
-(define_expand "fms<mode>4"
-  [(set (match_operand:SVE_F 0 "register_operand")
-	(unspec:SVE_F
-	  [(match_dup 4)
-	   (fma:SVE_F (match_operand:SVE_F 1 "register_operand")
-		      (match_operand:SVE_F 2 "register_operand")
-		      (neg:SVE_F
-			(match_operand:SVE_F 3 "register_operand")))]
-	  UNSPEC_MERGE_PTRUE))]
+;; Increment an SImode register by the number of elements in an svpattern
+;; using saturating arithmetic, extending the result to 64 bits.
+;;
+;; See aarch64_sve_cnt_pat for the counting behavior.
+(define_insn "@aarch64_sve_<inc_dec><mode>_pat"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(<paired_extend>:DI
+	  (SAT_PLUS:SI
+	    (unspec:SI [(match_operand:DI 2 "const_int_operand")
+			(match_operand:DI 3 "const_int_operand")
+			(match_operand:DI 4 "const_int_operand")]
+		       UNSPEC_SVE_CNT_PAT)
+	    (match_operand:SI_ONLY 1 "register_operand" "0"))))]
   "TARGET_SVE"
   {
-    operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+    const char *registers = (<CODE> == SS_PLUS ? "%x0, %w0" : "%w0");
+    return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", registers,
+						 operands + 2);
   }
 )
 
-;; fms predicated with a PTRUE.
-(define_insn "*fms<mode>4"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
-	(unspec:SVE_F
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-	   (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w")
-		      (match_operand:SVE_F 4 "register_operand" "w, w")
-		      (neg:SVE_F
-			(match_operand:SVE_F 2 "register_operand" "w, 0")))]
-	  UNSPEC_MERGE_PTRUE))]
+;; -------------------------------------------------------------------------
+;; ---- [INT] Increment by the number of elements in a pattern (vector)
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - INC
+;; - SQINC
+;; - UQINC
+;; -------------------------------------------------------------------------
+
+;; Increment a vector of DIs by the number of elements in an svpattern.
+;; See aarch64_sve_cnt_pat for the counting behavior.
+(define_insn "@aarch64_sve_<inc_dec><mode>_pat"
+  [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
+	(ANY_PLUS:VNx2DI
+	  (vec_duplicate:VNx2DI
+	    (zero_extend:DI
+	      (unspec:SI [(match_operand:DI 2 "const_int_operand")
+			  (match_operand:DI 3 "const_int_operand")
+			  (match_operand:DI 4 "const_int_operand")]
+			 UNSPEC_SVE_CNT_PAT)))
+	  (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")))]
   "TARGET_SVE"
-  "@
-   fnmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
-   fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
+  {
+    if (which_alternative == 1)
+      output_asm_insn ("movprfx\t%0, %1", operands);
+    return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
+						 operands + 2);
+  }
+  [(set_attr "movprfx" "*,yes")]
 )
 
-;; Unpredicated fnms (%0 = (-%1 * %2) - %3).
-(define_expand "fnms<mode>4"
-  [(set (match_operand:SVE_F 0 "register_operand")
-	(unspec:SVE_F
-	  [(match_dup 4)
-	   (fma:SVE_F (neg:SVE_F
-			(match_operand:SVE_F 1 "register_operand"))
-		      (match_operand:SVE_F 2 "register_operand")
-		      (neg:SVE_F
-			(match_operand:SVE_F 3 "register_operand")))]
-	  UNSPEC_MERGE_PTRUE))]
+;; Increment a vector of SIs by the number of elements in an svpattern.
+;; See aarch64_sve_cnt_pat for the counting behavior.
+(define_insn "@aarch64_sve_<inc_dec><mode>_pat"
+  [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
+	(ANY_PLUS:VNx4SI
+	  (vec_duplicate:VNx4SI
+	    (unspec:SI [(match_operand:DI 2 "const_int_operand")
+			(match_operand:DI 3 "const_int_operand")
+			(match_operand:DI 4 "const_int_operand")]
+		       UNSPEC_SVE_CNT_PAT))
+	  (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))]
   "TARGET_SVE"
   {
-    operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+    if (which_alternative == 1)
+      output_asm_insn ("movprfx\t%0, %1", operands);
+    return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
+						 operands + 2);
   }
-)
-
-;; fnms predicated with a PTRUE.
-(define_insn "*fnms<mode>4"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
-	(unspec:SVE_F
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-	   (fma:SVE_F (neg:SVE_F
-			(match_operand:SVE_F 3 "register_operand" "%0, w"))
-		      (match_operand:SVE_F 4 "register_operand" "w, w")
-		      (neg:SVE_F
-			(match_operand:SVE_F 2 "register_operand" "w, 0")))]
-	  UNSPEC_MERGE_PTRUE))]
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Increment a vector of HIs by the number of elements in an svpattern.
+;; See aarch64_sve_cnt_pat for the counting behavior.
+(define_expand "@aarch64_sve_<inc_dec><mode>_pat"
+  [(set (match_operand:VNx8HI 0 "register_operand")
+	(ANY_PLUS:VNx8HI
+	  (vec_duplicate:VNx8HI
+	    (truncate:HI
+	      (unspec:SI [(match_operand:DI 2 "const_int_operand")
+			  (match_operand:DI 3 "const_int_operand")
+			  (match_operand:DI 4 "const_int_operand")]
+			 UNSPEC_SVE_CNT_PAT)))
+	  (match_operand:VNx8HI_ONLY 1 "register_operand")))]
+  "TARGET_SVE"
+)
+
+(define_insn "*aarch64_sve_<inc_dec><mode>_pat"
+  [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
+	(ANY_PLUS:VNx8HI
+	  (vec_duplicate:VNx8HI
+	    (match_operator:HI 5 "subreg_lowpart_operator"
+	      [(unspec:SI [(match_operand:DI 2 "const_int_operand")
+			   (match_operand:DI 3 "const_int_operand")
+			   (match_operand:DI 4 "const_int_operand")]
+			  UNSPEC_SVE_CNT_PAT)]))
+	  (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")))]
   "TARGET_SVE"
-  "@
-   fnmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
-   fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
-)
-
-;; Unpredicated floating-point division.
-(define_expand "div<mode>3"
-  [(set (match_operand:SVE_F 0 "register_operand")
-	(unspec:SVE_F
-	  [(match_dup 3)
-	   (div:SVE_F (match_operand:SVE_F 1 "register_operand")
-		      (match_operand:SVE_F 2 "register_operand"))]
-	  UNSPEC_MERGE_PTRUE))]
+  {
+    if (which_alternative == 1)
+      output_asm_insn ("movprfx\t%0, %1", operands);
+    return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
+						 operands + 2);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Decrement by the number of elements in a pattern (scalar)
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - DEC
+;; - SQDEC
+;; - UQDEC
+;; -------------------------------------------------------------------------
+
+;; Decrement a DImode register by the number of elements in an svpattern.
+;; See aarch64_sve_cnt_pat for the counting behavior.
+(define_insn "@aarch64_sve_<inc_dec><mode>_pat"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ANY_MINUS:DI (match_operand:DI_ONLY 1 "register_operand" "0")
+		      (zero_extend:DI
+			(unspec:SI [(match_operand:DI 2 "const_int_operand")
+				    (match_operand:DI 3 "const_int_operand")
+				    (match_operand:DI 4 "const_int_operand")]
+				   UNSPEC_SVE_CNT_PAT))))]
   "TARGET_SVE"
   {
-    operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+    return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%x0",
+						 operands + 2);
   }
 )
 
-;; Floating-point division predicated with a PTRUE.
-(define_insn "*div<mode>3"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
-	(unspec:SVE_F
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-	   (div:SVE_F (match_operand:SVE_F 2 "register_operand" "0, w")
-		      (match_operand:SVE_F 3 "register_operand" "w, 0"))]
-	  UNSPEC_MERGE_PTRUE))]
+;; Decrement an SImode register by the number of elements in an svpattern
+;; using modular arithmetic.  See aarch64_sve_cnt_pat for the counting
+;; behavior.
+(define_insn "*aarch64_sve_decsi_pat"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_operand:SI 1 "register_operand" "0")
+		  (unspec:SI [(match_operand:DI 2 "const_int_operand")
+			      (match_operand:DI 3 "const_int_operand")
+			      (match_operand:DI 4 "const_int_operand")]
+			     UNSPEC_SVE_CNT_PAT)))]
   "TARGET_SVE"
-  "@
-   fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
-   fdivr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+  {
+    return aarch64_output_sve_cnt_pat_immediate ("dec", "%x0", operands + 2);
+  }
 )
 
-;; Unpredicated FNEG, FABS and FSQRT.
-(define_expand "<optab><mode>2"
-  [(set (match_operand:SVE_F 0 "register_operand")
-	(unspec:SVE_F
-	  [(match_dup 2)
-	   (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 1 "register_operand"))]
-	  UNSPEC_MERGE_PTRUE))]
+;; Decrement an SImode register by the number of elements in an svpattern
+;; using saturating arithmetic, extending the result to 64 bits.
+;;
+;; See aarch64_sve_cnt_pat for the counting behavior.
+(define_insn "@aarch64_sve_<inc_dec><mode>_pat"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(<paired_extend>:DI
+	  (SAT_MINUS:SI
+	    (match_operand:SI_ONLY 1 "register_operand" "0")
+	    (unspec:SI [(match_operand:DI 2 "const_int_operand")
+			(match_operand:DI 3 "const_int_operand")
+			(match_operand:DI 4 "const_int_operand")]
+		       UNSPEC_SVE_CNT_PAT))))]
   "TARGET_SVE"
   {
-    operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+    const char *registers = (<CODE> == SS_MINUS ? "%x0, %w0" : "%w0");
+    return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", registers,
+						 operands + 2);
   }
 )
 
-;; FNEG, FABS and FSQRT predicated with a PTRUE.
-(define_insn "*<optab><mode>2"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w")
-	(unspec:SVE_F
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
-	   (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 2 "register_operand" "w"))]
-	  UNSPEC_MERGE_PTRUE))]
+;; -------------------------------------------------------------------------
+;; ---- [INT] Decrement by the number of elements in a pattern (vector)
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - DEC
+;; - SQDEC
+;; - UQDEC
+;; -------------------------------------------------------------------------
+
+;; Decrement a vector of DIs by the number of elements in an svpattern.
+;; See aarch64_sve_cnt_pat for the counting behavior.
+(define_insn "@aarch64_sve_<inc_dec><mode>_pat"
+  [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
+	(ANY_MINUS:VNx2DI
+	  (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")
+	  (vec_duplicate:VNx2DI
+	    (zero_extend:DI
+	      (unspec:SI [(match_operand:DI 2 "const_int_operand")
+			  (match_operand:DI 3 "const_int_operand")
+			  (match_operand:DI 4 "const_int_operand")]
+			 UNSPEC_SVE_CNT_PAT)))))]
   "TARGET_SVE"
-  "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  {
+    if (which_alternative == 1)
+      output_asm_insn ("movprfx\t%0, %1", operands);
+    return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
+						 operands + 2);
+  }
+  [(set_attr "movprfx" "*,yes")]
 )
 
-;; Unpredicated FRINTy.
-(define_expand "<frint_pattern><mode>2"
-  [(set (match_operand:SVE_F 0 "register_operand")
-	(unspec:SVE_F
-	  [(match_dup 2)
-	   (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand")]
-			 FRINT)]
-	  UNSPEC_MERGE_PTRUE))]
+;; Decrement a vector of SIs by the number of elements in an svpattern.
+;; See aarch64_sve_cnt_pat for the counting behavior.
+(define_insn "@aarch64_sve_<inc_dec><mode>_pat"
+  [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
+	(ANY_MINUS:VNx4SI
+	  (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")
+	  (vec_duplicate:VNx4SI
+	    (unspec:SI [(match_operand:DI 2 "const_int_operand")
+			(match_operand:DI 3 "const_int_operand")
+			(match_operand:DI 4 "const_int_operand")]
+		       UNSPEC_SVE_CNT_PAT))))]
   "TARGET_SVE"
   {
-    operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+    if (which_alternative == 1)
+      output_asm_insn ("movprfx\t%0, %1", operands);
+    return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
+						 operands + 2);
   }
-)
-
-;; FRINTy predicated with a PTRUE.
-(define_insn "*<frint_pattern><mode>2"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w")
-	(unspec:SVE_F
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
-	   (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "w")]
-			 FRINT)]
-	  UNSPEC_MERGE_PTRUE))]
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Decrement a vector of HIs by the number of elements in an svpattern.
+;; See aarch64_sve_cnt_pat for the counting behavior.
+(define_expand "@aarch64_sve_<inc_dec><mode>_pat"
+  [(set (match_operand:VNx8HI 0 "register_operand")
+	(ANY_MINUS:VNx8HI
+	  (match_operand:VNx8HI_ONLY 1 "register_operand")
+	  (vec_duplicate:VNx8HI
+	    (truncate:HI
+	      (unspec:SI [(match_operand:DI 2 "const_int_operand")
+			  (match_operand:DI 3 "const_int_operand")
+			  (match_operand:DI 4 "const_int_operand")]
+			 UNSPEC_SVE_CNT_PAT)))))]
+  "TARGET_SVE"
+)
+
+(define_insn "*aarch64_sve_<inc_dec><mode>_pat"
+  [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
+	(ANY_MINUS:VNx8HI
+	  (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")
+	  (vec_duplicate:VNx8HI
+	    (match_operator:HI 5 "subreg_lowpart_operator"
+	      [(unspec:SI [(match_operand:DI 2 "const_int_operand")
+			   (match_operand:DI 3 "const_int_operand")
+			   (match_operand:DI 4 "const_int_operand")]
+			  UNSPEC_SVE_CNT_PAT)]))))]
   "TARGET_SVE"
-  "frint<frint_suffix>\t%0.<Vetype>, %1/m, %2.<Vetype>"
-)
-
-;; Unpredicated conversion of floats to integers of the same size (HF to HI,
-;; SF to SI or DF to DI).
-(define_expand "<fix_trunc_optab><mode><v_int_equiv>2"
-  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
-	(unspec:<V_INT_EQUIV>
-	  [(match_dup 2)
-	   (FIXUORS:<V_INT_EQUIV>
-	     (match_operand:SVE_F 1 "register_operand"))]
-	  UNSPEC_MERGE_PTRUE))]
+  {
+    if (which_alternative == 1)
+      output_asm_insn ("movprfx\t%0, %1", operands);
+    return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
+						 operands + 2);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Count elements in a predicate (scalar)
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - CNTP
+;; -------------------------------------------------------------------------
+
+;; Count the number of set bits in a predicate.  Operand 3 is true if
+;; operand 1 is known to be all-true.
+(define_insn "@aarch64_pred_cntp<mode>"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upl")
+		      (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+		      (match_operand:PRED_ALL 3 "register_operand" "Upa")]
+		     UNSPEC_CNTP)))]
+  "TARGET_SVE"
+  "cntp\t%x0, %1, %3.<Vetype>")
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Increment by the number of elements in a predicate (scalar)
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - INCP
+;; - SQINCP
+;; - UQINCP
+;; -------------------------------------------------------------------------
+
+;; Increment a DImode register by the number of set bits in a predicate.
+;; See aarch64_sve_cntp for a description of the operands.
+(define_expand "@aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
+  [(set (match_operand:DI 0 "register_operand")
+	(ANY_PLUS:DI
+	  (zero_extend:DI
+	    (unspec:SI [(match_dup 3)
+			(const_int SVE_KNOWN_PTRUE)
+			(match_operand:PRED_ALL 2 "register_operand")]
+		       UNSPEC_CNTP))
+	  (match_operand:DI_ONLY 1 "register_operand")))]
   "TARGET_SVE"
   {
-    operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+    operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
   }
 )
 
-;; Conversion of SF to DI, SI or HI, predicated with a PTRUE.
-(define_insn "*<fix_trunc_optab>v16hsf<mode>2"
-  [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
-	(unspec:SVE_HSDI
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
-	   (FIXUORS:SVE_HSDI
-	     (match_operand:VNx8HF 2 "register_operand" "w"))]
-	  UNSPEC_MERGE_PTRUE))]
+(define_insn_and_rewrite "*aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ANY_PLUS:DI
+	  (zero_extend:DI
+	    (unspec:SI [(match_operand 3)
+			(const_int SVE_KNOWN_PTRUE)
+			(match_operand:PRED_ALL 2 "register_operand" "Upa")]
+		       UNSPEC_CNTP))
+	  (match_operand:DI_ONLY 1 "register_operand" "0")))]
   "TARGET_SVE"
-  "fcvtz<su>\t%0.<Vetype>, %1/m, %2.h"
+  "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>"
+  "&& !CONSTANT_P (operands[3])"
+  {
+    operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
+  }
 )
 
-;; Conversion of SF to DI or SI, predicated with a PTRUE.
-(define_insn "*<fix_trunc_optab>vnx4sf<mode>2"
-  [(set (match_operand:SVE_SDI 0 "register_operand" "=w")
-	(unspec:SVE_SDI
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
-	   (FIXUORS:SVE_SDI
-	     (match_operand:VNx4SF 2 "register_operand" "w"))]
-	  UNSPEC_MERGE_PTRUE))]
-  "TARGET_SVE"
-  "fcvtz<su>\t%0.<Vetype>, %1/m, %2.s"
+;; Increment an SImode register by the number of set bits in a predicate
+;; using modular arithmetic.  See aarch64_sve_cntp for a description of
+;; the operands.
+(define_insn_and_rewrite "*aarch64_incsi<mode>_cntp"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI
+	  (unspec:SI [(match_operand 3)
+		      (const_int SVE_KNOWN_PTRUE)
+		      (match_operand:PRED_ALL 2 "register_operand" "Upa")]
+		     UNSPEC_CNTP)
+	  (match_operand:SI 1 "register_operand" "0")))]
+  "TARGET_SVE"
+  "incp\t%x0, %2.<Vetype>"
+  "&& !CONSTANT_P (operands[3])"
+  {
+    operands[3] = CONSTM1_RTX (<MODE>mode);
+  }
 )
 
-;; Conversion of DF to DI or SI, predicated with a PTRUE.
-(define_insn "*<fix_trunc_optab>vnx2df<mode>2"
-  [(set (match_operand:SVE_SDI 0 "register_operand" "=w")
-	(unspec:SVE_SDI
-	  [(match_operand:VNx2BI 1 "register_operand" "Upl")
-	   (FIXUORS:SVE_SDI
-	     (match_operand:VNx2DF 2 "register_operand" "w"))]
-	  UNSPEC_MERGE_PTRUE))]
+;; Increment an SImode register by the number of set bits in a predicate
+;; using saturating arithmetic, extending the result to 64 bits.
+;;
+;; See aarch64_sve_cntp for a description of the operands.
+(define_expand "@aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
+  [(set (match_operand:DI 0 "register_operand")
+	(<paired_extend>:DI
+	  (SAT_PLUS:SI
+	    (unspec:SI [(match_dup 3)
+			(const_int SVE_KNOWN_PTRUE)
+			(match_operand:PRED_ALL 2 "register_operand")]
+		       UNSPEC_CNTP)
+	    (match_operand:SI_ONLY 1 "register_operand"))))]
   "TARGET_SVE"
-  "fcvtz<su>\t%0.<Vetype>, %1/m, %2.d"
+  {
+    operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
+  }
 )
 
-;; Unpredicated conversion of integers to floats of the same size
-;; (HI to HF, SI to SF or DI to DF).
-(define_expand "<optab><v_int_equiv><mode>2"
-  [(set (match_operand:SVE_F 0 "register_operand")
-	(unspec:SVE_F
-	  [(match_dup 2)
-	   (FLOATUORS:SVE_F
-	     (match_operand:<V_INT_EQUIV> 1 "register_operand"))]
-	  UNSPEC_MERGE_PTRUE))]
+(define_insn_and_rewrite "*aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(<paired_extend>:DI
+	  (SAT_PLUS:SI
+	    (unspec:SI [(match_operand 3)
+			(const_int SVE_KNOWN_PTRUE)
+			(match_operand:PRED_ALL 2 "register_operand" "Upa")]
+		       UNSPEC_CNTP)
+	    (match_operand:SI_ONLY 1 "register_operand" "0"))))]
   "TARGET_SVE"
   {
-    operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+    if (<CODE> == SS_PLUS)
+      return "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>, %w0";
+    else
+      return "<inc_dec>p\t%w0, %2.<PRED_ALL:Vetype>";
+  }
+  "&& !CONSTANT_P (operands[3])"
+  {
+    operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
   }
 )
 
-;; Conversion of DI, SI or HI to the same number of HFs, predicated
-;; with a PTRUE.
-(define_insn "*<optab><mode>vnx8hf2"
-  [(set (match_operand:VNx8HF 0 "register_operand" "=w")
-	(unspec:VNx8HF
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
-	   (FLOATUORS:VNx8HF
-	     (match_operand:SVE_HSDI 2 "register_operand" "w"))]
-	  UNSPEC_MERGE_PTRUE))]
+;; -------------------------------------------------------------------------
+;; ---- [INT] Increment by the number of elements in a predicate (vector)
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - INCP
+;; - SQINCP
+;; - UQINCP
+;; -------------------------------------------------------------------------
+
+;; Increment a vector of DIs by the number of set bits in a predicate.
+;; See aarch64_sve_cntp for a description of the operands.
+(define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
+  [(set (match_operand:VNx2DI 0 "register_operand")
+	(ANY_PLUS:VNx2DI
+	  (vec_duplicate:VNx2DI
+	    (zero_extend:DI
+	      (unspec:SI
+		[(match_dup 3)
+		 (const_int SVE_KNOWN_PTRUE)
+		 (match_operand:<VPRED> 2 "register_operand")]
+		UNSPEC_CNTP)))
+	  (match_operand:VNx2DI_ONLY 1 "register_operand")))]
   "TARGET_SVE"
-  "<su_optab>cvtf\t%0.h, %1/m, %2.<Vetype>"
+  {
+    operands[3] = CONSTM1_RTX (<VPRED>mode);
+  }
 )
 
-;; Conversion of DI or SI to the same number of SFs, predicated with a PTRUE.
-(define_insn "*<optab><mode>vnx4sf2"
-  [(set (match_operand:VNx4SF 0 "register_operand" "=w")
-	(unspec:VNx4SF
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
-	   (FLOATUORS:VNx4SF
-	     (match_operand:SVE_SDI 2 "register_operand" "w"))]
-	  UNSPEC_MERGE_PTRUE))]
+(define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
+  [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
+	(ANY_PLUS:VNx2DI
+	  (vec_duplicate:VNx2DI
+	    (zero_extend:DI
+	      (unspec:SI
+		[(match_operand 3)
+		 (const_int SVE_KNOWN_PTRUE)
+		 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
+		UNSPEC_CNTP)))
+	  (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")))]
   "TARGET_SVE"
-  "<su_optab>cvtf\t%0.s, %1/m, %2.<Vetype>"
-)
-
-;; Conversion of DI or SI to DF, predicated with a PTRUE.
-(define_insn "aarch64_sve_<optab><mode>vnx2df2"
-  [(set (match_operand:VNx2DF 0 "register_operand" "=w")
-	(unspec:VNx2DF
-	  [(match_operand:VNx2BI 1 "register_operand" "Upl")
-	   (FLOATUORS:VNx2DF
-	     (match_operand:SVE_SDI 2 "register_operand" "w"))]
-	  UNSPEC_MERGE_PTRUE))]
+  "@
+   <inc_dec>p\t%0.d, %2
+   movprfx\t%0, %1\;<inc_dec>p\t%0.d, %2"
+  "&& !CONSTANT_P (operands[3])"
+  {
+    operands[3] = CONSTM1_RTX (<VPRED>mode);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Increment a vector of SIs by the number of set bits in a predicate.
+;; See aarch64_sve_cntp for a description of the operands.
+(define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
+  [(set (match_operand:VNx4SI 0 "register_operand")
+	(ANY_PLUS:VNx4SI
+	  (vec_duplicate:VNx4SI
+	    (unspec:SI
+	      [(match_dup 3)
+	       (const_int SVE_KNOWN_PTRUE)
+	       (match_operand:<VPRED> 2 "register_operand")]
+	      UNSPEC_CNTP))
+	  (match_operand:VNx4SI_ONLY 1 "register_operand")))]
   "TARGET_SVE"
-  "<su_optab>cvtf\t%0.d, %1/m, %2.<Vetype>"
+  {
+    operands[3] = CONSTM1_RTX (<VPRED>mode);
+  }
 )
 
-;; Conversion of DFs to the same number of SFs, or SFs to the same number
-;; of HFs.
-(define_insn "*trunc<Vwide><mode>2"
-  [(set (match_operand:SVE_HSF 0 "register_operand" "=w")
-	(unspec:SVE_HSF
-	  [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
-	   (unspec:SVE_HSF
-	     [(match_operand:<VWIDE> 2 "register_operand" "w")]
-	     UNSPEC_FLOAT_CONVERT)]
-	  UNSPEC_MERGE_PTRUE))]
+(define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
+  [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
+	(ANY_PLUS:VNx4SI
+	  (vec_duplicate:VNx4SI
+	    (unspec:SI
+	      [(match_operand 3)
+	       (const_int SVE_KNOWN_PTRUE)
+	       (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
+	      UNSPEC_CNTP))
+	  (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))]
   "TARGET_SVE"
-  "fcvt\t%0.<Vetype>, %1/m, %2.<Vewtype>"
-)
-
-;; Conversion of SFs to the same number of DFs, or HFs to the same number
-;; of SFs.
-(define_insn "aarch64_sve_extend<mode><Vwide>2"
-  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-	(unspec:<VWIDE>
-	  [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
-	   (unspec:<VWIDE>
-	     [(match_operand:SVE_HSF 2 "register_operand" "w")]
-	     UNSPEC_FLOAT_CONVERT)]
-	  UNSPEC_MERGE_PTRUE))]
+  "@
+   <inc_dec>p\t%0.s, %2
+   movprfx\t%0, %1\;<inc_dec>p\t%0.s, %2"
+  "&& !CONSTANT_P (operands[3])"
+  {
+    operands[3] = CONSTM1_RTX (<VPRED>mode);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Increment a vector of HIs by the number of set bits in a predicate.
+;; See aarch64_sve_cntp for a description of the operands.
+(define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
+  [(set (match_operand:VNx8HI 0 "register_operand")
+	(ANY_PLUS:VNx8HI
+	  (vec_duplicate:VNx8HI
+	    (truncate:HI
+	      (unspec:SI
+		[(match_dup 3)
+		 (const_int SVE_KNOWN_PTRUE)
+		 (match_operand:<VPRED> 2 "register_operand")]
+		UNSPEC_CNTP)))
+	  (match_operand:VNx8HI_ONLY 1 "register_operand")))]
   "TARGET_SVE"
-  "fcvt\t%0.<Vewtype>, %1/m, %2.<Vetype>"
+  {
+    operands[3] = CONSTM1_RTX (<VPRED>mode);
+  }
 )
 
-;; Unpack the low or high half of a predicate, where "high" refers to
-;; the low-numbered lanes for big-endian and the high-numbered lanes
-;; for little-endian.
-(define_expand "vec_unpack<su>_<perm_hilo>_<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand")
-   (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")]
-		   UNPACK)]
+(define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
+  [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
+	(ANY_PLUS:VNx8HI
+	  (vec_duplicate:VNx8HI
+	    (match_operator:HI 3 "subreg_lowpart_operator"
+	      [(unspec:SI
+		 [(match_operand 4)
+		  (const_int SVE_KNOWN_PTRUE)
+		  (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
+		 UNSPEC_CNTP)]))
+	  (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")))]
   "TARGET_SVE"
+  "@
+   <inc_dec>p\t%0.h, %2
+   movprfx\t%0, %1\;<inc_dec>p\t%0.h, %2"
+  "&& !CONSTANT_P (operands[4])"
   {
-    emit_insn ((<hi_lanes_optab>
-		? gen_aarch64_sve_punpkhi_<PRED_BHS:mode>
-		: gen_aarch64_sve_punpklo_<PRED_BHS:mode>)
-	       (operands[0], operands[1]));
-    DONE;
+    operands[4] = CONSTM1_RTX (<VPRED>mode);
   }
-)
-
-;; PUNPKHI and PUNPKLO.
-(define_insn "aarch64_sve_punpk<perm_hilo>_<mode>"
-  [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa")
-	(unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")]
-			UNPACK_UNSIGNED))]
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Decrement by the number of elements in a predicate (scalar)
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - DECP
+;; - SQDECP
+;; - UQDECP
+;; -------------------------------------------------------------------------
+
+;; Decrement a DImode register by the number of set bits in a predicate.
+;; See aarch64_sve_cntp for a description of the operands.
+(define_expand "@aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
+  [(set (match_operand:DI 0 "register_operand")
+	(ANY_MINUS:DI
+	  (match_operand:DI_ONLY 1 "register_operand")
+	  (zero_extend:DI
+	    (unspec:SI [(match_dup 3)
+			(const_int SVE_KNOWN_PTRUE)
+			(match_operand:PRED_ALL 2 "register_operand")]
+		       UNSPEC_CNTP))))]
   "TARGET_SVE"
-  "punpk<perm_hilo>\t%0.h, %1.b"
+  {
+    operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
+  }
 )
 
-;; Unpack the low or high half of a vector, where "high" refers to
-;; the low-numbered lanes for big-endian and the high-numbered lanes
-;; for little-endian.
-(define_expand "vec_unpack<su>_<perm_hilo>_<SVE_BHSI:mode>"
-  [(match_operand:<VWIDE> 0 "register_operand")
-   (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand")] UNPACK)]
+(define_insn_and_rewrite "*aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ANY_MINUS:DI
+	  (match_operand:DI_ONLY 1 "register_operand" "0")
+	  (zero_extend:DI
+	    (unspec:SI [(match_operand 3)
+			(const_int SVE_KNOWN_PTRUE)
+			(match_operand:PRED_ALL 2 "register_operand" "Upa")]
+		       UNSPEC_CNTP))))]
   "TARGET_SVE"
+  "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>"
+  "&& !CONSTANT_P (operands[3])"
   {
-    emit_insn ((<hi_lanes_optab>
-		? gen_aarch64_sve_<su>unpkhi_<SVE_BHSI:mode>
-		: gen_aarch64_sve_<su>unpklo_<SVE_BHSI:mode>)
-	       (operands[0], operands[1]));
-    DONE;
+    operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
   }
 )
 
-;; SUNPKHI, UUNPKHI, SUNPKLO and UUNPKLO.
-(define_insn "aarch64_sve_<su>unpk<perm_hilo>_<SVE_BHSI:mode>"
-  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-	(unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand" "w")]
-			UNPACK))]
-  "TARGET_SVE"
-  "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
+;; Decrement an SImode register by the number of set bits in a predicate
+;; using modular arithmetic.  See aarch64_sve_cntp for a description of the
+;; operands.
+(define_insn_and_rewrite "*aarch64_decsi<mode>_cntp"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI
+	  (match_operand:SI 1 "register_operand" "0")
+	  (unspec:SI [(match_operand 3)
+		      (const_int SVE_KNOWN_PTRUE)
+		      (match_operand:PRED_ALL 2 "register_operand" "Upa")]
+		     UNSPEC_CNTP)))]
+  "TARGET_SVE"
+  "decp\t%x0, %2.<Vetype>"
+  "&& !CONSTANT_P (operands[3])"
+  {
+    operands[3] = CONSTM1_RTX (<MODE>mode);
+  }
 )
 
-;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF.
-;; First unpack the source without conversion, then float-convert the
-;; unpacked source.
-(define_expand "vec_unpacks_<perm_hilo>_<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand")
-   (unspec:SVE_HSF [(match_operand:SVE_HSF 1 "register_operand")]
-		   UNPACK_UNSIGNED)]
+;; Decrement an SImode register by the number of set bits in a predicate
+;; using saturating arithmetic, extending the result to 64 bits.
+;;
+;; See aarch64_sve_cntp for a description of the operands.
+(define_expand "@aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
+  [(set (match_operand:DI 0 "register_operand")
+	(<paired_extend>:DI
+	  (SAT_MINUS:SI
+	    (match_operand:SI_ONLY 1 "register_operand")
+	    (unspec:SI [(match_dup 3)
+			(const_int SVE_KNOWN_PTRUE)
+			(match_operand:PRED_ALL 2 "register_operand")]
+		       UNSPEC_CNTP))))]
   "TARGET_SVE"
   {
-    /* Use ZIP to do the unpack, since we don't care about the upper halves
-       and since it has the nice property of not needing any subregs.
-       If using UUNPK* turns out to be preferable, we could model it as
-       a ZIP whose first operand is zero.  */
-    rtx temp = gen_reg_rtx (<MODE>mode);
-    emit_insn ((<hi_lanes_optab>
-		? gen_aarch64_sve_zip2<mode>
-		: gen_aarch64_sve_zip1<mode>)
-		(temp, operands[1], operands[1]));
-    rtx ptrue = force_reg (<VWIDE_PRED>mode, CONSTM1_RTX (<VWIDE_PRED>mode));
-    emit_insn (gen_aarch64_sve_extend<mode><Vwide>2 (operands[0],
-						     ptrue, temp));
-    DONE;
+    operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
   }
 )
 
-;; Unpack one half of a VNx4SI to VNx2DF.  First unpack from VNx4SI
-;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the
-;; unpacked VNx4SI to VNx2DF.
-(define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si"
-  [(match_operand:VNx2DF 0 "register_operand")
-   (FLOATUORS:VNx2DF
-     (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")]
-		    UNPACK_UNSIGNED))]
+(define_insn_and_rewrite "*aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(<paired_extend>:DI
+	  (SAT_MINUS:SI
+	    (match_operand:SI_ONLY 1 "register_operand" "0")
+	    (unspec:SI [(match_operand 3)
+			(const_int SVE_KNOWN_PTRUE)
+			(match_operand:PRED_ALL 2 "register_operand" "Upa")]
+		       UNSPEC_CNTP))))]
   "TARGET_SVE"
   {
-    /* Use ZIP to do the unpack, since we don't care about the upper halves
-       and since it has the nice property of not needing any subregs.
-       If using UUNPK* turns out to be preferable, we could model it as
-       a ZIP whose first operand is zero.  */
-    rtx temp = gen_reg_rtx (VNx4SImode);
-    emit_insn ((<hi_lanes_optab>
-	        ? gen_aarch64_sve_zip2vnx4si
-	        : gen_aarch64_sve_zip1vnx4si)
-	       (temp, operands[1], operands[1]));
-    rtx ptrue = force_reg (VNx2BImode, CONSTM1_RTX (VNx2BImode));
-    emit_insn (gen_aarch64_sve_<FLOATUORS:optab>vnx4sivnx2df2 (operands[0],
-							       ptrue, temp));
-    DONE;
+    if (<CODE> == SS_MINUS)
+      return "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>, %w0";
+    else
+      return "<inc_dec>p\t%w0, %2.<PRED_ALL:Vetype>";
+  }
+  "&& !CONSTANT_P (operands[3])"
+  {
+    operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
   }
 )
 
-;; Predicate pack.  Use UZP1 on the narrower type, which discards
-;; the high part of each wide element.
-(define_insn "vec_pack_trunc_<Vwide>"
-  [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa")
-	(unspec:PRED_BHS
-	  [(match_operand:<VWIDE> 1 "register_operand" "Upa")
-	   (match_operand:<VWIDE> 2 "register_operand" "Upa")]
-	  UNSPEC_PACK))]
+;; -------------------------------------------------------------------------
+;; ---- [INT] Decrement by the number of elements in a predicate (vector)
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - DECP
+;; - SQDECP
+;; - UQDECP
+;; -------------------------------------------------------------------------
+
+;; Decrement a vector of DIs by the number of set bits in a predicate.
+;; See aarch64_sve_cntp for a description of the operands.
+(define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
+  [(set (match_operand:VNx2DI 0 "register_operand")
+	(ANY_MINUS:VNx2DI
+	  (match_operand:VNx2DI_ONLY 1 "register_operand")
+	  (vec_duplicate:VNx2DI
+	    (zero_extend:DI
+	      (unspec:SI
+		[(match_dup 3)
+		 (const_int SVE_KNOWN_PTRUE)
+		 (match_operand:<VPRED> 2 "register_operand")]
+		UNSPEC_CNTP)))))]
   "TARGET_SVE"
-  "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
+  {
+    operands[3] = CONSTM1_RTX (<VPRED>mode);
+  }
 )
 
-;; Integer pack.  Use UZP1 on the narrower type, which discards
-;; the high part of each wide element.
-(define_insn "vec_pack_trunc_<Vwide>"
-  [(set (match_operand:SVE_BHSI 0 "register_operand" "=w")
-	(unspec:SVE_BHSI
-	  [(match_operand:<VWIDE> 1 "register_operand" "w")
-	   (match_operand:<VWIDE> 2 "register_operand" "w")]
-	  UNSPEC_PACK))]
+(define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
+  [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
+	(ANY_MINUS:VNx2DI
+	  (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")
+	  (vec_duplicate:VNx2DI
+	    (zero_extend:DI
+	      (unspec:SI
+		[(match_operand 3)
+		 (const_int SVE_KNOWN_PTRUE)
+		 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
+		UNSPEC_CNTP)))))]
   "TARGET_SVE"
-  "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
-)
-
-;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack
-;; the results into a single vector.
-(define_expand "vec_pack_trunc_<Vwide>"
-  [(set (match_dup 4)
-	(unspec:SVE_HSF
-	  [(match_dup 3)
-	   (unspec:SVE_HSF [(match_operand:<VWIDE> 1 "register_operand")]
-			   UNSPEC_FLOAT_CONVERT)]
-	  UNSPEC_MERGE_PTRUE))
-   (set (match_dup 5)
-	(unspec:SVE_HSF
-	  [(match_dup 3)
-	   (unspec:SVE_HSF [(match_operand:<VWIDE> 2 "register_operand")]
-			   UNSPEC_FLOAT_CONVERT)]
-	  UNSPEC_MERGE_PTRUE))
-   (set (match_operand:SVE_HSF 0 "register_operand")
-	(unspec:SVE_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
+  "@
+   <inc_dec>p\t%0.d, %2
+   movprfx\t%0, %1\;<inc_dec>p\t%0.d, %2"
+  "&& !CONSTANT_P (operands[3])"
+  {
+    operands[3] = CONSTM1_RTX (<VPRED>mode);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Decrement a vector of SIs by the number of set bits in a predicate.
+;; See aarch64_sve_cntp for a description of the operands.
+(define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
+  [(set (match_operand:VNx4SI 0 "register_operand")
+	(ANY_MINUS:VNx4SI
+	  (match_operand:VNx4SI_ONLY 1 "register_operand")
+	  (vec_duplicate:VNx4SI
+	    (unspec:SI
+	      [(match_dup 3)
+	       (const_int SVE_KNOWN_PTRUE)
+	       (match_operand:<VPRED> 2 "register_operand")]
+	      UNSPEC_CNTP))))]
   "TARGET_SVE"
   {
-    operands[3] = force_reg (<VWIDE_PRED>mode, CONSTM1_RTX (<VWIDE_PRED>mode));
-    operands[4] = gen_reg_rtx (<MODE>mode);
-    operands[5] = gen_reg_rtx (<MODE>mode);
+    operands[3] = CONSTM1_RTX (<VPRED>mode);
   }
 )
 
-;; Convert two vectors of DF to SI and pack the results into a single vector.
-(define_expand "vec_pack_<su>fix_trunc_vnx2df"
-  [(set (match_dup 4)
-	(unspec:VNx4SI
-	  [(match_dup 3)
-	   (FIXUORS:VNx4SI (match_operand:VNx2DF 1 "register_operand"))]
-	  UNSPEC_MERGE_PTRUE))
-   (set (match_dup 5)
-	(unspec:VNx4SI
-	  [(match_dup 3)
-	   (FIXUORS:VNx4SI (match_operand:VNx2DF 2 "register_operand"))]
-	  UNSPEC_MERGE_PTRUE))
-   (set (match_operand:VNx4SI 0 "register_operand")
-	(unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
+(define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
+  [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
+	(ANY_MINUS:VNx4SI
+	  (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")
+	  (vec_duplicate:VNx4SI
+	    (unspec:SI
+	      [(match_operand 3)
+	       (const_int SVE_KNOWN_PTRUE)
+	       (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
+	      UNSPEC_CNTP))))]
   "TARGET_SVE"
+  "@
+   <inc_dec>p\t%0.s, %2
+   movprfx\t%0, %1\;<inc_dec>p\t%0.s, %2"
+  "&& !CONSTANT_P (operands[3])"
   {
-    operands[3] = force_reg (VNx2BImode, CONSTM1_RTX (VNx2BImode));
-    operands[4] = gen_reg_rtx (VNx4SImode);
-    operands[5] = gen_reg_rtx (VNx4SImode);
+    operands[3] = CONSTM1_RTX (<VPRED>mode);
   }
-)
-
-;; Predicated floating-point operations.
-(define_insn "cond_<optab><mode>"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w")
-	(unspec:SVE_F
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
-	   (match_operand:SVE_F 2 "register_operand" "0")
-	   (match_operand:SVE_F 3 "register_operand" "w")]
-	  SVE_COND_FP_OP))]
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Decrement a vector of HIs by the number of set bits in a predicate.
+;; See aarch64_sve_cntp for a description of the operands.
+(define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
+  [(set (match_operand:VNx8HI 0 "register_operand")
+	(ANY_MINUS:VNx8HI
+	  (match_operand:VNx8HI_ONLY 1 "register_operand")
+	  (vec_duplicate:VNx8HI
+	    (truncate:HI
+	      (unspec:SI
+		[(match_dup 3)
+		 (const_int SVE_KNOWN_PTRUE)
+		 (match_operand:<VPRED> 2 "register_operand")]
+		UNSPEC_CNTP)))))]
   "TARGET_SVE"
-  "<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  {
+    operands[3] = CONSTM1_RTX (<VPRED>mode);
+  }
 )
 
-;; Shift an SVE vector left and insert a scalar into element 0.
-(define_insn "vec_shl_insert_<mode>"
-  [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
-	(unspec:SVE_ALL
-	  [(match_operand:SVE_ALL 1 "register_operand" "0, 0")
-	   (match_operand:<VEL> 2 "register_operand" "rZ, w")]
-	  UNSPEC_INSR))]
+(define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
+  [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
+	(ANY_MINUS:VNx8HI
+	  (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")
+	  (vec_duplicate:VNx8HI
+	    (match_operator:HI 3 "subreg_lowpart_operator"
+	      [(unspec:SI
+		 [(match_operand 4)
+		  (const_int SVE_KNOWN_PTRUE)
+		  (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
+		 UNSPEC_CNTP)]))))]
   "TARGET_SVE"
   "@
-   insr\t%0.<Vetype>, %<vwcore>2
-   insr\t%0.<Vetype>, %<Vetype>2"
+   <inc_dec>p\t%0.h, %2
+   movprfx\t%0, %1\;<inc_dec>p\t%0.h, %2"
+  "&& !CONSTANT_P (operands[4])"
+  {
+    operands[4] = CONSTM1_RTX (<VPRED>mode);
+  }
+  [(set_attr "movprfx" "*,yes")]
 )
diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
new file mode 100644
index 0000000000000..e7cd2b86d25f9
--- /dev/null
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -0,0 +1,2651 @@
+;; Machine description for AArch64 SVE2.
+;; Copyright (C) 2019-2021 Free Software Foundation, Inc.
+;; Contributed by ARM Ltd.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; The file is organised into the following sections (search for the full
+;; line):
+;;
+;; == Moves
+;; ---- Non-temporal gather loads
+;; ---- Non-temporal scatter stores
+;;
+;; == Uniform binary arithmnetic
+;; ---- [INT] Multiplication
+;; ---- [INT] Scaled high-part multiplication
+;; ---- [INT] General binary arithmetic that maps to unspecs
+;; ---- [INT] Saturating binary arithmetic
+;; ---- [INT] Saturating left shifts
+;;
+;; == Uniform ternary arithmnetic
+;; ---- [INT] General ternary arithmetic that maps to unspecs
+;; ---- [INT] Multiply-and-accumulate operations
+;; ---- [INT] Binary logic operations with rotation
+;; ---- [INT] Ternary logic operations
+;; ---- [INT] Shift-and-accumulate operations
+;; ---- [INT] Shift-and-insert operations
+;; ---- [INT] Sum of absolute differences
+;;
+;; == Extending arithmetic
+;; ---- [INT] Wide binary arithmetic
+;; ---- [INT] Long binary arithmetic
+;; ---- [INT] Long left shifts
+;; ---- [INT] Long binary arithmetic with accumulation
+;; ---- [FP] Long multiplication with accumulation
+;;
+;; == Narrowing arithnetic
+;; ---- [INT] Narrowing unary arithmetic
+;; ---- [INT] Narrowing binary arithmetic
+;; ---- [INT] Narrowing right shifts
+;;
+;; == Pairwise arithmetic
+;; ---- [INT] Pairwise arithmetic
+;; ---- [FP] Pairwise arithmetic
+;; ---- [INT] Pairwise arithmetic with accumulation
+;;
+;; == Complex arithmetic
+;; ---- [INT] Complex binary operations
+;; ---- [INT] Complex ternary operations
+;; ---- [INT] Complex dot product
+;;
+;; == Conversions
+;; ---- [FP<-FP] Widening conversions
+;; ---- [FP<-FP] Narrowing conversions
+;;
+;; == Other arithmetic
+;; ---- [INT] Reciprocal approximation
+;; ---- [INT<-FP] Base-2 logarithm
+;; ---- [INT] Polynomial multiplication
+;;
+;; == Permutation
+;; ---- [INT,FP] General permutes
+;; ---- [INT] Optional bit-permute extensions
+;;
+;; == General
+;; ---- Check for aliases between pointers
+;; ---- Histogram processing
+;; ---- String matching
+;;
+;; == Crypotographic extensions
+;; ---- Optional AES extensions
+;; ---- Optional SHA-3 extensions
+;; ---- Optional SM4 extensions
+
+;; =========================================================================
+;; == Moves
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- Non-temporal gather loads
+;; -------------------------------------------------------------------------
+;; Includes gather forms of:
+;; - LDNT1B
+;; - LDNT1D
+;; - LDNT1H
+;; - LDNT1W
+;; -------------------------------------------------------------------------
+
+;; Non-extending loads.
+(define_insn "@aarch64_gather_ldnt<mode>"
+  [(set (match_operand:SVE_FULL_SD 0 "register_operand" "=w, w")
+	(unspec:SVE_FULL_SD
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (match_operand:DI 2 "aarch64_reg_or_zero" "Z, r")
+	   (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")
+	   (mem:BLK (scratch))]
+	  UNSPEC_LDNT1_GATHER))]
+  "TARGET_SVE2"
+  "@
+   ldnt1<Vesize>\t%0.<Vetype>, %1/z, [%3.<Vetype>]
+   ldnt1<Vesize>\t%0.<Vetype>, %1/z, [%3.<Vetype>, %2]"
+)
+
+;; Extending loads.
+(define_insn_and_rewrite "@aarch64_gather_ldnt_<ANY_EXTEND:optab><SVE_FULL_SDI:mode><SVE_PARTIAL_I:mode>"
+  [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, w")
+	(unspec:SVE_FULL_SDI
+	  [(match_operand:<SVE_FULL_SDI:VPRED> 4 "general_operand" "UplDnm, UplDnm")
+	   (ANY_EXTEND:SVE_FULL_SDI
+	     (unspec:SVE_PARTIAL_I
+	       [(match_operand:<SVE_FULL_SDI:VPRED> 1 "register_operand" "Upl, Upl")
+		(match_operand:DI 2 "aarch64_reg_or_zero" "Z, r")
+		(match_operand:<SVE_FULL_SDI:V_INT_EQUIV> 3 "register_operand" "w, w")
+		(mem:BLK (scratch))]
+	       UNSPEC_LDNT1_GATHER))]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE2
+   && (~<SVE_FULL_SDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
+  "@
+   ldnt1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_SDI:Vetype>, %1/z, [%3.<SVE_FULL_SDI:Vetype>]
+   ldnt1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_SDI:Vetype>, %1/z, [%3.<SVE_FULL_SDI:Vetype>, %2]"
+  "&& !CONSTANT_P (operands[4])"
+  {
+    operands[4] = CONSTM1_RTX (<SVE_FULL_SDI:VPRED>mode);
+  }
+)
+
+;; -------------------------------------------------------------------------
+;; ---- Non-temporal scatter stores
+;; -------------------------------------------------------------------------
+;; Includes scatter forms of:
+;; - STNT1B
+;; - STNT1D
+;; - STNT1H
+;; - STNT1W
+;; -------------------------------------------------------------------------
+
+;; Non-truncating stores.
+(define_insn "@aarch64_scatter_stnt<mode>"
+  [(set (mem:BLK (scratch))
+	(unspec:BLK
+	  [(match_operand:<VPRED> 0 "register_operand" "Upl, Upl")
+	   (match_operand:DI 1 "aarch64_reg_or_zero" "Z, r")
+	   (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w")
+	   (match_operand:SVE_FULL_SD 3 "register_operand" "w, w")]
+
+	  UNSPEC_STNT1_SCATTER))]
+  "TARGET_SVE"
+  "@
+   stnt1<Vesize>\t%3.<Vetype>, %0, [%2.<Vetype>]
+   stnt1<Vesize>\t%3.<Vetype>, %0, [%2.<Vetype>, %1]"
+)
+
+;; Truncating stores.
+(define_insn "@aarch64_scatter_stnt_<SVE_FULL_SDI:mode><SVE_PARTIAL_I:mode>"
+  [(set (mem:BLK (scratch))
+	(unspec:BLK
+	  [(match_operand:<SVE_FULL_SDI:VPRED> 0 "register_operand" "Upl, Upl")
+	   (match_operand:DI 1 "aarch64_reg_or_zero" "Z, r")
+	   (match_operand:<SVE_FULL_SDI:V_INT_EQUIV> 2 "register_operand" "w, w")
+	   (truncate:SVE_PARTIAL_I
+	     (match_operand:SVE_FULL_SDI 3 "register_operand" "w, w"))]
+	  UNSPEC_STNT1_SCATTER))]
+  "TARGET_SVE2
+   && (~<SVE_FULL_SDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
+  "@
+   stnt1<SVE_PARTIAL_I:Vesize>\t%3.<SVE_FULL_SDI:Vetype>, %0, [%2.<SVE_FULL_SDI:Vetype>]
+   stnt1<SVE_PARTIAL_I:Vesize>\t%3.<SVE_FULL_SDI:Vetype>, %0, [%2.<SVE_FULL_SDI:Vetype>, %1]"
+)
+
+;; =========================================================================
+;; == Uniform binary arithmnetic
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Multiplication
+;; -------------------------------------------------------------------------
+;; Includes the lane forms of:
+;; - MUL
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_mul_lane_<mode>"
+  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
+	(mult:SVE_FULL_HSDI
+	  (unspec:SVE_FULL_HSDI
+	    [(match_operand:SVE_FULL_HSDI 2 "register_operand" "<sve_lane_con>")
+	     (match_operand:SI 3 "const_int_operand")]
+	    UNSPEC_SVE_LANE_SELECT)
+	  (match_operand:SVE_FULL_HSDI 1 "register_operand" "w")))]
+  "TARGET_SVE2"
+  "mul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Scaled high-part multiplication
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic.
+;; -------------------------------------------------------------------------
+
+;; Unpredicated integer multiply-high-with-(round-and-)scale.
+(define_expand "<su>mulh<r>s<mode>3"
+  [(set (match_operand:SVE_FULL_BHSI 0 "register_operand")
+	(unspec:SVE_FULL_BHSI
+	  [(match_dup 3)
+	   (unspec:SVE_FULL_BHSI
+	     [(match_operand:SVE_FULL_BHSI 1 "register_operand")
+	      (match_operand:SVE_FULL_BHSI 2 "register_operand")]
+	     MULHRS)]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE2"
+  {
+    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+
+    rtx prod_b = gen_reg_rtx (<VWIDE>mode);
+    rtx prod_t = gen_reg_rtx (<VWIDE>mode);
+    emit_insn (gen_aarch64_sve_<su>mullb<Vwide> (prod_b, operands[1],
+						 operands[2]));
+    emit_insn (gen_aarch64_sve_<su>mullt<Vwide> (prod_t, operands[1],
+						 operands[2]));
+
+    rtx shift = GEN_INT (GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1);
+    emit_insn (gen_aarch64_sve_<r>shrnb<Vwide> (operands[0], prod_b, shift));
+    emit_insn (gen_aarch64_sve_<r>shrnt<Vwide> (operands[0], operands[0],
+						prod_t, shift));
+
+    DONE;
+  }
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] General binary arithmetic that maps to unspecs
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SHADD
+;; - SHSUB
+;; - SHSUBR
+;; - SQRSHL
+;; - SQRSHLR
+;; - SRHADD
+;; - SRSHL
+;; - SRSHLR
+;; - SUQADD
+;; - UHADD
+;; - UHSUB
+;; - UHSUBR
+;; - UQRSHL
+;; - UQRSHLR
+;; - URHADD
+;; - URSHL
+;; - URSHLR
+;; - USQADD
+;; -------------------------------------------------------------------------
+
+;; Integer average (floor).
+(define_expand "<u>avg<mode>3_floor"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand")
+	(unspec:SVE_FULL_I
+	  [(match_dup 3)
+	   (unspec:SVE_FULL_I
+	     [(match_operand:SVE_FULL_I 1 "register_operand")
+	      (match_operand:SVE_FULL_I 2 "register_operand")]
+	     HADD)]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE2"
+  {
+    operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+  }
+)
+
+;; Integer average (rounding).
+(define_expand "<u>avg<mode>3_ceil"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand")
+	(unspec:SVE_FULL_I
+	  [(match_dup 3)
+	   (unspec:SVE_FULL_I
+	     [(match_operand:SVE_FULL_I 1 "register_operand")
+	      (match_operand:SVE_FULL_I 2 "register_operand")]
+	     RHADD)]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE2"
+  {
+    operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+  }
+)
+
+;; The immediate form of SQADD acts as an immediate form of SUQADD
+;; over its full range.  In contrast to the ss_plus pattern, we do
+;; not need to treat byte immediates specially.  E.g.:
+;;
+;;	SQADD	Z0.B, Z0.B, #128
+;;
+;; is equivalent to:
+;;
+;;	MOV	Z1.B, #128
+;;	SUQADD	Z0.B, P0/M, Z0.B, Z1.B
+;;
+;; even though it's not equivalent to:
+;;
+;;	MOV	Z1.B, #128
+;;	SQADD	Z0.B, P0/M, Z0.B, Z1.B	// Saturating subtraction of 128
+(define_insn "@aarch64_sve_suqadd<mode>_const"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_I
+	  [(match_operand:SVE_FULL_I 1 "register_operand" "0, w")
+	   (match_operand:SVE_FULL_I 2 "aarch64_sve_arith_immediate")]
+	  UNSPEC_SUQADD))]
+  "TARGET_SVE2"
+  "@
+   sqadd\t%0.<Vetype>, %0.<Vetype>, #%D2
+   movprfx\t%0, %1\;sqadd\t%0.<Vetype>, %0.<Vetype>, #%D2"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; General predicated binary arithmetic.  All operations handled here
+;; are commutative or have a reversed form.
+(define_insn "@aarch64_pred_<sve_int_op><mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w")
+	(unspec:SVE_FULL_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (unspec:SVE_FULL_I
+	     [(match_operand:SVE_FULL_I 2 "register_operand" "0, w, w")
+	      (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w")]
+	     SVE2_COND_INT_BINARY_REV)]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE2"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,*,yes")]
+)
+
+;; Predicated binary arithmetic with merging.
+(define_expand "@cond_<sve_int_op><mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand")
+	(unspec:SVE_FULL_I
+	  [(match_operand:<VPRED> 1 "register_operand")
+	   (unspec:SVE_FULL_I
+	     [(match_dup 5)
+	      (unspec:SVE_FULL_I
+		[(match_operand:SVE_FULL_I 2 "register_operand")
+		 (match_operand:SVE_FULL_I 3 "register_operand")]
+		SVE2_COND_INT_BINARY)]
+	     UNSPEC_PRED_X)
+	   (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE2"
+  {
+    operands[5] = CONSTM1_RTX (<MODE>mode);
+  }
+)
+
+;; Predicated binary arithmetic, merging with the first input.
+(define_insn_and_rewrite "*cond_<sve_int_op><mode>_2"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_I
+	     [(match_operand 4)
+	      (unspec:SVE_FULL_I
+		[(match_operand:SVE_FULL_I 2 "register_operand" "0, w")
+		 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
+		SVE2_COND_INT_BINARY)]
+	     UNSPEC_PRED_X)
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE2"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  "&& !CONSTANT_P (operands[4])"
+  {
+    operands[4] = CONSTM1_RTX (<VPRED>mode);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated binary arithmetic, merging with the second input.
+(define_insn_and_rewrite "*cond_<sve_int_op><mode>_3"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_I
+	     [(match_operand 4)
+	      (unspec:SVE_FULL_I
+		[(match_operand:SVE_FULL_I 2 "register_operand" "w, w")
+		 (match_operand:SVE_FULL_I 3 "register_operand" "0, w")]
+		SVE2_COND_INT_BINARY_REV)]
+	     UNSPEC_PRED_X)
+	   (match_dup 3)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE2"
+  "@
+   <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+  "&& !CONSTANT_P (operands[4])"
+  {
+    operands[4] = CONSTM1_RTX (<VPRED>mode);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated binary operations, merging with an independent value.
+(define_insn_and_rewrite "*cond_<sve_int_op><mode>_any"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
+	(unspec:SVE_FULL_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
+	   (unspec:SVE_FULL_I
+	     [(match_operand 5)
+	      (unspec:SVE_FULL_I
+		[(match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w, w")
+		 (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w, w, w")]
+		SVE2_COND_INT_BINARY_REV)]
+	     UNSPEC_PRED_X)
+	   (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE2
+   && !rtx_equal_p (operands[2], operands[4])
+   && !rtx_equal_p (operands[3], operands[4])"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   #"
+  "&& 1"
+  {
+    if (reload_completed
+        && register_operand (operands[4], <MODE>mode)
+        && !rtx_equal_p (operands[0], operands[4]))
+      {
+	emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+						 operands[4], operands[1]));
+	operands[4] = operands[2] = operands[0];
+      }
+    else if (!CONSTANT_P (operands[5]))
+      operands[5] = CONSTM1_RTX (<VPRED>mode);
+    else
+      FAIL;
+  }
+  [(set_attr "movprfx" "yes")]
+)
+
+;; Predicated binary operations with no reverse form, merging with zero.
+;; At present we don't generate these patterns via a cond_* optab,
+;; so there's no correctness requirement to handle merging with an
+;; independent value.
+(define_insn_and_rewrite "*cond_<sve_int_op><mode>_z"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w")
+	(unspec:SVE_FULL_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_I
+	     [(match_operand 5)
+	      (unspec:SVE_FULL_I
+		[(match_operand:SVE_FULL_I 2 "register_operand" "0, w")
+		 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
+		SVE2_COND_INT_BINARY_NOREV)]
+	     UNSPEC_PRED_X)
+	   (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE2"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  "&& !CONSTANT_P (operands[5])"
+  {
+    operands[5] = CONSTM1_RTX (<VPRED>mode);
+  }
+  [(set_attr "movprfx" "yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Saturating binary arithmetic
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SQDMULH
+;; - SQRDMULH
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
+	(unspec:SVE_FULL_I
+	  [(match_operand:SVE_FULL_I 1 "register_operand" "w")
+	   (match_operand:SVE_FULL_I 2 "register_operand" "w")]
+	  SVE2_INT_BINARY))]
+  "TARGET_SVE2"
+  "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
+)
+
+(define_insn "@aarch64_sve_<sve_int_op>_lane_<mode>"
+  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
+	(unspec:SVE_FULL_HSDI
+	  [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
+	   (unspec:SVE_FULL_HSDI
+	     [(match_operand:SVE_FULL_HSDI 2 "register_operand" "<sve_lane_con>")
+	      (match_operand:SI 3 "const_int_operand")]
+	     UNSPEC_SVE_LANE_SELECT)]
+	  SVE2_INT_BINARY_LANE))]
+  "TARGET_SVE2"
+  "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Saturating left shifts
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SQSHL
+;; - SQSHLR
+;; - UQSHL
+;; - UQSHLR
+;; -------------------------------------------------------------------------
+
+;; Predicated left shifts.
+(define_insn "@aarch64_pred_<sve_int_op><mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w, ?&w, ?&w")
+	(unspec:SVE_FULL_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
+	   (unspec:SVE_FULL_I
+	     [(match_operand:SVE_FULL_I 2 "register_operand" "0, 0, w, w, w")
+	      (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, D<lr>, w")]
+	     SVE2_COND_INT_SHIFT)]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE2"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,*,*,yes,yes")]
+)
+
+;; Predicated left shifts with merging.
+(define_expand "@cond_<sve_int_op><mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand")
+	(unspec:SVE_FULL_I
+	  [(match_operand:<VPRED> 1 "register_operand")
+	   (unspec:SVE_FULL_I
+	     [(match_dup 5)
+	      (unspec:SVE_FULL_I
+		[(match_operand:SVE_FULL_I 2 "register_operand")
+		 (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand")]
+		SVE2_COND_INT_SHIFT)]
+	     UNSPEC_PRED_X)
+	   (match_operand:SVE_FULL_I 4 "register_operand")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE2"
+  {
+    operands[5] = CONSTM1_RTX (<VPRED>mode);
+  }
+)
+
+;; Predicated left shifts, merging with the first input.
+(define_insn_and_rewrite "*cond_<sve_int_op><mode>_2"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w")
+	(unspec:SVE_FULL_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
+	   (unspec:SVE_FULL_I
+	     [(match_operand 4)
+	      (unspec:SVE_FULL_I
+		[(match_operand:SVE_FULL_I 2 "register_operand" "0, 0, w, w")
+		 (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, D<lr>, w")]
+		SVE2_COND_INT_SHIFT)]
+	     UNSPEC_PRED_X)
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE2"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  "&& !CONSTANT_P (operands[4])"
+  {
+    operands[4] = CONSTM1_RTX (<VPRED>mode);
+  }
+  [(set_attr "movprfx" "*,*,yes,yes")]
+)
+
+;; Predicated left shifts, merging with the second input.
+(define_insn_and_rewrite "*cond_<sve_int_op><mode>_3"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_I
+	     [(match_operand 4)
+	      (unspec:SVE_FULL_I
+		[(match_operand:SVE_FULL_I 2 "register_operand" "w, w")
+		 (match_operand:SVE_FULL_I 3 "register_operand" "0, w")]
+		SVE2_COND_INT_SHIFT)]
+	     UNSPEC_PRED_X)
+	   (match_dup 3)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE2"
+  "@
+   <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0, %3\;<sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+  "&& !CONSTANT_P (operands[4])"
+  {
+    operands[4] = CONSTM1_RTX (<VPRED>mode);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated left shifts, merging with an independent value.
+(define_insn_and_rewrite "*cond_<sve_int_op><mode>_any"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, &w, &w, &w, ?&w, ?&w")
+	(unspec:SVE_FULL_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl, Upl, Upl, Upl")
+	   (unspec:SVE_FULL_I
+	     [(match_operand 5)
+	      (unspec:SVE_FULL_I
+		[(match_operand:SVE_FULL_I 2 "register_operand" "0, 0, w, w, w, w, w, w, w")
+		 (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, D<lr>, w, D<lr>, w, D<lr>, w")]
+		SVE2_COND_INT_SHIFT)]
+	     UNSPEC_PRED_X)
+	   (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, Dz, 0, 0, w, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE2
+   && !rtx_equal_p (operands[2], operands[4])
+   && (CONSTANT_P (operands[4]) || !rtx_equal_p (operands[3], operands[4]))"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+  #
+  #"
+  "&& 1"
+  {
+    if (reload_completed
+        && register_operand (operands[4], <MODE>mode)
+        && !rtx_equal_p (operands[0], operands[4]))
+      {
+	emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+						 operands[4], operands[1]));
+	operands[4] = operands[2] = operands[0];
+      }
+    else if (!CONSTANT_P (operands[5]))
+      operands[5] = CONSTM1_RTX (<VPRED>mode);
+    else
+      FAIL;
+  }
+  [(set_attr "movprfx" "yes")]
+)
+
+;; =========================================================================
+;; == Uniform ternary arithmnetic
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] General ternary arithmetic that maps to unspecs
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ADCLB
+;; - ADCLT
+;; - EORBT
+;; - EORTB
+;; - SBCLB
+;; - SBCLT
+;; - SQRDMLAH
+;; - SQRDMLSH
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_I
+	  [(match_operand:SVE_FULL_I 2 "register_operand" "w, w")
+	   (match_operand:SVE_FULL_I 3 "register_operand" "w, w")
+	   (match_operand:SVE_FULL_I 1 "register_operand" "0, w")]
+	  SVE2_INT_TERNARY))]
+  "TARGET_SVE2"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+(define_insn "@aarch64_sve_<sve_int_op>_lane_<mode>"
+  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_HSDI
+	  [(match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w")
+	   (unspec:SVE_FULL_HSDI
+	     [(match_operand:SVE_FULL_HSDI 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
+	      (match_operand:SI 4 "const_int_operand")]
+	     UNSPEC_SVE_LANE_SELECT)
+	   (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")]
+	  SVE2_INT_TERNARY_LANE))]
+  "TARGET_SVE2"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]
+   movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Multiply-and-accumulate operations
+;; -------------------------------------------------------------------------
+;; Includes the lane forms of:
+;; - MLA
+;; - MLS
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_add_mul_lane_<mode>"
+  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
+	(plus:SVE_FULL_HSDI
+	  (mult:SVE_FULL_HSDI
+	    (unspec:SVE_FULL_HSDI
+	      [(match_operand:SVE_FULL_HSDI 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
+	       (match_operand:SI 4 "const_int_operand")]
+	      UNSPEC_SVE_LANE_SELECT)
+	    (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w"))
+	  (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")))]
+  "TARGET_SVE2"
+  "@
+   mla\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]
+   movprfx\t%0, %1\;mla\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+(define_insn "@aarch64_sve_sub_mul_lane_<mode>"
+  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
+	(minus:SVE_FULL_HSDI
+	  (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")
+	  (mult:SVE_FULL_HSDI
+	    (unspec:SVE_FULL_HSDI
+	      [(match_operand:SVE_FULL_HSDI 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
+	       (match_operand:SI 4 "const_int_operand")]
+	      UNSPEC_SVE_LANE_SELECT)
+	    (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w"))))]
+  "TARGET_SVE2"
+  "@
+   mls\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]
+   movprfx\t%0, %1\;mls\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Binary logic operations with rotation
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - XAR
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve2_xar<mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+	(rotatert:SVE_FULL_I
+	  (xor:SVE_FULL_I
+	    (match_operand:SVE_FULL_I 1 "register_operand" "%0, w")
+	    (match_operand:SVE_FULL_I 2 "register_operand" "w, w"))
+	  (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")))]
+  "TARGET_SVE2"
+  "@
+  xar\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3
+  movprfx\t%0, %1\;xar\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Ternary logic operations
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - BCAX
+;; - BSL
+;; - BSL1N
+;; - BSL2N
+;; - EOR3
+;; - NBSL
+;; -------------------------------------------------------------------------
+
+;; Unpredicated exclusive OR of AND.
+(define_expand "@aarch64_sve2_bcax<mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand")
+	(xor:SVE_FULL_I
+	  (and:SVE_FULL_I
+	    (unspec:SVE_FULL_I
+	      [(match_dup 4)
+	       (not:SVE_FULL_I
+		 (match_operand:SVE_FULL_I 3 "register_operand"))]
+	      UNSPEC_PRED_X)
+	    (match_operand:SVE_FULL_I 2 "register_operand"))
+	  (match_operand:SVE_FULL_I 1 "register_operand")))]
+  "TARGET_SVE2"
+  {
+    operands[4] = CONSTM1_RTX (<VPRED>mode);
+  }
+)
+
+(define_insn_and_rewrite "*aarch64_sve2_bcax<mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+	(xor:SVE_FULL_I
+	  (and:SVE_FULL_I
+	    (unspec:SVE_FULL_I
+	      [(match_operand 4)
+	       (not:SVE_FULL_I
+		 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))]
+	      UNSPEC_PRED_X)
+	    (match_operand:SVE_FULL_I 2 "register_operand" "w, w"))
+	  (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))]
+  "TARGET_SVE2"
+  "@
+  bcax\t%0.d, %0.d, %2.d, %3.d
+  movprfx\t%0, %1\;bcax\t%0.d, %0.d, %2.d, %3.d"
+  "&& !CONSTANT_P (operands[4])"
+  {
+    operands[4] = CONSTM1_RTX (<VPRED>mode);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Unpredicated 3-way exclusive OR.
+(define_insn "@aarch64_sve2_eor3<mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w, ?&w")
+	(xor:SVE_FULL_I
+	  (xor:SVE_FULL_I
+	    (match_operand:SVE_FULL_I 1 "register_operand" "0, w, w, w")
+	    (match_operand:SVE_FULL_I 2 "register_operand" "w, 0, w, w"))
+	  (match_operand:SVE_FULL_I 3 "register_operand" "w, w, 0, w")))]
+  "TARGET_SVE2"
+  "@
+  eor3\t%0.d, %0.d, %2.d, %3.d
+  eor3\t%0.d, %0.d, %1.d, %3.d
+  eor3\t%0.d, %0.d, %1.d, %2.d
+  movprfx\t%0, %1\;eor3\t%0.d, %0.d, %2.d, %3.d"
+  [(set_attr "movprfx" "*,*,*,yes")]
+)
+
+;; Use NBSL for vector NOR.
+(define_insn_and_rewrite "*aarch64_sve2_nor<mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_I
+	  [(match_operand 3)
+	   (and:SVE_FULL_I
+	     (not:SVE_FULL_I
+	       (match_operand:SVE_FULL_I 1 "register_operand" "%0, w"))
+	     (not:SVE_FULL_I
+	       (match_operand:SVE_FULL_I 2 "register_operand" "w, w")))]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE2"
+  "@
+  nbsl\t%0.d, %0.d, %2.d, %0.d
+  movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %0.d"
+  "&& !CONSTANT_P (operands[3])"
+  {
+    operands[3] = CONSTM1_RTX (<VPRED>mode);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Use NBSL for vector NAND.
+(define_insn_and_rewrite "*aarch64_sve2_nand<mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_I
+	  [(match_operand 3)
+	   (ior:SVE_FULL_I
+	     (not:SVE_FULL_I
+	       (match_operand:SVE_FULL_I 1 "register_operand" "%0, w"))
+	     (not:SVE_FULL_I
+	       (match_operand:SVE_FULL_I 2 "register_operand" "w, w")))]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE2"
+  "@
+  nbsl\t%0.d, %0.d, %2.d, %2.d
+  movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %2.d"
+  "&& !CONSTANT_P (operands[3])"
+  {
+    operands[3] = CONSTM1_RTX (<VPRED>mode);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Unpredicated bitwise select.
+;; (op3 ? bsl_mov : bsl_dup) == (((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)
+(define_expand "@aarch64_sve2_bsl<mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand")
+	(xor:SVE_FULL_I
+	  (and:SVE_FULL_I
+	    (xor:SVE_FULL_I
+	      (match_operand:SVE_FULL_I 1 "register_operand")
+	      (match_operand:SVE_FULL_I 2 "register_operand"))
+	    (match_operand:SVE_FULL_I 3 "register_operand"))
+	  (match_dup 2)))]
+  "TARGET_SVE2"
+)
+
+(define_insn "*aarch64_sve2_bsl<mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+	(xor:SVE_FULL_I
+	  (and:SVE_FULL_I
+	    (xor:SVE_FULL_I
+	      (match_operand:SVE_FULL_I 1 "register_operand" "<bsl_1st>, w")
+	      (match_operand:SVE_FULL_I 2 "register_operand" "<bsl_2nd>, w"))
+	    (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
+	  (match_dup BSL_DUP)))]
+  "TARGET_SVE2"
+  "@
+  bsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d
+  movprfx\t%0, %<bsl_mov>\;bsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Unpredicated bitwise inverted select.
+;; (~(op3 ? bsl_mov : bsl_dup)) == (~(((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup))
+(define_expand "@aarch64_sve2_nbsl<mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand")
+	(unspec:SVE_FULL_I
+	  [(match_dup 4)
+	   (not:SVE_FULL_I
+	     (xor:SVE_FULL_I
+	       (and:SVE_FULL_I
+		 (xor:SVE_FULL_I
+		   (match_operand:SVE_FULL_I 1 "register_operand")
+		   (match_operand:SVE_FULL_I 2 "register_operand"))
+		 (match_operand:SVE_FULL_I 3 "register_operand"))
+	       (match_dup 2)))]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE2"
+  {
+    operands[4] = CONSTM1_RTX (<VPRED>mode);
+  }
+)
+
+(define_insn_and_rewrite "*aarch64_sve2_nbsl<mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_I
+	  [(match_operand 4)
+	   (not:SVE_FULL_I
+	     (xor:SVE_FULL_I
+	       (and:SVE_FULL_I
+		 (xor:SVE_FULL_I
+		   (match_operand:SVE_FULL_I 1 "register_operand" "<bsl_1st>, w")
+		   (match_operand:SVE_FULL_I 2 "register_operand" "<bsl_2nd>, w"))
+		 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
+	       (match_dup BSL_DUP)))]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE2"
+  "@
+  nbsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d
+  movprfx\t%0, %<bsl_mov>\;nbsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d"
+  "&& !CONSTANT_P (operands[4])"
+  {
+    operands[4] = CONSTM1_RTX (<VPRED>mode);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Unpredicated bitwise select with inverted first operand.
+;; (op3 ? ~bsl_mov : bsl_dup) == ((~(bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)
+(define_expand "@aarch64_sve2_bsl1n<mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand")
+	(xor:SVE_FULL_I
+	  (and:SVE_FULL_I
+	    (unspec:SVE_FULL_I
+	      [(match_dup 4)
+	       (not:SVE_FULL_I
+		 (xor:SVE_FULL_I
+		   (match_operand:SVE_FULL_I 1 "register_operand")
+		   (match_operand:SVE_FULL_I 2 "register_operand")))]
+	      UNSPEC_PRED_X)
+	    (match_operand:SVE_FULL_I 3 "register_operand"))
+	  (match_dup 2)))]
+  "TARGET_SVE2"
+  {
+    operands[4] = CONSTM1_RTX (<VPRED>mode);
+  }
+)
+
+(define_insn_and_rewrite "*aarch64_sve2_bsl1n<mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+	(xor:SVE_FULL_I
+	  (and:SVE_FULL_I
+	    (unspec:SVE_FULL_I
+	      [(match_operand 4)
+	       (not:SVE_FULL_I
+		 (xor:SVE_FULL_I
+		   (match_operand:SVE_FULL_I 1 "register_operand" "<bsl_1st>, w")
+		   (match_operand:SVE_FULL_I 2 "register_operand" "<bsl_2nd>, w")))]
+	      UNSPEC_PRED_X)
+	    (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
+	  (match_dup BSL_DUP)))]
+  "TARGET_SVE2"
+  "@
+  bsl1n\t%0.d, %0.d, %<bsl_dup>.d, %3.d
+  movprfx\t%0, %<bsl_mov>\;bsl1n\t%0.d, %0.d, %<bsl_dup>.d, %3.d"
+  "&& !CONSTANT_P (operands[4])"
+  {
+    operands[4] = CONSTM1_RTX (<VPRED>mode);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Unpredicated bitwise select with inverted second operand.
+;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~op3 & ~bsl_dup))
+(define_expand "@aarch64_sve2_bsl2n<mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand")
+	(ior:SVE_FULL_I
+	  (and:SVE_FULL_I
+	    (match_operand:SVE_FULL_I 1 "register_operand")
+	    (match_operand:SVE_FULL_I 3 "register_operand"))
+	  (unspec:SVE_FULL_I
+	    [(match_dup 4)
+	     (and:SVE_FULL_I
+	       (not:SVE_FULL_I
+		 (match_operand:SVE_FULL_I 2 "register_operand"))
+	       (not:SVE_FULL_I
+		 (match_dup 3)))]
+	    UNSPEC_PRED_X)))]
+  "TARGET_SVE2"
+  {
+    operands[4] = CONSTM1_RTX (<VPRED>mode);
+  }
+)
+
+(define_insn_and_rewrite "*aarch64_sve2_bsl2n<mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+	(ior:SVE_FULL_I
+	  (and:SVE_FULL_I
+	    (match_operand:SVE_FULL_I 1 "register_operand" "<bsl_1st>, w")
+	    (match_operand:SVE_FULL_I 2 "register_operand" "<bsl_2nd>, w"))
+	  (unspec:SVE_FULL_I
+	    [(match_operand 4)
+	     (and:SVE_FULL_I
+	       (not:SVE_FULL_I
+		 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
+	       (not:SVE_FULL_I
+		 (match_dup BSL_DUP)))]
+	    UNSPEC_PRED_X)))]
+  "TARGET_SVE2"
+  "@
+  bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d
+  movprfx\t%0, %<bsl_mov>\;bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d"
+  "&& !CONSTANT_P (operands[4])"
+  {
+    operands[4] = CONSTM1_RTX (<VPRED>mode);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Unpredicated bitwise select with inverted second operand, alternative form.
+;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~bsl_dup & ~op3))
+(define_insn_and_rewrite "*aarch64_sve2_bsl2n<mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+	(ior:SVE_FULL_I
+	  (and:SVE_FULL_I
+	    (match_operand:SVE_FULL_I 1 "register_operand" "<bsl_1st>, w")
+	    (match_operand:SVE_FULL_I 2 "register_operand" "<bsl_2nd>, w"))
+	  (unspec:SVE_FULL_I
+	    [(match_operand 4)
+	     (and:SVE_FULL_I
+	       (not:SVE_FULL_I
+		 (match_dup BSL_DUP))
+	       (not:SVE_FULL_I
+		 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")))]
+	    UNSPEC_PRED_X)))]
+  "TARGET_SVE2"
+  "@
+  bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d
+  movprfx\t%0, %<bsl_mov>\;bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d"
+  "&& !CONSTANT_P (operands[4])"
+  {
+    operands[4] = CONSTM1_RTX (<VPRED>mode);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Shift-and-accumulate operations
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SRSRA
+;; - SSRA
+;; - URSRA
+;; - USRA
+;; -------------------------------------------------------------------------
+
+;; Provide the natural unpredicated interface for SSRA and USRA.
+(define_expand "@aarch64_sve_add_<sve_int_op><mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand")
+	(plus:SVE_FULL_I
+	  (unspec:SVE_FULL_I
+	    [(match_dup 4)
+	     (SHIFTRT:SVE_FULL_I
+	       (match_operand:SVE_FULL_I 2 "register_operand")
+	       (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))]
+	    UNSPEC_PRED_X)
+	 (match_operand:SVE_FULL_I 1 "register_operand")))]
+  "TARGET_SVE2"
+  {
+    operands[4] = CONSTM1_RTX (<VPRED>mode);
+  }
+)
+
+;; Pattern-match SSRA and USRA as a predicated operation whose predicate
+;; isn't needed.
+(define_insn_and_rewrite "*aarch64_sve2_sra<mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+	(plus:SVE_FULL_I
+	  (unspec:SVE_FULL_I
+	    [(match_operand 4)
+	     (SHIFTRT:SVE_FULL_I
+	       (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
+	       (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))]
+	    UNSPEC_PRED_X)
+	 (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))]
+  "TARGET_SVE2"
+  "@
+   <sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3
+   movprfx\t%0, %1\;<sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3"
+  "&& !CONSTANT_P (operands[4])"
+  {
+    operands[4] = CONSTM1_RTX (<VPRED>mode);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; SRSRA and URSRA.
+(define_insn "@aarch64_sve_add_<sve_int_op><mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+	(plus:SVE_FULL_I
+	  (unspec:SVE_FULL_I
+	    [(match_operand:SVE_FULL_I 2 "register_operand" "w, w")
+	     (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")]
+	    VRSHR_N)
+	 (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))]
+  "TARGET_SVE2"
+  "@
+   <sur>sra\t%0.<Vetype>, %2.<Vetype>, #%3
+   movprfx\t%0, %1\;<sur>sra\t%0.<Vetype>, %2.<Vetype>, #%3"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Shift-and-insert operations
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SLI
+;; - SRI
+;; -------------------------------------------------------------------------
+
+;; These instructions do not take MOVPRFX.
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
+	(unspec:SVE_FULL_I
+	  [(match_operand:SVE_FULL_I 1 "register_operand" "0")
+	   (match_operand:SVE_FULL_I 2 "register_operand" "w")
+	   (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm")]
+	  SVE2_INT_SHIFT_INSERT))]
+  "TARGET_SVE2"
+  "<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, #%3"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Sum of absolute differences
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SABA
+;; - UABA
+;; -------------------------------------------------------------------------
+
+;; Provide the natural unpredicated interface for SABA and UABA.
+(define_expand "@aarch64_sve2_<su>aba<mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+	(plus:SVE_FULL_I
+	  (minus:SVE_FULL_I
+	    (unspec:SVE_FULL_I
+	      [(match_dup 4)
+	       (USMAX:SVE_FULL_I
+		 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
+		 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))]
+	      UNSPEC_PRED_X)
+	    (unspec:SVE_FULL_I
+	      [(match_dup 4)
+	       (<max_opp>:SVE_FULL_I
+		 (match_dup 2)
+		 (match_dup 3))]
+	      UNSPEC_PRED_X))
+	  (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))]
+  "TARGET_SVE2"
+  {
+    operands[4] = CONSTM1_RTX (<VPRED>mode);
+  }
+)
+
+;; Pattern-match SABA and UABA as an absolute-difference-and-accumulate
+;; operation whose predicates aren't needed.
+(define_insn "*aarch64_sve2_<su>aba<mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+	(plus:SVE_FULL_I
+	  (minus:SVE_FULL_I
+	    (unspec:SVE_FULL_I
+	      [(match_operand 4)
+	       (USMAX:SVE_FULL_I
+		 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
+		 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))]
+	      UNSPEC_PRED_X)
+	    (unspec:SVE_FULL_I
+	      [(match_operand 5)
+	       (<max_opp>:SVE_FULL_I
+		 (match_dup 2)
+		 (match_dup 3))]
+	      UNSPEC_PRED_X))
+	  (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))]
+  "TARGET_SVE2"
+  "@
+   <su>aba\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %1\;<su>aba\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; =========================================================================
+;; == Extending arithmetic
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Wide binary arithmetic
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SADDWB
+;; - SADDWT
+;; - SSUBWB
+;; - SSUBWT
+;; - UADDWB
+;; - UADDWT
+;; - USUBWB
+;; - USUBWT
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
+	(unspec:SVE_FULL_HSDI
+	  [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
+	   (match_operand:<VNARROW> 2 "register_operand" "w")]
+	  SVE2_INT_BINARY_WIDE))]
+  "TARGET_SVE2"
+  "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Ventype>"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Long binary arithmetic
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SABDLB
+;; - SABDLT
+;; - SADDLB
+;; - SADDLBT
+;; - SADDLT
+;; - SMULLB
+;; - SMULLT
+;; - SQDMULLB
+;; - SQDMULLT
+;; - SSUBLB
+;; - SSUBLBT
+;; - SSUBLT
+;; - SSUBLTB
+;; - UABDLB
+;; - UABDLT
+;; - UADDLB
+;; - UADDLT
+;; - UMULLB
+;; - UMULLT
+;; - USUBLB
+;; - USUBLT
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
+	(unspec:SVE_FULL_HSDI
+	  [(match_operand:<VNARROW> 1 "register_operand" "w")
+	   (match_operand:<VNARROW> 2 "register_operand" "w")]
+	  SVE2_INT_BINARY_LONG))]
+  "TARGET_SVE2"
+  "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>"
+)
+
+(define_insn "@aarch64_sve_<sve_int_op>_lane_<mode>"
+  [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
+	(unspec:SVE_FULL_SDI
+	  [(match_operand:<VNARROW> 1 "register_operand" "w")
+	   (unspec:<VNARROW>
+	     [(match_operand:<VNARROW> 2 "register_operand" "<sve_lane_con>")
+	      (match_operand:SI 3 "const_int_operand")]
+	     UNSPEC_SVE_LANE_SELECT)]
+	  SVE2_INT_BINARY_LONG_LANE))]
+  "TARGET_SVE2"
+  "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>[%3]"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Long left shifts
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SSHLLB
+;; - SSHLLT
+;; - USHLLB
+;; - USHLLT
+;; -------------------------------------------------------------------------
+
+;; The immediate range is enforced before generating the instruction.
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
+	(unspec:SVE_FULL_HSDI
+	  [(match_operand:<VNARROW> 1 "register_operand" "w")
+	   (match_operand:DI 2 "const_int_operand")]
+	  SVE2_INT_SHIFT_IMM_LONG))]
+  "TARGET_SVE2"
+  "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, #%2"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Long binary arithmetic with accumulation
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SABALB
+;; - SABALT
+;; - SMLALB
+;; - SMLALT
+;; - SMLSLB
+;; - SMLSLT
+;; - SQDMLALB
+;; - SQDMLALBT
+;; - SQDMLALT
+;; - SQDMLSLB
+;; - SQDMLSLBT
+;; - SQDMLSLT
+;; - UABALB
+;; - UABALT
+;; - UMLALB
+;; - UMLALT
+;; - UMLSLB
+;; - UMLSLT
+;; -------------------------------------------------------------------------
+
+;; Non-saturating MLA operations.
+(define_insn "@aarch64_sve_add_<sve_int_op><mode>"
+  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
+	(plus:SVE_FULL_HSDI
+	  (unspec:SVE_FULL_HSDI
+	    [(match_operand:<VNARROW> 2 "register_operand" "w, w")
+	     (match_operand:<VNARROW> 3 "register_operand" "w, w")]
+	    SVE2_INT_ADD_BINARY_LONG)
+	  (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")))]
+  "TARGET_SVE2"
+  "@
+   <sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
+   movprfx\t%0, %1\;<sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Non-saturating MLA operations with lane select.
+(define_insn "@aarch64_sve_add_<sve_int_op>_lane_<mode>"
+  [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
+	(plus:SVE_FULL_SDI
+	  (unspec:SVE_FULL_SDI
+	    [(match_operand:<VNARROW> 2 "register_operand" "w, w")
+	     (unspec:<VNARROW>
+	       [(match_operand:<VNARROW> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
+		(match_operand:SI 4 "const_int_operand")]
+	       UNSPEC_SVE_LANE_SELECT)]
+	    SVE2_INT_ADD_BINARY_LONG_LANE)
+	  (match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")))]
+  "TARGET_SVE2"
+  "@
+   <sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
+   movprfx\t%0, %1\;<sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Saturating MLA operations.
+(define_insn "@aarch64_sve_qadd_<sve_int_op><mode>"
+  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
+	(ss_plus:SVE_FULL_HSDI
+	  (unspec:SVE_FULL_HSDI
+	    [(match_operand:<VNARROW> 2 "register_operand" "w, w")
+	     (match_operand:<VNARROW> 3 "register_operand" "w, w")]
+	    SVE2_INT_QADD_BINARY_LONG)
+	  (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")))]
+  "TARGET_SVE2"
+  "@
+   <sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
+   movprfx\t%0, %1\;<sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Saturating MLA operations with lane select.
+(define_insn "@aarch64_sve_qadd_<sve_int_op>_lane_<mode>"
+  [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
+	(ss_plus:SVE_FULL_SDI
+	  (unspec:SVE_FULL_SDI
+	    [(match_operand:<VNARROW> 2 "register_operand" "w, w")
+	     (unspec:<VNARROW>
+	       [(match_operand:<VNARROW> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
+		(match_operand:SI 4 "const_int_operand")]
+	       UNSPEC_SVE_LANE_SELECT)]
+	    SVE2_INT_QADD_BINARY_LONG_LANE)
+	  (match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")))]
+  "TARGET_SVE2"
+  "@
+   <sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
+   movprfx\t%0, %1\;<sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Non-saturating MLS operations.
+(define_insn "@aarch64_sve_sub_<sve_int_op><mode>"
+  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
+	(minus:SVE_FULL_HSDI
+	  (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")
+	  (unspec:SVE_FULL_HSDI
+	    [(match_operand:<VNARROW> 2 "register_operand" "w, w")
+	     (match_operand:<VNARROW> 3 "register_operand" "w, w")]
+	    SVE2_INT_SUB_BINARY_LONG)))]
+  "TARGET_SVE2"
+  "@
+   <sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
+   movprfx\t%0, %1\;<sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Non-saturating MLS operations with lane select.
+(define_insn "@aarch64_sve_sub_<sve_int_op>_lane_<mode>"
+  [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
+	(minus:SVE_FULL_SDI
+	  (match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")
+	  (unspec:SVE_FULL_SDI
+	    [(match_operand:<VNARROW> 2 "register_operand" "w, w")
+	     (unspec:<VNARROW>
+	       [(match_operand:<VNARROW> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
+		(match_operand:SI 4 "const_int_operand")]
+	       UNSPEC_SVE_LANE_SELECT)]
+	    SVE2_INT_SUB_BINARY_LONG_LANE)))]
+  "TARGET_SVE2"
+  "@
+   <sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
+   movprfx\t%0, %1\;<sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Saturating MLS operations.
+(define_insn "@aarch64_sve_qsub_<sve_int_op><mode>"
+  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
+	(ss_minus:SVE_FULL_HSDI
+	  (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")
+	  (unspec:SVE_FULL_HSDI
+	    [(match_operand:<VNARROW> 2 "register_operand" "w, w")
+	     (match_operand:<VNARROW> 3 "register_operand" "w, w")]
+	    SVE2_INT_QSUB_BINARY_LONG)))]
+  "TARGET_SVE2"
+  "@
+   <sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
+   movprfx\t%0, %1\;<sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Saturating MLS operations with lane select.
+(define_insn "@aarch64_sve_qsub_<sve_int_op>_lane_<mode>"
+  [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
+	(ss_minus:SVE_FULL_SDI
+	  (match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")
+	  (unspec:SVE_FULL_SDI
+	    [(match_operand:<VNARROW> 2 "register_operand" "w, w")
+	     (unspec:<VNARROW>
+	       [(match_operand:<VNARROW> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
+		(match_operand:SI 4 "const_int_operand")]
+	       UNSPEC_SVE_LANE_SELECT)]
+	    SVE2_INT_QSUB_BINARY_LONG_LANE)))]
+  "TARGET_SVE2"
+  "@
+   <sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
+   movprfx\t%0, %1\;<sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]"
+  [(set_attr "movprfx" "*,yes")]
+)
+;; -------------------------------------------------------------------------
+;; ---- [FP] Long multiplication with accumulation
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FMLALB
+;; - FMLALT
+;; - FMLSLB
+;; - FMLSLT
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<sve_fp_op><mode>"
+  [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=w, ?&w")
+	(unspec:VNx4SF_ONLY
+	  [(match_operand:<VNARROW> 1 "register_operand" "w, w")
+	   (match_operand:<VNARROW> 2 "register_operand" "w, w")
+	   (match_operand:VNx4SF_ONLY 3 "register_operand" "0, w")]
+	  SVE2_FP_TERNARY_LONG))]
+  "TARGET_SVE2"
+  "@
+   <sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>
+   movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+(define_insn "@aarch64_<sve_fp_op>_lane_<mode>"
+  [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=w, ?&w")
+	(unspec:VNx4SF_ONLY
+	  [(match_operand:<VNARROW> 1 "register_operand" "w, w")
+	   (unspec:<VNARROW>
+	     [(match_operand:<VNARROW> 2 "register_operand" "<sve_lane_con>, <sve_lane_con>")
+	      (match_operand:SI 3 "const_int_operand")]
+	     UNSPEC_SVE_LANE_SELECT)
+	   (match_operand:VNx4SF_ONLY 4 "register_operand" "0, w")]
+	  SVE2_FP_TERNARY_LONG_LANE))]
+  "TARGET_SVE2"
+  "@
+   <sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>[%3]
+   movprfx\t%0, %4\;<sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>[%3]"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; =========================================================================
+;; == Narrowing arithnetic
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Narrowing unary arithmetic
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SQXTNB
+;; - SQXTNT
+;; - SQXTUNB
+;; - SQXTUNT
+;; - UQXTNB
+;; - UQXTNT
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+  [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
+	(unspec:<VNARROW>
+	  [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")]
+	  SVE2_INT_UNARY_NARROWB))]
+  "TARGET_SVE2"
+  "<sve_int_op>\t%0.<Ventype>, %1.<Vetype>"
+)
+
+;; These instructions do not take MOVPRFX.
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+  [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
+	(unspec:<VNARROW>
+	  [(match_operand:<VNARROW> 1 "register_operand" "0")
+	   (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")]
+	  SVE2_INT_UNARY_NARROWT))]
+  "TARGET_SVE2"
+  "<sve_int_op>\t%0.<Ventype>, %2.<Vetype>"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Narrowing binary arithmetic
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ADDHNB
+;; - ADDHNT
+;; - RADDHNB
+;; - RADDHNT
+;; - RSUBHNB
+;; - RSUBHNT
+;; - SUBHNB
+;; - SUBHNT
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+  [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
+	(unspec:<VNARROW>
+	  [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
+	   (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")]
+	  SVE2_INT_BINARY_NARROWB))]
+  "TARGET_SVE2"
+  "<sve_int_op>\t%0.<Ventype>, %1.<Vetype>, %2.<Vetype>"
+)
+
+;; These instructions do not take MOVPRFX.
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+  [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
+	(unspec:<VNARROW>
+	  [(match_operand:<VNARROW> 1 "register_operand" "0")
+	   (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")
+	   (match_operand:SVE_FULL_HSDI 3 "register_operand" "w")]
+	  SVE2_INT_BINARY_NARROWT))]
+  "TARGET_SVE2"
+  "<sve_int_op>\t%0.<Ventype>, %2.<Vetype>, %3.<Vetype>"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Narrowing right shifts
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - RSHRNB
+;; - RSHRNT
+;; - SHRNB
+;; - SHRNT
+;; - SQRSHRNB
+;; - SQRSHRNT
+;; - SQRSHRUNB
+;; - SQRSHRUNT
+;; - SQSHRNB
+;; - SQSHRNT
+;; - SQSHRUNB
+;; - SQSHRUNT
+;; - UQRSHRNB
+;; - UQRSHRNT
+;; - UQSHRNB
+;; - UQSHRNT
+;; -------------------------------------------------------------------------
+
+;; The immediate range is enforced before generating the instruction.
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+  [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
+	(unspec:<VNARROW>
+	  [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
+	   (match_operand:DI 2 "const_int_operand")]
+	  SVE2_INT_SHIFT_IMM_NARROWB))]
+  "TARGET_SVE2"
+  "<sve_int_op>\t%0.<Ventype>, %1.<Vetype>, #%2"
+)
+
+;; The immediate range is enforced before generating the instruction.
+;; These instructions do not take MOVPRFX.
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+  [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
+	(unspec:<VNARROW>
+	  [(match_operand:<VNARROW> 1 "register_operand" "0")
+	   (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")
+	   (match_operand:DI 3 "const_int_operand")]
+	  SVE2_INT_SHIFT_IMM_NARROWT))]
+  "TARGET_SVE2"
+  "<sve_int_op>\t%0.<Ventype>, %2.<Vetype>, #%3"
+)
+
+;; =========================================================================
+;; == Pairwise arithmetic
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Pairwise arithmetic
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ADDP
+;; - SMAXP
+;; - SMINP
+;; - UMAXP
+;; - UMINP
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_pred_<sve_int_op><mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
+	   (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
+	  SVE2_INT_BINARY_PAIR))]
+  "TARGET_SVE2"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Pairwise arithmetic
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FADDP
+;; - FMAXP
+;; - FMAXNMP
+;; - FMINP
+;; - FMINNMP
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_pred_<sve_fp_op><mode>"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
+	   (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
+	  SVE2_FP_BINARY_PAIR))]
+  "TARGET_SVE2"
+  "@
+   <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Pairwise arithmetic with accumulation
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SADALP
+;; - UADALP
+;; -------------------------------------------------------------------------
+
+;; Predicated pairwise absolute difference and accumulate with merging.
+(define_expand "@cond_<sve_int_op><mode>"
+  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
+	(unspec:SVE_FULL_HSDI
+	  [(match_operand:<VPRED> 1 "register_operand")
+	   (unspec:SVE_FULL_HSDI
+	     [(match_dup 1)
+	      (match_operand:SVE_FULL_HSDI 2 "register_operand")
+	      (match_operand:<VNARROW> 3 "register_operand")]
+	     SVE2_INT_BINARY_PAIR_LONG)
+	   (match_operand:SVE_FULL_HSDI 4 "aarch64_simd_reg_or_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE2"
+{
+  /* Only target code is aware of these operations, so we don't need
+     to handle the fully-general case.  */
+  gcc_assert (rtx_equal_p (operands[2], operands[4])
+	      || CONSTANT_P (operands[4]));
+})
+
+;; Predicated pairwise absolute difference and accumulate, merging with
+;; the first input.
+(define_insn_and_rewrite "*cond_<sve_int_op><mode>_2"
+  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_HSDI
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_HSDI
+	     [(match_operand 4)
+	      (match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w")
+	      (match_operand:<VNARROW> 3 "register_operand" "w, w")]
+	     SVE2_INT_BINARY_PAIR_LONG)
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE2"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>
+   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>"
+  "&& !CONSTANT_P (operands[4])"
+  {
+    operands[4] = CONSTM1_RTX (<VPRED>mode);
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated pairwise absolute difference and accumulate, merging with zero.
+(define_insn_and_rewrite "*cond_<sve_int_op><mode>_z"
+  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w")
+	(unspec:SVE_FULL_HSDI
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_HSDI
+	     [(match_operand 5)
+	      (match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w")
+	      (match_operand:<VNARROW> 3 "register_operand" "w, w")]
+	     SVE2_INT_BINARY_PAIR_LONG)
+	   (match_operand:SVE_FULL_HSDI 4 "aarch64_simd_imm_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE2"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>"
+  "&& !CONSTANT_P (operands[5])"
+  {
+    operands[5] = CONSTM1_RTX (<VPRED>mode);
+  }
+  [(set_attr "movprfx" "yes")]
+)
+
+;; =========================================================================
+;; == Complex arithmetic
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Complex binary operations
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - CADD
+;; - SQCADD
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<optab><mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_I
+	  [(match_operand:SVE_FULL_I 1 "register_operand" "0, w")
+	   (match_operand:SVE_FULL_I 2 "register_operand" "w, w")]
+	  SVE2_INT_CADD))]
+  "TARGET_SVE2"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #<rot>
+   movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #<rot>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; unpredicated optab pattern for auto-vectorizer
+(define_expand "cadd<rot><mode>3"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand")
+	(unspec:SVE_FULL_I
+	  [(match_operand:SVE_FULL_I 1 "register_operand")
+	   (match_operand:SVE_FULL_I 2 "register_operand")]
+	  SVE2_INT_CADD_OP))]
+  "TARGET_SVE2"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Complex ternary operations
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - CMLA
+;; - SQRDCMLA
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<optab><mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_I
+	  [(match_operand:SVE_FULL_I 1 "register_operand" "0, w")
+	   (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
+	   (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
+	  SVE2_INT_CMLA))]
+  "TARGET_SVE2"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>, #<rot>
+   movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>, #<rot>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+(define_insn "@aarch64_<optab>_lane_<mode>"
+  [(set (match_operand:SVE_FULL_HSI 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_HSI
+	  [(match_operand:SVE_FULL_HSI 1 "register_operand" "0, w")
+	   (match_operand:SVE_FULL_HSI 2 "register_operand" "w, w")
+	   (unspec:SVE_FULL_HSI
+	     [(match_operand:SVE_FULL_HSI 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
+	      (match_operand:SI 4 "const_int_operand")]
+	     UNSPEC_SVE_LANE_SELECT)]
+	  SVE2_INT_CMLA))]
+  "TARGET_SVE2"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4], #<rot>
+   movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4], #<rot>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; unpredicated optab pattern for auto-vectorizer
+;; The complex mla/mls operations always need to expand to two instructions.
+;; The first operation does half the computation and the second does the
+;; remainder.  Because of this, expand early.
+(define_expand "cml<fcmac1><conj_op><mode>4"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand")
+	(plus:SVE_FULL_I (match_operand:SVE_FULL_I 1 "register_operand")
+	  (unspec:SVE_FULL_I
+	    [(match_operand:SVE_FULL_I 2 "register_operand")
+	     (match_operand:SVE_FULL_I 3 "register_operand")]
+	    SVE2_INT_CMLA_OP)))]
+  "TARGET_SVE2"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_aarch64_sve_cmla<sve_rot1><mode> (tmp, operands[1],
+						   operands[3], operands[2]));
+  emit_insn (gen_aarch64_sve_cmla<sve_rot2><mode> (operands[0], tmp,
+						   operands[3], operands[2]));
+  DONE;
+})
+
+;; unpredicated optab pattern for auto-vectorizer
+;; The complex mul operations always need to expand to two instructions.
+;; The first operation does half the computation and the second does the
+;; remainder.  Because of this, expand early.
+(define_expand "cmul<conj_op><mode>3"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand")
+	(unspec:SVE_FULL_I
+	  [(match_operand:SVE_FULL_I 1 "register_operand")
+	   (match_operand:SVE_FULL_I 2 "register_operand")]
+	  SVE2_INT_CMUL_OP))]
+  "TARGET_SVE2"
+{
+  rtx accum = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_aarch64_sve_cmla<sve_rot1><mode> (tmp, accum,
+						   operands[2], operands[1]));
+  emit_insn (gen_aarch64_sve_cmla<sve_rot2><mode> (operands[0], tmp,
+						   operands[2], operands[1]));
+  DONE;
+})
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Complex dot product
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - CDOT
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<optab><mode>"
+  [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_SDI
+	  [(match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")
+	   (match_operand:<VSI2QI> 2 "register_operand" "w, w")
+	   (match_operand:<VSI2QI> 3 "register_operand" "w, w")]
+	  SVE2_INT_CDOT))]
+  "TARGET_SVE2"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>, #<rot>
+   movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>, #<rot>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+(define_insn "@aarch64_<optab>_lane_<mode>"
+  [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_SDI
+	  [(match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")
+	   (match_operand:<VSI2QI> 2 "register_operand" "w, w")
+	   (unspec:<VSI2QI>
+	     [(match_operand:<VSI2QI> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
+	      (match_operand:SI 4 "const_int_operand")]
+	     UNSPEC_SVE_LANE_SELECT)]
+	  SVE2_INT_CDOT))]
+  "TARGET_SVE2"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>[%4], #<rot>
+   movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>[%4], #<rot>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; =========================================================================
+;; == Conversions
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [FP<-FP] Widening conversions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FCVTLT
+;; -------------------------------------------------------------------------
+
+;; Predicated convert long top.
+(define_insn "@aarch64_pred_<sve_fp_op><mode>"
+  [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
+	(unspec:SVE_FULL_SDF
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	   (match_operand:SI 3 "aarch64_sve_gp_strictness")
+	   (match_operand:<VNARROW> 2 "register_operand" "0")]
+	  SVE2_COND_FP_UNARY_LONG))]
+  "TARGET_SVE2"
+  "<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Ventype>"
+)
+
+;; Predicated convert long top with merging.
+(define_expand "@cond_<sve_fp_op><mode>"
+  [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
+	(unspec:SVE_FULL_SDF
+	  [(match_operand:<VPRED> 1 "register_operand")
+	   (unspec:SVE_FULL_SDF
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:<VNARROW> 2 "register_operand")]
+	     SVE2_COND_FP_UNARY_LONG)
+	   (match_operand:SVE_FULL_SDF 3 "register_operand")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE2"
+)
+
+;; These instructions do not take MOVPRFX.
+(define_insn_and_rewrite "*cond_<sve_fp_op><mode>_relaxed"
+  [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
+	(unspec:SVE_FULL_SDF
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	   (unspec:SVE_FULL_SDF
+	     [(match_operand 4)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:<VNARROW> 2 "register_operand" "w")]
+	     SVE2_COND_FP_UNARY_LONG)
+	   (match_operand:SVE_FULL_SDF 3 "register_operand" "0")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE2"
+  "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>"
+  "&& !rtx_equal_p (operands[1], operands[4])"
+  {
+    operands[4] = copy_rtx (operands[1]);
+  }
+)
+
+(define_insn "*cond_<sve_fp_op><mode>_strict"
+  [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
+	(unspec:SVE_FULL_SDF
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	   (unspec:SVE_FULL_SDF
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:<VNARROW> 2 "register_operand" "w")]
+	     SVE2_COND_FP_UNARY_LONG)
+	   (match_operand:SVE_FULL_SDF 3 "register_operand" "0")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE2"
+  "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP<-FP] Narrowing conversions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FCVTNT
+;; - FCVTX
+;; - FCVTXNT
+;; -------------------------------------------------------------------------
+
+;; Predicated FCVTNT.  This doesn't give a natural aarch64_pred_*/cond_*
+;; pair because the even elements always have to be supplied for active
+;; elements, even if the inactive elements don't matter.
+;;
+;; These instructions do not take MOVPRFX.
+(define_insn "@aarch64_sve_cvtnt<mode>"
+  [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w")
+	(unspec:SVE_FULL_HSF
+	  [(match_operand:<VWIDE_PRED> 2 "register_operand" "Upl")
+	   (const_int SVE_STRICT_GP)
+	   (match_operand:SVE_FULL_HSF 1 "register_operand" "0")
+	   (match_operand:<VWIDE> 3 "register_operand" "w")]
+	  UNSPEC_COND_FCVTNT))]
+  "TARGET_SVE2"
+  "fcvtnt\t%0.<Vetype>, %2/m, %3.<Vewtype>"
+)
+
+;; Predicated FCVTX (equivalent to what would be FCVTXNB, except that
+;; it supports MOVPRFX).
+(define_insn "@aarch64_pred_<sve_fp_op><mode>"
+  [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=w, ?&w")
+	(unspec:VNx4SF_ONLY
+	  [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl, Upl")
+	   (match_operand:SI 3 "aarch64_sve_gp_strictness")
+	   (match_operand:<VWIDE> 2 "register_operand" "0, w")]
+	  SVE2_COND_FP_UNARY_NARROWB))]
+  "TARGET_SVE2"
+  "@
+   <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
+   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated FCVTX with merging.
+(define_expand "@cond_<sve_fp_op><mode>"
+  [(set (match_operand:VNx4SF_ONLY 0 "register_operand")
+	(unspec:VNx4SF_ONLY
+	  [(match_operand:<VWIDE_PRED> 1 "register_operand")
+	   (unspec:VNx4SF_ONLY
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:<VWIDE> 2 "register_operand")]
+	     SVE2_COND_FP_UNARY_NARROWB)
+	   (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE2"
+)
+
+(define_insn_and_rewrite "*cond_<sve_fp_op><mode>_any_relaxed"
+  [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=&w, &w, &w")
+	(unspec:VNx4SF_ONLY
+	  [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (unspec:VNx4SF_ONLY
+	     [(match_operand 4)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:<VWIDE> 2 "register_operand" "w, w, w")]
+	     SVE2_COND_FP_UNARY_NARROWB)
+	   (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])"
+  "@
+   <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
+   movprfx\t%0.<Vewtype>, %1/z, %2.<Vewtype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
+   movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>"
+  "&& !rtx_equal_p (operands[1], operands[4])"
+  {
+    operands[4] = copy_rtx (operands[1]);
+  }
+  [(set_attr "movprfx" "*,yes,yes")]
+)
+
+(define_insn "*cond_<sve_fp_op><mode>_any_strict"
+  [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=&w, &w, &w")
+	(unspec:VNx4SF_ONLY
+	  [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (unspec:VNx4SF_ONLY
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:<VWIDE> 2 "register_operand" "w, w, w")]
+	     SVE2_COND_FP_UNARY_NARROWB)
+	   (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])"
+  "@
+   <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
+   movprfx\t%0.<Vewtype>, %1/z, %2.<Vewtype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
+   movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>"
+  [(set_attr "movprfx" "*,yes,yes")]
+)
+
+;; Predicated FCVTXNT.  This doesn't give a natural aarch64_pred_*/cond_*
+;; pair because the even elements always have to be supplied for active
+;; elements, even if the inactive elements don't matter.
+;;
+;; These instructions do not take MOVPRFX.
+(define_insn "@aarch64_sve2_cvtxnt<mode>"
+  [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
+	(unspec:<VNARROW>
+	  [(match_operand:<VPRED> 2 "register_operand" "Upl")
+	   (const_int SVE_STRICT_GP)
+	   (match_operand:<VNARROW> 1 "register_operand" "0")
+	   (match_operand:VNx2DF_ONLY 3 "register_operand" "w")]
+	  UNSPEC_COND_FCVTXNT))]
+  "TARGET_SVE2"
+  "fcvtxnt\t%0.<Ventype>, %2/m, %3.<Vetype>"
+)
+
+;; =========================================================================
+;; == Other arithmetic
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Reciprocal approximation
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - URECPE
+;; - URSQRTE
+;; -------------------------------------------------------------------------
+
+;; Predicated integer unary operations.
+(define_insn "@aarch64_pred_<sve_int_op><mode>"
+  [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
+	(unspec:VNx4SI_ONLY
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:VNx4SI_ONLY
+	     [(match_operand:VNx4SI_ONLY 2 "register_operand" "0, w")]
+	     SVE2_U32_UNARY)]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE2"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated integer unary operations with merging.
+(define_expand "@cond_<sve_int_op><mode>"
+  [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
+	(unspec:VNx4SI_ONLY
+	  [(match_operand:<VPRED> 1 "register_operand")
+	   (unspec:VNx4SI_ONLY
+	     [(match_dup 4)
+	      (unspec:VNx4SI_ONLY
+		[(match_operand:VNx4SI_ONLY 2 "register_operand")]
+		SVE2_U32_UNARY)]
+	     UNSPEC_PRED_X)
+	   (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE2"
+  {
+    operands[4] = CONSTM1_RTX (<MODE>mode);
+  }
+)
+
+(define_insn_and_rewrite "*cond_<sve_int_op><mode>"
+  [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w, ?&w")
+	(unspec:VNx4SI_ONLY
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (unspec:VNx4SI_ONLY
+	     [(match_operand 4)
+	      (unspec:VNx4SI_ONLY
+		[(match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w")]
+		SVE2_U32_UNARY)]
+	     UNSPEC_PRED_X)
+	   (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE2"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+   movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  "&& !CONSTANT_P (operands[4])"
+  {
+    operands[4] = CONSTM1_RTX (<VPRED>mode);
+  }
+  [(set_attr "movprfx" "*,yes,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT<-FP] Base-2 logarithm
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FLOGB
+;; -------------------------------------------------------------------------
+
+;; Predicated FLOGB.
+(define_insn "@aarch64_pred_<sve_fp_op><mode>"
+  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w, ?&w")
+	(unspec:<V_INT_EQUIV>
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (match_operand:SI 3 "aarch64_sve_gp_strictness")
+	   (match_operand:SVE_FULL_F 2 "register_operand" "0, w")]
+	  SVE2_COND_INT_UNARY_FP))]
+  "TARGET_SVE2"
+  "@
+   <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated FLOGB with merging.
+(define_expand "@cond_<sve_fp_op><mode>"
+  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
+	(unspec:<V_INT_EQUIV>
+	  [(match_operand:<VPRED> 1 "register_operand")
+	   (unspec:<V_INT_EQUIV>
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand")]
+	     SVE2_COND_INT_UNARY_FP)
+	   (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_reg_or_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE2"
+)
+
+(define_insn_and_rewrite "*cond_<sve_fp_op><mode>"
+  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=&w, ?&w, ?&w")
+	(unspec:<V_INT_EQUIV>
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (unspec:<V_INT_EQUIV>
+	     [(match_operand 4)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
+	     SVE2_COND_INT_UNARY_FP)
+	   (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])"
+  "@
+   <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+   movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  "&& !rtx_equal_p (operands[1], operands[4])"
+  {
+    operands[4] = copy_rtx (operands[1]);
+  }
+  [(set_attr "movprfx" "*,yes,yes")]
+)
+
+(define_insn "*cond_<sve_fp_op><mode>_strict"
+  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=&w, ?&w, ?&w")
+	(unspec:<V_INT_EQUIV>
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (unspec:<V_INT_EQUIV>
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
+	     SVE2_COND_INT_UNARY_FP)
+	   (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])"
+  "@
+   <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+   movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  [(set_attr "movprfx" "*,yes,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Polynomial multiplication
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - PMUL
+;; - PMULLB
+;; - PMULLT
+;; -------------------------------------------------------------------------
+
+;; Uniform PMUL.
+(define_insn "@aarch64_sve2_pmul<mode>"
+  [(set (match_operand:VNx16QI_ONLY 0 "register_operand" "=w")
+	(unspec:VNx16QI_ONLY
+	  [(match_operand:VNx16QI_ONLY 1 "register_operand" "w")
+	   (match_operand:VNx16QI_ONLY 2 "register_operand" "w")]
+	  UNSPEC_PMUL))]
+  "TARGET_SVE2"
+  "pmul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
+)
+
+;; Extending PMUL, with the results modeled as wider vectors.
+;; This representation is only possible for .H and .D, not .Q.
+(define_insn "@aarch64_sve_<optab><mode>"
+  [(set (match_operand:SVE_FULL_HDI 0 "register_operand" "=w")
+	(unspec:SVE_FULL_HDI
+	  [(match_operand:<VNARROW> 1 "register_operand" "w")
+	   (match_operand:<VNARROW> 2 "register_operand" "w")]
+	  SVE2_PMULL))]
+  "TARGET_SVE2"
+  "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>"
+)
+
+;; Extending PMUL, with the results modeled as pairs of values.
+;; This representation works for .H, .D and .Q, with .Q requiring
+;; the AES extension.  (This is enforced by the mode iterator.)
+(define_insn "@aarch64_sve_<optab><mode>"
+  [(set (match_operand:SVE2_PMULL_PAIR_I 0 "register_operand" "=w")
+	(unspec:SVE2_PMULL_PAIR_I
+	  [(match_operand:SVE2_PMULL_PAIR_I 1 "register_operand" "w")
+	   (match_operand:SVE2_PMULL_PAIR_I 2 "register_operand" "w")]
+	  SVE2_PMULL_PAIR))]
+  "TARGET_SVE2"
+  "<sve_int_op>\t%0.<Vewtype>, %1.<Vetype>, %2.<Vetype>"
+)
+
+;; =========================================================================
+;; == Permutation
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] General permutes
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - TBL (vector pair form)
+;; - TBX
+;; -------------------------------------------------------------------------
+
+;; TBL on a pair of data vectors.
+(define_insn "@aarch64_sve2_tbl2<mode>"
+  [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
+	(unspec:SVE_FULL
+	  [(match_operand:<VDOUBLE> 1 "register_operand" "w")
+	   (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
+	  UNSPEC_TBL2))]
+  "TARGET_SVE2"
+  "tbl\t%0.<Vetype>, %1, %2.<Vetype>"
+)
+
+;; TBX.  These instructions do not take MOVPRFX.
+(define_insn "@aarch64_sve2_tbx<mode>"
+  [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
+	(unspec:SVE_FULL
+	  [(match_operand:SVE_FULL 1 "register_operand" "0")
+	   (match_operand:SVE_FULL 2 "register_operand" "w")
+	   (match_operand:<V_INT_EQUIV> 3 "register_operand" "w")]
+	  UNSPEC_TBX))]
+  "TARGET_SVE2"
+  "tbx\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Optional bit-permute extensions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - BDEP
+;; - BEXT
+;; - BGRP
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
+	(unspec:SVE_FULL_I
+	  [(match_operand:SVE_FULL_I 1 "register_operand" "w")
+	   (match_operand:SVE_FULL_I 2 "register_operand" "w")]
+	  SVE2_INT_BITPERM))]
+  "TARGET_SVE2_BITPERM"
+  "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
+)
+
+;; =========================================================================
+;; == General
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- Check for aliases between pointers
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic: WHILERW and WHILEWR are
+;; defined in aarch64-sve.md instead.
+;; -------------------------------------------------------------------------
+
+;; Use WHILERW and WHILEWR to accelerate alias checks.  This is only
+;; possible if the accesses we're checking are exactly the same size
+;; as an SVE vector.
+(define_expand "check_<raw_war>_ptrs<mode>"
+  [(match_operand:GPI 0 "register_operand")
+   (unspec:VNx16BI
+     [(match_operand:GPI 1 "register_operand")
+      (match_operand:GPI 2 "register_operand")
+      (match_operand:GPI 3 "aarch64_bytes_per_sve_vector_operand")
+      (match_operand:GPI 4 "const_int_operand")]
+     SVE2_WHILE_PTR)]
+  "TARGET_SVE2"
+{
+  /* Use the widest predicate mode we can.  */
+  unsigned int align = INTVAL (operands[4]);
+  if (align > 8)
+    align = 8;
+  machine_mode pred_mode = aarch64_sve_pred_mode (align).require ();
+
+  /* Emit a WHILERW or WHILEWR, setting the condition codes based on
+     the result.  */
+  emit_insn (gen_while_ptest
+	     (<SVE2_WHILE_PTR:unspec>, <MODE>mode, pred_mode,
+	      gen_rtx_SCRATCH (pred_mode), operands[1], operands[2],
+	      CONSTM1_RTX (VNx16BImode), CONSTM1_RTX (pred_mode)));
+
+  /* Set operand 0 to true if the last bit of the predicate result is set,
+     i.e. if all elements are free of dependencies.  */
+  rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
+  rtx cmp = gen_rtx_LTU (<MODE>mode, cc_reg, const0_rtx);
+  emit_insn (gen_aarch64_cstore<mode> (operands[0], cmp, cc_reg));
+  DONE;
+})
+
+;; -------------------------------------------------------------------------
+;; ---- Histogram processing
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - HISTCNT
+;; - HISTSEG
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve2_histcnt<mode>"
+  [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
+	(unspec:SVE_FULL_SDI
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	   (match_operand:SVE_FULL_SDI 2 "register_operand" "w")
+	   (match_operand:SVE_FULL_SDI 3 "register_operand" "w")]
+	  UNSPEC_HISTCNT))]
+  "TARGET_SVE2"
+  "histcnt\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+)
+
+(define_insn "@aarch64_sve2_histseg<mode>"
+  [(set (match_operand:VNx16QI_ONLY 0 "register_operand" "=w")
+	(unspec:VNx16QI_ONLY
+	  [(match_operand:VNx16QI_ONLY 1 "register_operand" "w")
+	   (match_operand:VNx16QI_ONLY 2 "register_operand" "w")]
+	  UNSPEC_HISTSEG))]
+  "TARGET_SVE2"
+  "histseg\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- String matching
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - MATCH
+;; - NMATCH
+;; -------------------------------------------------------------------------
+
+;; Predicated string matching.
+(define_insn "@aarch64_pred_<sve_int_op><mode>"
+  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
+	(unspec:<VPRED>
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	   (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+	   (unspec:<VPRED>
+	     [(match_operand:SVE_FULL_BHI 3 "register_operand" "w")
+	      (match_operand:SVE_FULL_BHI 4 "register_operand" "w")]
+	     SVE2_MATCH)]
+	  UNSPEC_PRED_Z))
+   (clobber (reg:CC_NZC CC_REGNUM))]
+  "TARGET_SVE2"
+  "<sve_int_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
+)
+
+;; Predicated string matching in which both the flag and predicate results
+;; are interesting.
+(define_insn_and_rewrite "*aarch64_pred_<sve_int_op><mode>_cc"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand:VNx16BI 1 "register_operand" "Upl")
+	   (match_operand 4)
+	   (match_operand:SI 5 "aarch64_sve_ptrue_flag")
+	   (unspec:<VPRED>
+	     [(match_operand 6)
+	      (match_operand:SI 7 "aarch64_sve_ptrue_flag")
+	      (unspec:<VPRED>
+		[(match_operand:SVE_FULL_BHI 2 "register_operand" "w")
+		 (match_operand:SVE_FULL_BHI 3 "register_operand" "w")]
+		SVE2_MATCH)]
+	     UNSPEC_PRED_Z)]
+	  UNSPEC_PTEST))
+   (set (match_operand:<VPRED> 0 "register_operand" "=Upa")
+	(unspec:<VPRED>
+	  [(match_dup 6)
+	   (match_dup 7)
+	   (unspec:<VPRED>
+	     [(match_dup 2)
+	      (match_dup 3)]
+	     SVE2_MATCH)]
+	  UNSPEC_PRED_Z))]
+  "TARGET_SVE2
+   && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
+  "<sve_int_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+  "&& !rtx_equal_p (operands[4], operands[6])"
+  {
+    operands[6] = copy_rtx (operands[4]);
+    operands[7] = operands[5];
+  }
+)
+
+;; Predicated string matching in which only the flags result is interesting.
+(define_insn_and_rewrite "*aarch64_pred_<sve_int_op><mode>_ptest"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand:VNx16BI 1 "register_operand" "Upl")
+	   (match_operand 4)
+	   (match_operand:SI 5 "aarch64_sve_ptrue_flag")
+	   (unspec:<VPRED>
+	     [(match_operand 6)
+	      (match_operand:SI 7 "aarch64_sve_ptrue_flag")
+	      (unspec:<VPRED>
+		[(match_operand:SVE_FULL_BHI 2 "register_operand" "w")
+		 (match_operand:SVE_FULL_BHI 3 "register_operand" "w")]
+		SVE2_MATCH)]
+	     UNSPEC_PRED_Z)]
+	  UNSPEC_PTEST))
+   (clobber (match_scratch:<VPRED> 0 "=Upa"))]
+  "TARGET_SVE2
+   && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
+  "<sve_int_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+  "&& !rtx_equal_p (operands[4], operands[6])"
+  {
+    operands[6] = copy_rtx (operands[4]);
+    operands[7] = operands[5];
+  }
+)
+
+;; =========================================================================
+;; == Crypotographic extensions
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- Optional AES extensions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - AESD
+;; - AESE
+;; - AESIMC
+;; - AESMC
+;; -------------------------------------------------------------------------
+
+;; AESD and AESE.
+(define_insn "aarch64_sve2_aes<aes_op>"
+  [(set (match_operand:VNx16QI 0 "register_operand" "=w")
+	(unspec:VNx16QI
+	  [(xor:VNx16QI
+	     (match_operand:VNx16QI 1 "register_operand" "%0")
+	     (match_operand:VNx16QI 2 "register_operand" "w"))]
+          CRYPTO_AES))]
+  "TARGET_SVE2_AES"
+  "aes<aes_op>\t%0.b, %0.b, %2.b"
+  [(set_attr "type" "crypto_aese")]
+)
+
+;; AESMC and AESIMC.  These instructions do not take MOVPRFX.
+(define_insn "aarch64_sve2_aes<aesmc_op>"
+  [(set (match_operand:VNx16QI 0 "register_operand" "=w")
+	(unspec:VNx16QI
+	  [(match_operand:VNx16QI 1 "register_operand" "0")]
+	  CRYPTO_AESMC))]
+  "TARGET_SVE2_AES"
+  "aes<aesmc_op>\t%0.b, %0.b"
+  [(set_attr "type" "crypto_aesmc")]
+)
+
+;; When AESE/AESMC and AESD/AESIMC fusion is enabled, we really want
+;; to keep the two together and enforce the register dependency without
+;; scheduling or register allocation messing up the order or introducing
+;; moves inbetween.  Mash the two together during combine.
+
+(define_insn "*aarch64_sve2_aese_fused"
+  [(set (match_operand:VNx16QI 0 "register_operand" "=w")
+	(unspec:VNx16QI
+	  [(unspec:VNx16QI
+	     [(xor:VNx16QI
+		(match_operand:VNx16QI 1 "register_operand" "%0")
+		(match_operand:VNx16QI 2 "register_operand" "w"))]
+	     UNSPEC_AESE)]
+	  UNSPEC_AESMC))]
+  "TARGET_SVE2_AES && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
+  "aese\t%0.b, %0.b, %2.b\;aesmc\t%0.b, %0.b"
+  [(set_attr "type" "crypto_aese")
+   (set_attr "length" "8")]
+)
+
+(define_insn "*aarch64_sve2_aesd_fused"
+  [(set (match_operand:VNx16QI 0 "register_operand" "=w")
+	(unspec:VNx16QI
+	  [(unspec:VNx16QI
+	     [(xor:VNx16QI
+		(match_operand:VNx16QI 1 "register_operand" "%0")
+		(match_operand:VNx16QI 2 "register_operand" "w"))]
+	     UNSPEC_AESD)]
+	  UNSPEC_AESIMC))]
+  "TARGET_SVE2_AES && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
+  "aesd\t%0.b, %0.b, %2.b\;aesimc\t%0.b, %0.b"
+  [(set_attr "type" "crypto_aese")
+   (set_attr "length" "8")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- Optional SHA-3 extensions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - RAX1
+;; -------------------------------------------------------------------------
+
+(define_insn "aarch64_sve2_rax1"
+  [(set (match_operand:VNx2DI 0 "register_operand" "=w")
+	(xor:VNx2DI
+	  (rotate:VNx2DI
+	    (match_operand:VNx2DI 2 "register_operand" "w")
+	    (const_int 1))
+	  (match_operand:VNx2DI 1 "register_operand" "w")))]
+  "TARGET_SVE2_SHA3"
+  "rax1\t%0.d, %1.d, %2.d"
+  [(set_attr "type" "crypto_sha3")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- Optional SM4 extensions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SM4E
+;; - SM4EKEY
+;; -------------------------------------------------------------------------
+
+;; These instructions do not take MOVPRFX.
+(define_insn "aarch64_sve2_sm4e"
+  [(set (match_operand:VNx4SI 0 "register_operand" "=w")
+	(unspec:VNx4SI
+	  [(match_operand:VNx4SI 1 "register_operand" "0")
+	   (match_operand:VNx4SI 2 "register_operand" "w")]
+	  UNSPEC_SM4E))]
+  "TARGET_SVE2_SM4"
+  "sm4e\t%0.s, %0.s, %2.s"
+  [(set_attr "type" "crypto_sm4")]
+)
+
+(define_insn "aarch64_sve2_sm4ekey"
+  [(set (match_operand:VNx4SI 0 "register_operand" "=w")
+	(unspec:VNx4SI
+	  [(match_operand:VNx4SI 1 "register_operand" "w")
+	   (match_operand:VNx4SI 2 "register_operand" "w")]
+	  UNSPEC_SM4EKEY))]
+  "TARGET_SVE2_SM4"
+  "sm4ekey\t%0.s, %1.s, %2.s"
+  [(set_attr "type" "crypto_sm4")]
+)
diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
index f08b7e44b27be..af66c111da29f 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from aarch64-cores.def
 (define_attr "tune"
-	"cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,ares,neoversen1,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55"
+	"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,saphira,neoversen2,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82"
 	(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def b/gcc/config/aarch64/aarch64-tuning-flags.def
index ea9ead234cbc9..65b4c37d65226 100644
--- a/gcc/config/aarch64/aarch64-tuning-flags.def
+++ b/gcc/config/aarch64/aarch64-tuning-flags.def
@@ -1,4 +1,4 @@
-/* Copyright (C) 2015-2018 Free Software Foundation, Inc.
+/* Copyright (C) 2015-2021 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -41,4 +41,15 @@ AARCH64_EXTRA_TUNING_OPTION ("slow_unaligned_ldpw", SLOW_UNALIGNED_LDPW)
    are not considered cheap.  */
 AARCH64_EXTRA_TUNING_OPTION ("cheap_shift_extend", CHEAP_SHIFT_EXTEND)
 
+/* Disallow load/store pair instructions on Q-registers.  */
+AARCH64_EXTRA_TUNING_OPTION ("no_ldp_stp_qregs", NO_LDP_STP_QREGS)
+
+AARCH64_EXTRA_TUNING_OPTION ("rename_load_regs", RENAME_LOAD_REGS)
+
+AARCH64_EXTRA_TUNING_OPTION ("cse_sve_vl_constants", CSE_SVE_VL_CONSTANTS)
+
+AARCH64_EXTRA_TUNING_OPTION ("use_new_vector_costs", USE_NEW_VECTOR_COSTS)
+
+AARCH64_EXTRA_TUNING_OPTION ("matched_vector_throughput", MATCHED_VECTOR_THROUGHPUT)
+
 #undef AARCH64_EXTRA_TUNING_OPTION
diff --git a/gcc/config/aarch64/aarch64-vxworks.h b/gcc/config/aarch64/aarch64-vxworks.h
new file mode 100644
index 0000000000000..d5bdb4efb6115
--- /dev/null
+++ b/gcc/config/aarch64/aarch64-vxworks.h
@@ -0,0 +1,73 @@
+/* Definitions of target machine for GNU compiler.  Vxworks Aarch 64bit
+   version.
+   Copyright (C) 2018-2021 Free Software Foundation, Inc.
+   Contributed by Douglas B Rupp
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS VXWORKS_OVERRIDE_OPTIONS
+
+#undef LINK_SPEC
+#define LINK_SPEC VXWORKS_LINK_SPEC
+
+#undef LIB_SPEC
+#define LIB_SPEC VXWORKS_LIB_SPEC
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC VXWORKS_STARTFILE_SPEC
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC VXWORKS_ENDFILE_SPEC
+
+#undef CPP_SPEC
+#define CPP_SPEC VXWORKS_ADDITIONAL_CPP_SPEC
+
+#undef CC1_SPEC
+#define CC1_SPEC VXWORKS_CC1_SPEC
+
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER VXWORKS_FUNCTION_PROFILER
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()	  \
+  do {					  \
+    if (TARGET_BIG_END)                         \
+      builtin_define ("ARMEB");                 \
+    else                                        \
+      builtin_define ("ARMEL");                 \
+    builtin_define ("_VX_CPU=ARMARCH8A"); \
+    VXWORKS_OS_CPP_BUILTINS ();		  \
+  } while (0)
+
+/* Static stack checking is supported.  */
+#define STACK_CHECK_STATIC_BUILTIN 1
+
+#undef STACK_CHECK_PROTECT
+#define STACK_CHECK_PROTECT 16384
+
+/* The VxWorks environment on aarch64 is llvm-based.  */
+#undef VXWORKS_PERSONALITY
+#define VXWORKS_PERSONALITY "llvm"
+
+/* VxWorks uses R18 as a TCB pointer.  We must pick something else as
+   the static chain and R18 needs to be claimed "fixed".  Until we
+   arrange to override the common parts of the port family to
+   acknowledge the latter, configure --with-specs="-ffixed-r18".  */
+#undef  STATIC_CHAIN_REGNUM
+#define STATIC_CHAIN_REGNUM 9
+
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 20761578fb605..c2f4b27f6e4bc 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1,5 +1,5 @@
 /* Machine description for AArch64 architecture.
-   Copyright (C) 2009-2018 Free Software Foundation, Inc.
+   Copyright (C) 2009-2021 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -40,6 +40,7 @@
 #include "regs.h"
 #include "emit-rtl.h"
 #include "recog.h"
+#include "cgraph.h"
 #include "diagnostic.h"
 #include "insn-attr.h"
 #include "alias.h"
@@ -54,7 +55,6 @@
 #include "reload.h"
 #include "langhooks.h"
 #include "opts.h"
-#include "params.h"
 #include "gimplify.h"
 #include "dwarf2.h"
 #include "gimple-iterator.h"
@@ -71,6 +71,11 @@
 #include "selftest.h"
 #include "selftest-rtl.h"
 #include "rtx-vector-builder.h"
+#include "intl.h"
+#include "expmed.h"
+#include "function-abi.h"
+#include "gimple-pretty-print.h"
+#include "tree-ssa-loop-niter.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -78,52 +83,10 @@
 /* Defined for convenience.  */
 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
 
-/* Classifies an address.
-
-   ADDRESS_REG_IMM
-       A simple base register plus immediate offset.
-
-   ADDRESS_REG_WB
-       A base register indexed by immediate offset with writeback.
-
-   ADDRESS_REG_REG
-       A base register indexed by (optionally scaled) register.
-
-   ADDRESS_REG_UXTW
-       A base register indexed by (optionally scaled) zero-extended register.
-
-   ADDRESS_REG_SXTW
-       A base register indexed by (optionally scaled) sign-extended register.
-
-   ADDRESS_LO_SUM
-       A LO_SUM rtx with a base register and "LO12" symbol relocation.
-
-   ADDRESS_SYMBOLIC:
-       A constant symbolic address, in pc-relative literal pool.  */
-
-enum aarch64_address_type {
-  ADDRESS_REG_IMM,
-  ADDRESS_REG_WB,
-  ADDRESS_REG_REG,
-  ADDRESS_REG_UXTW,
-  ADDRESS_REG_SXTW,
-  ADDRESS_LO_SUM,
-  ADDRESS_SYMBOLIC
-};
-
-struct aarch64_address_info {
-  enum aarch64_address_type type;
-  rtx base;
-  rtx offset;
-  poly_int64 const_offset;
-  int shift;
-  enum aarch64_symbol_type symbol_type;
-};
-
 /* Information about a legitimate vector immediate operand.  */
 struct simd_immediate_info
 {
-  enum insn_type { MOV, MVN };
+  enum insn_type { MOV, MVN, INDEX, PTRUE };
   enum modifier_type { LSL, MSL };
 
   simd_immediate_info () {}
@@ -132,33 +95,51 @@ struct simd_immediate_info
 		       insn_type = MOV, modifier_type = LSL,
 		       unsigned int = 0);
   simd_immediate_info (scalar_mode, rtx, rtx);
+  simd_immediate_info (scalar_int_mode, aarch64_svpattern);
 
   /* The mode of the elements.  */
   scalar_mode elt_mode;
 
-  /* The value of each element if all elements are the same, or the
-     first value if the constant is a series.  */
-  rtx value;
-
-  /* The value of the step if the constant is a series, null otherwise.  */
-  rtx step;
-
   /* The instruction to use to move the immediate into a vector.  */
   insn_type insn;
 
-  /* The kind of shift modifier to use, and the number of bits to shift.
-     This is (LSL, 0) if no shift is needed.  */
-  modifier_type modifier;
-  unsigned int shift;
+  union
+  {
+    /* For MOV and MVN.  */
+    struct
+    {
+      /* The value of each element.  */
+      rtx value;
+
+      /* The kind of shift modifier to use, and the number of bits to shift.
+	 This is (LSL, 0) if no shift is needed.  */
+      modifier_type modifier;
+      unsigned int shift;
+    } mov;
+
+    /* For INDEX.  */
+    struct
+    {
+      /* The value of the first element and the step to be added for each
+	 subsequent element.  */
+      rtx base, step;
+    } index;
+
+    /* For PTRUE.  */
+    aarch64_svpattern pattern;
+  } u;
 };
 
 /* Construct a floating-point immediate in which each element has mode
    ELT_MODE_IN and value VALUE_IN.  */
 inline simd_immediate_info
 ::simd_immediate_info (scalar_float_mode elt_mode_in, rtx value_in)
-  : elt_mode (elt_mode_in), value (value_in), step (NULL_RTX), insn (MOV),
-    modifier (LSL), shift (0)
-{}
+  : elt_mode (elt_mode_in), insn (MOV)
+{
+  u.mov.value = value_in;
+  u.mov.modifier = LSL;
+  u.mov.shift = 0;
+}
 
 /* Construct an integer immediate in which each element has mode ELT_MODE_IN
    and value VALUE_IN.  The other parameters are as for the structure
@@ -168,17 +149,128 @@ ::simd_immediate_info (scalar_int_mode elt_mode_in,
 		       unsigned HOST_WIDE_INT value_in,
 		       insn_type insn_in, modifier_type modifier_in,
 		       unsigned int shift_in)
-  : elt_mode (elt_mode_in), value (gen_int_mode (value_in, elt_mode_in)),
-    step (NULL_RTX), insn (insn_in), modifier (modifier_in), shift (shift_in)
-{}
+  : elt_mode (elt_mode_in), insn (insn_in)
+{
+  u.mov.value = gen_int_mode (value_in, elt_mode_in);
+  u.mov.modifier = modifier_in;
+  u.mov.shift = shift_in;
+}
 
 /* Construct an integer immediate in which each element has mode ELT_MODE_IN
-   and where element I is equal to VALUE_IN + I * STEP_IN.  */
+   and where element I is equal to BASE_IN + I * STEP_IN.  */
+inline simd_immediate_info
+::simd_immediate_info (scalar_mode elt_mode_in, rtx base_in, rtx step_in)
+  : elt_mode (elt_mode_in), insn (INDEX)
+{
+  u.index.base = base_in;
+  u.index.step = step_in;
+}
+
+/* Construct a predicate that controls elements of mode ELT_MODE_IN
+   and has PTRUE pattern PATTERN_IN.  */
 inline simd_immediate_info
-::simd_immediate_info (scalar_mode elt_mode_in, rtx value_in, rtx step_in)
-  : elt_mode (elt_mode_in), value (value_in), step (step_in), insn (MOV),
-    modifier (LSL), shift (0)
-{}
+::simd_immediate_info (scalar_int_mode elt_mode_in,
+		       aarch64_svpattern pattern_in)
+  : elt_mode (elt_mode_in), insn (PTRUE)
+{
+  u.pattern = pattern_in;
+}
+
+namespace {
+
+/* Describes types that map to Pure Scalable Types (PSTs) in the AAPCS64.  */
+class pure_scalable_type_info
+{
+public:
+  /* Represents the result of analyzing a type.  All values are nonzero,
+     in the possibly forlorn hope that accidental conversions to bool
+     trigger a warning.  */
+  enum analysis_result
+  {
+    /* The type does not have an ABI identity; i.e. it doesn't contain
+       at least one object whose type is a Fundamental Data Type.  */
+    NO_ABI_IDENTITY = 1,
+
+    /* The type is definitely a Pure Scalable Type.  */
+    IS_PST,
+
+    /* The type is definitely not a Pure Scalable Type.  */
+    ISNT_PST,
+
+    /* It doesn't matter for PCS purposes whether the type is a Pure
+       Scalable Type or not, since the type will be handled the same
+       way regardless.
+
+       Specifically, this means that if the type is a Pure Scalable Type,
+       there aren't enough argument registers to hold it, and so it will
+       need to be passed or returned in memory.  If the type isn't a
+       Pure Scalable Type, it's too big to be passed or returned in core
+       or SIMD&FP registers, and so again will need to go in memory.  */
+    DOESNT_MATTER
+  };
+
+  /* Aggregates of 17 bytes or more are normally passed and returned
+     in memory, so aggregates of that size can safely be analyzed as
+     DOESNT_MATTER.  We need to be able to collect enough pieces to
+     represent a PST that is smaller than that.  Since predicates are
+     2 bytes in size for -msve-vector-bits=128, that means we need to be
+     able to store at least 8 pieces.
+
+     We also need to be able to store enough pieces to represent
+     a single vector in each vector argument register and a single
+     predicate in each predicate argument register.  This means that
+     we need at least 12 pieces.  */
+  static const unsigned int MAX_PIECES = NUM_FP_ARG_REGS + NUM_PR_ARG_REGS;
+#if __cplusplus >= 201103L
+  static_assert (MAX_PIECES >= 8, "Need to store at least 8 predicates");
+#endif
+
+  /* Describes one piece of a PST.  Each piece is one of:
+
+     - a single Scalable Vector Type (SVT)
+     - a single Scalable Predicate Type (SPT)
+     - a PST containing 2, 3 or 4 SVTs, with no padding
+
+     It either represents a single built-in type or a PST formed from
+     multiple homogeneous built-in types.  */
+  struct piece
+  {
+    rtx get_rtx (unsigned int, unsigned int) const;
+
+    /* The number of vector and predicate registers that the piece
+       occupies.  One of the two is always zero.  */
+    unsigned int num_zr;
+    unsigned int num_pr;
+
+    /* The mode of the registers described above.  */
+    machine_mode mode;
+
+    /* If this piece is formed from multiple homogeneous built-in types,
+       this is the mode of the built-in types, otherwise it is MODE.  */
+    machine_mode orig_mode;
+
+    /* The offset in bytes of the piece from the start of the type.  */
+    poly_uint64_pod offset;
+  };
+
+  /* Divides types analyzed as IS_PST into individual pieces.  The pieces
+     are in memory order.  */
+  auto_vec<piece, MAX_PIECES> pieces;
+
+  unsigned int num_zr () const;
+  unsigned int num_pr () const;
+
+  rtx get_rtx (machine_mode mode, unsigned int, unsigned int) const;
+
+  analysis_result analyze (const_tree);
+  bool analyze_registers (const_tree);
+
+private:
+  analysis_result analyze_array (const_tree);
+  analysis_result analyze_record (const_tree);
+  void add_piece (const piece &);
+};
+}
 
 /* The current code model.  */
 enum aarch64_code_model aarch64_cmodel;
@@ -192,10 +284,11 @@ poly_uint16 aarch64_sve_vg;
 #endif
 
 static bool aarch64_composite_type_p (const_tree, machine_mode);
+static bool aarch64_return_in_memory_1 (const_tree);
 static bool aarch64_vfp_is_call_or_return_candidate (machine_mode,
 						     const_tree,
 						     machine_mode *, int *,
-						     bool *);
+						     bool *, bool);
 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 static void aarch64_override_options_after_change (void);
@@ -206,7 +299,9 @@ static bool aarch64_builtin_support_vector_misalignment (machine_mode mode,
 							 int misalignment,
 							 bool is_packed);
 static machine_mode aarch64_simd_container_mode (scalar_mode, poly_int64);
-static bool aarch64_print_ldpstp_address (FILE *, machine_mode, rtx);
+static bool aarch64_print_address_internal (FILE*, machine_mode, rtx,
+					    aarch64_addr_query_type);
+static HOST_WIDE_INT aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val);
 
 /* Major revision number of the ARM Architecture implemented by the target.  */
 unsigned aarch64_architecture_version;
@@ -215,11 +310,20 @@ unsigned aarch64_architecture_version;
 enum aarch64_processor aarch64_tune = cortexa53;
 
 /* Mask to specify which instruction scheduling options should be used.  */
-unsigned long aarch64_tune_flags = 0;
+uint64_t aarch64_tune_flags = 0;
 
 /* Global flag for PC relative loads.  */
 bool aarch64_pcrelative_literal_loads;
 
+/* Global flag for whether frame pointer is enabled.  */
+bool aarch64_use_frame_pointer;
+
+#define BRANCH_PROTECT_STR_MAX 255
+char *accepted_branch_protection_string = NULL;
+
+static enum aarch64_parse_opt_result
+aarch64_parse_branch_protection (const char*, char**);
+
 /* Support for command line parsing of boolean flags in the tuning
    structures.  */
 struct aarch64_flag_desc
@@ -260,6 +364,8 @@ static const struct cpu_addrcost_table generic_addrcost_table =
     },
   0, /* pre_modify  */
   0, /* post_modify  */
+  0, /* post_modify_ld3_st3  */
+  0, /* post_modify_ld4_st4  */
   0, /* register_offset  */
   0, /* register_sextend  */
   0, /* register_zextend  */
@@ -276,6 +382,8 @@ static const struct cpu_addrcost_table exynosm1_addrcost_table =
     },
   0, /* pre_modify  */
   0, /* post_modify  */
+  0, /* post_modify_ld3_st3  */
+  0, /* post_modify_ld4_st4  */
   1, /* register_offset  */
   1, /* register_sextend  */
   2, /* register_zextend  */
@@ -291,7 +399,9 @@ static const struct cpu_addrcost_table xgene1_addrcost_table =
       1, /* ti  */
     },
   1, /* pre_modify  */
-  0, /* post_modify  */
+  1, /* post_modify  */
+  1, /* post_modify_ld3_st3  */
+  1, /* post_modify_ld4_st4  */
   0, /* register_offset  */
   1, /* register_sextend  */
   1, /* register_zextend  */
@@ -308,12 +418,50 @@ static const struct cpu_addrcost_table thunderx2t99_addrcost_table =
     },
   0, /* pre_modify  */
   0, /* post_modify  */
+  0, /* post_modify_ld3_st3  */
+  0, /* post_modify_ld4_st4  */
   2, /* register_offset  */
   3, /* register_sextend  */
   3, /* register_zextend  */
   0, /* imm_offset  */
 };
 
+static const struct cpu_addrcost_table thunderx3t110_addrcost_table =
+{
+    {
+      1, /* hi  */
+      1, /* si  */
+      1, /* di  */
+      2, /* ti  */
+    },
+  0, /* pre_modify  */
+  0, /* post_modify  */
+  0, /* post_modify_ld3_st3  */
+  0, /* post_modify_ld4_st4  */
+  2, /* register_offset  */
+  3, /* register_sextend  */
+  3, /* register_zextend  */
+  0, /* imm_offset  */
+};
+
+static const struct cpu_addrcost_table tsv110_addrcost_table =
+{
+    {
+      1, /* hi  */
+      0, /* si  */
+      0, /* di  */
+      1, /* ti  */
+    },
+  0, /* pre_modify  */
+  0, /* post_modify  */
+  0, /* post_modify_ld3_st3  */
+  0, /* post_modify_ld4_st4  */
+  0, /* register_offset  */
+  1, /* register_sextend  */
+  1, /* register_zextend  */
+  0, /* imm_offset  */
+};
+
 static const struct cpu_addrcost_table qdf24xx_addrcost_table =
 {
     {
@@ -324,12 +472,50 @@ static const struct cpu_addrcost_table qdf24xx_addrcost_table =
     },
   1, /* pre_modify  */
   1, /* post_modify  */
+  1, /* post_modify_ld3_st3  */
+  1, /* post_modify_ld4_st4  */
   3, /* register_offset  */
   3, /* register_sextend  */
   3, /* register_zextend  */
   2, /* imm_offset  */
 };
 
+static const struct cpu_addrcost_table a64fx_addrcost_table =
+{
+    {
+      1, /* hi  */
+      1, /* si  */
+      1, /* di  */
+      2, /* ti  */
+    },
+  0, /* pre_modify  */
+  0, /* post_modify  */
+  0, /* post_modify_ld3_st3  */
+  0, /* post_modify_ld4_st4  */
+  2, /* register_offset  */
+  3, /* register_sextend  */
+  3, /* register_zextend  */
+  0, /* imm_offset  */
+};
+
+static const struct cpu_addrcost_table neoversev1_addrcost_table =
+{
+    {
+      1, /* hi  */
+      0, /* si  */
+      0, /* di  */
+      1, /* ti  */
+    },
+  0, /* pre_modify  */
+  0, /* post_modify  */
+  3, /* post_modify_ld3_st3  */
+  3, /* post_modify_ld4_st4  */
+  0, /* register_offset  */
+  0, /* register_sextend  */
+  0, /* register_zextend  */
+  0 /* imm_offset  */
+};
+
 static const struct cpu_regmove_cost generic_regmove_cost =
 {
   1, /* GP2GP  */
@@ -401,11 +587,97 @@ static const struct cpu_regmove_cost thunderx2t99_regmove_cost =
 {
   1, /* GP2GP  */
   /* Avoid the use of int<->fp moves for spilling.  */
-  8, /* GP2FP  */
-  8, /* FP2GP  */
+  5, /* GP2FP  */
+  6, /* FP2GP  */
+  3, /* FP2FP  */
+};
+
+static const struct cpu_regmove_cost thunderx3t110_regmove_cost =
+{
+  1, /* GP2GP  */
+  /* Avoid the use of int<->fp moves for spilling.  */
+  4, /* GP2FP  */
+  5, /* FP2GP  */
   4  /* FP2FP  */
 };
 
+static const struct cpu_regmove_cost tsv110_regmove_cost =
+{
+  1, /* GP2GP  */
+  /* Avoid the use of slow int<->fp moves for spilling by setting
+     their cost higher than memmov_cost.  */
+  2, /* GP2FP  */
+  3, /* FP2GP  */
+  2  /* FP2FP  */
+};
+
+static const struct cpu_regmove_cost a64fx_regmove_cost =
+{
+  1, /* GP2GP  */
+  /* Avoid the use of slow int<->fp moves for spilling by setting
+     their cost higher than memmov_cost.  */
+  5, /* GP2FP  */
+  7, /* FP2GP  */
+  2 /* FP2FP  */
+};
+
+/* Generic costs for Advanced SIMD vector operations.   */
+static const advsimd_vec_cost generic_advsimd_vector_cost =
+{
+  1, /* int_stmt_cost  */
+  1, /* fp_stmt_cost  */
+  0, /* ld2_st2_permute_cost  */
+  0, /* ld3_st3_permute_cost  */
+  0, /* ld4_st4_permute_cost  */
+  2, /* permute_cost  */
+  2, /* reduc_i8_cost  */
+  2, /* reduc_i16_cost  */
+  2, /* reduc_i32_cost  */
+  2, /* reduc_i64_cost  */
+  2, /* reduc_f16_cost  */
+  2, /* reduc_f32_cost  */
+  2, /* reduc_f64_cost  */
+  2, /* store_elt_extra_cost  */
+  2, /* vec_to_scalar_cost  */
+  1, /* scalar_to_vec_cost  */
+  1, /* align_load_cost  */
+  1, /* unalign_load_cost  */
+  1, /* unalign_store_cost  */
+  1  /* store_cost  */
+};
+
+/* Generic costs for SVE vector operations.  */
+static const sve_vec_cost generic_sve_vector_cost =
+{
+  {
+    1, /* int_stmt_cost  */
+    1, /* fp_stmt_cost  */
+    0, /* ld2_st2_permute_cost  */
+    0, /* ld3_st3_permute_cost  */
+    0, /* ld4_st4_permute_cost  */
+    2, /* permute_cost  */
+    2, /* reduc_i8_cost  */
+    2, /* reduc_i16_cost  */
+    2, /* reduc_i32_cost  */
+    2, /* reduc_i64_cost  */
+    2, /* reduc_f16_cost  */
+    2, /* reduc_f32_cost  */
+    2, /* reduc_f64_cost  */
+    2, /* store_elt_extra_cost  */
+    2, /* vec_to_scalar_cost  */
+    1, /* scalar_to_vec_cost  */
+    1, /* align_load_cost  */
+    1, /* unalign_load_cost  */
+    1, /* unalign_store_cost  */
+    1  /* store_cost  */
+  },
+  2, /* clast_cost  */
+  2, /* fadda_f16_cost  */
+  2, /* fadda_f32_cost  */
+  2, /* fadda_f64_cost  */
+  1 /* scatter_store_elt_cost  */
+};
+
 /* Generic costs for vector insn classes.  */
 static const struct cpu_vector_cost generic_vector_cost =
 {
@@ -413,17 +685,103 @@ static const struct cpu_vector_cost generic_vector_cost =
   1, /* scalar_fp_stmt_cost  */
   1, /* scalar_load_cost  */
   1, /* scalar_store_cost  */
-  1, /* vec_int_stmt_cost  */
-  1, /* vec_fp_stmt_cost  */
-  2, /* vec_permute_cost  */
+  3, /* cond_taken_branch_cost  */
+  1, /* cond_not_taken_branch_cost  */
+  &generic_advsimd_vector_cost, /* advsimd  */
+  &generic_sve_vector_cost, /* sve */
+  nullptr /* issue_info  */
+};
+
+static const advsimd_vec_cost a64fx_advsimd_vector_cost =
+{
+  2, /* int_stmt_cost  */
+  5, /* fp_stmt_cost  */
+  0, /* ld2_st2_permute_cost  */
+  0, /* ld3_st3_permute_cost  */
+  0, /* ld4_st4_permute_cost  */
+  3, /* permute_cost  */
+  13, /* reduc_i8_cost  */
+  13, /* reduc_i16_cost  */
+  13, /* reduc_i32_cost  */
+  13, /* reduc_i64_cost  */
+  13, /* reduc_f16_cost  */
+  13, /* reduc_f32_cost  */
+  13, /* reduc_f64_cost  */
+  13, /* store_elt_extra_cost  */
+  13, /* vec_to_scalar_cost  */
+  4, /* scalar_to_vec_cost  */
+  6, /* align_load_cost  */
+  6, /* unalign_load_cost  */
+  1, /* unalign_store_cost  */
+  1  /* store_cost  */
+};
+
+static const sve_vec_cost a64fx_sve_vector_cost =
+{
+  {
+    2, /* int_stmt_cost  */
+    5, /* fp_stmt_cost  */
+    0, /* ld2_st2_permute_cost  */
+    0, /* ld3_st3_permute_cost  */
+    0, /* ld4_st4_permute_cost  */
+    3, /* permute_cost  */
+    13, /* reduc_i8_cost  */
+    13, /* reduc_i16_cost  */
+    13, /* reduc_i32_cost  */
+    13, /* reduc_i64_cost  */
+    13, /* reduc_f16_cost  */
+    13, /* reduc_f32_cost  */
+    13, /* reduc_f64_cost  */
+    13, /* store_elt_extra_cost  */
+    13, /* vec_to_scalar_cost  */
+    4, /* scalar_to_vec_cost  */
+    6, /* align_load_cost  */
+    6, /* unalign_load_cost  */
+    1, /* unalign_store_cost  */
+    1  /* store_cost  */
+  },
+  13, /* clast_cost  */
+  13, /* fadda_f16_cost  */
+  13, /* fadda_f32_cost  */
+  13, /* fadda_f64_cost  */
+  1 /* scatter_store_elt_cost  */
+};
+
+static const struct cpu_vector_cost a64fx_vector_cost =
+{
+  1, /* scalar_int_stmt_cost  */
+  5, /* scalar_fp_stmt_cost  */
+  4, /* scalar_load_cost  */
+  1, /* scalar_store_cost  */
+  3, /* cond_taken_branch_cost  */
+  1, /* cond_not_taken_branch_cost  */
+  &a64fx_advsimd_vector_cost, /* advsimd  */
+  &a64fx_sve_vector_cost, /* sve  */
+  nullptr /* issue_info  */
+};
+
+static const advsimd_vec_cost qdf24xx_advsimd_vector_cost =
+{
+  1, /* int_stmt_cost  */
+  3, /* fp_stmt_cost  */
+  0, /* ld2_st2_permute_cost  */
+  0, /* ld3_st3_permute_cost  */
+  0, /* ld4_st4_permute_cost  */
+  2, /* permute_cost  */
+  1, /* reduc_i8_cost  */
+  1, /* reduc_i16_cost  */
+  1, /* reduc_i32_cost  */
+  1, /* reduc_i64_cost  */
+  1, /* reduc_f16_cost  */
+  1, /* reduc_f32_cost  */
+  1, /* reduc_f64_cost  */
+  1, /* store_elt_extra_cost  */
   1, /* vec_to_scalar_cost  */
   1, /* scalar_to_vec_cost  */
-  1, /* vec_align_load_cost  */
-  1, /* vec_unalign_load_cost  */
-  1, /* vec_unalign_store_cost  */
-  1, /* vec_store_cost  */
-  3, /* cond_taken_branch_cost  */
-  1 /* cond_not_taken_branch_cost  */
+  1, /* align_load_cost  */
+  1, /* unalign_load_cost  */
+  1, /* unalign_store_cost  */
+  1  /* store_cost  */
 };
 
 /* QDF24XX costs for vector insn classes.  */
@@ -433,17 +791,36 @@ static const struct cpu_vector_cost qdf24xx_vector_cost =
   1, /* scalar_fp_stmt_cost  */
   1, /* scalar_load_cost  */
   1, /* scalar_store_cost  */
-  1, /* vec_int_stmt_cost  */
-  3, /* vec_fp_stmt_cost  */
-  2, /* vec_permute_cost  */
-  1, /* vec_to_scalar_cost  */
-  1, /* scalar_to_vec_cost  */
-  1, /* vec_align_load_cost  */
-  1, /* vec_unalign_load_cost  */
-  1, /* vec_unalign_store_cost  */
-  1, /* vec_store_cost  */
   3, /* cond_taken_branch_cost  */
-  1 /* cond_not_taken_branch_cost  */
+  1, /* cond_not_taken_branch_cost  */
+  &qdf24xx_advsimd_vector_cost, /* advsimd  */
+  nullptr, /* sve  */
+  nullptr /* issue_info  */
+};
+
+
+static const advsimd_vec_cost thunderx_advsimd_vector_cost =
+{
+  4, /* int_stmt_cost  */
+  1, /* fp_stmt_cost  */
+  0, /* ld2_st2_permute_cost  */
+  0, /* ld3_st3_permute_cost  */
+  0, /* ld4_st4_permute_cost  */
+  4, /* permute_cost  */
+  2, /* reduc_i8_cost  */
+  2, /* reduc_i16_cost  */
+  2, /* reduc_i32_cost  */
+  2, /* reduc_i64_cost  */
+  2, /* reduc_f16_cost  */
+  2, /* reduc_f32_cost  */
+  2, /* reduc_f64_cost  */
+  2, /* store_elt_extra_cost  */
+  2, /* vec_to_scalar_cost  */
+  2, /* scalar_to_vec_cost  */
+  3, /* align_load_cost  */
+  5, /* unalign_load_cost  */
+  5, /* unalign_store_cost  */
+  1  /* store_cost  */
 };
 
 /* ThunderX costs for vector insn classes.  */
@@ -453,37 +830,110 @@ static const struct cpu_vector_cost thunderx_vector_cost =
   1, /* scalar_fp_stmt_cost  */
   3, /* scalar_load_cost  */
   1, /* scalar_store_cost  */
-  4, /* vec_int_stmt_cost  */
-  1, /* vec_fp_stmt_cost  */
-  4, /* vec_permute_cost  */
-  2, /* vec_to_scalar_cost  */
-  2, /* scalar_to_vec_cost  */
-  3, /* vec_align_load_cost  */
-  5, /* vec_unalign_load_cost  */
-  5, /* vec_unalign_store_cost  */
-  1, /* vec_store_cost  */
   3, /* cond_taken_branch_cost  */
-  3 /* cond_not_taken_branch_cost  */
+  3, /* cond_not_taken_branch_cost  */
+  &thunderx_advsimd_vector_cost, /* advsimd  */
+  nullptr, /* sve  */
+  nullptr /* issue_info  */
 };
 
-/* Generic costs for vector insn classes.  */
-static const struct cpu_vector_cost cortexa57_vector_cost =
+static const advsimd_vec_cost tsv110_advsimd_vector_cost =
+{
+  2, /* int_stmt_cost  */
+  2, /* fp_stmt_cost  */
+  0, /* ld2_st2_permute_cost  */
+  0, /* ld3_st3_permute_cost  */
+  0, /* ld4_st4_permute_cost  */
+  2, /* permute_cost  */
+  3, /* reduc_i8_cost  */
+  3, /* reduc_i16_cost  */
+  3, /* reduc_i32_cost  */
+  3, /* reduc_i64_cost  */
+  3, /* reduc_f16_cost  */
+  3, /* reduc_f32_cost  */
+  3, /* reduc_f64_cost  */
+  3, /* store_elt_extra_cost  */
+  3, /* vec_to_scalar_cost  */
+  2, /* scalar_to_vec_cost  */
+  5, /* align_load_cost  */
+  5, /* unalign_load_cost  */
+  1, /* unalign_store_cost  */
+  1  /* store_cost  */
+};
+
+static const struct cpu_vector_cost tsv110_vector_cost =
 {
   1, /* scalar_int_stmt_cost  */
   1, /* scalar_fp_stmt_cost  */
-  4, /* scalar_load_cost  */
+  5, /* scalar_load_cost  */
   1, /* scalar_store_cost  */
-  2, /* vec_int_stmt_cost  */
-  2, /* vec_fp_stmt_cost  */
-  3, /* vec_permute_cost  */
+  1, /* cond_taken_branch_cost  */
+  1, /* cond_not_taken_branch_cost  */
+  &tsv110_advsimd_vector_cost, /* advsimd  */
+  nullptr, /* sve  */
+  nullptr /* issue_info  */
+};
+
+static const advsimd_vec_cost cortexa57_advsimd_vector_cost =
+{
+  2, /* int_stmt_cost  */
+  2, /* fp_stmt_cost  */
+  0, /* ld2_st2_permute_cost  */
+  0, /* ld3_st3_permute_cost  */
+  0, /* ld4_st4_permute_cost  */
+  3, /* permute_cost  */
+  8, /* reduc_i8_cost  */
+  8, /* reduc_i16_cost  */
+  8, /* reduc_i32_cost  */
+  8, /* reduc_i64_cost  */
+  8, /* reduc_f16_cost  */
+  8, /* reduc_f32_cost  */
+  8, /* reduc_f64_cost  */
+  8, /* store_elt_extra_cost  */
   8, /* vec_to_scalar_cost  */
   8, /* scalar_to_vec_cost  */
-  4, /* vec_align_load_cost  */
-  4, /* vec_unalign_load_cost  */
-  1, /* vec_unalign_store_cost  */
-  1, /* vec_store_cost  */
+  4, /* align_load_cost  */
+  4, /* unalign_load_cost  */
+  1, /* unalign_store_cost  */
+  1  /* store_cost  */
+};
+
+/* Cortex-A57 costs for vector insn classes.  */
+static const struct cpu_vector_cost cortexa57_vector_cost =
+{
+  1, /* scalar_int_stmt_cost  */
+  1, /* scalar_fp_stmt_cost  */
+  4, /* scalar_load_cost  */
+  1, /* scalar_store_cost  */
   1, /* cond_taken_branch_cost  */
-  1 /* cond_not_taken_branch_cost  */
+  1, /* cond_not_taken_branch_cost  */
+  &cortexa57_advsimd_vector_cost, /* advsimd  */
+  nullptr, /* sve  */
+  nullptr /* issue_info  */
+};
+
+static const advsimd_vec_cost exynosm1_advsimd_vector_cost =
+{
+  3, /* int_stmt_cost  */
+  3, /* fp_stmt_cost  */
+  0, /* ld2_st2_permute_cost  */
+  0, /* ld3_st3_permute_cost  */
+  0, /* ld4_st4_permute_cost  */
+  3, /* permute_cost  */
+  3, /* reduc_i8_cost  */
+  3, /* reduc_i16_cost  */
+  3, /* reduc_i32_cost  */
+  3, /* reduc_i64_cost  */
+  3, /* reduc_f16_cost  */
+  3, /* reduc_f32_cost  */
+  3, /* reduc_f64_cost  */
+  3, /* store_elt_extra_cost  */
+  3, /* vec_to_scalar_cost  */
+  3, /* scalar_to_vec_cost  */
+  5, /* align_load_cost  */
+  5, /* unalign_load_cost  */
+  1, /* unalign_store_cost  */
+  1  /* store_cost  */
 };
 
 static const struct cpu_vector_cost exynosm1_vector_cost =
@@ -492,17 +942,35 @@ static const struct cpu_vector_cost exynosm1_vector_cost =
   1, /* scalar_fp_stmt_cost  */
   5, /* scalar_load_cost  */
   1, /* scalar_store_cost  */
-  3, /* vec_int_stmt_cost  */
-  3, /* vec_fp_stmt_cost  */
-  3, /* vec_permute_cost  */
-  3, /* vec_to_scalar_cost  */
-  3, /* scalar_to_vec_cost  */
-  5, /* vec_align_load_cost  */
-  5, /* vec_unalign_load_cost  */
-  1, /* vec_unalign_store_cost  */
-  1, /* vec_store_cost  */
   1, /* cond_taken_branch_cost  */
-  1 /* cond_not_taken_branch_cost  */
+  1, /* cond_not_taken_branch_cost  */
+  &exynosm1_advsimd_vector_cost, /* advsimd  */
+  nullptr, /* sve  */
+  nullptr /* issue_info  */
+};
+
+static const advsimd_vec_cost xgene1_advsimd_vector_cost =
+{
+  2, /* int_stmt_cost  */
+  2, /* fp_stmt_cost  */
+  0, /* ld2_st2_permute_cost  */
+  0, /* ld3_st3_permute_cost  */
+  0, /* ld4_st4_permute_cost  */
+  2, /* permute_cost  */
+  4, /* reduc_i8_cost  */
+  4, /* reduc_i16_cost  */
+  4, /* reduc_i32_cost  */
+  4, /* reduc_i64_cost  */
+  4, /* reduc_f16_cost  */
+  4, /* reduc_f32_cost  */
+  4, /* reduc_f64_cost  */
+  4, /* store_elt_extra_cost  */
+  4, /* vec_to_scalar_cost  */
+  4, /* scalar_to_vec_cost  */
+  10, /* align_load_cost  */
+  10, /* unalign_load_cost  */
+  2, /* unalign_store_cost  */
+  2  /* store_cost  */
 };
 
 /* Generic costs for vector insn classes.  */
@@ -512,17 +980,35 @@ static const struct cpu_vector_cost xgene1_vector_cost =
   1, /* scalar_fp_stmt_cost  */
   5, /* scalar_load_cost  */
   1, /* scalar_store_cost  */
-  2, /* vec_int_stmt_cost  */
-  2, /* vec_fp_stmt_cost  */
-  2, /* vec_permute_cost  */
-  4, /* vec_to_scalar_cost  */
-  4, /* scalar_to_vec_cost  */
-  10, /* vec_align_load_cost  */
-  10, /* vec_unalign_load_cost  */
-  2, /* vec_unalign_store_cost  */
-  2, /* vec_store_cost  */
   2, /* cond_taken_branch_cost  */
-  1 /* cond_not_taken_branch_cost  */
+  1, /* cond_not_taken_branch_cost  */
+  &xgene1_advsimd_vector_cost, /* advsimd  */
+  nullptr, /* sve  */
+  nullptr /* issue_info  */
+};
+
+static const advsimd_vec_cost thunderx2t99_advsimd_vector_cost =
+{
+  4, /* int_stmt_cost  */
+  5, /* fp_stmt_cost  */
+  0, /* ld2_st2_permute_cost  */
+  0, /* ld3_st3_permute_cost  */
+  0, /* ld4_st4_permute_cost  */
+  10, /* permute_cost  */
+  6, /* reduc_i8_cost  */
+  6, /* reduc_i16_cost  */
+  6, /* reduc_i32_cost  */
+  6, /* reduc_i64_cost  */
+  6, /* reduc_f16_cost  */
+  6, /* reduc_f32_cost  */
+  6, /* reduc_f64_cost  */
+  6, /* store_elt_extra_cost  */
+  6, /* vec_to_scalar_cost  */
+  5, /* scalar_to_vec_cost  */
+  4, /* align_load_cost  */
+  4, /* unalign_load_cost  */
+  1, /* unalign_store_cost  */
+  1  /* store_cost  */
 };
 
 /* Costs for vector insn classes for Vulcan.  */
@@ -532,19 +1018,51 @@ static const struct cpu_vector_cost thunderx2t99_vector_cost =
   6, /* scalar_fp_stmt_cost  */
   4, /* scalar_load_cost  */
   1, /* scalar_store_cost  */
-  5, /* vec_int_stmt_cost  */
-  6, /* vec_fp_stmt_cost  */
-  3, /* vec_permute_cost  */
-  6, /* vec_to_scalar_cost  */
+  2, /* cond_taken_branch_cost  */
+  1,  /* cond_not_taken_branch_cost  */
+  &thunderx2t99_advsimd_vector_cost, /* advsimd  */
+  nullptr, /* sve  */
+  nullptr /* issue_info  */
+};
+
+static const advsimd_vec_cost thunderx3t110_advsimd_vector_cost =
+{
+  5, /* int_stmt_cost  */
+  5, /* fp_stmt_cost  */
+  0, /* ld2_st2_permute_cost  */
+  0, /* ld3_st3_permute_cost  */
+  0, /* ld4_st4_permute_cost  */
+  10, /* permute_cost  */
+  5, /* reduc_i8_cost  */
+  5, /* reduc_i16_cost  */
+  5, /* reduc_i32_cost  */
+  5, /* reduc_i64_cost  */
+  5, /* reduc_f16_cost  */
+  5, /* reduc_f32_cost  */
+  5, /* reduc_f64_cost  */
+  5, /* store_elt_extra_cost  */
+  5, /* vec_to_scalar_cost  */
   5, /* scalar_to_vec_cost  */
-  8, /* vec_align_load_cost  */
-  8, /* vec_unalign_load_cost  */
-  4, /* vec_unalign_store_cost  */
-  4, /* vec_store_cost  */
+  4, /* align_load_cost  */
+  4, /* unalign_load_cost  */
+  4, /* unalign_store_cost  */
+  4  /* store_cost  */
+};
+
+static const struct cpu_vector_cost thunderx3t110_vector_cost =
+{
+  1, /* scalar_int_stmt_cost  */
+  5, /* scalar_fp_stmt_cost  */
+  4, /* scalar_load_cost  */
+  1, /* scalar_store_cost  */
   2, /* cond_taken_branch_cost  */
-  1  /* cond_not_taken_branch_cost  */
+  1,  /* cond_not_taken_branch_cost  */
+  &thunderx3t110_advsimd_vector_cost, /* advsimd  */
+  nullptr, /* sve  */
+  nullptr /* issue_info  */
 };
 
+
 /* Generic costs for branch instructions.  */
 static const struct cpu_branch_cost generic_branch_cost =
 {
@@ -583,6 +1101,8 @@ static const cpu_prefetch_tune generic_prefetch_tune =
   -1,			/* l1_cache_size  */
   -1,			/* l1_cache_line_size  */
   -1,			/* l2_cache_size  */
+  true,			/* prefetch_dynamic_strides */
+  -1,			/* minimum_stride */
   -1			/* default_opt_level  */
 };
 
@@ -592,6 +1112,8 @@ static const cpu_prefetch_tune exynosm1_prefetch_tune =
   -1,			/* l1_cache_size  */
   64,			/* l1_cache_line_size  */
   -1,			/* l2_cache_size  */
+  true,			/* prefetch_dynamic_strides */
+  -1,			/* minimum_stride */
   -1			/* default_opt_level  */
 };
 
@@ -600,8 +1122,10 @@ static const cpu_prefetch_tune qdf24xx_prefetch_tune =
   4,			/* num_slots  */
   32,			/* l1_cache_size  */
   64,			/* l1_cache_line_size  */
-  1024,			/* l2_cache_size  */
-  -1			/* default_opt_level  */
+  512,			/* l2_cache_size  */
+  false,		/* prefetch_dynamic_strides */
+  2048,			/* minimum_stride */
+  3			/* default_opt_level  */
 };
 
 static const cpu_prefetch_tune thunderxt88_prefetch_tune =
@@ -610,6 +1134,8 @@ static const cpu_prefetch_tune thunderxt88_prefetch_tune =
   32,			/* l1_cache_size  */
   128,			/* l1_cache_line_size  */
   16*1024,		/* l2_cache_size  */
+  true,			/* prefetch_dynamic_strides */
+  -1,			/* minimum_stride */
   3			/* default_opt_level  */
 };
 
@@ -619,6 +1145,8 @@ static const cpu_prefetch_tune thunderx_prefetch_tune =
   32,			/* l1_cache_size  */
   128,			/* l1_cache_line_size  */
   -1,			/* l2_cache_size  */
+  true,			/* prefetch_dynamic_strides */
+  -1,			/* minimum_stride */
   -1			/* default_opt_level  */
 };
 
@@ -628,6 +1156,52 @@ static const cpu_prefetch_tune thunderx2t99_prefetch_tune =
   32,			/* l1_cache_size  */
   64,			/* l1_cache_line_size  */
   256,			/* l2_cache_size  */
+  true,			/* prefetch_dynamic_strides */
+  -1,			/* minimum_stride */
+  -1			/* default_opt_level  */
+};
+
+static const cpu_prefetch_tune thunderx3t110_prefetch_tune =
+{
+  8,			/* num_slots  */
+  32,			/* l1_cache_size  */
+  64,			/* l1_cache_line_size  */
+  256,			/* l2_cache_size  */
+  true,			/* prefetch_dynamic_strides */
+  -1,			/* minimum_stride */
+  -1			/* default_opt_level  */
+};
+
+static const cpu_prefetch_tune tsv110_prefetch_tune =
+{
+  0,                    /* num_slots  */
+  64,                   /* l1_cache_size  */
+  64,                   /* l1_cache_line_size  */
+  512,                  /* l2_cache_size  */
+  true,                 /* prefetch_dynamic_strides */
+  -1,                   /* minimum_stride */
+  -1                    /* default_opt_level  */
+};
+
+static const cpu_prefetch_tune xgene1_prefetch_tune =
+{
+  8,			/* num_slots  */
+  32,			/* l1_cache_size  */
+  64,			/* l1_cache_line_size  */
+  256,			/* l2_cache_size  */
+  true,                 /* prefetch_dynamic_strides */
+  -1,                   /* minimum_stride */
+  -1			/* default_opt_level  */
+};
+
+static const cpu_prefetch_tune a64fx_prefetch_tune =
+{
+  8,			/* num_slots  */
+  64,			/* l1_cache_size  */
+  256,			/* l1_cache_line_size  */
+  32768,		/* l2_cache_size  */
+  true,			/* prefetch_dynamic_strides */
+  -1,			/* minimum_stride */
   -1			/* default_opt_level  */
 };
 
@@ -639,12 +1213,13 @@ static const struct tune_params generic_tunings =
   &generic_vector_cost,
   &generic_branch_cost,
   &generic_approx_modes,
+  SVE_NOT_IMPLEMENTED, /* sve_width  */
   4, /* memmov_cost  */
   2, /* issue_rate  */
-  (AARCH64_FUSE_AES_AESMC), /* fusible_ops  */
-  8,	/* function_align.  */
-  4,	/* jump_align.  */
-  8,	/* loop_align.  */
+  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops  */
+  "16:12",	/* function_align.  */
+  "4",	/* jump_align.  */
+  "8",	/* loop_align.  */
   2,	/* int_reassoc_width.  */
   4,	/* fp_reassoc_width.  */
   1,	/* vec_reassoc_width.  */
@@ -652,7 +1227,10 @@ static const struct tune_params generic_tunings =
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_NONE),	/* tune_flags.  */
+  /* Enabling AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS significantly benefits
+     Neoverse V1.  It does not have a noticeable effect on A64FX and should
+     have at most a very minor effect on SVE2 cores.  */
+  (AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS),	/* tune_flags.  */
   &generic_prefetch_tune
 };
 
@@ -664,13 +1242,14 @@ static const struct tune_params cortexa35_tunings =
   &generic_vector_cost,
   &generic_branch_cost,
   &generic_approx_modes,
+  SVE_NOT_IMPLEMENTED, /* sve_width  */
   4, /* memmov_cost  */
   1, /* issue_rate  */
   (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
    | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops  */
-  16,	/* function_align.  */
-  4,	/* jump_align.  */
-  8,	/* loop_align.  */
+  "16",	/* function_align.  */
+  "4",	/* jump_align.  */
+  "8",	/* loop_align.  */
   2,	/* int_reassoc_width.  */
   4,	/* fp_reassoc_width.  */
   1,	/* vec_reassoc_width.  */
@@ -690,13 +1269,14 @@ static const struct tune_params cortexa53_tunings =
   &generic_vector_cost,
   &generic_branch_cost,
   &generic_approx_modes,
+  SVE_NOT_IMPLEMENTED, /* sve_width  */
   4, /* memmov_cost  */
   2, /* issue_rate  */
   (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
    | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops  */
-  16,	/* function_align.  */
-  4,	/* jump_align.  */
-  8,	/* loop_align.  */
+  "16",	/* function_align.  */
+  "4",	/* jump_align.  */
+  "8",	/* loop_align.  */
   2,	/* int_reassoc_width.  */
   4,	/* fp_reassoc_width.  */
   1,	/* vec_reassoc_width.  */
@@ -716,13 +1296,14 @@ static const struct tune_params cortexa57_tunings =
   &cortexa57_vector_cost,
   &generic_branch_cost,
   &generic_approx_modes,
+  SVE_NOT_IMPLEMENTED, /* sve_width  */
   4, /* memmov_cost  */
   3, /* issue_rate  */
   (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
    | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops  */
-  16,	/* function_align.  */
-  4,	/* jump_align.  */
-  8,	/* loop_align.  */
+  "16",	/* function_align.  */
+  "4",	/* jump_align.  */
+  "8",	/* loop_align.  */
   2,	/* int_reassoc_width.  */
   4,	/* fp_reassoc_width.  */
   1,	/* vec_reassoc_width.  */
@@ -742,13 +1323,14 @@ static const struct tune_params cortexa72_tunings =
   &cortexa57_vector_cost,
   &generic_branch_cost,
   &generic_approx_modes,
+  SVE_NOT_IMPLEMENTED, /* sve_width  */
   4, /* memmov_cost  */
   3, /* issue_rate  */
   (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
    | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops  */
-  16,	/* function_align.  */
-  4,	/* jump_align.  */
-  8,	/* loop_align.  */
+  "16",	/* function_align.  */
+  "4",	/* jump_align.  */
+  "8",	/* loop_align.  */
   2,	/* int_reassoc_width.  */
   4,	/* fp_reassoc_width.  */
   1,	/* vec_reassoc_width.  */
@@ -768,13 +1350,14 @@ static const struct tune_params cortexa73_tunings =
   &cortexa57_vector_cost,
   &generic_branch_cost,
   &generic_approx_modes,
+  SVE_NOT_IMPLEMENTED, /* sve_width  */
   4, /* memmov_cost.  */
   2, /* issue_rate.  */
   (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
    | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops  */
-  16,	/* function_align.  */
-  4,	/* jump_align.  */
-  8,	/* loop_align.  */
+  "16",	/* function_align.  */
+  "4",	/* jump_align.  */
+  "8",	/* loop_align.  */
   2,	/* int_reassoc_width.  */
   4,	/* fp_reassoc_width.  */
   1,	/* vec_reassoc_width.  */
@@ -796,12 +1379,13 @@ static const struct tune_params exynosm1_tunings =
   &exynosm1_vector_cost,
   &generic_branch_cost,
   &exynosm1_approx_modes,
+  SVE_NOT_IMPLEMENTED, /* sve_width  */
   4,	/* memmov_cost  */
   3,	/* issue_rate  */
   (AARCH64_FUSE_AES_AESMC), /* fusible_ops  */
-  4,	/* function_align.  */
-  4,	/* jump_align.  */
-  4,	/* loop_align.  */
+  "4",	/* function_align.  */
+  "4",	/* jump_align.  */
+  "4",	/* loop_align.  */
   2,	/* int_reassoc_width.  */
   4,	/* fp_reassoc_width.  */
   1,	/* vec_reassoc_width.  */
@@ -821,12 +1405,13 @@ static const struct tune_params thunderxt88_tunings =
   &thunderx_vector_cost,
   &generic_branch_cost,
   &generic_approx_modes,
+  SVE_NOT_IMPLEMENTED, /* sve_width  */
   6, /* memmov_cost  */
   2, /* issue_rate  */
-  AARCH64_FUSE_CMP_BRANCH, /* fusible_ops  */
-  8,	/* function_align.  */
-  8,	/* jump_align.  */
-  8,	/* loop_align.  */
+  AARCH64_FUSE_ALU_BRANCH, /* fusible_ops  */
+  "8",	/* function_align.  */
+  "8",	/* jump_align.  */
+  "8",	/* loop_align.  */
   2,	/* int_reassoc_width.  */
   4,	/* fp_reassoc_width.  */
   1,	/* vec_reassoc_width.  */
@@ -846,12 +1431,13 @@ static const struct tune_params thunderx_tunings =
   &thunderx_vector_cost,
   &generic_branch_cost,
   &generic_approx_modes,
+  SVE_NOT_IMPLEMENTED, /* sve_width  */
   6, /* memmov_cost  */
   2, /* issue_rate  */
-  AARCH64_FUSE_CMP_BRANCH, /* fusible_ops  */
-  8,	/* function_align.  */
-  8,	/* jump_align.  */
-  8,	/* loop_align.  */
+  AARCH64_FUSE_ALU_BRANCH, /* fusible_ops  */
+  "8",	/* function_align.  */
+  "8",	/* jump_align.  */
+  "8",	/* loop_align.  */
   2,	/* int_reassoc_width.  */
   4,	/* fp_reassoc_width.  */
   1,	/* vec_reassoc_width.  */
@@ -864,6 +1450,33 @@ static const struct tune_params thunderx_tunings =
   &thunderx_prefetch_tune
 };
 
+static const struct tune_params tsv110_tunings =
+{
+  &tsv110_extra_costs,
+  &tsv110_addrcost_table,
+  &tsv110_regmove_cost,
+  &tsv110_vector_cost,
+  &generic_branch_cost,
+  &generic_approx_modes,
+  SVE_NOT_IMPLEMENTED, /* sve_width  */
+  4,    /* memmov_cost  */
+  4,    /* issue_rate  */
+  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_ALU_BRANCH
+   | AARCH64_FUSE_ALU_CBZ), /* fusible_ops  */
+  "16", /* function_align.  */
+  "4",  /* jump_align.  */
+  "8",  /* loop_align.  */
+  2,    /* int_reassoc_width.  */
+  4,    /* fp_reassoc_width.  */
+  1,    /* vec_reassoc_width.  */
+  2,    /* min_div_recip_mul_sf.  */
+  2,    /* min_div_recip_mul_df.  */
+  0,    /* max_case_values.  */
+  tune_params::AUTOPREFETCHER_WEAK,     /* autoprefetcher_model.  */
+  (AARCH64_EXTRA_TUNE_NONE),     /* tune_flags.  */
+  &tsv110_prefetch_tune
+};
+
 static const struct tune_params xgene1_tunings =
 {
   &xgene1_extra_costs,
@@ -872,21 +1485,48 @@ static const struct tune_params xgene1_tunings =
   &xgene1_vector_cost,
   &generic_branch_cost,
   &xgene1_approx_modes,
+  SVE_NOT_IMPLEMENTED, /* sve_width  */
   6, /* memmov_cost  */
   4, /* issue_rate  */
   AARCH64_FUSE_NOTHING, /* fusible_ops  */
-  16,	/* function_align.  */
-  8,	/* jump_align.  */
-  16,	/* loop_align.  */
+  "16",	/* function_align.  */
+  "16",	/* jump_align.  */
+  "16",	/* loop_align.  */
   2,	/* int_reassoc_width.  */
   4,	/* fp_reassoc_width.  */
   1,	/* vec_reassoc_width.  */
   2,	/* min_div_recip_mul_sf.  */
   2,	/* min_div_recip_mul_df.  */
-  0,	/* max_case_values.  */
+  17,	/* max_case_values.  */
   tune_params::AUTOPREFETCHER_OFF,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_NONE),	/* tune_flags.  */
-  &generic_prefetch_tune
+  (AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS),	/* tune_flags.  */
+  &xgene1_prefetch_tune
+};
+
+static const struct tune_params emag_tunings =
+{
+  &xgene1_extra_costs,
+  &xgene1_addrcost_table,
+  &xgene1_regmove_cost,
+  &xgene1_vector_cost,
+  &generic_branch_cost,
+  &xgene1_approx_modes,
+  SVE_NOT_IMPLEMENTED,
+  6, /* memmov_cost  */
+  4, /* issue_rate  */
+  AARCH64_FUSE_NOTHING, /* fusible_ops  */
+  "16",	/* function_align.  */
+  "16",	/* jump_align.  */
+  "16",	/* loop_align.  */
+  2,	/* int_reassoc_width.  */
+  4,	/* fp_reassoc_width.  */
+  1,	/* vec_reassoc_width.  */
+  2,	/* min_div_recip_mul_sf.  */
+  2,	/* min_div_recip_mul_df.  */
+  17,	/* max_case_values.  */
+  tune_params::AUTOPREFETCHER_OFF,	/* autoprefetcher_model.  */
+  (AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS),	/* tune_flags.  */
+  &xgene1_prefetch_tune
 };
 
 static const struct tune_params qdf24xx_tunings =
@@ -897,13 +1537,14 @@ static const struct tune_params qdf24xx_tunings =
   &qdf24xx_vector_cost,
   &generic_branch_cost,
   &generic_approx_modes,
+  SVE_NOT_IMPLEMENTED, /* sve_width  */
   4, /* memmov_cost  */
   4, /* issue_rate  */
   (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
    | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops  */
-  16,	/* function_align.  */
-  8,	/* jump_align.  */
-  16,	/* loop_align.  */
+  "16",	/* function_align.  */
+  "8",	/* jump_align.  */
+  "16",	/* loop_align.  */
   2,	/* int_reassoc_width.  */
   4,	/* fp_reassoc_width.  */
   1,	/* vec_reassoc_width.  */
@@ -911,7 +1552,7 @@ static const struct tune_params qdf24xx_tunings =
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_NONE),		/* tune_flags.  */
+  AARCH64_EXTRA_TUNE_RENAME_LOAD_REGS, /* tune_flags.  */
   &qdf24xx_prefetch_tune
 };
 
@@ -925,13 +1566,14 @@ static const struct tune_params saphira_tunings =
   &generic_vector_cost,
   &generic_branch_cost,
   &generic_approx_modes,
+  SVE_NOT_IMPLEMENTED, /* sve_width  */
   4, /* memmov_cost  */
   4, /* issue_rate  */
   (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
    | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops  */
-  16,	/* function_align.  */
-  8,	/* jump_align.  */
-  16,	/* loop_align.  */
+  "16",	/* function_align.  */
+  "8",	/* jump_align.  */
+  "16",	/* loop_align.  */
   2,	/* int_reassoc_width.  */
   4,	/* fp_reassoc_width.  */
   1,	/* vec_reassoc_width.  */
@@ -951,13 +1593,14 @@ static const struct tune_params thunderx2t99_tunings =
   &thunderx2t99_vector_cost,
   &generic_branch_cost,
   &generic_approx_modes,
+  SVE_NOT_IMPLEMENTED, /* sve_width  */
   4, /* memmov_cost.  */
   4, /* issue_rate.  */
-  (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC
-   | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops  */
-  16,	/* function_align.  */
-  8,	/* jump_align.  */
-  16,	/* loop_align.  */
+  (AARCH64_FUSE_ALU_BRANCH | AARCH64_FUSE_AES_AESMC
+   | AARCH64_FUSE_ALU_CBZ), /* fusible_ops  */
+  "16",	/* function_align.  */
+  "8",	/* jump_align.  */
+  "16",	/* loop_align.  */
   3,	/* int_reassoc_width.  */
   2,	/* fp_reassoc_width.  */
   2,	/* vec_reassoc_width.  */
@@ -969,6 +1612,282 @@ static const struct tune_params thunderx2t99_tunings =
   &thunderx2t99_prefetch_tune
 };
 
+static const struct tune_params thunderx3t110_tunings =
+{
+  &thunderx3t110_extra_costs,
+  &thunderx3t110_addrcost_table,
+  &thunderx3t110_regmove_cost,
+  &thunderx3t110_vector_cost,
+  &generic_branch_cost,
+  &generic_approx_modes,
+  SVE_NOT_IMPLEMENTED, /* sve_width  */
+  4, /* memmov_cost.  */
+  6, /* issue_rate.  */
+  (AARCH64_FUSE_ALU_BRANCH | AARCH64_FUSE_AES_AESMC
+   | AARCH64_FUSE_ALU_CBZ), /* fusible_ops  */
+  "16",	/* function_align.  */
+  "8",	/* jump_align.  */
+  "16",	/* loop_align.  */
+  3,	/* int_reassoc_width.  */
+  2,	/* fp_reassoc_width.  */
+  2,	/* vec_reassoc_width.  */
+  2,	/* min_div_recip_mul_sf.  */
+  2,	/* min_div_recip_mul_df.  */
+  0,	/* max_case_values.  */
+  tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
+  (AARCH64_EXTRA_TUNE_NONE),	/* tune_flags.  */
+  &thunderx3t110_prefetch_tune
+};
+
+static const struct tune_params neoversen1_tunings =
+{
+  &cortexa76_extra_costs,
+  &generic_addrcost_table,
+  &generic_regmove_cost,
+  &cortexa57_vector_cost,
+  &generic_branch_cost,
+  &generic_approx_modes,
+  SVE_NOT_IMPLEMENTED, /* sve_width  */
+  4, /* memmov_cost  */
+  3, /* issue_rate  */
+  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops  */
+  "32:16",	/* function_align.  */
+  "4",		/* jump_align.  */
+  "32:16",	/* loop_align.  */
+  2,	/* int_reassoc_width.  */
+  4,	/* fp_reassoc_width.  */
+  2,	/* vec_reassoc_width.  */
+  2,	/* min_div_recip_mul_sf.  */
+  2,	/* min_div_recip_mul_df.  */
+  0,	/* max_case_values.  */
+  tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
+  (AARCH64_EXTRA_TUNE_NONE),	/* tune_flags.  */
+  &generic_prefetch_tune
+};
+
+static const advsimd_vec_cost neoversev1_advsimd_vector_cost =
+{
+  2, /* int_stmt_cost  */
+  2, /* fp_stmt_cost  */
+  4, /* ld2_st2_permute_cost */
+  4, /* ld3_st3_permute_cost  */
+  5, /* ld4_st4_permute_cost  */
+  3, /* permute_cost  */
+  4, /* reduc_i8_cost  */
+  4, /* reduc_i16_cost  */
+  2, /* reduc_i32_cost  */
+  2, /* reduc_i64_cost  */
+  6, /* reduc_f16_cost  */
+  3, /* reduc_f32_cost  */
+  2, /* reduc_f64_cost  */
+  2, /* store_elt_extra_cost  */
+  /* This value is just inherited from the Cortex-A57 table.  */
+  8, /* vec_to_scalar_cost  */
+  /* This depends very much on what the scalar value is and
+     where it comes from.  E.g. some constants take two dependent
+     instructions or a load, while others might be moved from a GPR.
+     4 seems to be a reasonable compromise in practice.  */
+  4, /* scalar_to_vec_cost  */
+  4, /* align_load_cost  */
+  4, /* unalign_load_cost  */
+  /* Although stores have a latency of 2 and compete for the
+     vector pipes, in practice it's better not to model that.  */
+  1, /* unalign_store_cost  */
+  1  /* store_cost  */
+};
+
+static const sve_vec_cost neoversev1_sve_vector_cost =
+{
+  {
+    2, /* int_stmt_cost  */
+    2, /* fp_stmt_cost  */
+    4, /* ld2_st2_permute_cost  */
+    7, /* ld3_st3_permute_cost  */
+    8, /* ld4_st4_permute_cost  */
+    3, /* permute_cost  */
+    /* Theoretically, a reduction involving 31 scalar ADDs could
+       complete in ~9 cycles and would have a cost of 31.  [SU]ADDV
+       completes in 14 cycles, so give it a cost of 31 + 5.  */
+    36, /* reduc_i8_cost  */
+    /* Likewise for 15 scalar ADDs (~5 cycles) vs. 12: 15 + 7.  */
+    22, /* reduc_i16_cost  */
+    /* Likewise for 7 scalar ADDs (~3 cycles) vs. 10: 7 + 7.  */
+    14, /* reduc_i32_cost  */
+    /* Likewise for 3 scalar ADDs (~2 cycles) vs. 10: 3 + 8.  */
+    11, /* reduc_i64_cost  */
+    /* Theoretically, a reduction involving 15 scalar FADDs could
+       complete in ~9 cycles and would have a cost of 30.  FADDV
+       completes in 13 cycles, so give it a cost of 30 + 4.  */
+    34, /* reduc_f16_cost  */
+    /* Likewise for 7 scalar FADDs (~6 cycles) vs. 11: 14 + 5.  */
+    19, /* reduc_f32_cost  */
+    /* Likewise for 3 scalar FADDs (~4 cycles) vs. 9: 6 + 5.  */
+    11, /* reduc_f64_cost  */
+    2, /* store_elt_extra_cost  */
+    /* This value is just inherited from the Cortex-A57 table.  */
+    8, /* vec_to_scalar_cost  */
+    /* See the comment above the Advanced SIMD versions.  */
+    4, /* scalar_to_vec_cost  */
+    4, /* align_load_cost  */
+    4, /* unalign_load_cost  */
+    /* Although stores have a latency of 2 and compete for the
+       vector pipes, in practice it's better not to model that.  */
+    1, /* unalign_store_cost  */
+    1  /* store_cost  */
+  },
+  3, /* clast_cost  */
+  19, /* fadda_f16_cost  */
+  11, /* fadda_f32_cost  */
+  8, /* fadda_f64_cost  */
+  3 /* scatter_store_elt_cost  */
+};
+
+static const aarch64_scalar_vec_issue_info neoversev1_scalar_issue_info =
+{
+  3, /* loads_stores_per_cycle  */
+  2, /* stores_per_cycle  */
+  4, /* general_ops_per_cycle  */
+  0, /* fp_simd_load_general_ops  */
+  1 /* fp_simd_store_general_ops  */
+};
+
+static const aarch64_advsimd_vec_issue_info neoversev1_advsimd_issue_info =
+{
+  {
+    3, /* loads_stores_per_cycle  */
+    2, /* stores_per_cycle  */
+    4, /* general_ops_per_cycle  */
+    0, /* fp_simd_load_general_ops  */
+    1 /* fp_simd_store_general_ops  */
+  },
+  2, /* ld2_st2_general_ops  */
+  2, /* ld3_st3_general_ops  */
+  3 /* ld4_st4_general_ops  */
+};
+
+static const aarch64_sve_vec_issue_info neoversev1_sve_issue_info =
+{
+  {
+    {
+      2, /* loads_per_cycle  */
+      2, /* stores_per_cycle  */
+      2, /* general_ops_per_cycle  */
+      0, /* fp_simd_load_general_ops  */
+      1 /* fp_simd_store_general_ops  */
+    },
+    2, /* ld2_st2_general_ops  */
+    2, /* ld3_st3_general_ops  */
+    3 /* ld4_st4_general_ops  */
+  },
+  1, /* pred_ops_per_cycle  */
+  2, /* while_pred_ops  */
+  2, /* int_cmp_pred_ops  */
+  1, /* fp_cmp_pred_ops  */
+  1, /* gather_scatter_pair_general_ops  */
+  1 /* gather_scatter_pair_pred_ops  */
+};
+
+static const aarch64_vec_issue_info neoversev1_vec_issue_info =
+{
+  &neoversev1_scalar_issue_info,
+  &neoversev1_advsimd_issue_info,
+  &neoversev1_sve_issue_info
+};
+
+/* Neoverse V1 costs for vector insn classes.  */
+static const struct cpu_vector_cost neoversev1_vector_cost =
+{
+  1, /* scalar_int_stmt_cost  */
+  2, /* scalar_fp_stmt_cost  */
+  4, /* scalar_load_cost  */
+  1, /* scalar_store_cost  */
+  1, /* cond_taken_branch_cost  */
+  1, /* cond_not_taken_branch_cost  */
+  &neoversev1_advsimd_vector_cost, /* advsimd  */
+  &neoversev1_sve_vector_cost, /* sve  */
+  &neoversev1_vec_issue_info /* issue_info  */
+};
+
+static const struct tune_params neoversev1_tunings =
+{
+  &cortexa76_extra_costs,
+  &neoversev1_addrcost_table,
+  &generic_regmove_cost,
+  &neoversev1_vector_cost,
+  &generic_branch_cost,
+  &generic_approx_modes,
+  SVE_256, /* sve_width  */
+  4, /* memmov_cost  */
+  3, /* issue_rate  */
+  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops  */
+  "32:16",	/* function_align.  */
+  "4",		/* jump_align.  */
+  "32:16",	/* loop_align.  */
+  2,	/* int_reassoc_width.  */
+  4,	/* fp_reassoc_width.  */
+  2,	/* vec_reassoc_width.  */
+  2,	/* min_div_recip_mul_sf.  */
+  2,	/* min_div_recip_mul_df.  */
+  0,	/* max_case_values.  */
+  tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
+  (AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
+   | AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
+   | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT),	/* tune_flags.  */
+  &generic_prefetch_tune
+};
+
+static const struct tune_params neoversen2_tunings =
+{
+  &cortexa76_extra_costs,
+  &generic_addrcost_table,
+  &generic_regmove_cost,
+  &cortexa57_vector_cost,
+  &generic_branch_cost,
+  &generic_approx_modes,
+  SVE_128, /* sve_width  */
+  4, /* memmov_cost  */
+  3, /* issue_rate  */
+  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops  */
+  "32:16",	/* function_align.  */
+  "4",		/* jump_align.  */
+  "32:16",	/* loop_align.  */
+  2,	/* int_reassoc_width.  */
+  4,	/* fp_reassoc_width.  */
+  2,	/* vec_reassoc_width.  */
+  2,	/* min_div_recip_mul_sf.  */
+  2,	/* min_div_recip_mul_df.  */
+  0,	/* max_case_values.  */
+  tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
+  (AARCH64_EXTRA_TUNE_NONE),	/* tune_flags.  */
+  &generic_prefetch_tune
+};
+
+static const struct tune_params a64fx_tunings =
+{
+  &a64fx_extra_costs,
+  &a64fx_addrcost_table,
+  &a64fx_regmove_cost,
+  &a64fx_vector_cost,
+  &generic_branch_cost,
+  &generic_approx_modes,
+  SVE_512, /* sve_width  */
+  4, /* memmov_cost  */
+  7, /* issue_rate  */
+  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops  */
+  "32",	/* function_align.  */
+  "16",	/* jump_align.  */
+  "32",	/* loop_align.  */
+  4,	/* int_reassoc_width.  */
+  2,	/* fp_reassoc_width.  */
+  2,	/* vec_reassoc_width.  */
+  2,	/* min_div_recip_mul_sf.  */
+  2,	/* min_div_recip_mul_df.  */
+  0,	/* max_case_values.  */
+  tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
+  (AARCH64_EXTRA_TUNE_NONE),	/* tune_flags.  */
+  &a64fx_prefetch_tune
+};
+
 /* Support for fine-grained override of the tuning structures.  */
 struct aarch64_tuning_override_function
 {
@@ -978,12 +1897,14 @@ struct aarch64_tuning_override_function
 
 static void aarch64_parse_fuse_string (const char*, struct tune_params*);
 static void aarch64_parse_tune_string (const char*, struct tune_params*);
+static void aarch64_parse_sve_width_string (const char*, struct tune_params*);
 
 static const struct aarch64_tuning_override_function
 aarch64_tuning_override_functions[] =
 {
   { "fuse", aarch64_parse_fuse_string },
   { "tune", aarch64_parse_tune_string },
+  { "sve_width", aarch64_parse_sve_width_string },
   { NULL, NULL }
 };
 
@@ -995,7 +1916,7 @@ struct processor
   enum aarch64_processor sched_core;
   enum aarch64_arch arch;
   unsigned architecture_version;
-  const unsigned long flags;
+  const uint64_t flags;
   const struct tune_params *const tune;
 };
 
@@ -1028,9 +1949,55 @@ static const struct processor *selected_arch;
 static const struct processor *selected_cpu;
 static const struct processor *selected_tune;
 
+enum aarch64_key_type aarch64_ra_sign_key = AARCH64_KEY_A;
+
 /* The current tuning set.  */
 struct tune_params aarch64_tune_params = generic_tunings;
 
+/* Check whether an 'aarch64_vector_pcs' attribute is valid.  */
+
+static tree
+handle_aarch64_vector_pcs_attribute (tree *node, tree name, tree,
+				     int, bool *no_add_attrs)
+{
+  /* Since we set fn_type_req to true, the caller should have checked
+     this for us.  */
+  gcc_assert (FUNC_OR_METHOD_TYPE_P (*node));
+  switch ((arm_pcs) fntype_abi (*node).id ())
+    {
+    case ARM_PCS_AAPCS64:
+    case ARM_PCS_SIMD:
+      return NULL_TREE;
+
+    case ARM_PCS_SVE:
+      error ("the %qE attribute cannot be applied to an SVE function type",
+	     name);
+      *no_add_attrs = true;
+      return NULL_TREE;
+
+    case ARM_PCS_TLSDESC:
+    case ARM_PCS_UNKNOWN:
+      break;
+    }
+  gcc_unreachable ();
+}
+
+/* Table of machine attributes.  */
+static const struct attribute_spec aarch64_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
+       affects_type_identity, handler, exclude } */
+  { "aarch64_vector_pcs", 0, 0, false, true,  true,  true,
+			  handle_aarch64_vector_pcs_attribute, NULL },
+  { "arm_sve_vector_bits", 1, 1, false, true,  false, true,
+			  aarch64_sve::handle_arm_sve_vector_bits_attribute,
+			  NULL },
+  { "Advanced SIMD type", 1, 1, false, true,  false, true,  NULL, NULL },
+  { "SVE type",		  3, 3, false, true,  false, true,  NULL, NULL },
+  { "SVE sizeless type",  0, 0, false, true,  false, true,  NULL, NULL },
+  { NULL,                 0, 0, false, false, false, false, NULL, NULL }
+};
+
 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
 
 /* An ISA extension in the co-processor and main instruction set space.  */
@@ -1051,6 +2018,101 @@ aarch64_cc;
 
 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
 
+struct aarch64_branch_protect_type
+{
+  /* The type's name that the user passes to the branch-protection option
+    string.  */
+  const char* name;
+  /* Function to handle the protection type and set global variables.
+    First argument is the string token corresponding with this type and the
+    second argument is the next token in the option string.
+    Return values:
+    * AARCH64_PARSE_OK: Handling was sucessful.
+    * AARCH64_INVALID_ARG: The type is invalid in this context and the caller
+      should print an error.
+    * AARCH64_INVALID_FEATURE: The type is invalid and the handler prints its
+      own error.  */
+  enum aarch64_parse_opt_result (*handler)(char*, char*);
+  /* A list of types that can follow this type in the option string.  */
+  const aarch64_branch_protect_type* subtypes;
+  unsigned int num_subtypes;
+};
+
+static enum aarch64_parse_opt_result
+aarch64_handle_no_branch_protection (char* str, char* rest)
+{
+  aarch64_ra_sign_scope = AARCH64_FUNCTION_NONE;
+  aarch64_enable_bti = 0;
+  if (rest)
+    {
+      error ("unexpected %<%s%> after %<%s%>", rest, str);
+      return AARCH64_PARSE_INVALID_FEATURE;
+    }
+  return AARCH64_PARSE_OK;
+}
+
+static enum aarch64_parse_opt_result
+aarch64_handle_standard_branch_protection (char* str, char* rest)
+{
+  aarch64_ra_sign_scope = AARCH64_FUNCTION_NON_LEAF;
+  aarch64_ra_sign_key = AARCH64_KEY_A;
+  aarch64_enable_bti = 1;
+  if (rest)
+    {
+      error ("unexpected %<%s%> after %<%s%>", rest, str);
+      return AARCH64_PARSE_INVALID_FEATURE;
+    }
+  return AARCH64_PARSE_OK;
+}
+
+static enum aarch64_parse_opt_result
+aarch64_handle_pac_ret_protection (char* str ATTRIBUTE_UNUSED,
+				    char* rest ATTRIBUTE_UNUSED)
+{
+  aarch64_ra_sign_scope = AARCH64_FUNCTION_NON_LEAF;
+  aarch64_ra_sign_key = AARCH64_KEY_A;
+  return AARCH64_PARSE_OK;
+}
+
+static enum aarch64_parse_opt_result
+aarch64_handle_pac_ret_leaf (char* str ATTRIBUTE_UNUSED,
+			      char* rest ATTRIBUTE_UNUSED)
+{
+  aarch64_ra_sign_scope = AARCH64_FUNCTION_ALL;
+  return AARCH64_PARSE_OK;
+}
+
+static enum aarch64_parse_opt_result
+aarch64_handle_pac_ret_b_key (char* str ATTRIBUTE_UNUSED,
+			      char* rest ATTRIBUTE_UNUSED)
+{
+  aarch64_ra_sign_key = AARCH64_KEY_B;
+  return AARCH64_PARSE_OK;
+}
+
+static enum aarch64_parse_opt_result
+aarch64_handle_bti_protection (char* str ATTRIBUTE_UNUSED,
+				    char* rest ATTRIBUTE_UNUSED)
+{
+  aarch64_enable_bti = 1;
+  return AARCH64_PARSE_OK;
+}
+
+static const struct aarch64_branch_protect_type aarch64_pac_ret_subtypes[] = {
+  { "leaf", aarch64_handle_pac_ret_leaf, NULL, 0 },
+  { "b-key", aarch64_handle_pac_ret_b_key, NULL, 0 },
+  { NULL, NULL, NULL, 0 }
+};
+
+static const struct aarch64_branch_protect_type aarch64_branch_protect_types[] = {
+  { "none", aarch64_handle_no_branch_protection, NULL, 0 },
+  { "standard", aarch64_handle_standard_branch_protection, NULL, 0 },
+  { "pac-ret", aarch64_handle_pac_ret_protection, aarch64_pac_ret_subtypes,
+    ARRAY_SIZE (aarch64_pac_ret_subtypes) },
+  { "bti", aarch64_handle_bti_protection, NULL, 0 },
+  { NULL, NULL, NULL, 0 }
+};
+
 /* The condition codes of the processor, and the inverse function.  */
 static const char * const aarch64_condition_codes[] =
 {
@@ -1058,96 +2120,499 @@ static const char * const aarch64_condition_codes[] =
   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 };
 
-/* Generate code to enable conditional branches in functions over 1 MiB.  */
-const char *
-aarch64_gen_far_branch (rtx * operands, int pos_label, const char * dest,
-			const char * branch_format)
+/* The preferred condition codes for SVE conditions.  */
+static const char *const aarch64_sve_condition_codes[] =
 {
-    rtx_code_label * tmp_label = gen_label_rtx ();
-    char label_buf[256];
-    char buffer[128];
-    ASM_GENERATE_INTERNAL_LABEL (label_buf, dest,
-				 CODE_LABEL_NUMBER (tmp_label));
-    const char *label_ptr = targetm.strip_name_encoding (label_buf);
-    rtx dest_label = operands[pos_label];
-    operands[pos_label] = tmp_label;
-
-    snprintf (buffer, sizeof (buffer), "%s%s", branch_format, label_ptr);
-    output_asm_insn (buffer, operands);
+  "none", "any", "nlast", "last", "first", "nfrst", "vs", "vc",
+  "pmore", "plast", "tcont", "tstop", "gt", "le", "al", "nv"
+};
 
-    snprintf (buffer, sizeof (buffer), "b\t%%l%d\n%s:", pos_label, label_ptr);
-    operands[pos_label] = dest_label;
-    output_asm_insn (buffer, operands);
-    return "";
-}
+/* Return the assembly token for svpattern value VALUE.  */
 
-void
-aarch64_err_no_fpadvsimd (machine_mode mode, const char *msg)
+static const char *
+svpattern_token (enum aarch64_svpattern pattern)
 {
-  const char *mc = FLOAT_MODE_P (mode) ? "floating-point" : "vector";
-  if (TARGET_GENERAL_REGS_ONLY)
-    error ("%qs is incompatible with %s %s", "-mgeneral-regs-only", mc, msg);
-  else
-    error ("%qs feature modifier is incompatible with %s %s", "+nofp", mc, msg);
+  switch (pattern)
+    {
+#define CASE(UPPER, LOWER, VALUE) case AARCH64_SV_##UPPER: return #LOWER;
+    AARCH64_FOR_SVPATTERN (CASE)
+#undef CASE
+    case AARCH64_NUM_SVPATTERNS:
+      break;
+    }
+  gcc_unreachable ();
 }
 
-/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
-   The register allocator chooses ALL_REGS if FP_REGS and GENERAL_REGS have
-   the same cost even if ALL_REGS has a much larger cost.  ALL_REGS is also
-   used if the cost of both FP_REGS and GENERAL_REGS is lower than the memory
-   cost (in this case the best class is the lowest cost one).  Using ALL_REGS
-   irrespectively of its cost results in bad allocations with many redundant
-   int<->FP moves which are expensive on various cores.
-   To avoid this we don't allow ALL_REGS as the allocno class, but force a
-   decision between FP_REGS and GENERAL_REGS.  We use the allocno class if it
-   isn't ALL_REGS.  Similarly, use the best class if it isn't ALL_REGS.
-   Otherwise set the allocno class depending on the mode.
-   The result of this is that it is no longer inefficient to have a higher
-   memory move cost than the register move cost.
-*/
+/* Return the location of a piece that is known to be passed or returned
+   in registers.  FIRST_ZR is the first unused vector argument register
+   and FIRST_PR is the first unused predicate argument register.  */
 
-static reg_class_t
-aarch64_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class,
-					 reg_class_t best_class)
+rtx
+pure_scalable_type_info::piece::get_rtx (unsigned int first_zr,
+					 unsigned int first_pr) const
 {
-  machine_mode mode;
+  gcc_assert (VECTOR_MODE_P (mode)
+	      && first_zr + num_zr <= V0_REGNUM + NUM_FP_ARG_REGS
+	      && first_pr + num_pr <= P0_REGNUM + NUM_PR_ARG_REGS);
 
-  if (allocno_class != ALL_REGS)
-    return allocno_class;
+  if (num_zr > 0 && num_pr == 0)
+    return gen_rtx_REG (mode, first_zr);
 
-  if (best_class != ALL_REGS)
-    return best_class;
+  if (num_zr == 0 && num_pr == 1)
+    return gen_rtx_REG (mode, first_pr);
 
-  mode = PSEUDO_REGNO_MODE (regno);
-  return FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode) ? FP_REGS : GENERAL_REGS;
+  gcc_unreachable ();
 }
 
-static unsigned int
-aarch64_min_divisions_for_recip_mul (machine_mode mode)
+/* Return the total number of vector registers required by the PST.  */
+
+unsigned int
+pure_scalable_type_info::num_zr () const
 {
-  if (GET_MODE_UNIT_SIZE (mode) == 4)
-    return aarch64_tune_params.min_div_recip_mul_sf;
-  return aarch64_tune_params.min_div_recip_mul_df;
+  unsigned int res = 0;
+  for (unsigned int i = 0; i < pieces.length (); ++i)
+    res += pieces[i].num_zr;
+  return res;
 }
 
-/* Return the reassociation width of treeop OPC with mode MODE.  */
-static int
-aarch64_reassociation_width (unsigned opc, machine_mode mode)
+/* Return the total number of predicate registers required by the PST.  */
+
+unsigned int
+pure_scalable_type_info::num_pr () const
 {
-  if (VECTOR_MODE_P (mode))
-    return aarch64_tune_params.vec_reassoc_width;
-  if (INTEGRAL_MODE_P (mode))
-    return aarch64_tune_params.int_reassoc_width;
-  /* Avoid reassociating floating point addition so we emit more FMAs.  */
-  if (FLOAT_MODE_P (mode) && opc != PLUS_EXPR)
-    return aarch64_tune_params.fp_reassoc_width;
-  return 1;
+  unsigned int res = 0;
+  for (unsigned int i = 0; i < pieces.length (); ++i)
+    res += pieces[i].num_pr;
+  return res;
 }
 
-/* Provide a mapping from gcc register numbers to dwarf register numbers.  */
-unsigned
-aarch64_dbx_register_number (unsigned regno)
-{
+/* Return the location of a PST that is known to be passed or returned
+   in registers.  FIRST_ZR is the first unused vector argument register
+   and FIRST_PR is the first unused predicate argument register.  */
+
+rtx
+pure_scalable_type_info::get_rtx (machine_mode mode,
+				  unsigned int first_zr,
+				  unsigned int first_pr) const
+{
+  /* Try to return a single REG if possible.  This leads to better
+     code generation; it isn't required for correctness.  */
+  if (mode == pieces[0].mode)
+    {
+      gcc_assert (pieces.length () == 1);
+      return pieces[0].get_rtx (first_zr, first_pr);
+    }
+
+  /* Build up a PARALLEL that contains the individual pieces.  */
+  rtvec rtxes = rtvec_alloc (pieces.length ());
+  for (unsigned int i = 0; i < pieces.length (); ++i)
+    {
+      rtx reg = pieces[i].get_rtx (first_zr, first_pr);
+      rtx offset = gen_int_mode (pieces[i].offset, Pmode);
+      RTVEC_ELT (rtxes, i) = gen_rtx_EXPR_LIST (VOIDmode, reg, offset);
+      first_zr += pieces[i].num_zr;
+      first_pr += pieces[i].num_pr;
+    }
+  return gen_rtx_PARALLEL (mode, rtxes);
+}
+
+/* Analyze whether TYPE is a Pure Scalable Type according to the rules
+   in the AAPCS64.  */
+
+pure_scalable_type_info::analysis_result
+pure_scalable_type_info::analyze (const_tree type)
+{
+  /* Prevent accidental reuse.  */
+  gcc_assert (pieces.is_empty ());
+
+  /* No code will be generated for erroneous types, so we won't establish
+     an ABI mapping.  */
+  if (type == error_mark_node)
+    return NO_ABI_IDENTITY;
+
+  /* Zero-sized types disappear in the language->ABI mapping.  */
+  if (TYPE_SIZE (type) && integer_zerop (TYPE_SIZE (type)))
+    return NO_ABI_IDENTITY;
+
+  /* Check for SVTs, SPTs, and built-in tuple types that map to PSTs.  */
+  piece p = {};
+  if (aarch64_sve::builtin_type_p (type, &p.num_zr, &p.num_pr))
+    {
+      machine_mode mode = TYPE_MODE_RAW (type);
+      gcc_assert (VECTOR_MODE_P (mode)
+		  && (!TARGET_SVE || aarch64_sve_mode_p (mode)));
+
+      p.mode = p.orig_mode = mode;
+      add_piece (p);
+      return IS_PST;
+    }
+
+  /* Check for user-defined PSTs.  */
+  if (TREE_CODE (type) == ARRAY_TYPE)
+    return analyze_array (type);
+  if (TREE_CODE (type) == RECORD_TYPE)
+    return analyze_record (type);
+
+  return ISNT_PST;
+}
+
+/* Analyze a type that is known not to be passed or returned in memory.
+   Return true if it has an ABI identity and is a Pure Scalable Type.  */
+
+bool
+pure_scalable_type_info::analyze_registers (const_tree type)
+{
+  analysis_result result = analyze (type);
+  gcc_assert (result != DOESNT_MATTER);
+  return result == IS_PST;
+}
+
+/* Subroutine of analyze for handling ARRAY_TYPEs.  */
+
+pure_scalable_type_info::analysis_result
+pure_scalable_type_info::analyze_array (const_tree type)
+{
+  /* Analyze the element type.  */
+  pure_scalable_type_info element_info;
+  analysis_result result = element_info.analyze (TREE_TYPE (type));
+  if (result != IS_PST)
+    return result;
+
+  /* An array of unknown, flexible or variable length will be passed and
+     returned by reference whatever we do.  */
+  tree nelts_minus_one = array_type_nelts (type);
+  if (!tree_fits_uhwi_p (nelts_minus_one))
+    return DOESNT_MATTER;
+
+  /* Likewise if the array is constant-sized but too big to be interesting.
+     The double checks against MAX_PIECES are to protect against overflow.  */
+  unsigned HOST_WIDE_INT count = tree_to_uhwi (nelts_minus_one);
+  if (count > MAX_PIECES)
+    return DOESNT_MATTER;
+  count += 1;
+  if (count * element_info.pieces.length () > MAX_PIECES)
+    return DOESNT_MATTER;
+
+  /* The above checks should have weeded out elements of unknown size.  */
+  poly_uint64 element_bytes;
+  if (!poly_int_tree_p (TYPE_SIZE_UNIT (TREE_TYPE (type)), &element_bytes))
+    gcc_unreachable ();
+
+  /* Build up the list of individual vectors and predicates.  */
+  gcc_assert (!element_info.pieces.is_empty ());
+  for (unsigned int i = 0; i < count; ++i)
+    for (unsigned int j = 0; j < element_info.pieces.length (); ++j)
+      {
+	piece p = element_info.pieces[j];
+	p.offset += i * element_bytes;
+	add_piece (p);
+      }
+  return IS_PST;
+}
+
+/* Subroutine of analyze for handling RECORD_TYPEs.  */
+
+pure_scalable_type_info::analysis_result
+pure_scalable_type_info::analyze_record (const_tree type)
+{
+  for (tree field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+    {
+      if (TREE_CODE (field) != FIELD_DECL)
+	continue;
+
+      /* Zero-sized fields disappear in the language->ABI mapping.  */
+      if (DECL_SIZE (field) && integer_zerop (DECL_SIZE (field)))
+	continue;
+
+      /* All fields with an ABI identity must be PSTs for the record as
+	 a whole to be a PST.  If any individual field is too big to be
+	 interesting then the record is too.  */
+      pure_scalable_type_info field_info;
+      analysis_result subresult = field_info.analyze (TREE_TYPE (field));
+      if (subresult == NO_ABI_IDENTITY)
+	continue;
+      if (subresult != IS_PST)
+	return subresult;
+
+      /* Since all previous fields are PSTs, we ought to be able to track
+	 the field offset using poly_ints.  */
+      tree bitpos = bit_position (field);
+      gcc_assert (poly_int_tree_p (bitpos));
+
+      /* For the same reason, it shouldn't be possible to create a PST field
+	 whose offset isn't byte-aligned.  */
+      poly_widest_int wide_bytepos = exact_div (wi::to_poly_widest (bitpos),
+						BITS_PER_UNIT);
+
+      /* Punt if the record is too big to be interesting.  */
+      poly_uint64 bytepos;
+      if (!wide_bytepos.to_uhwi (&bytepos)
+	  || pieces.length () + field_info.pieces.length () > MAX_PIECES)
+	return DOESNT_MATTER;
+
+      /* Add the individual vectors and predicates in the field to the
+	 record's list.  */
+      gcc_assert (!field_info.pieces.is_empty ());
+      for (unsigned int i = 0; i < field_info.pieces.length (); ++i)
+	{
+	  piece p = field_info.pieces[i];
+	  p.offset += bytepos;
+	  add_piece (p);
+	}
+    }
+  /* Empty structures disappear in the language->ABI mapping.  */
+  return pieces.is_empty () ? NO_ABI_IDENTITY : IS_PST;
+}
+
+/* Add P to the list of pieces in the type.  */
+
+void
+pure_scalable_type_info::add_piece (const piece &p)
+{
+  /* Try to fold the new piece into the previous one to form a
+     single-mode PST.  For example, if we see three consecutive vectors
+     of the same mode, we can represent them using the corresponding
+     3-tuple mode.
+
+     This is purely an optimization.  */
+  if (!pieces.is_empty ())
+    {
+      piece &prev = pieces.last ();
+      gcc_assert (VECTOR_MODE_P (p.mode) && VECTOR_MODE_P (prev.mode));
+      unsigned int nelems1, nelems2;
+      if (prev.orig_mode == p.orig_mode
+	  && known_eq (prev.offset + GET_MODE_SIZE (prev.mode), p.offset)
+	  && constant_multiple_p (GET_MODE_NUNITS (prev.mode),
+				  GET_MODE_NUNITS (p.orig_mode), &nelems1)
+	  && constant_multiple_p (GET_MODE_NUNITS (p.mode),
+				  GET_MODE_NUNITS (p.orig_mode), &nelems2)
+	  && targetm.array_mode (p.orig_mode,
+				 nelems1 + nelems2).exists (&prev.mode))
+	{
+	  prev.num_zr += p.num_zr;
+	  prev.num_pr += p.num_pr;
+	  return;
+	}
+    }
+  pieces.quick_push (p);
+}
+
+/* Return true if at least one possible value of type TYPE includes at
+   least one object of Pure Scalable Type, in the sense of the AAPCS64.
+
+   This is a relatively expensive test for some types, so it should
+   generally be made as late as possible.  */
+
+static bool
+aarch64_some_values_include_pst_objects_p (const_tree type)
+{
+  if (TYPE_SIZE (type) && integer_zerop (TYPE_SIZE (type)))
+    return false;
+
+  if (aarch64_sve::builtin_type_p (type))
+    return true;
+
+  if (TREE_CODE (type) == ARRAY_TYPE || TREE_CODE (type) == COMPLEX_TYPE)
+    return aarch64_some_values_include_pst_objects_p (TREE_TYPE (type));
+
+  if (RECORD_OR_UNION_TYPE_P (type))
+    for (tree field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+      if (TREE_CODE (field) == FIELD_DECL
+	  && aarch64_some_values_include_pst_objects_p (TREE_TYPE (field)))
+	return true;
+
+  return false;
+}
+
+/* Return the descriptor of the SIMD ABI.  */
+
+static const predefined_function_abi &
+aarch64_simd_abi (void)
+{
+  predefined_function_abi &simd_abi = function_abis[ARM_PCS_SIMD];
+  if (!simd_abi.initialized_p ())
+    {
+      HARD_REG_SET full_reg_clobbers
+	= default_function_abi.full_reg_clobbers ();
+      for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+	if (FP_SIMD_SAVED_REGNUM_P (regno))
+	  CLEAR_HARD_REG_BIT (full_reg_clobbers, regno);
+      simd_abi.initialize (ARM_PCS_SIMD, full_reg_clobbers);
+    }
+  return simd_abi;
+}
+
+/* Return the descriptor of the SVE PCS.  */
+
+static const predefined_function_abi &
+aarch64_sve_abi (void)
+{
+  predefined_function_abi &sve_abi = function_abis[ARM_PCS_SVE];
+  if (!sve_abi.initialized_p ())
+    {
+      HARD_REG_SET full_reg_clobbers
+	= default_function_abi.full_reg_clobbers ();
+      for (int regno = V8_REGNUM; regno <= V23_REGNUM; ++regno)
+	CLEAR_HARD_REG_BIT (full_reg_clobbers, regno);
+      for (int regno = P4_REGNUM; regno <= P15_REGNUM; ++regno)
+	CLEAR_HARD_REG_BIT (full_reg_clobbers, regno);
+      sve_abi.initialize (ARM_PCS_SVE, full_reg_clobbers);
+    }
+  return sve_abi;
+}
+
+/* If X is an UNSPEC_SALT_ADDR expression, return the address that it
+   wraps, otherwise return X itself.  */
+
+static rtx
+strip_salt (rtx x)
+{
+  rtx search = x;
+  if (GET_CODE (search) == CONST)
+    search = XEXP (search, 0);
+  if (GET_CODE (search) == UNSPEC && XINT (search, 1) == UNSPEC_SALT_ADDR)
+    x = XVECEXP (search, 0, 0);
+  return x;
+}
+
+/* Like strip_offset, but also strip any UNSPEC_SALT_ADDR from the
+   expression.  */
+
+static rtx
+strip_offset_and_salt (rtx addr, poly_int64 *offset)
+{
+  return strip_salt (strip_offset (addr, offset));
+}
+
+/* Generate code to enable conditional branches in functions over 1 MiB.  */
+const char *
+aarch64_gen_far_branch (rtx * operands, int pos_label, const char * dest,
+			const char * branch_format)
+{
+    rtx_code_label * tmp_label = gen_label_rtx ();
+    char label_buf[256];
+    char buffer[128];
+    ASM_GENERATE_INTERNAL_LABEL (label_buf, dest,
+				 CODE_LABEL_NUMBER (tmp_label));
+    const char *label_ptr = targetm.strip_name_encoding (label_buf);
+    rtx dest_label = operands[pos_label];
+    operands[pos_label] = tmp_label;
+
+    snprintf (buffer, sizeof (buffer), "%s%s", branch_format, label_ptr);
+    output_asm_insn (buffer, operands);
+
+    snprintf (buffer, sizeof (buffer), "b\t%%l%d\n%s:", pos_label, label_ptr);
+    operands[pos_label] = dest_label;
+    output_asm_insn (buffer, operands);
+    return "";
+}
+
+void
+aarch64_err_no_fpadvsimd (machine_mode mode)
+{
+  if (TARGET_GENERAL_REGS_ONLY)
+    if (FLOAT_MODE_P (mode))
+      error ("%qs is incompatible with the use of floating-point types",
+	     "-mgeneral-regs-only");
+    else
+      error ("%qs is incompatible with the use of vector types",
+	     "-mgeneral-regs-only");
+  else
+    if (FLOAT_MODE_P (mode))
+      error ("%qs feature modifier is incompatible with the use of"
+	     " floating-point types", "+nofp");
+    else
+      error ("%qs feature modifier is incompatible with the use of"
+	     " vector types", "+nofp");
+}
+
+/* Report when we try to do something that requires SVE when SVE is disabled.
+   This is an error of last resort and isn't very high-quality.  It usually
+   involves attempts to measure the vector length in some way.  */
+static void
+aarch64_report_sve_required (void)
+{
+  static bool reported_p = false;
+
+  /* Avoid reporting a slew of messages for a single oversight.  */
+  if (reported_p)
+    return;
+
+  error ("this operation requires the SVE ISA extension");
+  inform (input_location, "you can enable SVE using the command-line"
+	  " option %<-march%>, or by using the %<target%>"
+	  " attribute or pragma");
+  reported_p = true;
+}
+
+/* Return true if REGNO is P0-P15 or one of the special FFR-related
+   registers.  */
+inline bool
+pr_or_ffr_regnum_p (unsigned int regno)
+{
+  return PR_REGNUM_P (regno) || regno == FFR_REGNUM || regno == FFRT_REGNUM;
+}
+
+/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
+   The register allocator chooses POINTER_AND_FP_REGS if FP_REGS and
+   GENERAL_REGS have the same cost - even if POINTER_AND_FP_REGS has a much
+   higher cost.  POINTER_AND_FP_REGS is also used if the cost of both FP_REGS
+   and GENERAL_REGS is lower than the memory cost (in this case the best class
+   is the lowest cost one).  Using POINTER_AND_FP_REGS irrespectively of its
+   cost results in bad allocations with many redundant int<->FP moves which
+   are expensive on various cores.
+   To avoid this we don't allow POINTER_AND_FP_REGS as the allocno class, but
+   force a decision between FP_REGS and GENERAL_REGS.  We use the allocno class
+   if it isn't POINTER_AND_FP_REGS.  Similarly, use the best class if it isn't
+   POINTER_AND_FP_REGS.  Otherwise set the allocno class depending on the mode.
+   The result of this is that it is no longer inefficient to have a higher
+   memory move cost than the register move cost.
+*/
+
+static reg_class_t
+aarch64_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class,
+					 reg_class_t best_class)
+{
+  machine_mode mode;
+
+  if (!reg_class_subset_p (GENERAL_REGS, allocno_class)
+      || !reg_class_subset_p (FP_REGS, allocno_class))
+    return allocno_class;
+
+  if (!reg_class_subset_p (GENERAL_REGS, best_class)
+      || !reg_class_subset_p (FP_REGS, best_class))
+    return best_class;
+
+  mode = PSEUDO_REGNO_MODE (regno);
+  return FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode) ? FP_REGS : GENERAL_REGS;
+}
+
+static unsigned int
+aarch64_min_divisions_for_recip_mul (machine_mode mode)
+{
+  if (GET_MODE_UNIT_SIZE (mode) == 4)
+    return aarch64_tune_params.min_div_recip_mul_sf;
+  return aarch64_tune_params.min_div_recip_mul_df;
+}
+
+/* Return the reassociation width of treeop OPC with mode MODE.  */
+static int
+aarch64_reassociation_width (unsigned opc, machine_mode mode)
+{
+  if (VECTOR_MODE_P (mode))
+    return aarch64_tune_params.vec_reassoc_width;
+  if (INTEGRAL_MODE_P (mode))
+    return aarch64_tune_params.int_reassoc_width;
+  /* Avoid reassociating floating point addition so we emit more FMAs.  */
+  if (FLOAT_MODE_P (mode) && opc != PLUS_EXPR)
+    return aarch64_tune_params.fp_reassoc_width;
+  return 1;
+}
+
+/* Provide a mapping from gcc register numbers to dwarf register numbers.  */
+unsigned
+aarch64_dbx_register_number (unsigned regno)
+{
    if (GP_REGNUM_P (regno))
      return AARCH64_DWARF_R0 + regno - R0_REGNUM;
    else if (regno == SP_REGNUM)
@@ -1164,6 +2629,24 @@ aarch64_dbx_register_number (unsigned regno)
    return DWARF_FRAME_REGISTERS;
 }
 
+/* If X is a CONST_DOUBLE, return its bit representation as a constant
+   integer, otherwise return X unmodified.  */
+static rtx
+aarch64_bit_representation (rtx x)
+{
+  if (CONST_DOUBLE_P (x))
+    x = gen_lowpart (int_mode_for_mode (GET_MODE (x)).require (), x);
+  return x;
+}
+
+/* Return an estimate for the number of quadwords in an SVE vector.  This is
+   equivalent to the number of Advanced SIMD vectors in an SVE vector.  */
+static unsigned int
+aarch64_estimated_sve_vq ()
+{
+  return estimated_poly_value (BITS_PER_SVE_VECTOR) / 128;
+}
+
 /* Return true if MODE is any of the Advanced SIMD structure modes.  */
 static bool
 aarch64_advsimd_struct_mode_p (machine_mode mode)
@@ -1190,6 +2673,9 @@ const unsigned int VEC_SVE_PRED = 4;
 /* Can be used in combination with VEC_ADVSIMD or VEC_SVE_DATA to indicate
    a structure of 2, 3 or 4 vectors.  */
 const unsigned int VEC_STRUCT   = 8;
+/* Can be used in combination with VEC_SVE_DATA to indicate that the
+   vector has fewer significant bytes than a full SVE vector.  */
+const unsigned int VEC_PARTIAL  = 16;
 /* Useful combinations of the above.  */
 const unsigned int VEC_ANY_SVE  = VEC_SVE_DATA | VEC_SVE_PRED;
 const unsigned int VEC_ANY_DATA = VEC_ADVSIMD | VEC_SVE_DATA;
@@ -1205,34 +2691,92 @@ aarch64_classify_vector_mode (machine_mode mode)
   if (aarch64_sve_pred_mode_p (mode))
     return VEC_SVE_PRED;
 
-  scalar_mode inner = GET_MODE_INNER (mode);
-  if (VECTOR_MODE_P (mode)
-      && (inner == QImode
-	  || inner == HImode
-	  || inner == HFmode
-	  || inner == SImode
-	  || inner == SFmode
-	  || inner == DImode
-	  || inner == DFmode))
+  /* Make the decision based on the mode's enum value rather than its
+     properties, so that we keep the correct classification regardless
+     of -msve-vector-bits.  */
+  switch (mode)
     {
-      if (TARGET_SVE)
-	{
-	  if (known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR))
-	    return VEC_SVE_DATA;
-	  if (known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 2)
-	      || known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 3)
-	      || known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 4))
-	    return VEC_SVE_DATA | VEC_STRUCT;
-	}
+    /* Partial SVE QI vectors.  */
+    case E_VNx2QImode:
+    case E_VNx4QImode:
+    case E_VNx8QImode:
+    /* Partial SVE HI vectors.  */
+    case E_VNx2HImode:
+    case E_VNx4HImode:
+    /* Partial SVE SI vector.  */
+    case E_VNx2SImode:
+    /* Partial SVE HF vectors.  */
+    case E_VNx2HFmode:
+    case E_VNx4HFmode:
+    /* Partial SVE BF vectors.  */
+    case E_VNx2BFmode:
+    case E_VNx4BFmode:
+    /* Partial SVE SF vector.  */
+    case E_VNx2SFmode:
+      return TARGET_SVE ? VEC_SVE_DATA | VEC_PARTIAL : 0;
+
+    case E_VNx16QImode:
+    case E_VNx8HImode:
+    case E_VNx4SImode:
+    case E_VNx2DImode:
+    case E_VNx8BFmode:
+    case E_VNx8HFmode:
+    case E_VNx4SFmode:
+    case E_VNx2DFmode:
+      return TARGET_SVE ? VEC_SVE_DATA : 0;
+
+    /* x2 SVE vectors.  */
+    case E_VNx32QImode:
+    case E_VNx16HImode:
+    case E_VNx8SImode:
+    case E_VNx4DImode:
+    case E_VNx16BFmode:
+    case E_VNx16HFmode:
+    case E_VNx8SFmode:
+    case E_VNx4DFmode:
+    /* x3 SVE vectors.  */
+    case E_VNx48QImode:
+    case E_VNx24HImode:
+    case E_VNx12SImode:
+    case E_VNx6DImode:
+    case E_VNx24BFmode:
+    case E_VNx24HFmode:
+    case E_VNx12SFmode:
+    case E_VNx6DFmode:
+    /* x4 SVE vectors.  */
+    case E_VNx64QImode:
+    case E_VNx32HImode:
+    case E_VNx16SImode:
+    case E_VNx8DImode:
+    case E_VNx32BFmode:
+    case E_VNx32HFmode:
+    case E_VNx16SFmode:
+    case E_VNx8DFmode:
+      return TARGET_SVE ? VEC_SVE_DATA | VEC_STRUCT : 0;
+
+    /* 64-bit Advanced SIMD vectors.  */
+    case E_V8QImode:
+    case E_V4HImode:
+    case E_V2SImode:
+    /* ...E_V1DImode doesn't exist.  */
+    case E_V4HFmode:
+    case E_V4BFmode:
+    case E_V2SFmode:
+    case E_V1DFmode:
+    /* 128-bit Advanced SIMD vectors.  */
+    case E_V16QImode:
+    case E_V8HImode:
+    case E_V4SImode:
+    case E_V2DImode:
+    case E_V8HFmode:
+    case E_V8BFmode:
+    case E_V4SFmode:
+    case E_V2DFmode:
+      return TARGET_SIMD ? VEC_ADVSIMD : 0;
 
-      /* This includes V1DF but not V1DI (which doesn't exist).  */
-      if (TARGET_SIMD
-	  && (known_eq (GET_MODE_BITSIZE (mode), 64)
-	      || known_eq (GET_MODE_BITSIZE (mode), 128)))
-	return VEC_ADVSIMD;
+    default:
+      return 0;
     }
-
-  return 0;
 }
 
 /* Return true if MODE is any of the data vector modes, including
@@ -1243,6 +2787,14 @@ aarch64_vector_data_mode_p (machine_mode mode)
   return aarch64_classify_vector_mode (mode) & VEC_ANY_DATA;
 }
 
+/* Return true if MODE is any form of SVE mode, including predicates,
+   vectors and structures.  */
+bool
+aarch64_sve_mode_p (machine_mode mode)
+{
+  return aarch64_classify_vector_mode (mode) & VEC_ANY_SVE;
+}
+
 /* Return true if MODE is an SVE data vector mode; either a single vector
    or a structure of vectors.  */
 static bool
@@ -1251,6 +2803,24 @@ aarch64_sve_data_mode_p (machine_mode mode)
   return aarch64_classify_vector_mode (mode) & VEC_SVE_DATA;
 }
 
+/* Return the number of defined bytes in one constituent vector of
+   SVE mode MODE, which has vector flags VEC_FLAGS.  */
+static poly_int64
+aarch64_vl_bytes (machine_mode mode, unsigned int vec_flags)
+{
+  if (vec_flags & VEC_PARTIAL)
+    /* A single partial vector.  */
+    return GET_MODE_SIZE (mode);
+
+  if (vec_flags & VEC_SVE_DATA)
+    /* A single vector or a tuple.  */
+    return BYTES_PER_SVE_VECTOR;
+
+  /* A single predicate.  */
+  gcc_assert (vec_flags & VEC_SVE_PRED);
+  return BYTES_PER_SVE_PRED;
+}
+
 /* Implement target hook TARGET_ARRAY_MODE.  */
 static opt_machine_mode
 aarch64_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems)
@@ -1277,6 +2847,22 @@ aarch64_array_mode_supported_p (machine_mode mode,
   return false;
 }
 
+/* MODE is some form of SVE vector mode.  For data modes, return the number
+   of vector register bits that each element of MODE occupies, such as 64
+   for both VNx2DImode and VNx2SImode (where each 32-bit value is stored
+   in a 64-bit container).  For predicate modes, return the number of
+   data bits controlled by each significant predicate bit.  */
+
+static unsigned int
+aarch64_sve_container_bits (machine_mode mode)
+{
+  unsigned int vec_flags = aarch64_classify_vector_mode (mode);
+  poly_uint64 vector_bits = (vec_flags & (VEC_PARTIAL | VEC_SVE_PRED)
+			     ? BITS_PER_SVE_VECTOR
+			     : GET_MODE_BITSIZE (mode));
+  return vector_element_size (vector_bits, GET_MODE_NUNITS (mode));
+}
+
 /* Return the SVE predicate mode to use for elements that have
    ELEM_NBYTES bytes, if such a mode exists.  */
 
@@ -1297,57 +2883,189 @@ aarch64_sve_pred_mode (unsigned int elem_nbytes)
   return opt_machine_mode ();
 }
 
+/* Return the SVE predicate mode that should be used to control
+   SVE mode MODE.  */
+
+machine_mode
+aarch64_sve_pred_mode (machine_mode mode)
+{
+  unsigned int bits = aarch64_sve_container_bits (mode);
+  return aarch64_sve_pred_mode (bits / BITS_PER_UNIT).require ();
+}
+
 /* Implement TARGET_VECTORIZE_GET_MASK_MODE.  */
 
 static opt_machine_mode
-aarch64_get_mask_mode (poly_uint64 nunits, poly_uint64 nbytes)
+aarch64_get_mask_mode (machine_mode mode)
 {
-  if (TARGET_SVE && known_eq (nbytes, BYTES_PER_SVE_VECTOR))
-    {
-      unsigned int elem_nbytes = vector_element_size (nbytes, nunits);
-      machine_mode pred_mode;
-      if (aarch64_sve_pred_mode (elem_nbytes).exists (&pred_mode))
-	return pred_mode;
-    }
+  unsigned int vec_flags = aarch64_classify_vector_mode (mode);
+  if (vec_flags & VEC_SVE_DATA)
+    return aarch64_sve_pred_mode (mode);
 
-  return default_get_mask_mode (nunits, nbytes);
+  return default_get_mask_mode (mode);
 }
 
-/* Implement TARGET_HARD_REGNO_NREGS.  */
+/* Return the SVE vector mode that has NUNITS elements of mode INNER_MODE.  */
 
-static unsigned int
-aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
+opt_machine_mode
+aarch64_sve_data_mode (scalar_mode inner_mode, poly_uint64 nunits)
 {
-  /* ??? Logically we should only need to provide a value when
-     HARD_REGNO_MODE_OK says that the combination is valid,
-     but at the moment we need to handle all modes.  Just ignore
-     any runtime parts for registers that can't store them.  */
-  HOST_WIDE_INT lowest_size = constant_lower_bound (GET_MODE_SIZE (mode));
-  switch (aarch64_regno_regclass (regno))
-    {
-    case FP_REGS:
-    case FP_LO_REGS:
-      if (aarch64_sve_data_mode_p (mode))
-	return exact_div (GET_MODE_SIZE (mode),
-			  BYTES_PER_SVE_VECTOR).to_constant ();
-      return CEIL (lowest_size, UNITS_PER_VREG);
-    case PR_REGS:
-    case PR_LO_REGS:
-    case PR_HI_REGS:
-      return 1;
-    default:
-      return CEIL (lowest_size, UNITS_PER_WORD);
-    }
-  gcc_unreachable ();
+  enum mode_class mclass = (is_a <scalar_float_mode> (inner_mode)
+			    ? MODE_VECTOR_FLOAT : MODE_VECTOR_INT);
+  machine_mode mode;
+  FOR_EACH_MODE_IN_CLASS (mode, mclass)
+    if (inner_mode == GET_MODE_INNER (mode)
+	&& known_eq (nunits, GET_MODE_NUNITS (mode))
+	&& aarch64_sve_data_mode_p (mode))
+      return mode;
+  return opt_machine_mode ();
 }
 
-/* Implement TARGET_HARD_REGNO_MODE_OK.  */
+/* Return the integer element mode associated with SVE mode MODE.  */
 
-static bool
-aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
+static scalar_int_mode
+aarch64_sve_element_int_mode (machine_mode mode)
 {
-  if (GET_MODE_CLASS (mode) == MODE_CC)
-    return regno == CC_REGNUM;
+  poly_uint64 vector_bits = (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL
+			     ? BITS_PER_SVE_VECTOR
+			     : GET_MODE_BITSIZE (mode));
+  unsigned int elt_bits = vector_element_size (vector_bits,
+					       GET_MODE_NUNITS (mode));
+  return int_mode_for_size (elt_bits, 0).require ();
+}
+
+/* Return an integer element mode that contains exactly
+   aarch64_sve_container_bits (MODE) bits.  This is wider than
+   aarch64_sve_element_int_mode if MODE is a partial vector,
+   otherwise it's the same.  */
+
+static scalar_int_mode
+aarch64_sve_container_int_mode (machine_mode mode)
+{
+  return int_mode_for_size (aarch64_sve_container_bits (mode), 0).require ();
+}
+
+/* Return the integer vector mode associated with SVE mode MODE.
+   Unlike related_int_vector_mode, this can handle the case in which
+   MODE is a predicate (and thus has a different total size).  */
+
+machine_mode
+aarch64_sve_int_mode (machine_mode mode)
+{
+  scalar_int_mode int_mode = aarch64_sve_element_int_mode (mode);
+  return aarch64_sve_data_mode (int_mode, GET_MODE_NUNITS (mode)).require ();
+}
+
+/* Implement TARGET_VECTORIZE_RELATED_MODE.  */
+
+static opt_machine_mode
+aarch64_vectorize_related_mode (machine_mode vector_mode,
+				scalar_mode element_mode,
+				poly_uint64 nunits)
+{
+  unsigned int vec_flags = aarch64_classify_vector_mode (vector_mode);
+
+  /* If we're operating on SVE vectors, try to return an SVE mode.  */
+  poly_uint64 sve_nunits;
+  if ((vec_flags & VEC_SVE_DATA)
+      && multiple_p (BYTES_PER_SVE_VECTOR,
+		     GET_MODE_SIZE (element_mode), &sve_nunits))
+    {
+      machine_mode sve_mode;
+      if (maybe_ne (nunits, 0U))
+	{
+	  /* Try to find a full or partial SVE mode with exactly
+	     NUNITS units.  */
+	  if (multiple_p (sve_nunits, nunits)
+	      && aarch64_sve_data_mode (element_mode,
+					nunits).exists (&sve_mode))
+	    return sve_mode;
+	}
+      else
+	{
+	  /* Take the preferred number of units from the number of bytes
+	     that fit in VECTOR_MODE.  We always start by "autodetecting"
+	     a full vector mode with preferred_simd_mode, so vectors
+	     chosen here will also be full vector modes.  Then
+	     autovectorize_vector_modes tries smaller starting modes
+	     and thus smaller preferred numbers of units.  */
+	  sve_nunits = ordered_min (sve_nunits, GET_MODE_SIZE (vector_mode));
+	  if (aarch64_sve_data_mode (element_mode,
+				     sve_nunits).exists (&sve_mode))
+	    return sve_mode;
+	}
+    }
+
+  /* Prefer to use 1 128-bit vector instead of 2 64-bit vectors.  */
+  if ((vec_flags & VEC_ADVSIMD)
+      && known_eq (nunits, 0U)
+      && known_eq (GET_MODE_BITSIZE (vector_mode), 64U)
+      && maybe_ge (GET_MODE_BITSIZE (element_mode)
+		   * GET_MODE_NUNITS (vector_mode), 128U))
+    {
+      machine_mode res = aarch64_simd_container_mode (element_mode, 128);
+      if (VECTOR_MODE_P (res))
+	return res;
+    }
+
+  return default_vectorize_related_mode (vector_mode, element_mode, nunits);
+}
+
+/* Implement TARGET_PREFERRED_ELSE_VALUE.  For binary operations,
+   prefer to use the first arithmetic operand as the else value if
+   the else value doesn't matter, since that exactly matches the SVE
+   destructive merging form.  For ternary operations we could either
+   pick the first operand and use FMAD-like instructions or the last
+   operand and use FMLA-like instructions; the latter seems more
+   natural.  */
+
+static tree
+aarch64_preferred_else_value (unsigned, tree, unsigned int nops, tree *ops)
+{
+  return nops == 3 ? ops[2] : ops[0];
+}
+
+/* Implement TARGET_HARD_REGNO_NREGS.  */
+
+static unsigned int
+aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
+{
+  /* ??? Logically we should only need to provide a value when
+     HARD_REGNO_MODE_OK says that the combination is valid,
+     but at the moment we need to handle all modes.  Just ignore
+     any runtime parts for registers that can't store them.  */
+  HOST_WIDE_INT lowest_size = constant_lower_bound (GET_MODE_SIZE (mode));
+  switch (aarch64_regno_regclass (regno))
+    {
+    case FP_REGS:
+    case FP_LO_REGS:
+    case FP_LO8_REGS:
+      {
+	unsigned int vec_flags = aarch64_classify_vector_mode (mode);
+	if (vec_flags & VEC_SVE_DATA)
+	  return exact_div (GET_MODE_SIZE (mode),
+			    aarch64_vl_bytes (mode, vec_flags)).to_constant ();
+	return CEIL (lowest_size, UNITS_PER_VREG);
+      }
+    case PR_REGS:
+    case PR_LO_REGS:
+    case PR_HI_REGS:
+    case FFR_REGS:
+    case PR_AND_FFR_REGS:
+      return 1;
+    default:
+      return CEIL (lowest_size, UNITS_PER_WORD);
+    }
+  gcc_unreachable ();
+}
+
+/* Implement TARGET_HARD_REGNO_MODE_OK.  */
+
+static bool
+aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
+{
+  if (GET_MODE_CLASS (mode) == MODE_CC)
+    return regno == CC_REGNUM;
 
   if (regno == VG_REGNUM)
     /* This must have the same size as _Unwind_Word.  */
@@ -1355,10 +3073,10 @@ aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
 
   unsigned int vec_flags = aarch64_classify_vector_mode (mode);
   if (vec_flags & VEC_SVE_PRED)
-    return PR_REGNUM_P (regno);
+    return pr_or_ffr_regnum_p (regno);
 
-  if (PR_REGNUM_P (regno))
-    return 0;
+  if (pr_or_ffr_regnum_p (regno))
+    return false;
 
   if (regno == SP_REGNUM)
     /* The purpose of comparing with ptr_mode is to support the
@@ -1369,10 +3087,16 @@ aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
   if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
     return mode == Pmode;
 
-  if (GP_REGNUM_P (regno) && known_le (GET_MODE_SIZE (mode), 16))
-    return true;
-
-  if (FP_REGNUM_P (regno))
+  if (GP_REGNUM_P (regno))
+    {
+      if (vec_flags & VEC_ANY_SVE)
+	return false;
+      if (known_le (GET_MODE_SIZE (mode), 8))
+	return true;
+      if (known_le (GET_MODE_SIZE (mode), 16))
+	return (regno & 1) == 0;
+    }
+  else if (FP_REGNUM_P (regno))
     {
       if (vec_flags & VEC_STRUCT)
 	return end_hard_regno (mode, regno) - 1 <= V31_REGNUM;
@@ -1383,14 +3107,177 @@ aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
   return false;
 }
 
+/* Return true if a function with type FNTYPE returns its value in
+   SVE vector or predicate registers.  */
+
+static bool
+aarch64_returns_value_in_sve_regs_p (const_tree fntype)
+{
+  tree return_type = TREE_TYPE (fntype);
+
+  pure_scalable_type_info pst_info;
+  switch (pst_info.analyze (return_type))
+    {
+    case pure_scalable_type_info::IS_PST:
+      return (pst_info.num_zr () <= NUM_FP_ARG_REGS
+	      && pst_info.num_pr () <= NUM_PR_ARG_REGS);
+
+    case pure_scalable_type_info::DOESNT_MATTER:
+      gcc_assert (aarch64_return_in_memory_1 (return_type));
+      return false;
+
+    case pure_scalable_type_info::NO_ABI_IDENTITY:
+    case pure_scalable_type_info::ISNT_PST:
+      return false;
+    }
+  gcc_unreachable ();
+}
+
+/* Return true if a function with type FNTYPE takes arguments in
+   SVE vector or predicate registers.  */
+
+static bool
+aarch64_takes_arguments_in_sve_regs_p (const_tree fntype)
+{
+  CUMULATIVE_ARGS args_so_far_v;
+  aarch64_init_cumulative_args (&args_so_far_v, NULL_TREE, NULL_RTX,
+				NULL_TREE, 0, true);
+  cumulative_args_t args_so_far = pack_cumulative_args (&args_so_far_v);
+
+  for (tree chain = TYPE_ARG_TYPES (fntype);
+       chain && chain != void_list_node;
+       chain = TREE_CHAIN (chain))
+    {
+      tree arg_type = TREE_VALUE (chain);
+      if (arg_type == error_mark_node)
+	return false;
+
+      function_arg_info arg (arg_type, /*named=*/true);
+      apply_pass_by_reference_rules (&args_so_far_v, arg);
+      pure_scalable_type_info pst_info;
+      if (pst_info.analyze_registers (arg.type))
+	{
+	  unsigned int end_zr = args_so_far_v.aapcs_nvrn + pst_info.num_zr ();
+	  unsigned int end_pr = args_so_far_v.aapcs_nprn + pst_info.num_pr ();
+	  gcc_assert (end_zr <= NUM_FP_ARG_REGS && end_pr <= NUM_PR_ARG_REGS);
+	  return true;
+	}
+
+      targetm.calls.function_arg_advance (args_so_far, arg);
+    }
+  return false;
+}
+
+/* Implement TARGET_FNTYPE_ABI.  */
+
+static const predefined_function_abi &
+aarch64_fntype_abi (const_tree fntype)
+{
+  if (lookup_attribute ("aarch64_vector_pcs", TYPE_ATTRIBUTES (fntype)))
+    return aarch64_simd_abi ();
+
+  if (aarch64_returns_value_in_sve_regs_p (fntype)
+      || aarch64_takes_arguments_in_sve_regs_p (fntype))
+    return aarch64_sve_abi ();
+
+  return default_function_abi;
+}
+
+/* Implement TARGET_COMPATIBLE_VECTOR_TYPES_P.  */
+
+static bool
+aarch64_compatible_vector_types_p (const_tree type1, const_tree type2)
+{
+  return (aarch64_sve::builtin_type_p (type1)
+	  == aarch64_sve::builtin_type_p (type2));
+}
+
+/* Return true if we should emit CFI for register REGNO.  */
+
+static bool
+aarch64_emit_cfi_for_reg_p (unsigned int regno)
+{
+  return (GP_REGNUM_P (regno)
+	  || !default_function_abi.clobbers_full_reg_p (regno));
+}
+
+/* Return the mode we should use to save and restore register REGNO.  */
+
+static machine_mode
+aarch64_reg_save_mode (unsigned int regno)
+{
+  if (GP_REGNUM_P (regno))
+    return DImode;
+
+  if (FP_REGNUM_P (regno))
+    switch (crtl->abi->id ())
+      {
+      case ARM_PCS_AAPCS64:
+	/* Only the low 64 bits are saved by the base PCS.  */
+	return DFmode;
+
+      case ARM_PCS_SIMD:
+	/* The vector PCS saves the low 128 bits (which is the full
+	   register on non-SVE targets).  */
+	return TFmode;
+
+      case ARM_PCS_SVE:
+	/* Use vectors of DImode for registers that need frame
+	   information, so that the first 64 bytes of the save slot
+	   are always the equivalent of what storing D<n> would give.  */
+	if (aarch64_emit_cfi_for_reg_p (regno))
+	  return VNx2DImode;
+
+	/* Use vectors of bytes otherwise, so that the layout is
+	   endian-agnostic, and so that we can use LDR and STR for
+	   big-endian targets.  */
+	return VNx16QImode;
+
+      case ARM_PCS_TLSDESC:
+      case ARM_PCS_UNKNOWN:
+	break;
+      }
+
+  if (PR_REGNUM_P (regno))
+    /* Save the full predicate register.  */
+    return VNx16BImode;
+
+  gcc_unreachable ();
+}
+
+/* Implement TARGET_INSN_CALLEE_ABI.  */
+
+const predefined_function_abi &
+aarch64_insn_callee_abi (const rtx_insn *insn)
+{
+  rtx pat = PATTERN (insn);
+  gcc_assert (GET_CODE (pat) == PARALLEL);
+  rtx unspec = XVECEXP (pat, 0, 1);
+  gcc_assert (GET_CODE (unspec) == UNSPEC
+	      && XINT (unspec, 1) == UNSPEC_CALLEE_ABI);
+  return function_abis[INTVAL (XVECEXP (unspec, 0, 0))];
+}
+
 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED.  The callee only saves
    the lower 64 bits of a 128-bit register.  Tell the compiler the callee
    clobbers the top 64 bits when restoring the bottom 64 bits.  */
 
 static bool
-aarch64_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode)
+aarch64_hard_regno_call_part_clobbered (unsigned int abi_id,
+					unsigned int regno,
+					machine_mode mode)
 {
-  return FP_REGNUM_P (regno) && maybe_gt (GET_MODE_SIZE (mode), 8);
+  if (FP_REGNUM_P (regno) && abi_id != ARM_PCS_SVE)
+    {
+      poly_int64 per_register_size = GET_MODE_SIZE (mode);
+      unsigned int nregs = hard_regno_nregs (regno, mode);
+      if (nregs > 1)
+	per_register_size = exact_div (per_register_size, nregs);
+      if (abi_id == ARM_PCS_SIMD || abi_id == ARM_PCS_TLSDESC)
+	return maybe_gt (per_register_size, 16);
+      return maybe_gt (per_register_size, 8);
+    }
+  return false;
 }
 
 /* Implement REGMODE_NATURAL_SIZE.  */
@@ -1432,6 +3319,13 @@ aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned,
     return SImode;
 }
 
+/* Return true if I's bits are consecutive ones from the MSB.  */
+bool
+aarch64_high_bits_all_ones_p (HOST_WIDE_INT i)
+{
+  return exact_log2 (-i) != HOST_WIDE_INT_M1;
+}
+
 /* Implement TARGET_CONSTANT_ALIGNMENT.  Make strings word-aligned so
    that strcpy from constants will be faster.  */
 
@@ -1477,33 +3371,6 @@ aarch64_is_noplt_call_p (rtx sym)
   return false;
 }
 
-/* Return true if the offsets to a zero/sign-extract operation
-   represent an expression that matches an extend operation.  The
-   operands represent the paramters from
-
-   (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)).  */
-bool
-aarch64_is_extend_from_extract (scalar_int_mode mode, rtx mult_imm,
-				rtx extract_imm)
-{
-  HOST_WIDE_INT mult_val, extract_val;
-
-  if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
-    return false;
-
-  mult_val = INTVAL (mult_imm);
-  extract_val = INTVAL (extract_imm);
-
-  if (extract_val > 8
-      && extract_val < GET_MODE_BITSIZE (mode)
-      && exact_log2 (extract_val & ~7) > 0
-      && (extract_val & 7) <= 4
-      && mult_val == (1 << (extract_val & 7)))
-    return true;
-
-  return false;
-}
-
 /* Emit an insn that's a simple single-set.  Both the operands must be
    known to be valid.  */
 inline static rtx_insn *
@@ -1517,13 +3384,69 @@ emit_set_insn (rtx x, rtx y)
 rtx
 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
 {
-  machine_mode mode = SELECT_CC_MODE (code, x, y);
-  rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
+  machine_mode cmp_mode = GET_MODE (x);
+  machine_mode cc_mode;
+  rtx cc_reg;
+
+  if (cmp_mode == TImode)
+    {
+      gcc_assert (code == NE);
+
+      cc_mode = CCmode;
+      cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
+
+      rtx x_lo = operand_subword (x, 0, 0, TImode);
+      rtx y_lo = operand_subword (y, 0, 0, TImode);
+      emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x_lo, y_lo));
 
-  emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
+      rtx x_hi = operand_subword (x, 1, 0, TImode);
+      rtx y_hi = operand_subword (y, 1, 0, TImode);
+      emit_insn (gen_ccmpccdi (cc_reg, cc_reg, x_hi, y_hi,
+			       gen_rtx_EQ (cc_mode, cc_reg, const0_rtx),
+			       GEN_INT (AARCH64_EQ)));
+    }
+  else
+    {
+      cc_mode = SELECT_CC_MODE (code, x, y);
+      cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
+      emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x, y));
+    }
   return cc_reg;
 }
 
+/* Similarly, but maybe zero-extend Y if Y_MODE < SImode.  */
+
+static rtx
+aarch64_gen_compare_reg_maybe_ze (RTX_CODE code, rtx x, rtx y,
+                                  machine_mode y_mode)
+{
+  if (y_mode == E_QImode || y_mode == E_HImode)
+    {
+      if (CONST_INT_P (y))
+	{
+	  y = GEN_INT (INTVAL (y) & GET_MODE_MASK (y_mode));
+	  y_mode = SImode;
+	}
+      else
+	{
+	  rtx t, cc_reg;
+	  machine_mode cc_mode;
+
+	  t = gen_rtx_ZERO_EXTEND (SImode, y);
+	  t = gen_rtx_COMPARE (CC_SWPmode, t, x);
+	  cc_mode = CC_SWPmode;
+	  cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
+	  emit_set_insn (cc_reg, t);
+	  return cc_reg;
+	}
+    }
+
+  if (!aarch64_plus_operand (y, y_mode))
+    y = force_reg (y_mode, y);
+
+  return aarch64_gen_compare_reg (code, x, y);
+}
+
 /* Build the SYMBOL_REF for __tls_get_addr.  */
 
 static GTY(()) rtx tls_get_addr_libfunc;
@@ -1542,14 +3465,9 @@ static enum tls_model
 tls_symbolic_operand_type (rtx addr)
 {
   enum tls_model tls_kind = TLS_MODEL_NONE;
-  if (GET_CODE (addr) == CONST)
-    {
-      poly_int64 addend;
-      rtx sym = strip_offset (addr, &addend);
-      if (GET_CODE (sym) == SYMBOL_REF)
-	tls_kind = SYMBOL_REF_TLS_MODEL (sym);
-    }
-  else if (GET_CODE (addr) == SYMBOL_REF)
+  poly_int64 offset;
+  addr = strip_offset_and_salt (addr, &offset);
+  if (SYMBOL_REF_P (addr))
     tls_kind = SYMBOL_REF_TLS_MODEL (addr);
 
   return tls_kind;
@@ -1690,7 +3608,7 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
 	/* The operand is expected to be MEM.  Whenever the related insn
 	   pattern changed, above code which calculate mem should be
 	   updated.  */
-	gcc_assert (GET_CODE (mem) == MEM);
+	gcc_assert (MEM_P (mem));
 	MEM_READONLY_P (mem) = 1;
 	MEM_NOTRAP_P (mem) = 1;
 	emit_insn (insn);
@@ -1733,7 +3651,7 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
 	    mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
 	  }
 
-	gcc_assert (GET_CODE (mem) == MEM);
+	gcc_assert (MEM_P (mem));
 	MEM_READONLY_P (mem) = 1;
 	MEM_NOTRAP_P (mem) = 1;
 	emit_insn (insn);
@@ -1743,11 +3661,16 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
     case SYMBOL_SMALL_TLSGD:
       {
 	rtx_insn *insns;
-	machine_mode mode = GET_MODE (dest);
-	rtx result = gen_rtx_REG (mode, R0_REGNUM);
+	/* The return type of __tls_get_addr is the C pointer type
+	   so use ptr_mode.  */
+	rtx result = gen_rtx_REG (ptr_mode, R0_REGNUM);
+	rtx tmp_reg = dest;
+
+	if (GET_MODE (dest) != ptr_mode)
+	  tmp_reg = can_create_pseudo_p () ? gen_reg_rtx (ptr_mode) : result;
 
 	start_sequence ();
-	if (TARGET_ILP32)
+	if (ptr_mode == SImode)
 	  aarch64_emit_call_insn (gen_tlsgd_small_si (result, imm));
 	else
 	  aarch64_emit_call_insn (gen_tlsgd_small_di (result, imm));
@@ -1755,7 +3678,11 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
 	end_sequence ();
 
 	RTL_CONST_CALL_P (insns) = 1;
-	emit_libcall_block (insns, dest, result, imm);
+	emit_libcall_block (insns, tmp_reg, result, imm);
+	/* Convert back to the mode of the dest adding a zero_extend
+	   from SImode (ptr_mode) to DImode (Pmode). */
+	if (dest != tmp_reg)
+	  convert_move (dest, tmp_reg, true);
 	return;
       }
 
@@ -1862,8 +3789,21 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
       }
 
     case SYMBOL_TINY_GOT:
-      emit_insn (gen_ldr_got_tiny (dest, imm));
-      return;
+      {
+	rtx insn;
+	machine_mode mode = GET_MODE (dest);
+
+	if (mode == ptr_mode)
+	  insn = gen_ldr_got_tiny (mode, dest, imm);
+	else
+	  {
+	    gcc_assert (mode == Pmode);
+	    insn = gen_ldr_got_tiny_sidi (dest, imm);
+	  }
+
+	emit_insn (insn);
+	return;
+      }
 
     case SYMBOL_TINY_TLSIE:
       {
@@ -1909,6 +3849,27 @@ aarch64_emit_move (rtx dest, rtx src)
 	  : emit_move_insn_1 (dest, src));
 }
 
+/* Apply UNOPTAB to OP and store the result in DEST.  */
+
+static void
+aarch64_emit_unop (rtx dest, optab unoptab, rtx op)
+{
+  rtx tmp = expand_unop (GET_MODE (dest), unoptab, op, dest, 0);
+  if (dest != tmp)
+    emit_move_insn (dest, tmp);
+}
+
+/* Apply BINOPTAB to OP0 and OP1 and store the result in DEST.  */
+
+static void
+aarch64_emit_binop (rtx dest, optab binoptab, rtx op0, rtx op1)
+{
+  rtx tmp = expand_binop (GET_MODE (dest), binoptab, op0, op1, dest, 0,
+			  OPTAB_DIRECT);
+  if (dest != tmp)
+    emit_move_insn (dest, tmp);
+}
+
 /* Split a 128-bit move operation into two 64-bit move operations,
    taking care to handle partial overlap of register to register
    copies.  Special cases are needed when moving between GP regs and
@@ -1938,16 +3899,8 @@ aarch64_split_128bit_move (rtx dst, rtx src)
 	  src_lo = gen_lowpart (word_mode, src);
 	  src_hi = gen_highpart (word_mode, src);
 
-	  if (mode == TImode)
-	    {
-	      emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
-	      emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
-	    }
-	  else
-	    {
-	      emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
-	      emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
-	    }
+	  emit_insn (gen_aarch64_movlow_di (mode, dst, src_lo));
+	  emit_insn (gen_aarch64_movhigh_di (mode, dst, src_hi));
 	  return;
 	}
       else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
@@ -1955,16 +3908,8 @@ aarch64_split_128bit_move (rtx dst, rtx src)
 	  dst_lo = gen_lowpart (word_mode, dst);
 	  dst_hi = gen_highpart (word_mode, dst);
 
-	  if (mode == TImode)
-	    {
-	      emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
-	      emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
-	    }
-	  else
-	    {
-	      emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
-	      emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
-	    }
+	  emit_insn (gen_aarch64_movdi_low (mode, dst_lo, src));
+	  emit_insn (gen_aarch64_movdi_high (mode, dst_hi, src));
 	  return;
 	}
     }
@@ -1987,11 +3932,16 @@ aarch64_split_128bit_move (rtx dst, rtx src)
     }
 }
 
+/* Return true if we should split a move from 128-bit value SRC
+   to 128-bit register DEST.  */
+
 bool
 aarch64_split_128bit_move_p (rtx dst, rtx src)
 {
-  return (! REG_P (src)
-	  || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
+  if (FP_REGNUM_P (REGNO (dst)))
+    return REG_P (src) && !FP_REGNUM_P (REGNO (src));
+  /* All moves to GPRs need to be split.  */
+  return true;
 }
 
 /* Split a complex SIMD combine.  */
@@ -2007,36 +3957,7 @@ aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
 	      && register_operand (src1, src_mode)
 	      && register_operand (src2, src_mode));
 
-  rtx (*gen) (rtx, rtx, rtx);
-
-  switch (src_mode)
-    {
-    case E_V8QImode:
-      gen = gen_aarch64_simd_combinev8qi;
-      break;
-    case E_V4HImode:
-      gen = gen_aarch64_simd_combinev4hi;
-      break;
-    case E_V2SImode:
-      gen = gen_aarch64_simd_combinev2si;
-      break;
-    case E_V4HFmode:
-      gen = gen_aarch64_simd_combinev4hf;
-      break;
-    case E_V2SFmode:
-      gen = gen_aarch64_simd_combinev2sf;
-      break;
-    case E_DImode:
-      gen = gen_aarch64_simd_combinedi;
-      break;
-    case E_DFmode:
-      gen = gen_aarch64_simd_combinedf;
-      break;
-    default:
-      gcc_unreachable ();
-    }
-
-  emit_insn (gen (dst, src1, src2));
+  emit_insn (gen_aarch64_simd_combine (src_mode, dst, src1, src2));
   return;
 }
 
@@ -2052,41 +3973,10 @@ aarch64_split_simd_move (rtx dst, rtx src)
 
   if (REG_P (dst) && REG_P (src))
     {
-      rtx (*gen) (rtx, rtx);
-
       gcc_assert (VECTOR_MODE_P (src_mode));
-
-      switch (src_mode)
-	{
-	case E_V16QImode:
-	  gen = gen_aarch64_split_simd_movv16qi;
-	  break;
-	case E_V8HImode:
-	  gen = gen_aarch64_split_simd_movv8hi;
-	  break;
-	case E_V4SImode:
-	  gen = gen_aarch64_split_simd_movv4si;
-	  break;
-	case E_V2DImode:
-	  gen = gen_aarch64_split_simd_movv2di;
-	  break;
-	case E_V8HFmode:
-	  gen = gen_aarch64_split_simd_movv8hf;
-	  break;
-	case E_V4SFmode:
-	  gen = gen_aarch64_split_simd_movv4sf;
-	  break;
-	case E_V2DFmode:
-	  gen = gen_aarch64_split_simd_movv2df;
-	  break;
-	default:
-	  gcc_unreachable ();
-	}
-
-      emit_insn (gen (dst, src));
-      return;
-    }
-}
+      emit_insn (gen_aarch64_split_simd_mov (src_mode, dst, src));
+    }
+}
 
 bool
 aarch64_zero_extend_const_eq (machine_mode xmode, rtx x,
@@ -2096,7 +3986,36 @@ aarch64_zero_extend_const_eq (machine_mode xmode, rtx x,
   gcc_assert (r != NULL);
   return rtx_equal_p (x, r);
 }
-			      
+
+/* Return TARGET if it is nonnull and a register of mode MODE.
+   Otherwise, return a fresh register of mode MODE if we can,
+   or TARGET reinterpreted as MODE if we can't.  */
+
+static rtx
+aarch64_target_reg (rtx target, machine_mode mode)
+{
+  if (target && REG_P (target) && GET_MODE (target) == mode)
+    return target;
+  if (!can_create_pseudo_p ())
+    {
+      gcc_assert (target);
+      return gen_lowpart (mode, target);
+    }
+  return gen_reg_rtx (mode);
+}
+
+/* Return a register that contains the constant in BUILDER, given that
+   the constant is a legitimate move operand.  Use TARGET as the register
+   if it is nonnull and convenient.  */
+
+static rtx
+aarch64_emit_set_immediate (rtx target, rtx_vector_builder &builder)
+{
+  rtx src = builder.build ();
+  target = aarch64_target_reg (target, GET_MODE (src));
+  emit_insn (gen_rtx_SET (target, src));
+  return target;
+}
 
 static rtx
 aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
@@ -2111,6 +4030,329 @@ aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
     }
 }
 
+/* Return true if predicate value X is a constant in which every element
+   is a CONST_INT.  When returning true, describe X in BUILDER as a VNx16BI
+   value, i.e. as a predicate in which all bits are significant.  */
+
+static bool
+aarch64_get_sve_pred_bits (rtx_vector_builder &builder, rtx x)
+{
+  if (GET_CODE (x) != CONST_VECTOR)
+    return false;
+
+  unsigned int factor = vector_element_size (GET_MODE_NUNITS (VNx16BImode),
+					     GET_MODE_NUNITS (GET_MODE (x)));
+  unsigned int npatterns = CONST_VECTOR_NPATTERNS (x) * factor;
+  unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
+  builder.new_vector (VNx16BImode, npatterns, nelts_per_pattern);
+
+  unsigned int nelts = const_vector_encoded_nelts (x);
+  for (unsigned int i = 0; i < nelts; ++i)
+    {
+      rtx elt = CONST_VECTOR_ENCODED_ELT (x, i);
+      if (!CONST_INT_P (elt))
+	return false;
+
+      builder.quick_push (elt);
+      for (unsigned int j = 1; j < factor; ++j)
+	builder.quick_push (const0_rtx);
+    }
+  builder.finalize ();
+  return true;
+}
+
+/* BUILDER contains a predicate constant of mode VNx16BI.  Return the
+   widest predicate element size it can have (that is, the largest size
+   for which each element would still be 0 or 1).  */
+
+unsigned int
+aarch64_widest_sve_pred_elt_size (rtx_vector_builder &builder)
+{
+  /* Start with the most optimistic assumption: that we only need
+     one bit per pattern.  This is what we will use if only the first
+     bit in each pattern is ever set.  */
+  unsigned int mask = GET_MODE_SIZE (DImode);
+  mask |= builder.npatterns ();
+
+  /* Look for set bits.  */
+  unsigned int nelts = builder.encoded_nelts ();
+  for (unsigned int i = 1; i < nelts; ++i)
+    if (INTVAL (builder.elt (i)) != 0)
+      {
+	if (i & 1)
+	  return 1;
+	mask |= i;
+      }
+  return mask & -mask;
+}
+
+/* If VNx16BImode rtx X is a canonical PTRUE for a predicate mode,
+   return that predicate mode, otherwise return opt_machine_mode ().  */
+
+opt_machine_mode
+aarch64_ptrue_all_mode (rtx x)
+{
+  gcc_assert (GET_MODE (x) == VNx16BImode);
+  if (GET_CODE (x) != CONST_VECTOR
+      || !CONST_VECTOR_DUPLICATE_P (x)
+      || !CONST_INT_P (CONST_VECTOR_ENCODED_ELT (x, 0))
+      || INTVAL (CONST_VECTOR_ENCODED_ELT (x, 0)) == 0)
+    return opt_machine_mode ();
+
+  unsigned int nelts = const_vector_encoded_nelts (x);
+  for (unsigned int i = 1; i < nelts; ++i)
+    if (CONST_VECTOR_ENCODED_ELT (x, i) != const0_rtx)
+      return opt_machine_mode ();
+
+  return aarch64_sve_pred_mode (nelts);
+}
+
+/* BUILDER is a predicate constant of mode VNx16BI.  Consider the value
+   that the constant would have with predicate element size ELT_SIZE
+   (ignoring the upper bits in each element) and return:
+
+   * -1 if all bits are set
+   * N if the predicate has N leading set bits followed by all clear bits
+   * 0 if the predicate does not have any of these forms.  */
+
+int
+aarch64_partial_ptrue_length (rtx_vector_builder &builder,
+			      unsigned int elt_size)
+{
+  /* If nelts_per_pattern is 3, we have set bits followed by clear bits
+     followed by set bits.  */
+  if (builder.nelts_per_pattern () == 3)
+    return 0;
+
+  /* Skip over leading set bits.  */
+  unsigned int nelts = builder.encoded_nelts ();
+  unsigned int i = 0;
+  for (; i < nelts; i += elt_size)
+    if (INTVAL (builder.elt (i)) == 0)
+      break;
+  unsigned int vl = i / elt_size;
+
+  /* Check for the all-true case.  */
+  if (i == nelts)
+    return -1;
+
+  /* If nelts_per_pattern is 1, then either VL is zero, or we have a
+     repeating pattern of set bits followed by clear bits.  */
+  if (builder.nelts_per_pattern () != 2)
+    return 0;
+
+  /* We have a "foreground" value and a duplicated "background" value.
+     If the background might repeat and the last set bit belongs to it,
+     we might have set bits followed by clear bits followed by set bits.  */
+  if (i > builder.npatterns () && maybe_ne (nelts, builder.full_nelts ()))
+    return 0;
+
+  /* Make sure that the rest are all clear.  */
+  for (; i < nelts; i += elt_size)
+    if (INTVAL (builder.elt (i)) != 0)
+      return 0;
+
+  return vl;
+}
+
+/* See if there is an svpattern that encodes an SVE predicate of mode
+   PRED_MODE in which the first VL bits are set and the rest are clear.
+   Return the pattern if so, otherwise return AARCH64_NUM_SVPATTERNS.
+   A VL of -1 indicates an all-true vector.  */
+
+aarch64_svpattern
+aarch64_svpattern_for_vl (machine_mode pred_mode, int vl)
+{
+  if (vl < 0)
+    return AARCH64_SV_ALL;
+
+  if (maybe_gt (vl, GET_MODE_NUNITS (pred_mode)))
+    return AARCH64_NUM_SVPATTERNS;
+
+  if (vl >= 1 && vl <= 8)
+    return aarch64_svpattern (AARCH64_SV_VL1 + (vl - 1));
+
+  if (vl >= 16 && vl <= 256 && pow2p_hwi (vl))
+    return aarch64_svpattern (AARCH64_SV_VL16 + (exact_log2 (vl) - 4));
+
+  int max_vl;
+  if (GET_MODE_NUNITS (pred_mode).is_constant (&max_vl))
+    {
+      if (vl == (max_vl / 3) * 3)
+	return AARCH64_SV_MUL3;
+      /* These would only trigger for non-power-of-2 lengths.  */
+      if (vl == (max_vl & -4))
+	return AARCH64_SV_MUL4;
+      if (vl == (1 << floor_log2 (max_vl)))
+	return AARCH64_SV_POW2;
+      if (vl == max_vl)
+	return AARCH64_SV_ALL;
+    }
+  return AARCH64_NUM_SVPATTERNS;
+}
+
+/* Return a VNx16BImode constant in which every sequence of ELT_SIZE
+   bits has the lowest bit set and the upper bits clear.  This is the
+   VNx16BImode equivalent of a PTRUE for controlling elements of
+   ELT_SIZE bytes.  However, because the constant is VNx16BImode,
+   all bits are significant, even the upper zeros.  */
+
+rtx
+aarch64_ptrue_all (unsigned int elt_size)
+{
+  rtx_vector_builder builder (VNx16BImode, elt_size, 1);
+  builder.quick_push (const1_rtx);
+  for (unsigned int i = 1; i < elt_size; ++i)
+    builder.quick_push (const0_rtx);
+  return builder.build ();
+}
+
+/* Return an all-true predicate register of mode MODE.  */
+
+rtx
+aarch64_ptrue_reg (machine_mode mode)
+{
+  gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL);
+  rtx reg = force_reg (VNx16BImode, CONSTM1_RTX (VNx16BImode));
+  return gen_lowpart (mode, reg);
+}
+
+/* Return an all-false predicate register of mode MODE.  */
+
+rtx
+aarch64_pfalse_reg (machine_mode mode)
+{
+  gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL);
+  rtx reg = force_reg (VNx16BImode, CONST0_RTX (VNx16BImode));
+  return gen_lowpart (mode, reg);
+}
+
+/* PRED1[0] is a PTEST predicate and PRED1[1] is an aarch64_sve_ptrue_flag
+   for it.  PRED2[0] is the predicate for the instruction whose result
+   is tested by the PTEST and PRED2[1] is again an aarch64_sve_ptrue_flag
+   for it.  Return true if we can prove that the two predicates are
+   equivalent for PTEST purposes; that is, if we can replace PRED2[0]
+   with PRED1[0] without changing behavior.  */
+
+bool
+aarch64_sve_same_pred_for_ptest_p (rtx *pred1, rtx *pred2)
+{
+  machine_mode mode = GET_MODE (pred1[0]);
+  gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL
+	      && mode == GET_MODE (pred2[0])
+	      && aarch64_sve_ptrue_flag (pred1[1], SImode)
+	      && aarch64_sve_ptrue_flag (pred2[1], SImode));
+
+  bool ptrue1_p = (pred1[0] == CONSTM1_RTX (mode)
+		   || INTVAL (pred1[1]) == SVE_KNOWN_PTRUE);
+  bool ptrue2_p = (pred2[0] == CONSTM1_RTX (mode)
+		   || INTVAL (pred2[1]) == SVE_KNOWN_PTRUE);
+  return (ptrue1_p && ptrue2_p) || rtx_equal_p (pred1[0], pred2[0]);
+}
+
+/* Emit a comparison CMP between OP0 and OP1, both of which have mode
+   DATA_MODE, and return the result in a predicate of mode PRED_MODE.
+   Use TARGET as the target register if nonnull and convenient.  */
+
+static rtx
+aarch64_sve_emit_int_cmp (rtx target, machine_mode pred_mode, rtx_code cmp,
+			  machine_mode data_mode, rtx op1, rtx op2)
+{
+  insn_code icode = code_for_aarch64_pred_cmp (cmp, data_mode);
+  expand_operand ops[5];
+  create_output_operand (&ops[0], target, pred_mode);
+  create_input_operand (&ops[1], CONSTM1_RTX (pred_mode), pred_mode);
+  create_integer_operand (&ops[2], SVE_KNOWN_PTRUE);
+  create_input_operand (&ops[3], op1, data_mode);
+  create_input_operand (&ops[4], op2, data_mode);
+  expand_insn (icode, 5, ops);
+  return ops[0].value;
+}
+
+/* Use a comparison to convert integer vector SRC into MODE, which is
+   the corresponding SVE predicate mode.  Use TARGET for the result
+   if it's nonnull and convenient.  */
+
+rtx
+aarch64_convert_sve_data_to_pred (rtx target, machine_mode mode, rtx src)
+{
+  machine_mode src_mode = GET_MODE (src);
+  return aarch64_sve_emit_int_cmp (target, mode, NE, src_mode,
+				   src, CONST0_RTX (src_mode));
+}
+
+/* Return the assembly token for svprfop value PRFOP.  */
+
+static const char *
+svprfop_token (enum aarch64_svprfop prfop)
+{
+  switch (prfop)
+    {
+#define CASE(UPPER, LOWER, VALUE) case AARCH64_SV_##UPPER: return #LOWER;
+    AARCH64_FOR_SVPRFOP (CASE)
+#undef CASE
+    case AARCH64_NUM_SVPRFOPS:
+      break;
+    }
+  gcc_unreachable ();
+}
+
+/* Return the assembly string for an SVE prefetch operation with
+   mnemonic MNEMONIC, given that PRFOP_RTX is the prefetch operation
+   and that SUFFIX is the format for the remaining operands.  */
+
+char *
+aarch64_output_sve_prefetch (const char *mnemonic, rtx prfop_rtx,
+			     const char *suffix)
+{
+  static char buffer[128];
+  aarch64_svprfop prfop = (aarch64_svprfop) INTVAL (prfop_rtx);
+  unsigned int written = snprintf (buffer, sizeof (buffer), "%s\t%s, %s",
+				   mnemonic, svprfop_token (prfop), suffix);
+  gcc_assert (written < sizeof (buffer));
+  return buffer;
+}
+
+/* Check whether we can calculate the number of elements in PATTERN
+   at compile time, given that there are NELTS_PER_VQ elements per
+   128-bit block.  Return the value if so, otherwise return -1.  */
+
+HOST_WIDE_INT
+aarch64_fold_sve_cnt_pat (aarch64_svpattern pattern, unsigned int nelts_per_vq)
+{
+  unsigned int vl, const_vg;
+  if (pattern >= AARCH64_SV_VL1 && pattern <= AARCH64_SV_VL8)
+    vl = 1 + (pattern - AARCH64_SV_VL1);
+  else if (pattern >= AARCH64_SV_VL16 && pattern <= AARCH64_SV_VL256)
+    vl = 16 << (pattern - AARCH64_SV_VL16);
+  else if (aarch64_sve_vg.is_constant (&const_vg))
+    {
+      /* There are two vector granules per quadword.  */
+      unsigned int nelts = (const_vg / 2) * nelts_per_vq;
+      switch (pattern)
+	{
+	case AARCH64_SV_POW2: return 1 << floor_log2 (nelts);
+	case AARCH64_SV_MUL4: return nelts & -4;
+	case AARCH64_SV_MUL3: return (nelts / 3) * 3;
+	case AARCH64_SV_ALL: return nelts;
+	default: gcc_unreachable ();
+	}
+    }
+  else
+    return -1;
+
+  /* There are two vector granules per quadword.  */
+  poly_uint64 nelts_all = exact_div (aarch64_sve_vg, 2) * nelts_per_vq;
+  if (known_le (vl, nelts_all))
+    return vl;
+
+  /* Requesting more elements than are available results in a PFALSE.  */
+  if (known_gt (vl, nelts_all))
+    return 0;
+
+  return -1;
+}
+
 /* Return true if we can move VALUE into a register using a single
    CNT[BHWD] instruction.  */
 
@@ -2138,16 +4380,17 @@ aarch64_sve_cnt_immediate_p (rtx x)
    operand (a vector pattern followed by a multiplier in the range [1, 16]).
    PREFIX is the mnemonic without the size suffix and OPERANDS is the
    first part of the operands template (the part that comes before the
-   vector size itself).  FACTOR is the number of quadwords.
-   NELTS_PER_VQ, if nonzero, is the number of elements in each quadword.
-   If it is zero, we can use any element size.  */
+   vector size itself).  PATTERN is the pattern to use.  FACTOR is the
+   number of quadwords.  NELTS_PER_VQ, if nonzero, is the number of elements
+   in each quadword.  If it is zero, we can use any element size.  */
 
 static char *
 aarch64_output_sve_cnt_immediate (const char *prefix, const char *operands,
+				  aarch64_svpattern pattern,
 				  unsigned int factor,
 				  unsigned int nelts_per_vq)
 {
-  static char buffer[sizeof ("sqincd\t%x0, %w0, all, mul #16")];
+  static char buffer[sizeof ("sqincd\t%x0, %w0, vl256, mul #16")];
 
   if (nelts_per_vq == 0)
     /* There is some overlap in the ranges of the four CNT instructions.
@@ -2160,12 +4403,16 @@ aarch64_output_sve_cnt_immediate (const char *prefix, const char *operands,
 
   factor >>= shift;
   unsigned int written;
-  if (factor == 1)
+  if (pattern == AARCH64_SV_ALL && factor == 1)
     written = snprintf (buffer, sizeof (buffer), "%s%c\t%s",
 			prefix, suffix, operands);
+  else if (factor == 1)
+    written = snprintf (buffer, sizeof (buffer), "%s%c\t%s, %s",
+			prefix, suffix, operands, svpattern_token (pattern));
   else
-    written = snprintf (buffer, sizeof (buffer), "%s%c\t%s, all, mul #%d",
-			prefix, suffix, operands, factor);
+    written = snprintf (buffer, sizeof (buffer), "%s%c\t%s, %s, mul #%d",
+			prefix, suffix, operands, svpattern_token (pattern),
+			factor);
   gcc_assert (written < sizeof (buffer));
   return buffer;
 }
@@ -2175,7 +4422,8 @@ aarch64_output_sve_cnt_immediate (const char *prefix, const char *operands,
    PREFIX is the mnemonic without the size suffix and OPERANDS is the
    first part of the operands template (the part that comes before the
    vector size itself).  X is the value of the vector size operand,
-   as a polynomial integer rtx.  */
+   as a polynomial integer rtx; we need to convert this into an "all"
+   pattern with a multiplier.  */
 
 char *
 aarch64_output_sve_cnt_immediate (const char *prefix, const char *operands,
@@ -2183,10 +4431,55 @@ aarch64_output_sve_cnt_immediate (const char *prefix, const char *operands,
 {
   poly_int64 value = rtx_to_poly_int64 (x);
   gcc_assert (aarch64_sve_cnt_immediate_p (value));
-  return aarch64_output_sve_cnt_immediate (prefix, operands,
+  return aarch64_output_sve_cnt_immediate (prefix, operands, AARCH64_SV_ALL,
 					   value.coeffs[1], 0);
 }
 
+/* Return the asm string for an instruction with a CNT-like vector size
+   operand (a vector pattern followed by a multiplier in the range [1, 16]).
+   PREFIX is the mnemonic without the size suffix and OPERANDS is the
+   first part of the operands template (the part that comes before the
+   vector size itself).  CNT_PAT[0..2] are the operands of the
+   UNSPEC_SVE_CNT_PAT; see aarch64_sve_cnt_pat for details.  */
+
+char *
+aarch64_output_sve_cnt_pat_immediate (const char *prefix,
+				      const char *operands, rtx *cnt_pat)
+{
+  aarch64_svpattern pattern = (aarch64_svpattern) INTVAL (cnt_pat[0]);
+  unsigned int nelts_per_vq = INTVAL (cnt_pat[1]);
+  unsigned int factor = INTVAL (cnt_pat[2]) * nelts_per_vq;
+  return aarch64_output_sve_cnt_immediate (prefix, operands, pattern,
+					   factor, nelts_per_vq);
+}
+
+/* Return true if we can add X using a single SVE INC or DEC instruction.  */
+
+bool
+aarch64_sve_scalar_inc_dec_immediate_p (rtx x)
+{
+  poly_int64 value;
+  return (poly_int_rtx_p (x, &value)
+	  && (aarch64_sve_cnt_immediate_p (value)
+	      || aarch64_sve_cnt_immediate_p (-value)));
+}
+
+/* Return the asm string for adding SVE INC/DEC immediate OFFSET to
+   operand 0.  */
+
+char *
+aarch64_output_sve_scalar_inc_dec (rtx offset)
+{
+  poly_int64 offset_value = rtx_to_poly_int64 (offset);
+  gcc_assert (offset_value.coeffs[0] == offset_value.coeffs[1]);
+  if (offset_value.coeffs[1] > 0)
+    return aarch64_output_sve_cnt_immediate ("inc", "%x0", AARCH64_SV_ALL,
+					     offset_value.coeffs[1], 0);
+  else
+    return aarch64_output_sve_cnt_immediate ("dec", "%x0", AARCH64_SV_ALL,
+					     -offset_value.coeffs[1], 0);
+}
+
 /* Return true if we can add VALUE to a register using a single ADDVL
    or ADDPL instruction.  */
 
@@ -2212,27 +4505,16 @@ aarch64_sve_addvl_addpl_immediate_p (rtx x)
 	  && aarch64_sve_addvl_addpl_immediate_p (value));
 }
 
-/* Return the asm string for adding ADDVL or ADDPL immediate X to operand 1
-   and storing the result in operand 0.  */
+/* Return the asm string for adding ADDVL or ADDPL immediate OFFSET
+   to operand 1 and storing the result in operand 0.  */
 
 char *
-aarch64_output_sve_addvl_addpl (rtx dest, rtx base, rtx offset)
+aarch64_output_sve_addvl_addpl (rtx offset)
 {
   static char buffer[sizeof ("addpl\t%x0, %x1, #-") + 3 * sizeof (int)];
   poly_int64 offset_value = rtx_to_poly_int64 (offset);
   gcc_assert (aarch64_sve_addvl_addpl_immediate_p (offset_value));
 
-  /* Use INC or DEC if possible.  */
-  if (rtx_equal_p (dest, base) && GP_REGNUM_P (REGNO (dest)))
-    {
-      if (aarch64_sve_cnt_immediate_p (offset_value))
-	return aarch64_output_sve_cnt_immediate ("inc", "%x0",
-						 offset_value.coeffs[1], 0);
-      if (aarch64_sve_cnt_immediate_p (-offset_value))
-	return aarch64_output_sve_cnt_immediate ("dec", "%x0",
-						 -offset_value.coeffs[1], 0);
-    }
-
   int factor = offset_value.coeffs[1];
   if ((factor & 15) == 0)
     snprintf (buffer, sizeof (buffer), "addvl\t%%x0, %%x1, #%d", factor / 16);
@@ -2247,8 +4529,8 @@ aarch64_output_sve_addvl_addpl (rtx dest, rtx base, rtx offset)
    factor in *FACTOR_OUT (if nonnull).  */
 
 bool
-aarch64_sve_inc_dec_immediate_p (rtx x, int *factor_out,
-				 unsigned int *nelts_per_vq_out)
+aarch64_sve_vector_inc_dec_immediate_p (rtx x, int *factor_out,
+					unsigned int *nelts_per_vq_out)
 {
   rtx elt;
   poly_int64 value;
@@ -2282,9 +4564,9 @@ aarch64_sve_inc_dec_immediate_p (rtx x, int *factor_out,
    instruction.  */
 
 bool
-aarch64_sve_inc_dec_immediate_p (rtx x)
+aarch64_sve_vector_inc_dec_immediate_p (rtx x)
 {
-  return aarch64_sve_inc_dec_immediate_p (x, NULL, NULL);
+  return aarch64_sve_vector_inc_dec_immediate_p (x, NULL, NULL);
 }
 
 /* Return the asm template for an SVE vector INC or DEC instruction.
@@ -2292,18 +4574,18 @@ aarch64_sve_inc_dec_immediate_p (rtx x)
    value of the vector count operand itself.  */
 
 char *
-aarch64_output_sve_inc_dec_immediate (const char *operands, rtx x)
+aarch64_output_sve_vector_inc_dec (const char *operands, rtx x)
 {
   int factor;
   unsigned int nelts_per_vq;
-  if (!aarch64_sve_inc_dec_immediate_p (x, &factor, &nelts_per_vq))
+  if (!aarch64_sve_vector_inc_dec_immediate_p (x, &factor, &nelts_per_vq))
     gcc_unreachable ();
   if (factor < 0)
-    return aarch64_output_sve_cnt_immediate ("dec", operands, -factor,
-					     nelts_per_vq);
+    return aarch64_output_sve_cnt_immediate ("dec", operands, AARCH64_SV_ALL,
+					     -factor, nelts_per_vq);
   else
-    return aarch64_output_sve_cnt_immediate ("inc", operands, factor,
-					     nelts_per_vq);
+    return aarch64_output_sve_cnt_immediate ("inc", operands, AARCH64_SV_ALL,
+					     factor, nelts_per_vq);
 }
 
 static int
@@ -2435,7 +4717,7 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
 bool
 aarch64_mov128_immediate (rtx imm)
 {
-  if (GET_CODE (imm) == CONST_INT)
+  if (CONST_INT_P (imm))
     return true;
 
   gcc_assert (CONST_WIDE_INT_NUNITS (imm) == 2);
@@ -2454,7 +4736,7 @@ aarch64_mov128_immediate (rtx imm)
 static unsigned int
 aarch64_add_offset_1_temporaries (HOST_WIDE_INT offset)
 {
-  return abs_hwi (offset) < 0x1000000 ? 0 : 1;
+  return absu_hwi (offset) < 0x1000000 ? 0 : 1;
 }
 
 /* A subroutine of aarch64_add_offset.  Set DEST to SRC + OFFSET for
@@ -2481,7 +4763,7 @@ aarch64_add_offset_1 (scalar_int_mode mode, rtx dest,
   gcc_assert (emit_move_imm || temp1 != NULL_RTX);
   gcc_assert (temp1 == NULL_RTX || !reg_overlap_mentioned_p (temp1, src));
 
-  HOST_WIDE_INT moffset = abs_hwi (offset);
+  unsigned HOST_WIDE_INT moffset = absu_hwi (offset);
   rtx_insn *insn;
 
   if (!moffset)
@@ -2525,7 +4807,8 @@ aarch64_add_offset_1 (scalar_int_mode mode, rtx dest,
   if (emit_move_imm)
     {
       gcc_assert (temp1 != NULL_RTX || can_create_pseudo_p ());
-      temp1 = aarch64_force_temporary (mode, temp1, GEN_INT (moffset));
+      temp1 = aarch64_force_temporary (mode, temp1,
+				       gen_int_mode (moffset, mode));
     }
   insn = emit_insn (offset < 0
 		    ? gen_sub3_insn (dest, src, temp1)
@@ -2686,20 +4969,36 @@ aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src,
 	}
       else
 	{
-	  /* Use CNTD, then multiply it by FACTOR.  */
-	  val = gen_int_mode (poly_int64 (2, 2), mode);
+	  /* Base the factor on LOW_BIT if we can calculate LOW_BIT
+	     directly, since that should increase the chances of being
+	     able to use a shift and add sequence.  If LOW_BIT itself
+	     is out of range, just use CNTD.  */
+	  if (low_bit <= 16 * 8)
+	    factor /= low_bit;
+	  else
+	    low_bit = 1;
+
+	  val = gen_int_mode (poly_int64 (low_bit * 2, low_bit * 2), mode);
 	  val = aarch64_force_temporary (mode, temp1, val);
 
-	  /* Go back to using a negative multiplication factor if we have
-	     no register from which to subtract.  */
-	  if (code == MINUS && src == const0_rtx)
+	  if (can_create_pseudo_p ())
+	    {
+	      rtx coeff1 = gen_int_mode (factor, mode);
+	      val = expand_mult (mode, val, coeff1, NULL_RTX, true, true);
+	    }
+	  else
 	    {
-	      factor = -factor;
-	      code = PLUS;
+	      /* Go back to using a negative multiplication factor if we have
+		 no register from which to subtract.  */
+	      if (code == MINUS && src == const0_rtx)
+		{
+		  factor = -factor;
+		  code = PLUS;
+		}
+	      rtx coeff1 = gen_int_mode (factor, mode);
+	      coeff1 = aarch64_force_temporary (mode, temp2, coeff1);
+	      val = gen_rtx_MULT (mode, val, coeff1);
 	    }
-	  rtx coeff1 = gen_int_mode (factor, mode);
-	  coeff1 = aarch64_force_temporary (mode, temp2, coeff1);
-	  val = gen_rtx_MULT (mode, val, coeff1);
 	}
 
       if (shift > 0)
@@ -2782,10 +5081,11 @@ aarch64_add_sp (rtx temp1, rtx temp2, poly_int64 delta, bool emit_move_imm)
    if nonnull.  */
 
 static inline void
-aarch64_sub_sp (rtx temp1, rtx temp2, poly_int64 delta, bool frame_related_p)
+aarch64_sub_sp (rtx temp1, rtx temp2, poly_int64 delta, bool frame_related_p,
+		bool emit_move_imm = true)
 {
   aarch64_add_offset (Pmode, stack_pointer_rtx, stack_pointer_rtx, -delta,
-		      temp1, temp2, frame_related_p);
+		      temp1, temp2, frame_related_p, emit_move_imm);
 }
 
 /* Set DEST to (vec_series BASE STEP).  */
@@ -2805,81 +5105,257 @@ aarch64_expand_vec_series (rtx dest, rtx base, rtx step)
   emit_set_insn (dest, gen_rtx_VEC_SERIES (mode, base, step));
 }
 
-/* Try to duplicate SRC into SVE register DEST, given that SRC is an
-   integer of mode INT_MODE.  Return true on success.  */
+/* Duplicate 128-bit Advanced SIMD vector SRC so that it fills an SVE
+   register of mode MODE.  Use TARGET for the result if it's nonnull
+   and convenient.
 
-static bool
-aarch64_expand_sve_widened_duplicate (rtx dest, scalar_int_mode src_mode,
-				      rtx src)
-{
-  /* If the constant is smaller than 128 bits, we can do the move
-     using a vector of SRC_MODEs.  */
-  if (src_mode != TImode)
-    {
-      poly_uint64 count = exact_div (GET_MODE_SIZE (GET_MODE (dest)),
-				     GET_MODE_SIZE (src_mode));
-      machine_mode dup_mode = mode_for_vector (src_mode, count).require ();
-      emit_move_insn (gen_lowpart (dup_mode, dest),
-		      gen_const_vec_duplicate (dup_mode, src));
-      return true;
+   The two vector modes must have the same element mode.  The behavior
+   is to duplicate architectural lane N of SRC into architectural lanes
+   N + I * STEP of the result.  On big-endian targets, architectural
+   lane 0 of an Advanced SIMD vector is the last element of the vector
+   in memory layout, so for big-endian targets this operation has the
+   effect of reversing SRC before duplicating it.  Callers need to
+   account for this.  */
+
+rtx
+aarch64_expand_sve_dupq (rtx target, machine_mode mode, rtx src)
+{
+  machine_mode src_mode = GET_MODE (src);
+  gcc_assert (GET_MODE_INNER (mode) == GET_MODE_INNER (src_mode));
+  insn_code icode = (BYTES_BIG_ENDIAN
+		     ? code_for_aarch64_vec_duplicate_vq_be (mode)
+		     : code_for_aarch64_vec_duplicate_vq_le (mode));
+
+  unsigned int i = 0;
+  expand_operand ops[3];
+  create_output_operand (&ops[i++], target, mode);
+  create_output_operand (&ops[i++], src, src_mode);
+  if (BYTES_BIG_ENDIAN)
+    {
+      /* Create a PARALLEL describing the reversal of SRC.  */
+      unsigned int nelts_per_vq = 128 / GET_MODE_UNIT_BITSIZE (mode);
+      rtx sel = aarch64_gen_stepped_int_parallel (nelts_per_vq,
+						  nelts_per_vq - 1, -1);
+      create_fixed_operand (&ops[i++], sel);
     }
+  expand_insn (icode, i, ops);
+  return ops[0].value;
+}
 
-  /* Use LD1RQ[BHWD] to load the 128 bits from memory.  */
-  src = force_const_mem (src_mode, src);
+/* Try to force 128-bit vector value SRC into memory and use LD1RQ to fetch
+   the memory image into DEST.  Return true on success.  */
+
+static bool
+aarch64_expand_sve_ld1rq (rtx dest, rtx src)
+{
+  src = force_const_mem (GET_MODE (src), src);
   if (!src)
     return false;
 
   /* Make sure that the address is legitimate.  */
-  if (!aarch64_sve_ld1r_operand_p (src))
+  if (!aarch64_sve_ld1rq_operand_p (src))
     {
       rtx addr = force_reg (Pmode, XEXP (src, 0));
       src = replace_equiv_address (src, addr);
     }
 
   machine_mode mode = GET_MODE (dest);
-  unsigned int elem_bytes = GET_MODE_UNIT_SIZE (mode);
-  machine_mode pred_mode = aarch64_sve_pred_mode (elem_bytes).require ();
-  rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
-  src = gen_rtx_UNSPEC (mode, gen_rtvec (2, ptrue, src), UNSPEC_LD1RQ);
-  emit_insn (gen_rtx_SET (dest, src));
+  machine_mode pred_mode = aarch64_sve_pred_mode (mode);
+  rtx ptrue = aarch64_ptrue_reg (pred_mode);
+  emit_insn (gen_aarch64_sve_ld1rq (mode, dest, src, ptrue));
   return true;
 }
 
-/* Expand a move of general CONST_VECTOR SRC into DEST, given that it
-   isn't a simple duplicate or series.  */
+/* SRC is an SVE CONST_VECTOR that contains N "foreground" values followed
+   by N "background" values.  Try to move it into TARGET using:
 
-static void
-aarch64_expand_sve_const_vector (rtx dest, rtx src)
+      PTRUE PRED.<T>, VL<N>
+      MOV TRUE.<T>, #<foreground>
+      MOV FALSE.<T>, #<background>
+      SEL TARGET.<T>, PRED.<T>, TRUE.<T>, FALSE.<T>
+
+   The PTRUE is always a single instruction but the MOVs might need a
+   longer sequence.  If the background value is zero (as it often is),
+   the sequence can sometimes collapse to a PTRUE followed by a
+   zero-predicated move.
+
+   Return the target on success, otherwise return null.  */
+
+static rtx
+aarch64_expand_sve_const_vector_sel (rtx target, rtx src)
+{
+  gcc_assert (CONST_VECTOR_NELTS_PER_PATTERN (src) == 2);
+
+  /* Make sure that the PTRUE is valid.  */
+  machine_mode mode = GET_MODE (src);
+  machine_mode pred_mode = aarch64_sve_pred_mode (mode);
+  unsigned int npatterns = CONST_VECTOR_NPATTERNS (src);
+  if (aarch64_svpattern_for_vl (pred_mode, npatterns)
+      == AARCH64_NUM_SVPATTERNS)
+    return NULL_RTX;
+
+  rtx_vector_builder pred_builder (pred_mode, npatterns, 2);
+  rtx_vector_builder true_builder (mode, npatterns, 1);
+  rtx_vector_builder false_builder (mode, npatterns, 1);
+  for (unsigned int i = 0; i < npatterns; ++i)
+    {
+      true_builder.quick_push (CONST_VECTOR_ENCODED_ELT (src, i));
+      pred_builder.quick_push (CONST1_RTX (BImode));
+    }
+  for (unsigned int i = 0; i < npatterns; ++i)
+    {
+      false_builder.quick_push (CONST_VECTOR_ENCODED_ELT (src, i + npatterns));
+      pred_builder.quick_push (CONST0_RTX (BImode));
+    }
+  expand_operand ops[4];
+  create_output_operand (&ops[0], target, mode);
+  create_input_operand (&ops[1], true_builder.build (), mode);
+  create_input_operand (&ops[2], false_builder.build (), mode);
+  create_input_operand (&ops[3], pred_builder.build (), pred_mode);
+  expand_insn (code_for_vcond_mask (mode, mode), 4, ops);
+  return target;
+}
+
+/* Return a register containing CONST_VECTOR SRC, given that SRC has an
+   SVE data mode and isn't a legitimate constant.  Use TARGET for the
+   result if convenient.
+
+   The returned register can have whatever mode seems most natural
+   given the contents of SRC.  */
+
+static rtx
+aarch64_expand_sve_const_vector (rtx target, rtx src)
 {
   machine_mode mode = GET_MODE (src);
   unsigned int npatterns = CONST_VECTOR_NPATTERNS (src);
   unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (src);
-  gcc_assert (npatterns > 1);
+  scalar_mode elt_mode = GET_MODE_INNER (mode);
+  unsigned int elt_bits = GET_MODE_BITSIZE (elt_mode);
+  unsigned int container_bits = aarch64_sve_container_bits (mode);
+  unsigned int encoded_bits = npatterns * nelts_per_pattern * container_bits;
+
+  if (nelts_per_pattern == 1
+      && encoded_bits <= 128
+      && container_bits != elt_bits)
+    {
+      /* We have a partial vector mode and a constant whose full-vector
+	 equivalent would occupy a repeating 128-bit sequence.  Build that
+	 full-vector equivalent instead, so that we have the option of
+	 using LD1RQ and Advanced SIMD operations.  */
+      unsigned int repeat = container_bits / elt_bits;
+      machine_mode full_mode = aarch64_full_sve_mode (elt_mode).require ();
+      rtx_vector_builder builder (full_mode, npatterns * repeat, 1);
+      for (unsigned int i = 0; i < npatterns; ++i)
+	for (unsigned int j = 0; j < repeat; ++j)
+	  builder.quick_push (CONST_VECTOR_ENCODED_ELT (src, i));
+      target = aarch64_target_reg (target, full_mode);
+      return aarch64_expand_sve_const_vector (target, builder.build ());
+    }
 
-  if (nelts_per_pattern == 1)
-    {
-      /* The constant is a repeating seqeuence of at least two elements,
-	 where the repeating elements occupy no more than 128 bits.
-	 Get an integer representation of the replicated value.  */
-      scalar_int_mode int_mode;
-      if (BYTES_BIG_ENDIAN)
-	/* For now, always use LD1RQ to load the value on big-endian
-	   targets, since the handling of smaller integers includes a
-	   subreg that is semantically an element reverse.  */
-	int_mode = TImode;
-      else
+  if (nelts_per_pattern == 1 && encoded_bits == 128)
+    {
+      /* The constant is a duplicated quadword but can't be narrowed
+	 beyond a quadword.  Get the memory image of the first quadword
+	 as a 128-bit vector and try using LD1RQ to load it from memory.
+
+	 The effect for both endiannesses is to load memory lane N into
+	 architectural lanes N + I * STEP of the result.  On big-endian
+	 targets, the layout of the 128-bit vector in an Advanced SIMD
+	 register would be different from its layout in an SVE register,
+	 but this 128-bit vector is a memory value only.  */
+      machine_mode vq_mode = aarch64_vq_mode (elt_mode).require ();
+      rtx vq_value = simplify_gen_subreg (vq_mode, src, mode, 0);
+      if (vq_value && aarch64_expand_sve_ld1rq (target, vq_value))
+	return target;
+    }
+
+  if (nelts_per_pattern == 1 && encoded_bits < 128)
+    {
+      /* The vector is a repeating sequence of 64 bits or fewer.
+	 See if we can load them using an Advanced SIMD move and then
+	 duplicate it to fill a vector.  This is better than using a GPR
+	 move because it keeps everything in the same register file.  */
+      machine_mode vq_mode = aarch64_vq_mode (elt_mode).require ();
+      rtx_vector_builder builder (vq_mode, npatterns, 1);
+      for (unsigned int i = 0; i < npatterns; ++i)
 	{
-	  unsigned int int_bits = GET_MODE_UNIT_BITSIZE (mode) * npatterns;
-	  gcc_assert (int_bits <= 128);
-	  int_mode = int_mode_for_size (int_bits, 0).require ();
+	  /* We want memory lane N to go into architectural lane N,
+	     so reverse for big-endian targets.  The DUP .Q pattern
+	     has a compensating reverse built-in.  */
+	  unsigned int srci = BYTES_BIG_ENDIAN ? npatterns - i - 1 : i;
+	  builder.quick_push (CONST_VECTOR_ENCODED_ELT (src, srci));
 	}
-      rtx int_value = simplify_gen_subreg (int_mode, src, mode, 0);
-      if (int_value
-	  && aarch64_expand_sve_widened_duplicate (dest, int_mode, int_value))
-	return;
+      rtx vq_src = builder.build ();
+      if (aarch64_simd_valid_immediate (vq_src, NULL))
+	{
+	  vq_src = force_reg (vq_mode, vq_src);
+	  return aarch64_expand_sve_dupq (target, mode, vq_src);
+	}
+
+      /* Get an integer representation of the repeating part of Advanced
+	 SIMD vector VQ_SRC.  This preserves the endianness of VQ_SRC,
+	 which for big-endian targets is lane-swapped wrt a normal
+	 Advanced SIMD vector.  This means that for both endiannesses,
+	 memory lane N of SVE vector SRC corresponds to architectural
+	 lane N of a register holding VQ_SRC.  This in turn means that
+	 memory lane 0 of SVE vector SRC is in the lsb of VQ_SRC (viewed
+	 as a single 128-bit value) and thus that memory lane 0 of SRC is
+	 in the lsb of the integer.  Duplicating the integer therefore
+	 ensures that memory lane N of SRC goes into architectural lane
+	 N + I * INDEX of the SVE register.  */
+      scalar_mode int_mode = int_mode_for_size (encoded_bits, 0).require ();
+      rtx elt_value = simplify_gen_subreg (int_mode, vq_src, vq_mode, 0);
+      if (elt_value)
+	{
+	  /* Pretend that we had a vector of INT_MODE to start with.  */
+	  elt_mode = int_mode;
+	  mode = aarch64_full_sve_mode (int_mode).require ();
+
+	  /* If the integer can be moved into a general register by a
+	     single instruction, do that and duplicate the result.  */
+	  if (CONST_INT_P (elt_value)
+	      && aarch64_move_imm (INTVAL (elt_value), elt_mode))
+	    {
+	      elt_value = force_reg (elt_mode, elt_value);
+	      return expand_vector_broadcast (mode, elt_value);
+	    }
+	}
+      else if (npatterns == 1)
+	/* We're duplicating a single value, but can't do better than
+	   force it to memory and load from there.  This handles things
+	   like symbolic constants.  */
+	elt_value = CONST_VECTOR_ENCODED_ELT (src, 0);
+
+      if (elt_value)
+	{
+	  /* Load the element from memory if we can, otherwise move it into
+	     a register and use a DUP.  */
+	  rtx op = force_const_mem (elt_mode, elt_value);
+	  if (!op)
+	    op = force_reg (elt_mode, elt_value);
+	  return expand_vector_broadcast (mode, op);
+	}
+    }
+
+  /* Try using INDEX.  */
+  rtx base, step;
+  if (const_vec_series_p (src, &base, &step))
+    {
+      aarch64_expand_vec_series (target, base, step);
+      return target;
     }
 
+  /* From here on, it's better to force the whole constant to memory
+     if we can.  */
+  if (GET_MODE_NUNITS (mode).is_constant ())
+    return NULL_RTX;
+
+  if (nelts_per_pattern == 2)
+    if (rtx res = aarch64_expand_sve_const_vector_sel (target, src))
+      return res;
+
   /* Expand each pattern individually.  */
+  gcc_assert (npatterns > 1);
   rtx_vector_builder builder;
   auto_vec<rtx, 16> vectors (npatterns);
   for (unsigned int i = 0; i < npatterns; ++i)
@@ -2896,31 +5372,273 @@ aarch64_expand_sve_const_vector (rtx dest, rtx src)
       npatterns /= 2;
       for (unsigned int i = 0; i < npatterns; ++i)
 	{
-	  rtx tmp = (npatterns == 1 ? dest : gen_reg_rtx (mode));
+	  rtx tmp = (npatterns == 1 ? target : gen_reg_rtx (mode));
 	  rtvec v = gen_rtvec (2, vectors[i], vectors[i + npatterns]);
 	  emit_set_insn (tmp, gen_rtx_UNSPEC (mode, v, UNSPEC_ZIP1));
 	  vectors[i] = tmp;
 	}
     }
-  gcc_assert (vectors[0] == dest);
+  gcc_assert (vectors[0] == target);
+  return target;
 }
 
-/* Set DEST to immediate IMM.  For SVE vector modes, GEN_VEC_DUPLICATE
-   is a pattern that can be used to set DEST to a replicated scalar
-   element.  */
+/* Use WHILE to set a predicate register of mode MODE in which the first
+   VL bits are set and the rest are clear.  Use TARGET for the register
+   if it's nonnull and convenient.  */
 
-void
-aarch64_expand_mov_immediate (rtx dest, rtx imm,
-			      rtx (*gen_vec_duplicate) (rtx, rtx))
+static rtx
+aarch64_sve_move_pred_via_while (rtx target, machine_mode mode,
+				 unsigned int vl)
 {
-  machine_mode mode = GET_MODE (dest);
+  rtx limit = force_reg (DImode, gen_int_mode (vl, DImode));
+  target = aarch64_target_reg (target, mode);
+  emit_insn (gen_while (UNSPEC_WHILELO, DImode, mode,
+			target, const0_rtx, limit));
+  return target;
+}
 
-  /* Check on what type of symbol it is.  */
-  scalar_int_mode int_mode;
-  if ((GET_CODE (imm) == SYMBOL_REF
-       || GET_CODE (imm) == LABEL_REF
-       || GET_CODE (imm) == CONST
-       || GET_CODE (imm) == CONST_POLY_INT)
+static rtx
+aarch64_expand_sve_const_pred_1 (rtx, rtx_vector_builder &, bool);
+
+/* BUILDER is a constant predicate in which the index of every set bit
+   is a multiple of ELT_SIZE (which is <= 8).  Try to load the constant
+   by inverting every element at a multiple of ELT_SIZE and EORing the
+   result with an ELT_SIZE PTRUE.
+
+   Return a register that contains the constant on success, otherwise
+   return null.  Use TARGET as the register if it is nonnull and
+   convenient.  */
+
+static rtx
+aarch64_expand_sve_const_pred_eor (rtx target, rtx_vector_builder &builder,
+				   unsigned int elt_size)
+{
+  /* Invert every element at a multiple of ELT_SIZE, keeping the
+     other bits zero.  */
+  rtx_vector_builder inv_builder (VNx16BImode, builder.npatterns (),
+				  builder.nelts_per_pattern ());
+  for (unsigned int i = 0; i < builder.encoded_nelts (); ++i)
+    if ((i & (elt_size - 1)) == 0 && INTVAL (builder.elt (i)) == 0)
+      inv_builder.quick_push (const1_rtx);
+    else
+      inv_builder.quick_push (const0_rtx);
+  inv_builder.finalize ();
+
+  /* See if we can load the constant cheaply.  */
+  rtx inv = aarch64_expand_sve_const_pred_1 (NULL_RTX, inv_builder, false);
+  if (!inv)
+    return NULL_RTX;
+
+  /* EOR the result with an ELT_SIZE PTRUE.  */
+  rtx mask = aarch64_ptrue_all (elt_size);
+  mask = force_reg (VNx16BImode, mask);
+  inv = gen_lowpart (VNx16BImode, inv);
+  target = aarch64_target_reg (target, VNx16BImode);
+  emit_insn (gen_aarch64_pred_z (XOR, VNx16BImode, target, mask, inv, mask));
+  return target;
+}
+
+/* BUILDER is a constant predicate in which the index of every set bit
+   is a multiple of ELT_SIZE (which is <= 8).  Try to load the constant
+   using a TRN1 of size PERMUTE_SIZE, which is >= ELT_SIZE.  Return the
+   register on success, otherwise return null.  Use TARGET as the register
+   if nonnull and convenient.  */
+
+static rtx
+aarch64_expand_sve_const_pred_trn (rtx target, rtx_vector_builder &builder,
+				   unsigned int elt_size,
+				   unsigned int permute_size)
+{
+  /* We're going to split the constant into two new constants A and B,
+     with element I of BUILDER going into A if (I & PERMUTE_SIZE) == 0
+     and into B otherwise.  E.g. for PERMUTE_SIZE == 4 && ELT_SIZE == 1:
+
+     A: { 0, 1, 2, 3, _, _, _, _, 8, 9, 10, 11, _, _, _, _ }
+     B: { 4, 5, 6, 7, _, _, _, _, 12, 13, 14, 15, _, _, _, _ }
+
+     where _ indicates elements that will be discarded by the permute.
+
+     First calculate the ELT_SIZEs for A and B.  */
+  unsigned int a_elt_size = GET_MODE_SIZE (DImode);
+  unsigned int b_elt_size = GET_MODE_SIZE (DImode);
+  for (unsigned int i = 0; i < builder.encoded_nelts (); i += elt_size)
+    if (INTVAL (builder.elt (i)) != 0)
+      {
+	if (i & permute_size)
+	  b_elt_size |= i - permute_size;
+	else
+	  a_elt_size |= i;
+      }
+  a_elt_size &= -a_elt_size;
+  b_elt_size &= -b_elt_size;
+
+  /* Now construct the vectors themselves.  */
+  rtx_vector_builder a_builder (VNx16BImode, builder.npatterns (),
+				builder.nelts_per_pattern ());
+  rtx_vector_builder b_builder (VNx16BImode, builder.npatterns (),
+				builder.nelts_per_pattern ());
+  unsigned int nelts = builder.encoded_nelts ();
+  for (unsigned int i = 0; i < nelts; ++i)
+    if (i & (elt_size - 1))
+      {
+	a_builder.quick_push (const0_rtx);
+	b_builder.quick_push (const0_rtx);
+      }
+    else if ((i & permute_size) == 0)
+      {
+	/* The A and B elements are significant.  */
+	a_builder.quick_push (builder.elt (i));
+	b_builder.quick_push (builder.elt (i + permute_size));
+      }
+    else
+      {
+	/* The A and B elements are going to be discarded, so pick whatever
+	   is likely to give a nice constant.  We are targeting element
+	   sizes A_ELT_SIZE and B_ELT_SIZE for A and B respectively,
+	   with the aim of each being a sequence of ones followed by
+	   a sequence of zeros.  So:
+
+	   * if X_ELT_SIZE <= PERMUTE_SIZE, the best approach is to
+	     duplicate the last X_ELT_SIZE element, to extend the
+	     current sequence of ones or zeros.
+
+	   * if X_ELT_SIZE > PERMUTE_SIZE, the best approach is to add a
+	     zero, so that the constant really does have X_ELT_SIZE and
+	     not a smaller size.  */
+	if (a_elt_size > permute_size)
+	  a_builder.quick_push (const0_rtx);
+	else
+	  a_builder.quick_push (a_builder.elt (i - a_elt_size));
+	if (b_elt_size > permute_size)
+	  b_builder.quick_push (const0_rtx);
+	else
+	  b_builder.quick_push (b_builder.elt (i - b_elt_size));
+      }
+  a_builder.finalize ();
+  b_builder.finalize ();
+
+  /* Try loading A into a register.  */
+  rtx_insn *last = get_last_insn ();
+  rtx a = aarch64_expand_sve_const_pred_1 (NULL_RTX, a_builder, false);
+  if (!a)
+    return NULL_RTX;
+
+  /* Try loading B into a register.  */
+  rtx b = a;
+  if (a_builder != b_builder)
+    {
+      b = aarch64_expand_sve_const_pred_1 (NULL_RTX, b_builder, false);
+      if (!b)
+	{
+	  delete_insns_since (last);
+	  return NULL_RTX;
+	}
+    }
+
+  /* Emit the TRN1 itself.  We emit a TRN that operates on VNx16BI
+     operands but permutes them as though they had mode MODE.  */
+  machine_mode mode = aarch64_sve_pred_mode (permute_size).require ();
+  target = aarch64_target_reg (target, GET_MODE (a));
+  rtx type_reg = CONST0_RTX (mode);
+  emit_insn (gen_aarch64_sve_trn1_conv (mode, target, a, b, type_reg));
+  return target;
+}
+
+/* Subroutine of aarch64_expand_sve_const_pred.  Try to load the VNx16BI
+   constant in BUILDER into an SVE predicate register.  Return the register
+   on success, otherwise return null.  Use TARGET for the register if
+   nonnull and convenient.
+
+   ALLOW_RECURSE_P is true if we can use methods that would call this
+   function recursively.  */
+
+static rtx
+aarch64_expand_sve_const_pred_1 (rtx target, rtx_vector_builder &builder,
+				 bool allow_recurse_p)
+{
+  if (builder.encoded_nelts () == 1)
+    /* A PFALSE or a PTRUE .B ALL.  */
+    return aarch64_emit_set_immediate (target, builder);
+
+  unsigned int elt_size = aarch64_widest_sve_pred_elt_size (builder);
+  if (int vl = aarch64_partial_ptrue_length (builder, elt_size))
+    {
+      /* If we can load the constant using PTRUE, use it as-is.  */
+      machine_mode mode = aarch64_sve_pred_mode (elt_size).require ();
+      if (aarch64_svpattern_for_vl (mode, vl) != AARCH64_NUM_SVPATTERNS)
+	return aarch64_emit_set_immediate (target, builder);
+
+      /* Otherwise use WHILE to set the first VL bits.  */
+      return aarch64_sve_move_pred_via_while (target, mode, vl);
+    }
+
+  if (!allow_recurse_p)
+    return NULL_RTX;
+
+  /* Try inverting the vector in element size ELT_SIZE and then EORing
+     the result with an ELT_SIZE PTRUE.  */
+  if (INTVAL (builder.elt (0)) == 0)
+    if (rtx res = aarch64_expand_sve_const_pred_eor (target, builder,
+						     elt_size))
+      return res;
+
+  /* Try using TRN1 to permute two simpler constants.  */
+  for (unsigned int i = elt_size; i <= 8; i *= 2)
+    if (rtx res = aarch64_expand_sve_const_pred_trn (target, builder,
+						     elt_size, i))
+      return res;
+
+  return NULL_RTX;
+}
+
+/* Return an SVE predicate register that contains the VNx16BImode
+   constant in BUILDER, without going through the move expanders.
+
+   The returned register can have whatever mode seems most natural
+   given the contents of BUILDER.  Use TARGET for the result if
+   convenient.  */
+
+static rtx
+aarch64_expand_sve_const_pred (rtx target, rtx_vector_builder &builder)
+{
+  /* Try loading the constant using pure predicate operations.  */
+  if (rtx res = aarch64_expand_sve_const_pred_1 (target, builder, true))
+    return res;
+
+  /* Try forcing the constant to memory.  */
+  if (builder.full_nelts ().is_constant ())
+    if (rtx mem = force_const_mem (VNx16BImode, builder.build ()))
+      {
+	target = aarch64_target_reg (target, VNx16BImode);
+	emit_move_insn (target, mem);
+	return target;
+      }
+
+  /* The last resort is to load the constant as an integer and then
+     compare it against zero.  Use -1 for set bits in order to increase
+     the changes of using SVE DUPM or an Advanced SIMD byte mask.  */
+  rtx_vector_builder int_builder (VNx16QImode, builder.npatterns (),
+				  builder.nelts_per_pattern ());
+  for (unsigned int i = 0; i < builder.encoded_nelts (); ++i)
+    int_builder.quick_push (INTVAL (builder.elt (i))
+			    ? constm1_rtx : const0_rtx);
+  return aarch64_convert_sve_data_to_pred (target, VNx16BImode,
+					   int_builder.build ());
+}
+
+/* Set DEST to immediate IMM.  */
+
+void
+aarch64_expand_mov_immediate (rtx dest, rtx imm)
+{
+  machine_mode mode = GET_MODE (dest);
+
+  /* Check on what type of symbol it is.  */
+  scalar_int_mode int_mode;
+  if ((SYMBOL_REF_P (imm)
+       || LABEL_REF_P (imm)
+       || GET_CODE (imm) == CONST
+       || GET_CODE (imm) == CONST_POLY_INT)
       && is_a <scalar_int_mode> (mode, &int_mode))
     {
       rtx mem;
@@ -2936,6 +5654,11 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm,
 	 folding it into the relocation.  */
       if (!offset.is_constant (&const_offset))
 	{
+	  if (!TARGET_SVE)
+	    {
+	      aarch64_report_sve_required ();
+	      return;
+	    }
 	  if (base == const0_rtx && aarch64_sve_cnt_immediate_p (offset))
 	    emit_insn (gen_rtx_SET (dest, imm));
 	  else
@@ -2967,8 +5690,11 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm,
       switch (sty)
 	{
 	case SYMBOL_FORCE_TO_MEM:
+	  if (int_mode != ptr_mode)
+	    imm = convert_memory_address (ptr_mode, imm);
+
 	  if (const_offset != 0
-	      && targetm.cannot_force_const_mem (int_mode, imm))
+	      && targetm.cannot_force_const_mem (ptr_mode, imm))
 	    {
 	      gcc_assert (can_create_pseudo_p ());
 	      base = aarch64_force_temporary (int_mode, dest, base);
@@ -3034,38 +5760,50 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm,
 
   if (!CONST_INT_P (imm))
     {
-      rtx base, step, value;
-      if (GET_CODE (imm) == HIGH
-	  || aarch64_simd_valid_immediate (imm, NULL))
-	emit_insn (gen_rtx_SET (dest, imm));
-      else if (const_vec_series_p (imm, &base, &step))
-	aarch64_expand_vec_series (dest, base, step);
-      else if (const_vec_duplicate_p (imm, &value))
+      if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
 	{
-	  /* If the constant is out of range of an SVE vector move,
-	     load it from memory if we can, otherwise move it into
-	     a register and use a DUP.  */
-	  scalar_mode inner_mode = GET_MODE_INNER (mode);
-	  rtx op = force_const_mem (inner_mode, value);
-	  if (!op)
-	    op = force_reg (inner_mode, value);
-	  else if (!aarch64_sve_ld1r_operand_p (op))
+	  /* Only the low bit of each .H, .S and .D element is defined,
+	     so we can set the upper bits to whatever we like.  If the
+	     predicate is all-true in MODE, prefer to set all the undefined
+	     bits as well, so that we can share a single .B predicate for
+	     all modes.  */
+	  if (imm == CONSTM1_RTX (mode))
+	    imm = CONSTM1_RTX (VNx16BImode);
+
+	  /* All methods for constructing predicate modes wider than VNx16BI
+	     will set the upper bits of each element to zero.  Expose this
+	     by moving such constants as a VNx16BI, so that all bits are
+	     significant and so that constants for different modes can be
+	     shared.  The wider constant will still be available as a
+	     REG_EQUAL note.  */
+	  rtx_vector_builder builder;
+	  if (aarch64_get_sve_pred_bits (builder, imm))
 	    {
-	      rtx addr = force_reg (Pmode, XEXP (op, 0));
-	      op = replace_equiv_address (op, addr);
+	      rtx res = aarch64_expand_sve_const_pred (dest, builder);
+	      if (dest != res)
+		emit_move_insn (dest, gen_lowpart (mode, res));
+	      return;
 	    }
-	  emit_insn (gen_vec_duplicate (dest, op));
 	}
-      else if (GET_CODE (imm) == CONST_VECTOR
-	       && !GET_MODE_NUNITS (GET_MODE (imm)).is_constant ())
-	aarch64_expand_sve_const_vector (dest, imm);
-      else
+
+      if (GET_CODE (imm) == HIGH
+	  || aarch64_simd_valid_immediate (imm, NULL))
 	{
-	  rtx mem = force_const_mem (mode, imm);
-	  gcc_assert (mem);
-	  emit_move_insn (dest, mem);
+	  emit_insn (gen_rtx_SET (dest, imm));
+	  return;
 	}
 
+      if (GET_CODE (imm) == CONST_VECTOR && aarch64_sve_data_mode_p (mode))
+	if (rtx res = aarch64_expand_sve_const_vector (dest, imm))
+	  {
+	    if (dest != res)
+	      emit_insn (gen_aarch64_sve_reinterpret (mode, dest, res));
+	    return;
+	  }
+
+      rtx mem = force_const_mem (mode, imm);
+      gcc_assert (mem);
+      emit_move_insn (dest, mem);
       return;
     }
 
@@ -3073,15 +5811,61 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm,
 				  as_a <scalar_int_mode> (mode));
 }
 
+/* Return the MEM rtx that provides the canary value that should be used
+   for stack-smashing protection.  MODE is the mode of the memory.
+   For SSP_GLOBAL, DECL_RTL is the MEM rtx for the canary variable
+   (__stack_chk_guard), otherwise it has no useful value.  SALT_TYPE
+   indicates whether the caller is performing a SET or a TEST operation.  */
+
+rtx
+aarch64_stack_protect_canary_mem (machine_mode mode, rtx decl_rtl,
+				  aarch64_salt_type salt_type)
+{
+  rtx addr;
+  if (aarch64_stack_protector_guard == SSP_GLOBAL)
+    {
+      gcc_assert (MEM_P (decl_rtl));
+      addr = XEXP (decl_rtl, 0);
+      poly_int64 offset;
+      rtx base = strip_offset_and_salt (addr, &offset);
+      if (!SYMBOL_REF_P (base))
+	return decl_rtl;
+
+      rtvec v = gen_rtvec (2, base, GEN_INT (salt_type));
+      addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_SALT_ADDR);
+      addr = gen_rtx_CONST (Pmode, addr);
+      addr = plus_constant (Pmode, addr, offset);
+    }
+  else
+    {
+      /* Calculate the address from the system register.  */
+      rtx salt = GEN_INT (salt_type);
+      addr = gen_reg_rtx (mode);
+      if (mode == DImode)
+	emit_insn (gen_reg_stack_protect_address_di (addr, salt));
+      else
+	{
+	  emit_insn (gen_reg_stack_protect_address_si (addr, salt));
+	  addr = convert_memory_address (Pmode, addr);
+	}
+      addr = plus_constant (Pmode, addr, aarch64_stack_protector_guard_offset);
+    }
+  return gen_rtx_MEM (mode, force_reg (Pmode, addr));
+}
+
 /* Emit an SVE predicated move from SRC to DEST.  PRED is a predicate
    that is known to contain PTRUE.  */
 
 void
 aarch64_emit_sve_pred_move (rtx dest, rtx pred, rtx src)
 {
-  emit_insn (gen_rtx_SET (dest, gen_rtx_UNSPEC (GET_MODE (dest),
-						gen_rtvec (2, pred, src),
-						UNSPEC_MERGE_PTRUE)));
+  expand_operand ops[3];
+  machine_mode mode = GET_MODE (dest);
+  create_output_operand (&ops[0], dest, mode);
+  create_input_operand (&ops[1], pred, GET_MODE(pred));
+  create_input_operand (&ops[2], src, mode);
+  temporary_volatile_ok v (true);
+  expand_insn (code_for_aarch64_pred_mov (mode), 3, ops);
 }
 
 /* Expand a pre-RA SVE data move from SRC to DEST in which at least one
@@ -3097,7 +5881,7 @@ void
 aarch64_expand_sve_mem_move (rtx dest, rtx src, machine_mode pred_mode)
 {
   machine_mode mode = GET_MODE (dest);
-  rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
+  rtx ptrue = aarch64_ptrue_reg (pred_mode);
   if (!register_operand (src, mode)
       && !register_operand (dest, mode))
     {
@@ -3145,9 +5929,35 @@ bool
 aarch64_maybe_expand_sve_subreg_move (rtx dest, rtx src)
 {
   gcc_assert (BYTES_BIG_ENDIAN);
-  if (GET_CODE (dest) == SUBREG)
+
+  /* Do not try to optimize subregs that LRA has created for matched
+     reloads.  These subregs only exist as a temporary measure to make
+     the RTL well-formed, but they are exempt from the usual
+     TARGET_CAN_CHANGE_MODE_CLASS rules.
+
+     For example, if we have:
+
+       (set (reg:VNx8HI R1) (foo:VNx8HI (reg:VNx4SI R2)))
+
+     and the constraints require R1 and R2 to be in the same register,
+     LRA may need to create RTL such as:
+
+       (set (subreg:VNx4SI (reg:VNx8HI TMP) 0) (reg:VNx4SI R2))
+       (set (reg:VNx8HI TMP) (foo:VNx8HI (subreg:VNx4SI (reg:VNx8HI TMP) 0)))
+       (set (reg:VNx8HI R1) (reg:VNx8HI TMP))
+
+     which forces both the input and output of the original instruction
+     to use the same hard register.  But for this to work, the normal
+     rules have to be suppressed on the subreg input, otherwise LRA
+     would need to reload that input too, meaning that the process
+     would never terminate.  To compensate for this, the normal rules
+     are also suppressed for the subreg output of the first move.
+     Ignoring the special case and handling the first move normally
+     would therefore generate wrong code: we would reverse the elements
+     for the first subreg but not reverse them back for the second subreg.  */
+  if (SUBREG_P (dest) && !LRA_SUBREG_P (dest))
     dest = SUBREG_REG (dest);
-  if (GET_CODE (src) == SUBREG)
+  if (SUBREG_P (src) && !LRA_SUBREG_P (src))
     src = SUBREG_REG (src);
 
   /* The optimization handles two single SVE REGs with different element
@@ -3161,7 +5971,7 @@ aarch64_maybe_expand_sve_subreg_move (rtx dest, rtx src)
     return false;
 
   /* Generate *aarch64_sve_mov<mode>_subreg_be.  */
-  rtx ptrue = force_reg (VNx16BImode, CONSTM1_RTX (VNx16BImode));
+  rtx ptrue = aarch64_ptrue_reg (VNx16BImode);
   rtx unspec = gen_rtx_UNSPEC (GET_MODE (dest), gen_rtvec (2, ptrue, src),
 			       UNSPEC_REV_SUBREG);
   emit_insn (gen_rtx_SET (dest, unspec));
@@ -3172,7 +5982,7 @@ aarch64_maybe_expand_sve_subreg_move (rtx dest, rtx src)
    attributes.  Unlike gen_lowpart, this doesn't care whether the
    mode change is valid.  */
 
-static rtx
+rtx
 aarch64_replace_reg_mode (rtx x, machine_mode mode)
 {
   if (GET_MODE (x) == mode)
@@ -3183,90 +5993,87 @@ aarch64_replace_reg_mode (rtx x, machine_mode mode)
   return x;
 }
 
+/* Return the SVE REV[BHW] unspec for reversing quantites of mode MODE
+   stored in wider integer containers.  */
+
+static unsigned int
+aarch64_sve_rev_unspec (machine_mode mode)
+{
+  switch (GET_MODE_UNIT_SIZE (mode))
+    {
+    case 1: return UNSPEC_REVB;
+    case 2: return UNSPEC_REVH;
+    case 4: return UNSPEC_REVW;
+    }
+  gcc_unreachable ();
+}
+
 /* Split a *aarch64_sve_mov<mode>_subreg_be pattern with the given
    operands.  */
 
 void
 aarch64_split_sve_subreg_move (rtx dest, rtx ptrue, rtx src)
 {
-  /* Decide which REV operation we need.  The mode with narrower elements
-     determines the mode of the operands and the mode with the wider
+  /* Decide which REV operation we need.  The mode with wider elements
+     determines the mode of the operands and the mode with the narrower
      elements determines the reverse width.  */
-  machine_mode mode_with_wider_elts = GET_MODE (dest);
-  machine_mode mode_with_narrower_elts = GET_MODE (src);
+  machine_mode mode_with_wider_elts = aarch64_sve_int_mode (GET_MODE (dest));
+  machine_mode mode_with_narrower_elts = aarch64_sve_int_mode (GET_MODE (src));
   if (GET_MODE_UNIT_SIZE (mode_with_wider_elts)
       < GET_MODE_UNIT_SIZE (mode_with_narrower_elts))
     std::swap (mode_with_wider_elts, mode_with_narrower_elts);
 
-  unsigned int wider_bytes = GET_MODE_UNIT_SIZE (mode_with_wider_elts);
-  unsigned int unspec;
-  if (wider_bytes == 8)
-    unspec = UNSPEC_REV64;
-  else if (wider_bytes == 4)
-    unspec = UNSPEC_REV32;
-  else if (wider_bytes == 2)
-    unspec = UNSPEC_REV16;
-  else
-    gcc_unreachable ();
-  machine_mode pred_mode = aarch64_sve_pred_mode (wider_bytes).require ();
-
-  /* Emit:
+  unsigned int unspec = aarch64_sve_rev_unspec (mode_with_narrower_elts);
+  machine_mode pred_mode = aarch64_sve_pred_mode (mode_with_wider_elts);
 
-       (set DEST (unspec [PTRUE (unspec [SRC] UNSPEC_REV<nn>)]
-			 UNSPEC_MERGE_PTRUE))
-
-     with the appropriate modes.  */
+  /* Get the operands in the appropriate modes and emit the instruction.  */
   ptrue = gen_lowpart (pred_mode, ptrue);
-  dest = aarch64_replace_reg_mode (dest, mode_with_narrower_elts);
-  src = aarch64_replace_reg_mode (src, mode_with_narrower_elts);
-  src = gen_rtx_UNSPEC (mode_with_narrower_elts, gen_rtvec (1, src), unspec);
-  src = gen_rtx_UNSPEC (mode_with_narrower_elts, gen_rtvec (2, ptrue, src),
-			UNSPEC_MERGE_PTRUE);
-  emit_insn (gen_rtx_SET (dest, src));
+  dest = aarch64_replace_reg_mode (dest, mode_with_wider_elts);
+  src = aarch64_replace_reg_mode (src, mode_with_wider_elts);
+  emit_insn (gen_aarch64_pred (unspec, mode_with_wider_elts,
+			       dest, ptrue, src));
 }
 
 static bool
-aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
-				 tree exp ATTRIBUTE_UNUSED)
+aarch64_function_ok_for_sibcall (tree, tree exp)
 {
-  /* Currently, always true.  */
+  if (crtl->abi->id () != expr_callee_abi (exp).id ())
+    return false;
+
   return true;
 }
 
-/* Implement TARGET_PASS_BY_REFERENCE.  */
+/* Subroutine of aarch64_pass_by_reference for arguments that are not
+   passed in SVE registers.  */
 
 static bool
-aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
-			   machine_mode mode,
-			   const_tree type,
-			   bool named ATTRIBUTE_UNUSED)
+aarch64_pass_by_reference_1 (CUMULATIVE_ARGS *pcum,
+			     const function_arg_info &arg)
 {
   HOST_WIDE_INT size;
   machine_mode dummymode;
   int nregs;
 
   /* GET_MODE_SIZE (BLKmode) is useless since it is 0.  */
-  if (mode == BLKmode && type)
-    size = int_size_in_bytes (type);
+  if (arg.mode == BLKmode && arg.type)
+    size = int_size_in_bytes (arg.type);
   else
     /* No frontends can create types with variable-sized modes, so we
        shouldn't be asked to pass or return them.  */
-    size = GET_MODE_SIZE (mode).to_constant ();
+    size = GET_MODE_SIZE (arg.mode).to_constant ();
 
   /* Aggregates are passed by reference based on their size.  */
-  if (type && AGGREGATE_TYPE_P (type))
-    {
-      size = int_size_in_bytes (type);
-    }
+  if (arg.aggregate_type_p ())
+    size = int_size_in_bytes (arg.type);
 
   /* Variable sized arguments are always returned by reference.  */
   if (size < 0)
     return true;
 
   /* Can this be a candidate to be passed in fp/simd register(s)?  */
-  if (aarch64_vfp_is_call_or_return_candidate (mode, type,
-					       &dummymode, &nregs,
-					       NULL))
+  if (aarch64_vfp_is_call_or_return_candidate (arg.mode, arg.type,
+					       &dummymode, &nregs, NULL,
+					       !pcum || pcum->silent_p))
     return false;
 
   /* Arguments which are variable sized or larger than 2 registers are
@@ -3275,6 +6082,44 @@ aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
   return size > 2 * UNITS_PER_WORD;
 }
 
+/* Implement TARGET_PASS_BY_REFERENCE.  */
+
+static bool
+aarch64_pass_by_reference (cumulative_args_t pcum_v,
+			   const function_arg_info &arg)
+{
+  CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
+
+  if (!arg.type)
+    return aarch64_pass_by_reference_1 (pcum, arg);
+
+  pure_scalable_type_info pst_info;
+  switch (pst_info.analyze (arg.type))
+    {
+    case pure_scalable_type_info::IS_PST:
+      if (pcum && !pcum->silent_p && !TARGET_SVE)
+	/* We can't gracefully recover at this point, so make this a
+	   fatal error.  */
+	fatal_error (input_location, "arguments of type %qT require"
+		     " the SVE ISA extension", arg.type);
+
+      /* Variadic SVE types are passed by reference.  Normal non-variadic
+	 arguments are too if we've run out of registers.  */
+      return (!arg.named
+	      || pcum->aapcs_nvrn + pst_info.num_zr () > NUM_FP_ARG_REGS
+	      || pcum->aapcs_nprn + pst_info.num_pr () > NUM_PR_ARG_REGS);
+
+    case pure_scalable_type_info::DOESNT_MATTER:
+      gcc_assert (aarch64_pass_by_reference_1 (pcum, arg));
+      return true;
+
+    case pure_scalable_type_info::NO_ABI_IDENTITY:
+    case pure_scalable_type_info::ISNT_PST:
+      return aarch64_pass_by_reference_1 (pcum, arg);
+    }
+  gcc_unreachable ();
+}
+
 /* Return TRUE if VALTYPE is padded to its least significant bits.  */
 static bool
 aarch64_return_in_msb (const_tree valtype)
@@ -3298,7 +6143,13 @@ aarch64_return_in_msb (const_tree valtype)
      is always passed/returned in the least significant bits of fp/simd
      register(s).  */
   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
-					       &dummy_mode, &dummy_int, NULL))
+					       &dummy_mode, &dummy_int, NULL,
+					       false))
+    return false;
+
+  /* Likewise pure scalable types for SVE vector and predicate registers.  */
+  pure_scalable_type_info pst_info;
+  if (pst_info.analyze_registers (valtype))
     return false;
 
   return true;
@@ -3313,13 +6164,20 @@ aarch64_function_value (const_tree type, const_tree func,
 {
   machine_mode mode;
   int unsignedp;
-  int count;
-  machine_mode ag_mode;
 
   mode = TYPE_MODE (type);
   if (INTEGRAL_TYPE_P (type))
     mode = promote_function_mode (type, mode, &unsignedp, func, 1);
 
+  pure_scalable_type_info pst_info;
+  if (type && pst_info.analyze_registers (type))
+    return pst_info.get_rtx (mode, V0_REGNUM, P0_REGNUM);
+
+  /* Generic vectors that map to full SVE modes with -msve-vector-bits=N
+     are returned in memory, not by value.  */
+  unsigned int vec_flags = aarch64_classify_vector_mode (mode);
+  bool sve_p = (vec_flags & VEC_ANY_SVE);
+
   if (aarch64_return_in_msb (type))
     {
       HOST_WIDE_INT size = int_size_in_bytes (type);
@@ -3331,9 +6189,12 @@ aarch64_function_value (const_tree type, const_tree func,
 	}
     }
 
-  if (aarch64_vfp_is_call_or_return_candidate (mode, type,
-					       &ag_mode, &count, NULL))
+  int count;
+  machine_mode ag_mode;
+  if (aarch64_vfp_is_call_or_return_candidate (mode, type, &ag_mode, &count,
+					       NULL, false))
     {
+      gcc_assert (!sve_p);
       if (!aarch64_composite_type_p (type, mode))
 	{
 	  gcc_assert (count == 1 && mode == ag_mode);
@@ -3356,7 +6217,29 @@ aarch64_function_value (const_tree type, const_tree func,
 	}
     }
   else
-    return gen_rtx_REG (mode, R0_REGNUM);
+    {
+      if (sve_p)
+	{
+	  /* Vector types can acquire a partial SVE mode using things like
+	     __attribute__((vector_size(N))), and this is potentially useful.
+	     However, the choice of mode doesn't affect the type's ABI
+	     identity, so we should treat the types as though they had
+	     the associated integer mode, just like they did before SVE
+	     was introduced.
+
+	     We know that the vector must be 128 bits or smaller,
+	     otherwise we'd have returned it in memory instead.  */
+	  gcc_assert (type
+		      && (aarch64_some_values_include_pst_objects_p (type)
+			  || (vec_flags & VEC_PARTIAL)));
+
+	  scalar_int_mode int_mode = int_mode_for_mode (mode).require ();
+	  rtx reg = gen_rtx_REG (int_mode, R0_REGNUM);
+	  rtx pair = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
+	  return gen_rtx_PARALLEL (mode, gen_rtvec (1, pair));
+	}
+      return gen_rtx_REG (mode, R0_REGNUM);
+    }
 }
 
 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
@@ -3380,17 +6263,11 @@ aarch64_function_value_regno_p (const unsigned int regno)
   return false;
 }
 
-/* Implement TARGET_RETURN_IN_MEMORY.
-
-   If the type T of the result of a function is such that
-     void func (T arg)
-   would require that arg be passed as a value in a register (or set of
-   registers) according to the parameter passing rules, then the result
-   is returned in the same registers as would be used for such an
-   argument.  */
+/* Subroutine for aarch64_return_in_memory for types that are not returned
+   in SVE registers.  */
 
 static bool
-aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
+aarch64_return_in_memory_1 (const_tree type)
 {
   HOST_WIDE_INT size;
   machine_mode ag_mode;
@@ -3402,11 +6279,8 @@ aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
     /* Simple scalar types always returned in registers.  */
     return false;
 
-  if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
-					       type,
-					       &ag_mode,
-					       &count,
-					       NULL))
+  if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type), type,
+					       &ag_mode, &count, NULL, false))
     return false;
 
   /* Types larger than 2 registers returned in memory.  */
@@ -3414,26 +6288,58 @@ aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
   return (size < 0 || size > 2 * UNITS_PER_WORD);
 }
 
+/* Implement TARGET_RETURN_IN_MEMORY.
+
+   If the type T of the result of a function is such that
+     void func (T arg)
+   would require that arg be passed as a value in a register (or set of
+   registers) according to the parameter passing rules, then the result
+   is returned in the same registers as would be used for such an
+   argument.  */
+
+static bool
+aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
+{
+  pure_scalable_type_info pst_info;
+  switch (pst_info.analyze (type))
+    {
+    case pure_scalable_type_info::IS_PST:
+      return (pst_info.num_zr () > NUM_FP_ARG_REGS
+	      || pst_info.num_pr () > NUM_PR_ARG_REGS);
+
+    case pure_scalable_type_info::DOESNT_MATTER:
+      gcc_assert (aarch64_return_in_memory_1 (type));
+      return true;
+
+    case pure_scalable_type_info::NO_ABI_IDENTITY:
+    case pure_scalable_type_info::ISNT_PST:
+      return aarch64_return_in_memory_1 (type);
+    }
+  gcc_unreachable ();
+}
+
 static bool
 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
 			       const_tree type, int *nregs)
 {
   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
-  return aarch64_vfp_is_call_or_return_candidate (mode,
-						  type,
+  return aarch64_vfp_is_call_or_return_candidate (mode, type,
 						  &pcum->aapcs_vfp_rmode,
-						  nregs,
-						  NULL);
+						  nregs, NULL, pcum->silent_p);
 }
 
 /* Given MODE and TYPE of a function argument, return the alignment in
    bits.  The idea is to suppress any stronger alignment requested by
-   the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
-   This is a helper function for local use only.  */
+   the user and opt for the natural alignment (specified in AAPCS64 \S
+   4.1).  ABI_BREAK is set to true if the alignment was incorrectly
+   calculated in versions of GCC prior to GCC-9.  This is a helper
+   function for local use only.  */
 
 static unsigned int
-aarch64_function_arg_alignment (machine_mode mode, const_tree type)
+aarch64_function_arg_alignment (machine_mode mode, const_tree type,
+				unsigned int *abi_break)
 {
+  *abi_break = 0;
   if (!type)
     return GET_MODE_ALIGNMENT (mode);
 
@@ -3449,25 +6355,55 @@ aarch64_function_arg_alignment (machine_mode mode, const_tree type)
     return TYPE_ALIGN (TREE_TYPE (type));
 
   unsigned int alignment = 0;
+  unsigned int bitfield_alignment = 0;
   for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
     if (TREE_CODE (field) == FIELD_DECL)
-      alignment = std::max (alignment, DECL_ALIGN (field));
+      {
+	/* Note that we explicitly consider zero-sized fields here,
+	   even though they don't map to AAPCS64 machine types.
+	   For example, in:
+
+	       struct __attribute__((aligned(8))) empty {};
+
+	       struct s {
+		 [[no_unique_address]] empty e;
+		 int x;
+	       };
+
+	   "s" contains only one Fundamental Data Type (the int field)
+	   but gains 8-byte alignment and size thanks to "e".  */
+	alignment = std::max (alignment, DECL_ALIGN (field));
+	if (DECL_BIT_FIELD_TYPE (field))
+	  bitfield_alignment
+	    = std::max (bitfield_alignment,
+			TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)));
+      }
+
+  if (bitfield_alignment > alignment)
+    {
+      *abi_break = alignment;
+      return bitfield_alignment;
+    }
 
   return alignment;
 }
 
 /* Layout a function argument according to the AAPCS64 rules.  The rule
-   numbers refer to the rule numbers in the AAPCS64.  */
+   numbers refer to the rule numbers in the AAPCS64.  ORIG_MODE is the
+   mode that was originally given to us by the target hook, whereas the
+   mode in ARG might be the result of replacing partial SVE modes with
+   the equivalent integer mode.  */
 
 static void
-aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
-		    const_tree type,
-		    bool named ATTRIBUTE_UNUSED)
+aarch64_layout_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
 {
   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
+  tree type = arg.type;
+  machine_mode mode = arg.mode;
   int ncrn, nvrn, nregs;
   bool allocate_ncrn, allocate_nvrn;
   HOST_WIDE_INT size;
+  unsigned int abi_break;
 
   /* We need to do this once per argument.  */
   if (pcum->aapcs_arg_processed)
@@ -3475,6 +6411,54 @@ aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
 
   pcum->aapcs_arg_processed = true;
 
+  pure_scalable_type_info pst_info;
+  if (type && pst_info.analyze_registers (type))
+    {
+      /* The PCS says that it is invalid to pass an SVE value to an
+	 unprototyped function.  There is no ABI-defined location we
+	 can return in this case, so we have no real choice but to raise
+	 an error immediately, even though this is only a query function.  */
+      if (arg.named && pcum->pcs_variant != ARM_PCS_SVE)
+	{
+	  gcc_assert (!pcum->silent_p);
+	  error ("SVE type %qT cannot be passed to an unprototyped function",
+		 arg.type);
+	  /* Avoid repeating the message, and avoid tripping the assert
+	     below.  */
+	  pcum->pcs_variant = ARM_PCS_SVE;
+	}
+
+      /* We would have converted the argument into pass-by-reference
+	 form if it didn't fit in registers.  */
+      pcum->aapcs_nextnvrn = pcum->aapcs_nvrn + pst_info.num_zr ();
+      pcum->aapcs_nextnprn = pcum->aapcs_nprn + pst_info.num_pr ();
+      gcc_assert (arg.named
+		  && pcum->pcs_variant == ARM_PCS_SVE
+		  && pcum->aapcs_nextnvrn <= NUM_FP_ARG_REGS
+		  && pcum->aapcs_nextnprn <= NUM_PR_ARG_REGS);
+      pcum->aapcs_reg = pst_info.get_rtx (mode, V0_REGNUM + pcum->aapcs_nvrn,
+					  P0_REGNUM + pcum->aapcs_nprn);
+      return;
+    }
+
+  /* Generic vectors that map to full SVE modes with -msve-vector-bits=N
+     are passed by reference, not by value.  */
+  unsigned int vec_flags = aarch64_classify_vector_mode (mode);
+  bool sve_p = (vec_flags & VEC_ANY_SVE);
+  if (sve_p)
+    /* Vector types can acquire a partial SVE mode using things like
+       __attribute__((vector_size(N))), and this is potentially useful.
+       However, the choice of mode doesn't affect the type's ABI
+       identity, so we should treat the types as though they had
+       the associated integer mode, just like they did before SVE
+       was introduced.
+
+       We know that the vector must be 128 bits or smaller,
+       otherwise we'd have passed it in memory instead.  */
+    gcc_assert (type
+		&& (aarch64_some_values_include_pst_objects_p (type)
+		    || (vec_flags & VEC_PARTIAL)));
+
   /* Size in bytes, rounded to the nearest multiple of 8 bytes.  */
   if (type)
     size = int_size_in_bytes (type);
@@ -3489,6 +6473,7 @@ aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
 						 mode,
 						 type,
 						 &nregs);
+  gcc_assert (!sve_p || !allocate_nvrn);
 
   /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
      The following code thus handles passing by SIMD/FP registers first.  */
@@ -3499,8 +6484,8 @@ aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
      and homogenous short-vector aggregates (HVA).  */
   if (allocate_nvrn)
     {
-      if (!TARGET_FLOAT)
-	aarch64_err_no_fpadvsimd (mode, "argument");
+      if (!pcum->silent_p && !TARGET_FLOAT)
+	aarch64_err_no_fpadvsimd (mode);
 
       if (nvrn + nregs <= NUM_FP_ARG_REGS)
 	{
@@ -3544,27 +6529,44 @@ aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
      entirely general registers.  */
   if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
     {
-
       gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
 
       /* C.8 if the argument has an alignment of 16 then the NGRN is
-         rounded up to the next even number.  */
+	 rounded up to the next even number.  */
       if (nregs == 2
 	  && ncrn % 2
 	  /* The == 16 * BITS_PER_UNIT instead of >= 16 * BITS_PER_UNIT
 	     comparison is there because for > 16 * BITS_PER_UNIT
 	     alignment nregs should be > 2 and therefore it should be
 	     passed by reference rather than value.  */
-	  && aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
+	  && (aarch64_function_arg_alignment (mode, type, &abi_break)
+	      == 16 * BITS_PER_UNIT))
 	{
+	  if (abi_break && warn_psabi && currently_expanding_gimple_stmt)
+	    inform (input_location, "parameter passing for argument of type "
+		    "%qT changed in GCC 9.1", type);
 	  ++ncrn;
 	  gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
 	}
 
+      /* If an argument with an SVE mode needs to be shifted up to the
+	 high part of the register, treat it as though it had an integer mode.
+	 Using the normal (parallel [...]) would suppress the shifting.  */
+      if (sve_p
+	  && BYTES_BIG_ENDIAN
+	  && maybe_ne (GET_MODE_SIZE (mode), nregs * UNITS_PER_WORD)
+	  && aarch64_pad_reg_upward (mode, type, false))
+	{
+	  mode = int_mode_for_mode (mode).require ();
+	  sve_p = false;
+	}
+
       /* NREGS can be 0 when e.g. an empty structure is to be passed.
-         A reg is still generated for it, but the caller should be smart
+	 A reg is still generated for it, but the caller should be smart
 	 enough not to use it.  */
-      if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
+      if (nregs == 0
+	  || (nregs == 1 && !sve_p)
+	  || GET_MODE_CLASS (mode) == MODE_INT)
 	pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
       else
 	{
@@ -3574,7 +6576,10 @@ aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
 	  par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
 	  for (i = 0; i < nregs; i++)
 	    {
-	      rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
+	      scalar_int_mode reg_mode = word_mode;
+	      if (nregs == 1)
+		reg_mode = int_mode_for_mode (mode).require ();
+	      rtx tmp = gen_rtx_REG (reg_mode, R0_REGNUM + ncrn + i);
 	      tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
 				       GEN_INT (i * UNITS_PER_WORD));
 	      XVECEXP (par, 0, i) = tmp;
@@ -3594,74 +6599,105 @@ aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
 on_stack:
   pcum->aapcs_stack_words = size / UNITS_PER_WORD;
 
-  if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
-    pcum->aapcs_stack_size = ROUND_UP (pcum->aapcs_stack_size,
-				       16 / UNITS_PER_WORD);
+  if (aarch64_function_arg_alignment (mode, type, &abi_break)
+      == 16 * BITS_PER_UNIT)
+    {
+      int new_size = ROUND_UP (pcum->aapcs_stack_size, 16 / UNITS_PER_WORD);
+      if (pcum->aapcs_stack_size != new_size)
+	{
+	  if (abi_break && warn_psabi && currently_expanding_gimple_stmt)
+	    inform (input_location, "parameter passing for argument of type "
+		    "%qT changed in GCC 9.1", type);
+	  pcum->aapcs_stack_size = new_size;
+	}
+    }
   return;
 }
 
 /* Implement TARGET_FUNCTION_ARG.  */
 
 static rtx
-aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode,
-		      const_tree type, bool named)
+aarch64_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
 {
   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
-  gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
+  gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64
+	      || pcum->pcs_variant == ARM_PCS_SIMD
+	      || pcum->pcs_variant == ARM_PCS_SVE);
 
-  if (mode == VOIDmode)
-    return NULL_RTX;
+  if (arg.end_marker_p ())
+    return gen_int_mode (pcum->pcs_variant, DImode);
 
-  aarch64_layout_arg (pcum_v, mode, type, named);
+  aarch64_layout_arg (pcum_v, arg);
   return pcum->aapcs_reg;
 }
 
 void
 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
-			   const_tree fntype ATTRIBUTE_UNUSED,
-			   rtx libname ATTRIBUTE_UNUSED,
-			   const_tree fndecl ATTRIBUTE_UNUSED,
-			   unsigned n_named ATTRIBUTE_UNUSED)
+			      const_tree fntype,
+			      rtx libname ATTRIBUTE_UNUSED,
+			      const_tree fndecl ATTRIBUTE_UNUSED,
+			      unsigned n_named ATTRIBUTE_UNUSED,
+			      bool silent_p)
 {
   pcum->aapcs_ncrn = 0;
   pcum->aapcs_nvrn = 0;
+  pcum->aapcs_nprn = 0;
   pcum->aapcs_nextncrn = 0;
   pcum->aapcs_nextnvrn = 0;
-  pcum->pcs_variant = ARM_PCS_AAPCS64;
+  pcum->aapcs_nextnprn = 0;
+  if (fntype)
+    pcum->pcs_variant = (arm_pcs) fntype_abi (fntype).id ();
+  else
+    pcum->pcs_variant = ARM_PCS_AAPCS64;
   pcum->aapcs_reg = NULL_RTX;
   pcum->aapcs_arg_processed = false;
   pcum->aapcs_stack_words = 0;
   pcum->aapcs_stack_size = 0;
+  pcum->silent_p = silent_p;
 
-  if (!TARGET_FLOAT
-      && fndecl && TREE_PUBLIC (fndecl)
+  if (!silent_p
+      && !TARGET_FLOAT
       && fntype && fntype != error_mark_node)
     {
       const_tree type = TREE_TYPE (fntype);
       machine_mode mode ATTRIBUTE_UNUSED; /* To pass pointer as argument.  */
       int nregs ATTRIBUTE_UNUSED; /* Likewise.  */
       if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type), type,
-						   &mode, &nregs, NULL))
-	aarch64_err_no_fpadvsimd (TYPE_MODE (type), "return type");
+						   &mode, &nregs, NULL, false))
+	aarch64_err_no_fpadvsimd (TYPE_MODE (type));
+    }
+
+  if (!silent_p
+      && !TARGET_SVE
+      && pcum->pcs_variant == ARM_PCS_SVE)
+    {
+      /* We can't gracefully recover at this point, so make this a
+	 fatal error.  */
+      if (fndecl)
+	fatal_error (input_location, "%qE requires the SVE ISA extension",
+		     fndecl);
+      else
+	fatal_error (input_location, "calls to functions of type %qT require"
+		     " the SVE ISA extension", fntype);
     }
-  return;
 }
 
 static void
 aarch64_function_arg_advance (cumulative_args_t pcum_v,
-			      machine_mode mode,
-			      const_tree type,
-			      bool named)
+			      const function_arg_info &arg)
 {
   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
-  if (pcum->pcs_variant == ARM_PCS_AAPCS64)
+  if (pcum->pcs_variant == ARM_PCS_AAPCS64
+      || pcum->pcs_variant == ARM_PCS_SIMD
+      || pcum->pcs_variant == ARM_PCS_SVE)
     {
-      aarch64_layout_arg (pcum_v, mode, type, named);
+      aarch64_layout_arg (pcum_v, arg);
       gcc_assert ((pcum->aapcs_reg != NULL_RTX)
 		  != (pcum->aapcs_stack_words != 0));
       pcum->aapcs_arg_processed = false;
       pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
       pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
+      pcum->aapcs_nprn = pcum->aapcs_nextnprn;
       pcum->aapcs_stack_size += pcum->aapcs_stack_words;
       pcum->aapcs_stack_words = 0;
       pcum->aapcs_reg = NULL_RTX;
@@ -3685,8 +6721,19 @@ aarch64_function_arg_regno_p (unsigned regno)
 static unsigned int
 aarch64_function_arg_boundary (machine_mode mode, const_tree type)
 {
-  unsigned int alignment = aarch64_function_arg_alignment (mode, type);
-  return MIN (MAX (alignment, PARM_BOUNDARY), STACK_BOUNDARY);
+  unsigned int abi_break;
+  unsigned int alignment = aarch64_function_arg_alignment (mode, type,
+							   &abi_break);
+  alignment = MIN (MAX (alignment, PARM_BOUNDARY), STACK_BOUNDARY);
+  if (abi_break & warn_psabi)
+    {
+      abi_break = MIN (MAX (abi_break, PARM_BOUNDARY), STACK_BOUNDARY);
+      if (alignment != abi_break)
+	inform (input_location, "parameter passing for argument of type "
+		"%qT changed in GCC 9.1", type);
+    }
+
+  return alignment;
 }
 
 /* Implement TARGET_GET_RAW_RESULT_MODE and TARGET_GET_RAW_ARG_MODE.  */
@@ -3758,7 +6805,8 @@ aarch64_pad_reg_upward (machine_mode mode, const_tree type,
 		     bool first ATTRIBUTE_UNUSED)
 {
 
-  /* Small composite types are always padded upward.  */
+  /* Aside from pure scalable types, small composite types are always
+     padded upward.  */
   if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
     {
       HOST_WIDE_INT size;
@@ -3769,7 +6817,12 @@ aarch64_pad_reg_upward (machine_mode mode, const_tree type,
 	   shouldn't be asked to pass or return them.  */
 	size = GET_MODE_SIZE (mode).to_constant ();
       if (size < 2 * UNITS_PER_WORD)
-	return true;
+	{
+	  pure_scalable_type_info pst_info;
+	  if (pst_info.analyze_registers (type))
+	    return false;
+	  return true;
+	}
     }
 
   /* Otherwise, use the default padding.  */
@@ -3792,10 +6845,6 @@ aarch64_libgcc_cmp_return_mode (void)
 #error Cannot use simple address calculation for stack probing
 #endif
 
-/* The pair of scratch registers used for stack probing.  */
-#define PROBE_STACK_FIRST_REG  9
-#define PROBE_STACK_SECOND_REG 10
-
 /* Emit code to probe a range of stack addresses from FIRST to FIRST+POLY_SIZE,
    inclusive.  These are offsets from the current stack pointer.  */
 
@@ -3809,7 +6858,7 @@ aarch64_emit_probe_stack_range (HOST_WIDE_INT first, poly_int64 poly_size)
       return;
     }
 
-  rtx reg1 = gen_rtx_REG (Pmode, PROBE_STACK_FIRST_REG);
+  rtx reg1 = gen_rtx_REG (Pmode, PROBE_STACK_FIRST_REGNUM);
 
   /* See the same assertion on PROBE_INTERVAL above.  */
   gcc_assert ((first % ARITH_FACTOR) == 0);
@@ -3867,7 +6916,7 @@ aarch64_emit_probe_stack_range (HOST_WIDE_INT first, poly_int64 poly_size)
      equality test for the loop condition.  */
   else
     {
-      rtx reg2 = gen_rtx_REG (Pmode, PROBE_STACK_SECOND_REG);
+      rtx reg2 = gen_rtx_REG (Pmode, PROBE_STACK_SECOND_REGNUM);
 
       /* Step 1: round SIZE to the previous multiple of the interval.  */
 
@@ -3945,13 +6994,33 @@ aarch64_output_probe_stack_range (rtx reg1, rtx reg2)
   /* Loop.  */
   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
 
+  HOST_WIDE_INT stack_clash_probe_interval
+    = 1 << param_stack_clash_protection_guard_size;
+
   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
   xops[0] = reg1;
-  xops[1] = GEN_INT (PROBE_INTERVAL);
+  HOST_WIDE_INT interval;
+  if (flag_stack_clash_protection)
+    interval = stack_clash_probe_interval;
+  else
+    interval = PROBE_INTERVAL;
+
+  gcc_assert (aarch64_uimm12_shift (interval));
+  xops[1] = GEN_INT (interval);
+
   output_asm_insn ("sub\t%0, %0, %1", xops);
 
-  /* Probe at TEST_ADDR.  */
-  output_asm_insn ("str\txzr, [%0]", xops);
+  /* If doing stack clash protection then we probe up by the ABI specified
+     amount.  We do this because we're dropping full pages at a time in the
+     loop.  But if we're doing non-stack clash probing, probe at SP 0.  */
+  if (flag_stack_clash_protection)
+    xops[1] = GEN_INT (STACK_CLASH_CALLER_GUARD);
+  else
+    xops[1] = CONST0_RTX (GET_MODE (xops[1]));
+
+  /* Probe at TEST_ADDR.  If we're inside the loop it is always safe to probe
+     by this amount for each iteration.  */
+  output_asm_insn ("str\txzr, [%0, %1]", xops);
 
   /* Test if TEST_ADDR == LAST_ADDR.  */
   xops[1] = reg2;
@@ -3965,191 +7034,410 @@ aarch64_output_probe_stack_range (rtx reg1, rtx reg2)
   return "";
 }
 
-/* Mark the registers that need to be saved by the callee and calculate
-   the size of the callee-saved registers area and frame record (both FP
-   and LR may be omitted).  */
-static void
-aarch64_layout_frame (void)
+/* Emit the probe loop for doing stack clash probes and stack adjustments for
+   SVE.  This emits probes from BASE to BASE - ADJUSTMENT based on a guard size
+   of GUARD_SIZE.  When a probe is emitted it is done at most
+   MIN_PROBE_THRESHOLD bytes from the current BASE at an interval of
+   at most MIN_PROBE_THRESHOLD.  By the end of this function
+   BASE = BASE - ADJUSTMENT.  */
+
+const char *
+aarch64_output_probe_sve_stack_clash (rtx base, rtx adjustment,
+				      rtx min_probe_threshold, rtx guard_size)
 {
-  HOST_WIDE_INT offset = 0;
-  int regno, last_fp_reg = INVALID_REGNUM;
+  /* This function is not allowed to use any instruction generation function
+     like gen_ and friends.  If you do you'll likely ICE during CFG validation,
+     so instead emit the code you want using output_asm_insn.  */
+  gcc_assert (flag_stack_clash_protection);
+  gcc_assert (CONST_INT_P (min_probe_threshold) && CONST_INT_P (guard_size));
+  gcc_assert (INTVAL (guard_size) > INTVAL (min_probe_threshold));
 
-  if (reload_completed && cfun->machine->frame.laid_out)
-    return;
+  /* The minimum required allocation before the residual requires probing.  */
+  HOST_WIDE_INT residual_probe_guard = INTVAL (min_probe_threshold);
 
+  /* Clamp the value down to the nearest value that can be used with a cmp.  */
+  residual_probe_guard = aarch64_clamp_to_uimm12_shift (residual_probe_guard);
+  rtx probe_offset_value_rtx = gen_int_mode (residual_probe_guard, Pmode);
+
+  gcc_assert (INTVAL (min_probe_threshold) >= residual_probe_guard);
+  gcc_assert (aarch64_uimm12_shift (residual_probe_guard));
+
+  static int labelno = 0;
+  char loop_start_lab[32];
+  char loop_end_lab[32];
+  rtx xops[2];
+
+  ASM_GENERATE_INTERNAL_LABEL (loop_start_lab, "SVLPSPL", labelno);
+  ASM_GENERATE_INTERNAL_LABEL (loop_end_lab, "SVLPEND", labelno++);
+
+  /* Emit loop start label.  */
+  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_start_lab);
+
+  /* ADJUSTMENT < RESIDUAL_PROBE_GUARD.  */
+  xops[0] = adjustment;
+  xops[1] = probe_offset_value_rtx;
+  output_asm_insn ("cmp\t%0, %1", xops);
+
+  /* Branch to end if not enough adjustment to probe.  */
+  fputs ("\tb.lt\t", asm_out_file);
+  assemble_name_raw (asm_out_file, loop_end_lab);
+  fputc ('\n', asm_out_file);
+
+  /* BASE = BASE - RESIDUAL_PROBE_GUARD.  */
+  xops[0] = base;
+  xops[1] = probe_offset_value_rtx;
+  output_asm_insn ("sub\t%0, %0, %1", xops);
+
+  /* Probe at BASE.  */
+  xops[1] = const0_rtx;
+  output_asm_insn ("str\txzr, [%0, %1]", xops);
+
+  /* ADJUSTMENT = ADJUSTMENT - RESIDUAL_PROBE_GUARD.  */
+  xops[0] = adjustment;
+  xops[1] = probe_offset_value_rtx;
+  output_asm_insn ("sub\t%0, %0, %1", xops);
+
+  /* Branch to start if still more bytes to allocate.  */
+  fputs ("\tb\t", asm_out_file);
+  assemble_name_raw (asm_out_file, loop_start_lab);
+  fputc ('\n', asm_out_file);
+
+  /* No probe leave.  */
+  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_end_lab);
+
+  /* BASE = BASE - ADJUSTMENT.  */
+  xops[0] = base;
+  xops[1] = adjustment;
+  output_asm_insn ("sub\t%0, %0, %1", xops);
+  return "";
+}
+
+/* Determine whether a frame chain needs to be generated.  */
+static bool
+aarch64_needs_frame_chain (void)
+{
   /* Force a frame chain for EH returns so the return address is at FP+8.  */
-  cfun->machine->frame.emit_frame_chain
-    = frame_pointer_needed || crtl->calls_eh_return;
+  if (frame_pointer_needed || crtl->calls_eh_return)
+    return true;
+
+  /* A leaf function cannot have calls or write LR.  */
+  bool is_leaf = crtl->is_leaf && !df_regs_ever_live_p (LR_REGNUM);
+
+  /* Don't use a frame chain in leaf functions if leaf frame pointers
+     are disabled.  */
+  if (flag_omit_leaf_frame_pointer && is_leaf)
+    return false;
+
+  return aarch64_use_frame_pointer;
+}
+
+/* Mark the registers that need to be saved by the callee and calculate
+   the size of the callee-saved registers area and frame record (both FP
+   and LR may be omitted).  */
+static void
+aarch64_layout_frame (void)
+{
+  poly_int64 offset = 0;
+  int regno, last_fp_reg = INVALID_REGNUM;
+  machine_mode vector_save_mode = aarch64_reg_save_mode (V8_REGNUM);
+  poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode);
+  bool frame_related_fp_reg_p = false;
+  aarch64_frame &frame = cfun->machine->frame;
+
+  frame.emit_frame_chain = aarch64_needs_frame_chain ();
 
-  /* Emit a frame chain if the frame pointer is enabled.
-     If -momit-leaf-frame-pointer is used, do not use a frame chain
-     in leaf functions which do not use LR.  */
-  if (flag_omit_frame_pointer == 2
-      && !(flag_omit_leaf_frame_pointer && crtl->is_leaf
-	   && !df_regs_ever_live_p (LR_REGNUM)))
-    cfun->machine->frame.emit_frame_chain = true;
+  /* Adjust the outgoing arguments size if required.  Keep it in sync with what
+     the mid-end is doing.  */
+  crtl->outgoing_args_size = STACK_DYNAMIC_OFFSET (cfun);
 
 #define SLOT_NOT_REQUIRED (-2)
 #define SLOT_REQUIRED     (-1)
 
-  cfun->machine->frame.wb_candidate1 = INVALID_REGNUM;
-  cfun->machine->frame.wb_candidate2 = INVALID_REGNUM;
+  frame.wb_candidate1 = INVALID_REGNUM;
+  frame.wb_candidate2 = INVALID_REGNUM;
+  frame.spare_pred_reg = INVALID_REGNUM;
 
   /* First mark all the registers that really need to be saved...  */
-  for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
-    cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
-
-  for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
-    cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
+  for (regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
+    frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
 
   /* ... that includes the eh data registers (if needed)...  */
   if (crtl->calls_eh_return)
     for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
-      cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
-	= SLOT_REQUIRED;
+      frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = SLOT_REQUIRED;
 
   /* ... and any callee saved register that dataflow says is live.  */
   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
     if (df_regs_ever_live_p (regno)
+	&& !fixed_regs[regno]
 	&& (regno == R30_REGNUM
-	    || !call_used_regs[regno]))
-      cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
+	    || !crtl->abi->clobbers_full_reg_p (regno)))
+      frame.reg_offset[regno] = SLOT_REQUIRED;
 
   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
     if (df_regs_ever_live_p (regno)
-	&& !call_used_regs[regno])
+	&& !fixed_regs[regno]
+	&& !crtl->abi->clobbers_full_reg_p (regno))
       {
-	cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
+	frame.reg_offset[regno] = SLOT_REQUIRED;
 	last_fp_reg = regno;
+	if (aarch64_emit_cfi_for_reg_p (regno))
+	  frame_related_fp_reg_p = true;
+      }
+
+  /* Big-endian SVE frames need a spare predicate register in order
+     to save Z8-Z15.  Decide which register they should use.  Prefer
+     an unused argument register if possible, so that we don't force P4
+     to be saved unnecessarily.  */
+  if (frame_related_fp_reg_p
+      && crtl->abi->id () == ARM_PCS_SVE
+      && BYTES_BIG_ENDIAN)
+    {
+      bitmap live1 = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
+      bitmap live2 = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun));
+      for (regno = P0_REGNUM; regno <= P7_REGNUM; regno++)
+	if (!bitmap_bit_p (live1, regno) && !bitmap_bit_p (live2, regno))
+	  break;
+      gcc_assert (regno <= P7_REGNUM);
+      frame.spare_pred_reg = regno;
+      df_set_regs_ever_live (regno, true);
+    }
+
+  for (regno = P0_REGNUM; regno <= P15_REGNUM; regno++)
+    if (df_regs_ever_live_p (regno)
+	&& !fixed_regs[regno]
+	&& !crtl->abi->clobbers_full_reg_p (regno))
+      frame.reg_offset[regno] = SLOT_REQUIRED;
+
+  /* With stack-clash, LR must be saved in non-leaf functions.  The saving of
+     LR counts as an implicit probe which allows us to maintain the invariant
+     described in the comment at expand_prologue.  */
+  gcc_assert (crtl->is_leaf
+	      || maybe_ne (frame.reg_offset[R30_REGNUM], SLOT_NOT_REQUIRED));
+
+  /* Now assign stack slots for the registers.  Start with the predicate
+     registers, since predicate LDR and STR have a relatively small
+     offset range.  These saves happen below the hard frame pointer.  */
+  for (regno = P0_REGNUM; regno <= P15_REGNUM; regno++)
+    if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
+      {
+	frame.reg_offset[regno] = offset;
+	offset += BYTES_PER_SVE_PRED;
       }
 
-  if (cfun->machine->frame.emit_frame_chain)
+  if (maybe_ne (offset, 0))
+    {
+      /* If we have any vector registers to save above the predicate registers,
+	 the offset of the vector register save slots need to be a multiple
+	 of the vector size.  This lets us use the immediate forms of LDR/STR
+	 (or LD1/ST1 for big-endian).
+
+	 A vector register is 8 times the size of a predicate register,
+	 and we need to save a maximum of 12 predicate registers, so the
+	 first vector register will be at either #1, MUL VL or #2, MUL VL.
+
+	 If we don't have any vector registers to save, and we know how
+	 big the predicate save area is, we can just round it up to the
+	 next 16-byte boundary.  */
+      if (last_fp_reg == (int) INVALID_REGNUM && offset.is_constant ())
+	offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
+      else
+	{
+	  if (known_le (offset, vector_save_size))
+	    offset = vector_save_size;
+	  else if (known_le (offset, vector_save_size * 2))
+	    offset = vector_save_size * 2;
+	  else
+	    gcc_unreachable ();
+	}
+    }
+
+  /* If we need to save any SVE vector registers, add them next.  */
+  if (last_fp_reg != (int) INVALID_REGNUM && crtl->abi->id () == ARM_PCS_SVE)
+    for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
+      if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
+	{
+	  frame.reg_offset[regno] = offset;
+	  offset += vector_save_size;
+	}
+
+  /* OFFSET is now the offset of the hard frame pointer from the bottom
+     of the callee save area.  */
+  bool saves_below_hard_fp_p = maybe_ne (offset, 0);
+  frame.below_hard_fp_saved_regs_size = offset;
+  if (frame.emit_frame_chain)
     {
       /* FP and LR are placed in the linkage record.  */
-      cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
-      cfun->machine->frame.wb_candidate1 = R29_REGNUM;
-      cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
-      cfun->machine->frame.wb_candidate2 = R30_REGNUM;
-      offset = 2 * UNITS_PER_WORD;
+      frame.reg_offset[R29_REGNUM] = offset;
+      frame.wb_candidate1 = R29_REGNUM;
+      frame.reg_offset[R30_REGNUM] = offset + UNITS_PER_WORD;
+      frame.wb_candidate2 = R30_REGNUM;
+      offset += 2 * UNITS_PER_WORD;
     }
 
-  /* Now assign stack slots for them.  */
   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
-    if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
+    if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
       {
-	cfun->machine->frame.reg_offset[regno] = offset;
-	if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
-	  cfun->machine->frame.wb_candidate1 = regno;
-	else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM)
-	  cfun->machine->frame.wb_candidate2 = regno;
+	frame.reg_offset[regno] = offset;
+	if (frame.wb_candidate1 == INVALID_REGNUM)
+	  frame.wb_candidate1 = regno;
+	else if (frame.wb_candidate2 == INVALID_REGNUM)
+	  frame.wb_candidate2 = regno;
 	offset += UNITS_PER_WORD;
       }
 
-  HOST_WIDE_INT max_int_offset = offset;
-  offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
-  bool has_align_gap = offset != max_int_offset;
+  poly_int64 max_int_offset = offset;
+  offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
+  bool has_align_gap = maybe_ne (offset, max_int_offset);
 
   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
-    if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
+    if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
       {
 	/* If there is an alignment gap between integer and fp callee-saves,
 	   allocate the last fp register to it if possible.  */
-	if (regno == last_fp_reg && has_align_gap && (offset & 8) == 0)
+	if (regno == last_fp_reg
+	    && has_align_gap
+	    && known_eq (vector_save_size, 8)
+	    && multiple_p (offset, 16))
 	  {
-	    cfun->machine->frame.reg_offset[regno] = max_int_offset;
+	    frame.reg_offset[regno] = max_int_offset;
 	    break;
 	  }
 
-	cfun->machine->frame.reg_offset[regno] = offset;
-	if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
-	  cfun->machine->frame.wb_candidate1 = regno;
-	else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM
-		 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
-	  cfun->machine->frame.wb_candidate2 = regno;
-	offset += UNITS_PER_WORD;
+	frame.reg_offset[regno] = offset;
+	if (frame.wb_candidate1 == INVALID_REGNUM)
+	  frame.wb_candidate1 = regno;
+	else if (frame.wb_candidate2 == INVALID_REGNUM
+		 && frame.wb_candidate1 >= V0_REGNUM)
+	  frame.wb_candidate2 = regno;
+	offset += vector_save_size;
       }
 
-  offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
+  offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
 
-  cfun->machine->frame.saved_regs_size = offset;
+  frame.saved_regs_size = offset;
 
-  HOST_WIDE_INT varargs_and_saved_regs_size
-    = offset + cfun->machine->frame.saved_varargs_size;
+  poly_int64 varargs_and_saved_regs_size = offset + frame.saved_varargs_size;
 
-  cfun->machine->frame.hard_fp_offset
+  poly_int64 above_outgoing_args
     = aligned_upper_bound (varargs_and_saved_regs_size
 			   + get_frame_size (),
 			   STACK_BOUNDARY / BITS_PER_UNIT);
 
+  frame.hard_fp_offset
+    = above_outgoing_args - frame.below_hard_fp_saved_regs_size;
+
   /* Both these values are already aligned.  */
   gcc_assert (multiple_p (crtl->outgoing_args_size,
 			  STACK_BOUNDARY / BITS_PER_UNIT));
-  cfun->machine->frame.frame_size
-    = (cfun->machine->frame.hard_fp_offset
-       + crtl->outgoing_args_size);
+  frame.frame_size = above_outgoing_args + crtl->outgoing_args_size;
 
-  cfun->machine->frame.locals_offset = cfun->machine->frame.saved_varargs_size;
+  frame.locals_offset = frame.saved_varargs_size;
 
-  cfun->machine->frame.initial_adjust = 0;
-  cfun->machine->frame.final_adjust = 0;
-  cfun->machine->frame.callee_adjust = 0;
-  cfun->machine->frame.callee_offset = 0;
+  frame.initial_adjust = 0;
+  frame.final_adjust = 0;
+  frame.callee_adjust = 0;
+  frame.sve_callee_adjust = 0;
+  frame.callee_offset = 0;
 
   HOST_WIDE_INT max_push_offset = 0;
-  if (cfun->machine->frame.wb_candidate2 != INVALID_REGNUM)
+  if (frame.wb_candidate2 != INVALID_REGNUM)
     max_push_offset = 512;
-  else if (cfun->machine->frame.wb_candidate1 != INVALID_REGNUM)
+  else if (frame.wb_candidate1 != INVALID_REGNUM)
     max_push_offset = 256;
 
-  HOST_WIDE_INT const_size, const_fp_offset;
-  if (cfun->machine->frame.frame_size.is_constant (&const_size)
+  HOST_WIDE_INT const_size, const_outgoing_args_size, const_fp_offset;
+  HOST_WIDE_INT const_saved_regs_size;
+  if (frame.frame_size.is_constant (&const_size)
       && const_size < max_push_offset
-      && known_eq (crtl->outgoing_args_size, 0))
+      && known_eq (frame.hard_fp_offset, const_size))
     {
       /* Simple, small frame with no outgoing arguments:
+
 	 stp reg1, reg2, [sp, -frame_size]!
 	 stp reg3, reg4, [sp, 16]  */
-      cfun->machine->frame.callee_adjust = const_size;
-    }
-  else if (known_lt (crtl->outgoing_args_size
-		     + cfun->machine->frame.saved_regs_size, 512)
+      frame.callee_adjust = const_size;
+    }
+  else if (crtl->outgoing_args_size.is_constant (&const_outgoing_args_size)
+	   && frame.saved_regs_size.is_constant (&const_saved_regs_size)
+	   && const_outgoing_args_size + const_saved_regs_size < 512
+	   /* We could handle this case even with outgoing args, provided
+	      that the number of args left us with valid offsets for all
+	      predicate and vector save slots.  It's such a rare case that
+	      it hardly seems worth the effort though.  */
+	   && (!saves_below_hard_fp_p || const_outgoing_args_size == 0)
 	   && !(cfun->calls_alloca
-		&& known_lt (cfun->machine->frame.hard_fp_offset,
-			     max_push_offset)))
+		&& frame.hard_fp_offset.is_constant (&const_fp_offset)
+		&& const_fp_offset < max_push_offset))
     {
       /* Frame with small outgoing arguments:
+
 	 sub sp, sp, frame_size
 	 stp reg1, reg2, [sp, outgoing_args_size]
 	 stp reg3, reg4, [sp, outgoing_args_size + 16]  */
-      cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size;
-      cfun->machine->frame.callee_offset
-	= cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset;
+      frame.initial_adjust = frame.frame_size;
+      frame.callee_offset = const_outgoing_args_size;
+    }
+  else if (saves_below_hard_fp_p
+	   && known_eq (frame.saved_regs_size,
+			frame.below_hard_fp_saved_regs_size))
+    {
+      /* Frame in which all saves are SVE saves:
+
+	 sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size
+	 save SVE registers relative to SP
+	 sub sp, sp, outgoing_args_size  */
+      frame.initial_adjust = (frame.hard_fp_offset
+			      + frame.below_hard_fp_saved_regs_size);
+      frame.final_adjust = crtl->outgoing_args_size;
     }
-  else if (cfun->machine->frame.hard_fp_offset.is_constant (&const_fp_offset)
+  else if (frame.hard_fp_offset.is_constant (&const_fp_offset)
 	   && const_fp_offset < max_push_offset)
     {
-      /* Frame with large outgoing arguments but a small local area:
+      /* Frame with large outgoing arguments or SVE saves, but with
+	 a small local area:
+
 	 stp reg1, reg2, [sp, -hard_fp_offset]!
 	 stp reg3, reg4, [sp, 16]
+	 [sub sp, sp, below_hard_fp_saved_regs_size]
+	 [save SVE registers relative to SP]
 	 sub sp, sp, outgoing_args_size  */
-      cfun->machine->frame.callee_adjust = const_fp_offset;
-      cfun->machine->frame.final_adjust
-	= cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust;
+      frame.callee_adjust = const_fp_offset;
+      frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
+      frame.final_adjust = crtl->outgoing_args_size;
     }
   else
     {
-      /* Frame with large local area and outgoing arguments using frame pointer:
+      /* Frame with large local area and outgoing arguments or SVE saves,
+	 using frame pointer:
+
 	 sub sp, sp, hard_fp_offset
 	 stp x29, x30, [sp, 0]
 	 add x29, sp, 0
 	 stp reg3, reg4, [sp, 16]
+	 [sub sp, sp, below_hard_fp_saved_regs_size]
+	 [save SVE registers relative to SP]
 	 sub sp, sp, outgoing_args_size  */
-      cfun->machine->frame.initial_adjust = cfun->machine->frame.hard_fp_offset;
-      cfun->machine->frame.final_adjust
-	= cfun->machine->frame.frame_size - cfun->machine->frame.initial_adjust;
+      frame.initial_adjust = frame.hard_fp_offset;
+      frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
+      frame.final_adjust = crtl->outgoing_args_size;
+    }
+
+  /* Make sure the individual adjustments add up to the full frame size.  */
+  gcc_assert (known_eq (frame.initial_adjust
+			+ frame.callee_adjust
+			+ frame.sve_callee_adjust
+			+ frame.final_adjust, frame.frame_size));
+
+  if (!frame.emit_frame_chain && frame.callee_adjust == 0)
+    {
+      /* We've decided not to associate any register saves with the initial
+	 stack allocation.  */
+      frame.wb_candidate1 = INVALID_REGNUM;
+      frame.wb_candidate2 = INVALID_REGNUM;
     }
 
-  cfun->machine->frame.laid_out = true;
+  frame.laid_out = true;
 }
 
 /* Return true if the register REGNO is saved on entry to
@@ -4158,7 +7446,7 @@ aarch64_layout_frame (void)
 static bool
 aarch64_register_saved_on_entry (int regno)
 {
-  return cfun->machine->frame.reg_offset[regno] >= 0;
+  return known_ge (cfun->machine->frame.reg_offset[regno], 0);
 }
 
 /* Return the next register up from REGNO up to LIMIT for the callee
@@ -4209,6 +7497,10 @@ aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
       return gen_storewb_pairdf_di (base, base, reg, reg2,
 				    GEN_INT (-adjustment),
 				    GEN_INT (UNITS_PER_WORD - adjustment));
+    case E_TFmode:
+      return gen_storewb_pairtf_di (base, base, reg, reg2,
+				    GEN_INT (-adjustment),
+				    GEN_INT (UNITS_PER_VREG - adjustment));
     default:
       gcc_unreachable ();
     }
@@ -4221,7 +7513,7 @@ static void
 aarch64_push_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment)
 {
   rtx_insn *insn;
-  machine_mode mode = (regno1 <= R30_REGNUM) ? E_DImode : E_DFmode;
+  machine_mode mode = aarch64_reg_save_mode (regno1);
 
   if (regno2 == INVALID_REGNUM)
     return aarch64_pushwb_single_reg (mode, regno1, adjustment);
@@ -4251,6 +7543,9 @@ aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
     case E_DFmode:
       return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
 				   GEN_INT (UNITS_PER_WORD));
+    case E_TFmode:
+      return gen_loadwb_pairtf_di (base, base, reg, reg2, GEN_INT (adjustment),
+				   GEN_INT (UNITS_PER_VREG));
     default:
       gcc_unreachable ();
     }
@@ -4264,7 +7559,7 @@ static void
 aarch64_pop_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment,
 		  rtx *cfi_ops)
 {
-  machine_mode mode = (regno1 <= R30_REGNUM) ? E_DImode : E_DFmode;
+  machine_mode mode = aarch64_reg_save_mode (regno1);
   rtx reg1 = gen_rtx_REG (mode, regno1);
 
   *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg1, *cfi_ops);
@@ -4294,10 +7589,19 @@ aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
   switch (mode)
     {
     case E_DImode:
-      return gen_store_pairdi (mem1, reg1, mem2, reg2);
+      return gen_store_pair_dw_didi (mem1, reg1, mem2, reg2);
 
     case E_DFmode:
-      return gen_store_pairdf (mem1, reg1, mem2, reg2);
+      return gen_store_pair_dw_dfdf (mem1, reg1, mem2, reg2);
+
+    case E_TFmode:
+      return gen_store_pair_dw_tftf (mem1, reg1, mem2, reg2);
+
+    case E_V4SImode:
+      return gen_vec_store_pairv4siv4si (mem1, reg1, mem2, reg2);
+
+    case E_V16QImode:
+      return gen_vec_store_pairv16qiv16qi (mem1, reg1, mem2, reg2);
 
     default:
       gcc_unreachable ();
@@ -4314,10 +7618,16 @@ aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
   switch (mode)
     {
     case E_DImode:
-      return gen_load_pairdi (reg1, mem1, reg2, mem2);
+      return gen_load_pair_dw_didi (reg1, mem1, reg2, mem2);
 
     case E_DFmode:
-      return gen_load_pairdf (reg1, mem1, reg2, mem2);
+      return gen_load_pair_dw_dfdf (reg1, mem1, reg2, mem2);
+
+    case E_TFmode:
+      return gen_load_pair_dw_tftf (reg1, mem1, reg2, mem2);
+
+    case E_V4SImode:
+      return gen_load_pairv4siv4si (reg1, mem1, reg2, mem2);
 
     default:
       gcc_unreachable ();
@@ -4333,24 +7643,100 @@ aarch64_return_address_signing_enabled (void)
   /* This function should only be called after frame laid out.   */
   gcc_assert (cfun->machine->frame.laid_out);
 
+  /* Turn return address signing off in any function that uses
+     __builtin_eh_return.  The address passed to __builtin_eh_return
+     is not signed so either it has to be signed (with original sp)
+     or the code path that uses it has to avoid authenticating it.
+     Currently eh return introduces a return to anywhere gadget, no
+     matter what we do here since it uses ret with user provided
+     address. An ideal fix for that is to use indirect branch which
+     can be protected with BTI j (to some extent).  */
+  if (crtl->calls_eh_return)
+    return false;
+
   /* If signing scope is AARCH64_FUNCTION_NON_LEAF, we only sign a leaf function
-     if it's LR is pushed onto stack.  */
+     if its LR is pushed onto stack.  */
   return (aarch64_ra_sign_scope == AARCH64_FUNCTION_ALL
 	  || (aarch64_ra_sign_scope == AARCH64_FUNCTION_NON_LEAF
-	      && cfun->machine->frame.reg_offset[LR_REGNUM] >= 0));
+	      && known_ge (cfun->machine->frame.reg_offset[LR_REGNUM], 0)));
+}
+
+/* Return TRUE if Branch Target Identification Mechanism is enabled.  */
+bool
+aarch64_bti_enabled (void)
+{
+  return (aarch64_enable_bti == 1);
+}
+
+/* The caller is going to use ST1D or LD1D to save or restore an SVE
+   register in mode MODE at BASE_RTX + OFFSET, where OFFSET is in
+   the range [1, 16] * GET_MODE_SIZE (MODE).  Prepare for this by:
+
+     (1) updating BASE_RTX + OFFSET so that it is a legitimate ST1D
+	 or LD1D address
+
+     (2) setting PRED to a valid predicate register for the ST1D or LD1D,
+	 if the variable isn't already nonnull
+
+   (1) is needed when OFFSET is in the range [8, 16] * GET_MODE_SIZE (MODE).
+   Handle this case using a temporary base register that is suitable for
+   all offsets in that range.  Use ANCHOR_REG as this base register if it
+   is nonnull, otherwise create a new register and store it in ANCHOR_REG.  */
+
+static inline void
+aarch64_adjust_sve_callee_save_base (machine_mode mode, rtx &base_rtx,
+				     rtx &anchor_reg, poly_int64 &offset,
+				     rtx &ptrue)
+{
+  if (maybe_ge (offset, 8 * GET_MODE_SIZE (mode)))
+    {
+      /* This is the maximum valid offset of the anchor from the base.
+	 Lower values would be valid too.  */
+      poly_int64 anchor_offset = 16 * GET_MODE_SIZE (mode);
+      if (!anchor_reg)
+	{
+	  anchor_reg = gen_rtx_REG (Pmode, STACK_CLASH_SVE_CFA_REGNUM);
+	  emit_insn (gen_add3_insn (anchor_reg, base_rtx,
+				    gen_int_mode (anchor_offset, Pmode)));
+	}
+      base_rtx = anchor_reg;
+      offset -= anchor_offset;
+    }
+  if (!ptrue)
+    {
+      int pred_reg = cfun->machine->frame.spare_pred_reg;
+      emit_move_insn (gen_rtx_REG (VNx16BImode, pred_reg),
+		      CONSTM1_RTX (VNx16BImode));
+      ptrue = gen_rtx_REG (VNx2BImode, pred_reg);
+    }
+}
+
+/* Add a REG_CFA_EXPRESSION note to INSN to say that register REG
+   is saved at BASE + OFFSET.  */
+
+static void
+aarch64_add_cfa_expression (rtx_insn *insn, rtx reg,
+			    rtx base, poly_int64 offset)
+{
+  rtx mem = gen_frame_mem (GET_MODE (reg),
+			   plus_constant (Pmode, base, offset));
+  add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
 }
 
 /* Emit code to save the callee-saved registers from register number START
    to LIMIT to the stack at the location starting at offset START_OFFSET,
-   skipping any write-back candidates if SKIP_WB is true.  */
+   skipping any write-back candidates if SKIP_WB is true.  HARD_FP_VALID_P
+   is true if the hard frame pointer has been set up.  */
 
 static void
-aarch64_save_callee_saves (machine_mode mode, poly_int64 start_offset,
-			   unsigned start, unsigned limit, bool skip_wb)
+aarch64_save_callee_saves (poly_int64 start_offset,
+			   unsigned start, unsigned limit, bool skip_wb,
+			   bool hard_fp_valid_p)
 {
   rtx_insn *insn;
   unsigned regno;
   unsigned regno2;
+  rtx anchor_reg = NULL_RTX, ptrue = NULL_RTX;
 
   for (regno = aarch64_next_callee_save (start, limit);
        regno <= limit;
@@ -4358,6 +7744,7 @@ aarch64_save_callee_saves (machine_mode mode, poly_int64 start_offset,
     {
       rtx reg, mem;
       poly_int64 offset;
+      bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno);
 
       if (skip_wb
 	  && (regno == cfun->machine->frame.wb_candidate1
@@ -4365,27 +7752,53 @@ aarch64_save_callee_saves (machine_mode mode, poly_int64 start_offset,
 	continue;
 
       if (cfun->machine->reg_is_wrapped_separately[regno])
-       continue;
+	continue;
 
+      machine_mode mode = aarch64_reg_save_mode (regno);
       reg = gen_rtx_REG (mode, regno);
       offset = start_offset + cfun->machine->frame.reg_offset[regno];
-      mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx,
-						offset));
+      rtx base_rtx = stack_pointer_rtx;
+      poly_int64 sp_offset = offset;
 
-      regno2 = aarch64_next_callee_save (regno + 1, limit);
+      HOST_WIDE_INT const_offset;
+      if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
+	aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg,
+					     offset, ptrue);
+      else if (GP_REGNUM_P (regno)
+	       && (!offset.is_constant (&const_offset) || const_offset >= 512))
+	{
+	  gcc_assert (known_eq (start_offset, 0));
+	  poly_int64 fp_offset
+	    = cfun->machine->frame.below_hard_fp_saved_regs_size;
+	  if (hard_fp_valid_p)
+	    base_rtx = hard_frame_pointer_rtx;
+	  else
+	    {
+	      if (!anchor_reg)
+		{
+		  anchor_reg = gen_rtx_REG (Pmode, STACK_CLASH_SVE_CFA_REGNUM);
+		  emit_insn (gen_add3_insn (anchor_reg, base_rtx,
+					    gen_int_mode (fp_offset, Pmode)));
+		}
+	      base_rtx = anchor_reg;
+	    }
+	  offset -= fp_offset;
+	}
+      mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
+      bool need_cfa_note_p = (base_rtx != stack_pointer_rtx);
 
-      if (regno2 <= limit
+      if (!aarch64_sve_mode_p (mode)
+	  && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit
 	  && !cfun->machine->reg_is_wrapped_separately[regno2]
-	  && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
-	      == cfun->machine->frame.reg_offset[regno2]))
-
+	  && known_eq (GET_MODE_SIZE (mode),
+		       cfun->machine->frame.reg_offset[regno2]
+		       - cfun->machine->frame.reg_offset[regno]))
 	{
 	  rtx reg2 = gen_rtx_REG (mode, regno2);
 	  rtx mem2;
 
-	  offset = start_offset + cfun->machine->frame.reg_offset[regno2];
-	  mem2 = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx,
-						     offset));
+	  offset += GET_MODE_SIZE (mode);
+	  mem2 = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
 	  insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
 						    reg2));
 
@@ -4393,37 +7806,54 @@ aarch64_save_callee_saves (machine_mode mode, poly_int64 start_offset,
 	     always assumed to be relevant to the frame
 	     calculations; subsequent parts, are only
 	     frame-related if explicitly marked.  */
-	  RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
+	  if (aarch64_emit_cfi_for_reg_p (regno2))
+	    {
+	      if (need_cfa_note_p)
+		aarch64_add_cfa_expression (insn, reg2, stack_pointer_rtx,
+					    sp_offset + GET_MODE_SIZE (mode));
+	      else
+		RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
+	    }
+
 	  regno = regno2;
 	}
+      else if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
+	{
+	  insn = emit_insn (gen_aarch64_pred_mov (mode, mem, ptrue, reg));
+	  need_cfa_note_p = true;
+	}
+      else if (aarch64_sve_mode_p (mode))
+	insn = emit_insn (gen_rtx_SET (mem, reg));
       else
 	insn = emit_move_insn (mem, reg);
 
-      RTX_FRAME_RELATED_P (insn) = 1;
+      RTX_FRAME_RELATED_P (insn) = frame_related_p;
+      if (frame_related_p && need_cfa_note_p)
+	aarch64_add_cfa_expression (insn, reg, stack_pointer_rtx, sp_offset);
     }
 }
 
-/* Emit code to restore the callee registers of mode MODE from register
-   number START up to and including LIMIT.  Restore from the stack offset
-   START_OFFSET, skipping any write-back candidates if SKIP_WB is true.
-   Write the appropriate REG_CFA_RESTORE notes into CFI_OPS.  */
+/* Emit code to restore the callee registers from register number START
+   up to and including LIMIT.  Restore from the stack offset START_OFFSET,
+   skipping any write-back candidates if SKIP_WB is true.  Write the
+   appropriate REG_CFA_RESTORE notes into CFI_OPS.  */
 
 static void
-aarch64_restore_callee_saves (machine_mode mode,
-			      poly_int64 start_offset, unsigned start,
+aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
 			      unsigned limit, bool skip_wb, rtx *cfi_ops)
 {
-  rtx base_rtx = stack_pointer_rtx;
   unsigned regno;
   unsigned regno2;
   poly_int64 offset;
+  rtx anchor_reg = NULL_RTX, ptrue = NULL_RTX;
 
   for (regno = aarch64_next_callee_save (start, limit);
        regno <= limit;
        regno = aarch64_next_callee_save (regno + 1, limit))
     {
+      bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno);
       if (cfun->machine->reg_is_wrapped_separately[regno])
-       continue;
+	continue;
 
       rtx reg, mem;
 
@@ -4432,30 +7862,40 @@ aarch64_restore_callee_saves (machine_mode mode,
 	      || regno == cfun->machine->frame.wb_candidate2))
 	continue;
 
+      machine_mode mode = aarch64_reg_save_mode (regno);
       reg = gen_rtx_REG (mode, regno);
       offset = start_offset + cfun->machine->frame.reg_offset[regno];
+      rtx base_rtx = stack_pointer_rtx;
+      if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
+	aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg,
+					     offset, ptrue);
       mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
 
-      regno2 = aarch64_next_callee_save (regno + 1, limit);
-
-      if (regno2 <= limit
+      if (!aarch64_sve_mode_p (mode)
+	  && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit
 	  && !cfun->machine->reg_is_wrapped_separately[regno2]
-	  && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
-	      == cfun->machine->frame.reg_offset[regno2]))
+	  && known_eq (GET_MODE_SIZE (mode),
+		       cfun->machine->frame.reg_offset[regno2]
+		       - cfun->machine->frame.reg_offset[regno]))
 	{
 	  rtx reg2 = gen_rtx_REG (mode, regno2);
 	  rtx mem2;
 
-	  offset = start_offset + cfun->machine->frame.reg_offset[regno2];
+	  offset += GET_MODE_SIZE (mode);
 	  mem2 = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
 	  emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
 
 	  *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
 	  regno = regno2;
 	}
+      else if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
+	emit_insn (gen_aarch64_pred_mov (mode, reg, ptrue, mem));
+      else if (aarch64_sve_mode_p (mode))
+	emit_insn (gen_rtx_SET (reg, mem));
       else
 	emit_move_insn (reg, mem);
-      *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
+      if (frame_related_p)
+	*cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
     }
 }
 
@@ -4470,7 +7910,18 @@ offset_4bit_signed_scaled_p (machine_mode mode, poly_int64 offset)
 	  && IN_RANGE (multiple, -8, 7));
 }
 
-/* Return true if OFFSET is a unsigned 6-bit value multiplied by the size
+/* Return true if OFFSET is a signed 6-bit value multiplied by the size
+   of MODE.  */
+
+static inline bool
+offset_6bit_signed_scaled_p (machine_mode mode, poly_int64 offset)
+{
+  HOST_WIDE_INT multiple;
+  return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
+	  && IN_RANGE (multiple, -32, 31));
+}
+
+/* Return true if OFFSET is an unsigned 6-bit value multiplied by the size
    of MODE.  */
 
 static inline bool
@@ -4494,9 +7945,9 @@ aarch64_offset_7bit_signed_scaled_p (machine_mode mode, poly_int64 offset)
 
 /* Return true if OFFSET is a signed 9-bit value.  */
 
-static inline bool
-offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
-			       poly_int64 offset)
+bool
+aarch64_offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
+				       poly_int64 offset)
 {
   HOST_WIDE_INT const_offset;
   return (offset.is_constant (&const_offset)
@@ -4530,8 +7981,6 @@ offset_12bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset)
 static sbitmap
 aarch64_get_separate_components (void)
 {
-  aarch64_layout_frame ();
-
   sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
   bitmap_clear (components);
 
@@ -4539,13 +7988,35 @@ aarch64_get_separate_components (void)
   for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
     if (aarch64_register_saved_on_entry (regno))
       {
+	/* Punt on saves and restores that use ST1D and LD1D.  We could
+	   try to be smarter, but it would involve making sure that the
+	   spare predicate register itself is safe to use at the save
+	   and restore points.  Also, when a frame pointer is being used,
+	   the slots are often out of reach of ST1D and LD1D anyway.  */
+	machine_mode mode = aarch64_reg_save_mode (regno);
+	if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
+	  continue;
+
 	poly_int64 offset = cfun->machine->frame.reg_offset[regno];
-	if (!frame_pointer_needed)
-	  offset += cfun->machine->frame.frame_size
-		    - cfun->machine->frame.hard_fp_offset;
+
+	/* If the register is saved in the first SVE save slot, we use
+	   it as a stack probe for -fstack-clash-protection.  */
+	if (flag_stack_clash_protection
+	    && maybe_ne (cfun->machine->frame.below_hard_fp_saved_regs_size, 0)
+	    && known_eq (offset, 0))
+	  continue;
+
+	/* Get the offset relative to the register we'll use.  */
+	if (frame_pointer_needed)
+	  offset -= cfun->machine->frame.below_hard_fp_saved_regs_size;
+	else
+	  offset += crtl->outgoing_args_size;
+
 	/* Check that we can access the stack slot of the register with one
 	   direct load with no adjustments needed.  */
-	if (offset_12bit_unsigned_scaled_p (DImode, offset))
+	if (aarch64_sve_mode_p (mode)
+	    ? offset_9bit_signed_scaled_p (mode, offset)
+	    : offset_12bit_unsigned_scaled_p (mode, offset))
 	  bitmap_set_bit (components, regno);
       }
 
@@ -4553,9 +8024,15 @@ aarch64_get_separate_components (void)
   if (frame_pointer_needed)
     bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
 
+  /* If the spare predicate register used by big-endian SVE code
+     is call-preserved, it must be saved in the main prologue
+     before any saves that use it.  */
+  if (cfun->machine->frame.spare_pred_reg != INVALID_REGNUM)
+    bitmap_clear_bit (components, cfun->machine->frame.spare_pred_reg);
+
   unsigned reg1 = cfun->machine->frame.wb_candidate1;
   unsigned reg2 = cfun->machine->frame.wb_candidate2;
-  /* If aarch64_layout_frame has chosen registers to store/restore with
+  /* If registers have been chosen to be stored/restored with
      writeback don't interfere with them to avoid having to output explicit
      stack adjustment instructions.  */
   if (reg2 != INVALID_REGNUM)
@@ -4581,25 +8058,44 @@ aarch64_components_for_bb (basic_block bb)
   sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
   bitmap_clear (components);
 
+  /* Clobbered registers don't generate values in any meaningful sense,
+     since nothing after the clobber can rely on their value.  And we can't
+     say that partially-clobbered registers are unconditionally killed,
+     because whether they're killed or not depends on the mode of the
+     value they're holding.  Thus partially call-clobbered registers
+     appear in neither the kill set nor the gen set.
+
+     Check manually for any calls that clobber more of a register than the
+     current function can.  */
+  function_abi_aggregator callee_abis;
+  rtx_insn *insn;
+  FOR_BB_INSNS (bb, insn)
+    if (CALL_P (insn))
+      callee_abis.note_callee_abi (insn_callee_abi (insn));
+  HARD_REG_SET extra_caller_saves = callee_abis.caller_save_regs (*crtl->abi);
+
   /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets.  */
   for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
-    if ((!call_used_regs[regno])
-       && (bitmap_bit_p (in, regno)
-	   || bitmap_bit_p (gen, regno)
-	   || bitmap_bit_p (kill, regno)))
+    if (!fixed_regs[regno]
+	&& !crtl->abi->clobbers_full_reg_p (regno)
+	&& (TEST_HARD_REG_BIT (extra_caller_saves, regno)
+	    || bitmap_bit_p (in, regno)
+	    || bitmap_bit_p (gen, regno)
+	    || bitmap_bit_p (kill, regno)))
       {
-	unsigned regno2, offset, offset2;
 	bitmap_set_bit (components, regno);
 
 	/* If there is a callee-save at an adjacent offset, add it too
 	   to increase the use of LDP/STP.  */
-	offset = cfun->machine->frame.reg_offset[regno];
-	regno2 = ((offset & 8) == 0) ? regno + 1 : regno - 1;
+	poly_int64 offset = cfun->machine->frame.reg_offset[regno];
+	unsigned regno2 = multiple_p (offset, 16) ? regno + 1 : regno - 1;
 
 	if (regno2 <= LAST_SAVED_REGNUM)
 	  {
-	    offset2 = cfun->machine->frame.reg_offset[regno2];
-	    if ((offset & ~8) == (offset2 & ~8))
+	    poly_int64 offset2 = cfun->machine->frame.reg_offset[regno2];
+	    if (regno < regno2
+		? known_eq (offset + 8, offset2)
+		: multiple_p (offset2, 16) && known_eq (offset2 + 8, offset))
 	      bitmap_set_bit (components, regno2);
 	  }
       }
@@ -4654,14 +8150,16 @@ aarch64_process_components (sbitmap components, bool prologue_p)
 
   while (regno != last_regno)
     {
-      /* AAPCS64 section 5.1.2 requires only the bottom 64 bits to be saved
-	 so DFmode for the vector registers is enough.  */
-      machine_mode mode = GP_REGNUM_P (regno) ? E_DImode : E_DFmode;
+      bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno);
+      machine_mode mode = aarch64_reg_save_mode (regno);
+      
       rtx reg = gen_rtx_REG (mode, regno);
       poly_int64 offset = cfun->machine->frame.reg_offset[regno];
-      if (!frame_pointer_needed)
-	offset += cfun->machine->frame.frame_size
-		  - cfun->machine->frame.hard_fp_offset;
+      if (frame_pointer_needed)
+	offset -= cfun->machine->frame.below_hard_fp_saved_regs_size;
+      else
+	offset += crtl->outgoing_args_size;
+
       rtx addr = plus_constant (Pmode, ptr_reg, offset);
       rtx mem = gen_frame_mem (mode, addr);
 
@@ -4672,38 +8170,49 @@ aarch64_process_components (sbitmap components, bool prologue_p)
       if (regno2 == last_regno)
 	{
 	  insn = emit_insn (set);
-	  RTX_FRAME_RELATED_P (insn) = 1;
-	  if (prologue_p)
-	    add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
-	  else
-	    add_reg_note (insn, REG_CFA_RESTORE, reg);
+	  if (frame_related_p)
+	    {
+	      RTX_FRAME_RELATED_P (insn) = 1;
+	      if (prologue_p)
+		add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
+	      else
+		add_reg_note (insn, REG_CFA_RESTORE, reg);
+	    }
 	  break;
 	}
 
       poly_int64 offset2 = cfun->machine->frame.reg_offset[regno2];
       /* The next register is not of the same class or its offset is not
 	 mergeable with the current one into a pair.  */
-      if (!satisfies_constraint_Ump (mem)
+      if (aarch64_sve_mode_p (mode)
+	  || !satisfies_constraint_Ump (mem)
 	  || GP_REGNUM_P (regno) != GP_REGNUM_P (regno2)
+	  || (crtl->abi->id () == ARM_PCS_SIMD && FP_REGNUM_P (regno))
 	  || maybe_ne ((offset2 - cfun->machine->frame.reg_offset[regno]),
 		       GET_MODE_SIZE (mode)))
 	{
 	  insn = emit_insn (set);
-	  RTX_FRAME_RELATED_P (insn) = 1;
-	  if (prologue_p)
-	    add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
-	  else
-	    add_reg_note (insn, REG_CFA_RESTORE, reg);
+	  if (frame_related_p)
+	    {
+	      RTX_FRAME_RELATED_P (insn) = 1;
+	      if (prologue_p)
+		add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
+	      else
+		add_reg_note (insn, REG_CFA_RESTORE, reg);
+	    }
 
 	  regno = regno2;
 	  continue;
 	}
 
+      bool frame_related2_p = aarch64_emit_cfi_for_reg_p (regno2);
+
       /* REGNO2 can be saved/restored in a pair with REGNO.  */
       rtx reg2 = gen_rtx_REG (mode, regno2);
-      if (!frame_pointer_needed)
-	offset2 += cfun->machine->frame.frame_size
-		  - cfun->machine->frame.hard_fp_offset;
+      if (frame_pointer_needed)
+	offset2 -= cfun->machine->frame.below_hard_fp_saved_regs_size;
+      else
+	offset2 += crtl->outgoing_args_size;
       rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
       rtx mem2 = gen_frame_mem (mode, addr2);
       rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
@@ -4714,16 +8223,23 @@ aarch64_process_components (sbitmap components, bool prologue_p)
       else
 	insn = emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
 
-      RTX_FRAME_RELATED_P (insn) = 1;
-      if (prologue_p)
-	{
-	  add_reg_note (insn, REG_CFA_OFFSET, set);
-	  add_reg_note (insn, REG_CFA_OFFSET, set2);
-	}
-      else
+      if (frame_related_p || frame_related2_p)
 	{
-	  add_reg_note (insn, REG_CFA_RESTORE, reg);
-	  add_reg_note (insn, REG_CFA_RESTORE, reg2);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  if (prologue_p)
+	    {
+	      if (frame_related_p)
+		add_reg_note (insn, REG_CFA_OFFSET, set);
+	      if (frame_related2_p)
+		add_reg_note (insn, REG_CFA_OFFSET, set2);
+	    }
+	  else
+	    {
+	      if (frame_related_p)
+		add_reg_note (insn, REG_CFA_RESTORE, reg);
+	      if (frame_related2_p)
+		add_reg_note (insn, REG_CFA_RESTORE, reg2);
+	    }
 	}
 
       regno = aarch64_get_next_set_bit (components, regno2 + 1);
@@ -4756,81 +8272,404 @@ aarch64_set_handled_components (sbitmap components)
       cfun->machine->reg_is_wrapped_separately[regno] = true;
 }
 
-/* Add a REG_CFA_EXPRESSION note to INSN to say that register REG
-   is saved at BASE + OFFSET.  */
+/* On AArch64 we have an ABI defined safe buffer.  This constant is used to
+   determining the probe offset for alloca.  */
 
-static void
-aarch64_add_cfa_expression (rtx_insn *insn, unsigned int reg,
-			    rtx base, poly_int64 offset)
+static HOST_WIDE_INT
+aarch64_stack_clash_protection_alloca_probe_range (void)
 {
-  rtx mem = gen_frame_mem (DImode, plus_constant (Pmode, base, offset));
-  add_reg_note (insn, REG_CFA_EXPRESSION,
-		gen_rtx_SET (mem, regno_reg_rtx[reg]));
+  return STACK_CLASH_CALLER_GUARD;
 }
 
-/* AArch64 stack frames generated by this compiler look like:
-
-	+-------------------------------+
-	|                               |
-	|  incoming stack arguments     |
-	|                               |
-	+-------------------------------+
-	|                               | <-- incoming stack pointer (aligned)
-	|  callee-allocated save area   |
-	|  for register varargs         |
-	|                               |
-	+-------------------------------+
-	|  local variables              | <-- frame_pointer_rtx
-	|                               |
-	+-------------------------------+
-	|  padding0                     | \
-	+-------------------------------+  |
-	|  callee-saved registers       |  | frame.saved_regs_size
-	+-------------------------------+  |
-	|  LR'                          |  |
-	+-------------------------------+  |
-	|  FP'                          | / <- hard_frame_pointer_rtx (aligned)
-        +-------------------------------+
-	|  dynamic allocation           |
-	+-------------------------------+
-	|  padding                      |
-	+-------------------------------+
-	|  outgoing stack arguments     | <-- arg_pointer
-        |                               |
-	+-------------------------------+
-	|                               | <-- stack_pointer_rtx (aligned)
 
-   Dynamic stack allocations via alloca() decrease stack_pointer_rtx
-   but leave frame_pointer_rtx and hard_frame_pointer_rtx
-   unchanged.  */
+/* Allocate POLY_SIZE bytes of stack space using TEMP1 and TEMP2 as scratch
+   registers.  If POLY_SIZE is not large enough to require a probe this function
+   will only adjust the stack.  When allocating the stack space
+   FRAME_RELATED_P is then used to indicate if the allocation is frame related.
+   FINAL_ADJUSTMENT_P indicates whether we are allocating the outgoing
+   arguments.  If we are then we ensure that any allocation larger than the ABI
+   defined buffer needs a probe so that the invariant of having a 1KB buffer is
+   maintained.
 
-/* Generate the prologue instructions for entry into a function.
-   Establish the stack frame by decreasing the stack pointer with a
-   properly calculated size and, if necessary, create a frame record
-   filled with the values of LR and previous frame pointer.  The
-   current FP is also set up if it is in use.  */
+   We emit barriers after each stack adjustment to prevent optimizations from
+   breaking the invariant that we never drop the stack more than a page.  This
+   invariant is needed to make it easier to correctly handle asynchronous
+   events, e.g. if we were to allow the stack to be dropped by more than a page
+   and then have multiple probes up and we take a signal somewhere in between
+   then the signal handler doesn't know the state of the stack and can make no
+   assumptions about which pages have been probed.  */
 
-void
-aarch64_expand_prologue (void)
-{
-  aarch64_layout_frame ();
+static void
+aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
+					poly_int64 poly_size,
+					bool frame_related_p,
+					bool final_adjustment_p)
+{
+  HOST_WIDE_INT guard_size
+    = 1 << param_stack_clash_protection_guard_size;
+  HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD;
+  HOST_WIDE_INT min_probe_threshold
+    = (final_adjustment_p
+       ? guard_used_by_caller
+       : guard_size - guard_used_by_caller);
+  /* When doing the final adjustment for the outgoing arguments, take into
+     account any unprobed space there is above the current SP.  There are
+     two cases:
+
+     - When saving SVE registers below the hard frame pointer, we force
+       the lowest save to take place in the prologue before doing the final
+       adjustment (i.e. we don't allow the save to be shrink-wrapped).
+       This acts as a probe at SP, so there is no unprobed space.
+
+     - When there are no SVE register saves, we use the store of the link
+       register as a probe.  We can't assume that LR was saved at position 0
+       though, so treat any space below it as unprobed.  */
+  if (final_adjustment_p
+      && known_eq (cfun->machine->frame.below_hard_fp_saved_regs_size, 0))
+    {
+      poly_int64 lr_offset = cfun->machine->frame.reg_offset[LR_REGNUM];
+      if (known_ge (lr_offset, 0))
+	min_probe_threshold -= lr_offset.to_constant ();
+      else
+	gcc_assert (!flag_stack_clash_protection || known_eq (poly_size, 0));
+    }
 
   poly_int64 frame_size = cfun->machine->frame.frame_size;
-  poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
-  HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
-  poly_int64 final_adjust = cfun->machine->frame.final_adjust;
-  poly_int64 callee_offset = cfun->machine->frame.callee_offset;
-  unsigned reg1 = cfun->machine->frame.wb_candidate1;
-  unsigned reg2 = cfun->machine->frame.wb_candidate2;
-  bool emit_frame_chain = cfun->machine->frame.emit_frame_chain;
-  rtx_insn *insn;
 
-  /* Sign return address for functions.  */
-  if (aarch64_return_address_signing_enabled ())
+  /* We should always have a positive probe threshold.  */
+  gcc_assert (min_probe_threshold > 0);
+
+  if (flag_stack_clash_protection && !final_adjustment_p)
     {
-      insn = emit_insn (gen_pacisp ());
-      add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx);
-      RTX_FRAME_RELATED_P (insn) = 1;
+      poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
+      poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust;
+      poly_int64 final_adjust = cfun->machine->frame.final_adjust;
+
+      if (known_eq (frame_size, 0))
+	{
+	  dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
+	}
+      else if (known_lt (initial_adjust + sve_callee_adjust,
+			 guard_size - guard_used_by_caller)
+	       && known_lt (final_adjust, guard_used_by_caller))
+	{
+	  dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
+	}
+    }
+
+  /* If SIZE is not large enough to require probing, just adjust the stack and
+     exit.  */
+  if (known_lt (poly_size, min_probe_threshold)
+      || !flag_stack_clash_protection)
+    {
+      aarch64_sub_sp (temp1, temp2, poly_size, frame_related_p);
+      return;
+    }
+
+  HOST_WIDE_INT size;
+  /* Handle the SVE non-constant case first.  */
+  if (!poly_size.is_constant (&size))
+    {
+     if (dump_file)
+      {
+	fprintf (dump_file, "Stack clash SVE prologue: ");
+	print_dec (poly_size, dump_file);
+	fprintf (dump_file, " bytes, dynamic probing will be required.\n");
+      }
+
+      /* First calculate the amount of bytes we're actually spilling.  */
+      aarch64_add_offset (Pmode, temp1, CONST0_RTX (Pmode),
+			  poly_size, temp1, temp2, false, true);
+
+      rtx_insn *insn = get_last_insn ();
+
+      if (frame_related_p)
+	{
+	  /* This is done to provide unwinding information for the stack
+	     adjustments we're about to do, however to prevent the optimizers
+	     from removing the R11 move and leaving the CFA note (which would be
+	     very wrong) we tie the old and new stack pointer together.
+	     The tie will expand to nothing but the optimizers will not touch
+	     the instruction.  */
+	  rtx stack_ptr_copy = gen_rtx_REG (Pmode, STACK_CLASH_SVE_CFA_REGNUM);
+	  emit_move_insn (stack_ptr_copy, stack_pointer_rtx);
+	  emit_insn (gen_stack_tie (stack_ptr_copy, stack_pointer_rtx));
+
+	  /* We want the CFA independent of the stack pointer for the
+	     duration of the loop.  */
+	  add_reg_note (insn, REG_CFA_DEF_CFA, stack_ptr_copy);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+
+      rtx probe_const = gen_int_mode (min_probe_threshold, Pmode);
+      rtx guard_const = gen_int_mode (guard_size, Pmode);
+
+      insn = emit_insn (gen_probe_sve_stack_clash (Pmode, stack_pointer_rtx,
+						   stack_pointer_rtx, temp1,
+						   probe_const, guard_const));
+
+      /* Now reset the CFA register if needed.  */
+      if (frame_related_p)
+	{
+	  add_reg_note (insn, REG_CFA_DEF_CFA,
+			gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+				      gen_int_mode (poly_size, Pmode)));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+
+      return;
+    }
+
+  if (dump_file)
+    fprintf (dump_file,
+	     "Stack clash AArch64 prologue: " HOST_WIDE_INT_PRINT_DEC
+	     " bytes, probing will be required.\n", size);
+
+  /* Round size to the nearest multiple of guard_size, and calculate the
+     residual as the difference between the original size and the rounded
+     size.  */
+  HOST_WIDE_INT rounded_size = ROUND_DOWN (size, guard_size);
+  HOST_WIDE_INT residual = size - rounded_size;
+
+  /* We can handle a small number of allocations/probes inline.  Otherwise
+     punt to a loop.  */
+  if (rounded_size <= STACK_CLASH_MAX_UNROLL_PAGES * guard_size)
+    {
+      for (HOST_WIDE_INT i = 0; i < rounded_size; i += guard_size)
+	{
+	  aarch64_sub_sp (NULL, temp2, guard_size, true);
+	  emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
+					   guard_used_by_caller));
+	  emit_insn (gen_blockage ());
+	}
+      dump_stack_clash_frame_info (PROBE_INLINE, size != rounded_size);
+    }
+  else
+    {
+      /* Compute the ending address.  */
+      aarch64_add_offset (Pmode, temp1, stack_pointer_rtx, -rounded_size,
+			  temp1, NULL, false, true);
+      rtx_insn *insn = get_last_insn ();
+
+      /* For the initial allocation, we don't have a frame pointer
+	 set up, so we always need CFI notes.  If we're doing the
+	 final allocation, then we may have a frame pointer, in which
+	 case it is the CFA, otherwise we need CFI notes.
+
+	 We can determine which allocation we are doing by looking at
+	 the value of FRAME_RELATED_P since the final allocations are not
+	 frame related.  */
+      if (frame_related_p)
+	{
+	  /* We want the CFA independent of the stack pointer for the
+	     duration of the loop.  */
+	  add_reg_note (insn, REG_CFA_DEF_CFA,
+			plus_constant (Pmode, temp1, rounded_size));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+
+      /* This allocates and probes the stack.  Note that this re-uses some of
+	 the existing Ada stack protection code.  However we are guaranteed not
+	 to enter the non loop or residual branches of that code.
+
+	 The non-loop part won't be entered because if our allocation amount
+	 doesn't require a loop, the case above would handle it.
+
+	 The residual amount won't be entered because TEMP1 is a mutliple of
+	 the allocation size.  The residual will always be 0.  As such, the only
+	 part we are actually using from that code is the loop setup.  The
+	 actual probing is done in aarch64_output_probe_stack_range.  */
+      insn = emit_insn (gen_probe_stack_range (stack_pointer_rtx,
+					       stack_pointer_rtx, temp1));
+
+      /* Now reset the CFA register if needed.  */
+      if (frame_related_p)
+	{
+	  add_reg_note (insn, REG_CFA_DEF_CFA,
+			plus_constant (Pmode, stack_pointer_rtx, rounded_size));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+
+      emit_insn (gen_blockage ());
+      dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
+    }
+
+  /* Handle any residuals.  Residuals of at least MIN_PROBE_THRESHOLD have to
+     be probed.  This maintains the requirement that each page is probed at
+     least once.  For initial probing we probe only if the allocation is
+     more than GUARD_SIZE - buffer, and for the outgoing arguments we probe
+     if the amount is larger than buffer.  GUARD_SIZE - buffer + buffer ==
+     GUARD_SIZE.  This works that for any allocation that is large enough to
+     trigger a probe here, we'll have at least one, and if they're not large
+     enough for this code to emit anything for them, The page would have been
+     probed by the saving of FP/LR either by this function or any callees.  If
+     we don't have any callees then we won't have more stack adjustments and so
+     are still safe.  */
+  if (residual)
+    {
+      HOST_WIDE_INT residual_probe_offset = guard_used_by_caller;
+      /* If we're doing final adjustments, and we've done any full page
+	 allocations then any residual needs to be probed.  */
+      if (final_adjustment_p && rounded_size != 0)
+	min_probe_threshold = 0;
+      /* If doing a small final adjustment, we always probe at offset 0.
+	 This is done to avoid issues when LR is not at position 0 or when
+	 the final adjustment is smaller than the probing offset.  */
+      else if (final_adjustment_p && rounded_size == 0)
+	residual_probe_offset = 0;
+
+      aarch64_sub_sp (temp1, temp2, residual, frame_related_p);
+      if (residual >= min_probe_threshold)
+	{
+	  if (dump_file)
+	    fprintf (dump_file,
+		     "Stack clash AArch64 prologue residuals: "
+		     HOST_WIDE_INT_PRINT_DEC " bytes, probing will be required."
+		     "\n", residual);
+
+	    emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
+					     residual_probe_offset));
+	  emit_insn (gen_blockage ());
+	}
+    }
+}
+
+/* Return 1 if the register is used by the epilogue.  We need to say the
+   return register is used, but only after epilogue generation is complete.
+   Note that in the case of sibcalls, the values "used by the epilogue" are
+   considered live at the start of the called function.
+
+   For SIMD functions we need to return 1 for FP registers that are saved and
+   restored by a function but are not zero in call_used_regs.  If we do not do 
+   this optimizations may remove the restore of the register.  */
+
+int
+aarch64_epilogue_uses (int regno)
+{
+  if (epilogue_completed)
+    {
+      if (regno == LR_REGNUM)
+	return 1;
+    }
+  return 0;
+}
+
+/* AArch64 stack frames generated by this compiler look like:
+
+	+-------------------------------+
+	|                               |
+	|  incoming stack arguments     |
+	|                               |
+	+-------------------------------+
+	|                               | <-- incoming stack pointer (aligned)
+	|  callee-allocated save area   |
+	|  for register varargs         |
+	|                               |
+	+-------------------------------+
+	|  local variables              | <-- frame_pointer_rtx
+	|                               |
+	+-------------------------------+
+	|  padding                      | \
+	+-------------------------------+  |
+	|  callee-saved registers       |  | frame.saved_regs_size
+	+-------------------------------+  |
+	|  LR'                          |  |
+	+-------------------------------+  |
+	|  FP'                          |  |
+	+-------------------------------+  |<- hard_frame_pointer_rtx (aligned)
+	|  SVE vector registers         |  | \
+	+-------------------------------+  |  | below_hard_fp_saved_regs_size
+	|  SVE predicate registers      | /  /
+	+-------------------------------+
+	|  dynamic allocation           |
+	+-------------------------------+
+	|  padding                      |
+	+-------------------------------+
+	|  outgoing stack arguments     | <-- arg_pointer
+        |                               |
+	+-------------------------------+
+	|                               | <-- stack_pointer_rtx (aligned)
+
+   Dynamic stack allocations via alloca() decrease stack_pointer_rtx
+   but leave frame_pointer_rtx and hard_frame_pointer_rtx
+   unchanged.
+
+   By default for stack-clash we assume the guard is at least 64KB, but this
+   value is configurable to either 4KB or 64KB.  We also force the guard size to
+   be the same as the probing interval and both values are kept in sync.
+
+   With those assumptions the callee can allocate up to 63KB (or 3KB depending
+   on the guard size) of stack space without probing.
+
+   When probing is needed, we emit a probe at the start of the prologue
+   and every PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE bytes thereafter.
+
+   We have to track how much space has been allocated and the only stores
+   to the stack we track as implicit probes are the FP/LR stores.
+
+   For outgoing arguments we probe if the size is larger than 1KB, such that
+   the ABI specified buffer is maintained for the next callee.
+
+   The following registers are reserved during frame layout and should not be
+   used for any other purpose:
+
+   - r11: Used by stack clash protection when SVE is enabled, and also
+	  as an anchor register when saving and restoring registers
+   - r12(EP0) and r13(EP1): Used as temporaries for stack adjustment.
+   - r14 and r15: Used for speculation tracking.
+   - r16(IP0), r17(IP1): Used by indirect tailcalls.
+   - r30(LR), r29(FP): Used by standard frame layout.
+
+   These registers must be avoided in frame layout related code unless the
+   explicit intention is to interact with one of the features listed above.  */
+
+/* Generate the prologue instructions for entry into a function.
+   Establish the stack frame by decreasing the stack pointer with a
+   properly calculated size and, if necessary, create a frame record
+   filled with the values of LR and previous frame pointer.  The
+   current FP is also set up if it is in use.  */
+
+void
+aarch64_expand_prologue (void)
+{
+  poly_int64 frame_size = cfun->machine->frame.frame_size;
+  poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
+  HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
+  poly_int64 final_adjust = cfun->machine->frame.final_adjust;
+  poly_int64 callee_offset = cfun->machine->frame.callee_offset;
+  poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust;
+  poly_int64 below_hard_fp_saved_regs_size
+    = cfun->machine->frame.below_hard_fp_saved_regs_size;
+  unsigned reg1 = cfun->machine->frame.wb_candidate1;
+  unsigned reg2 = cfun->machine->frame.wb_candidate2;
+  bool emit_frame_chain = cfun->machine->frame.emit_frame_chain;
+  rtx_insn *insn;
+
+  if (flag_stack_clash_protection && known_eq (callee_adjust, 0))
+    {
+      /* Fold the SVE allocation into the initial allocation.
+	 We don't do this in aarch64_layout_arg to avoid pessimizing
+	 the epilogue code.  */
+      initial_adjust += sve_callee_adjust;
+      sve_callee_adjust = 0;
+    }
+
+  /* Sign return address for functions.  */
+  if (aarch64_return_address_signing_enabled ())
+    {
+      switch (aarch64_ra_sign_key)
+	{
+	  case AARCH64_KEY_A:
+	    insn = emit_insn (gen_paciasp ());
+	    break;
+	  case AARCH64_KEY_B:
+	    insn = emit_insn (gen_pacibsp ());
+	    break;
+	  default:
+	    gcc_unreachable ();
+	}
+      add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx);
+      RTX_FRAME_RELATED_P (insn) = 1;
     }
 
   if (flag_stack_usage_info)
@@ -4850,27 +8689,45 @@ aarch64_expand_prologue (void)
 	aarch64_emit_probe_stack_range (get_stack_check_protect (), frame_size);
     }
 
-  rtx ip0_rtx = gen_rtx_REG (Pmode, IP0_REGNUM);
-  rtx ip1_rtx = gen_rtx_REG (Pmode, IP1_REGNUM);
+  rtx tmp0_rtx = gen_rtx_REG (Pmode, EP0_REGNUM);
+  rtx tmp1_rtx = gen_rtx_REG (Pmode, EP1_REGNUM);
+
+  /* In theory we should never have both an initial adjustment
+     and a callee save adjustment.  Verify that is the case since the
+     code below does not handle it for -fstack-clash-protection.  */
+  gcc_assert (known_eq (initial_adjust, 0) || callee_adjust == 0);
 
-  aarch64_sub_sp (ip0_rtx, ip1_rtx, initial_adjust, true);
+  /* Will only probe if the initial adjustment is larger than the guard
+     less the amount of the guard reserved for use by the caller's
+     outgoing args.  */
+  aarch64_allocate_and_probe_stack_space (tmp0_rtx, tmp1_rtx, initial_adjust,
+					  true, false);
 
   if (callee_adjust != 0)
     aarch64_push_regs (reg1, reg2, callee_adjust);
 
+  /* The offset of the frame chain record (if any) from the current SP.  */
+  poly_int64 chain_offset = (initial_adjust + callee_adjust
+			     - cfun->machine->frame.hard_fp_offset);
+  gcc_assert (known_ge (chain_offset, 0));
+
+  /* The offset of the bottom of the save area from the current SP.  */
+  poly_int64 saved_regs_offset = chain_offset - below_hard_fp_saved_regs_size;
+
   if (emit_frame_chain)
     {
-      poly_int64 reg_offset = callee_adjust;
       if (callee_adjust == 0)
 	{
 	  reg1 = R29_REGNUM;
 	  reg2 = R30_REGNUM;
-	  reg_offset = callee_offset;
-	  aarch64_save_callee_saves (DImode, reg_offset, reg1, reg2, false);
+	  aarch64_save_callee_saves (saved_regs_offset, reg1, reg2,
+				     false, false);
 	}
+      else
+	gcc_assert (known_eq (chain_offset, 0));
       aarch64_add_offset (Pmode, hard_frame_pointer_rtx,
-			  stack_pointer_rtx, callee_offset,
-			  ip1_rtx, ip0_rtx, frame_pointer_needed);
+			  stack_pointer_rtx, chain_offset,
+			  tmp1_rtx, tmp0_rtx, frame_pointer_needed);
       if (frame_pointer_needed && !frame_size.is_constant ())
 	{
 	  /* Variable-sized frames need to describe the save slot
@@ -4896,20 +8753,36 @@ aarch64_expand_prologue (void)
 
 	  /* Change the save slot expressions for the registers that
 	     we've already saved.  */
-	  reg_offset -= callee_offset;
-	  aarch64_add_cfa_expression (insn, reg2, hard_frame_pointer_rtx,
-				      reg_offset + UNITS_PER_WORD);
-	  aarch64_add_cfa_expression (insn, reg1, hard_frame_pointer_rtx,
-				      reg_offset);
+	  aarch64_add_cfa_expression (insn, regno_reg_rtx[reg2],
+				      hard_frame_pointer_rtx, UNITS_PER_WORD);
+	  aarch64_add_cfa_expression (insn, regno_reg_rtx[reg1],
+				      hard_frame_pointer_rtx, 0);
 	}
       emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
     }
 
-  aarch64_save_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
-			     callee_adjust != 0 || emit_frame_chain);
-  aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
-			     callee_adjust != 0 || emit_frame_chain);
-  aarch64_sub_sp (ip1_rtx, ip0_rtx, final_adjust, !frame_pointer_needed);
+  aarch64_save_callee_saves (saved_regs_offset, R0_REGNUM, R30_REGNUM,
+			     callee_adjust != 0 || emit_frame_chain,
+			     emit_frame_chain);
+  if (maybe_ne (sve_callee_adjust, 0))
+    {
+      gcc_assert (!flag_stack_clash_protection
+		  || known_eq (initial_adjust, 0));
+      aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx,
+					      sve_callee_adjust,
+					      !frame_pointer_needed, false);
+      saved_regs_offset += sve_callee_adjust;
+    }
+  aarch64_save_callee_saves (saved_regs_offset, P0_REGNUM, P15_REGNUM,
+			     false, emit_frame_chain);
+  aarch64_save_callee_saves (saved_regs_offset, V0_REGNUM, V31_REGNUM,
+			     callee_adjust != 0 || emit_frame_chain,
+			     emit_frame_chain);
+
+  /* We may need to probe the final adjustment if it is larger than the guard
+     that is assumed by the called.  */
+  aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust,
+					  !frame_pointer_needed, true);
 }
 
 /* Return TRUE if we can use a simple_return insn.
@@ -4927,8 +8800,6 @@ aarch64_use_return_insn_p (void)
   if (crtl->profile)
     return false;
 
-  aarch64_layout_frame ();
-
   return known_eq (cfun->machine->frame.frame_size, 0);
 }
 
@@ -4940,23 +8811,43 @@ aarch64_use_return_insn_p (void)
 void
 aarch64_expand_epilogue (bool for_sibcall)
 {
-  aarch64_layout_frame ();
-
   poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
   HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
   poly_int64 final_adjust = cfun->machine->frame.final_adjust;
   poly_int64 callee_offset = cfun->machine->frame.callee_offset;
+  poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust;
+  poly_int64 below_hard_fp_saved_regs_size
+    = cfun->machine->frame.below_hard_fp_saved_regs_size;
   unsigned reg1 = cfun->machine->frame.wb_candidate1;
   unsigned reg2 = cfun->machine->frame.wb_candidate2;
   rtx cfi_ops = NULL;
   rtx_insn *insn;
-  /* A stack clash protection prologue may not have left IP0_REGNUM or
-     IP1_REGNUM in a usable state.  The same is true for allocations
+  /* A stack clash protection prologue may not have left EP0_REGNUM or
+     EP1_REGNUM in a usable state.  The same is true for allocations
      with an SVE component, since we then need both temporary registers
-     for each allocation.  */
+     for each allocation.  For stack clash we are in a usable state if
+     the adjustment is less than GUARD_SIZE - GUARD_USED_BY_CALLER.  */
+  HOST_WIDE_INT guard_size
+    = 1 << param_stack_clash_protection_guard_size;
+  HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD;
+
+  /* We can re-use the registers when:
+
+     (a) the deallocation amount is the same as the corresponding
+	 allocation amount (which is false if we combine the initial
+	 and SVE callee save allocations in the prologue); and
+
+     (b) the allocation amount doesn't need a probe (which is false
+	 if the amount is guard_size - guard_used_by_caller or greater).
+
+     In such situations the register should remain live with the correct
+     value.  */
   bool can_inherit_p = (initial_adjust.is_constant ()
 			&& final_adjust.is_constant ()
-			&& !flag_stack_clash_protection);
+			&& (!flag_stack_clash_protection
+			    || (known_lt (initial_adjust,
+					  guard_size - guard_used_by_caller)
+				&& known_eq (sve_callee_adjust, 0))));
 
   /* We need to add memory barrier to prevent read from deallocated stack.  */
   bool need_barrier_p
@@ -4974,22 +8865,32 @@ aarch64_expand_epilogue (bool for_sibcall)
 
   /* Restore the stack pointer from the frame pointer if it may not
      be the same as the stack pointer.  */
-  rtx ip0_rtx = gen_rtx_REG (Pmode, IP0_REGNUM);
-  rtx ip1_rtx = gen_rtx_REG (Pmode, IP1_REGNUM);
+  rtx tmp0_rtx = gen_rtx_REG (Pmode, EP0_REGNUM);
+  rtx tmp1_rtx = gen_rtx_REG (Pmode, EP1_REGNUM);
   if (frame_pointer_needed
       && (maybe_ne (final_adjust, 0) || cfun->calls_alloca))
     /* If writeback is used when restoring callee-saves, the CFA
        is restored on the instruction doing the writeback.  */
     aarch64_add_offset (Pmode, stack_pointer_rtx,
-			hard_frame_pointer_rtx, -callee_offset,
-			ip1_rtx, ip0_rtx, callee_adjust == 0);
+			hard_frame_pointer_rtx,
+			-callee_offset - below_hard_fp_saved_regs_size,
+			tmp1_rtx, tmp0_rtx, callee_adjust == 0);
   else
-    aarch64_add_sp (ip1_rtx, ip0_rtx, final_adjust,
-		    !can_inherit_p || df_regs_ever_live_p (IP1_REGNUM));
-
-  aarch64_restore_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
+     /* The case where we need to re-use the register here is very rare, so
+	avoid the complicated condition and just always emit a move if the
+	immediate doesn't fit.  */
+     aarch64_add_sp (tmp1_rtx, tmp0_rtx, final_adjust, true);
+
+  /* Restore the vector registers before the predicate registers,
+     so that we can use P4 as a temporary for big-endian SVE frames.  */
+  aarch64_restore_callee_saves (callee_offset, V0_REGNUM, V31_REGNUM,
 				callee_adjust != 0, &cfi_ops);
-  aarch64_restore_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
+  aarch64_restore_callee_saves (callee_offset, P0_REGNUM, P15_REGNUM,
+				false, &cfi_ops);
+  if (maybe_ne (sve_callee_adjust, 0))
+    aarch64_add_sp (NULL_RTX, NULL_RTX, sve_callee_adjust, true);
+  aarch64_restore_callee_saves (callee_offset - sve_callee_adjust,
+				R0_REGNUM, R30_REGNUM,
 				callee_adjust != 0, &cfi_ops);
 
   if (need_barrier_p)
@@ -4998,7 +8899,11 @@ aarch64_expand_epilogue (bool for_sibcall)
   if (callee_adjust != 0)
     aarch64_pop_regs (reg1, reg2, callee_adjust, &cfi_ops);
 
-  if (callee_adjust != 0 || maybe_gt (initial_adjust, 65536))
+  /* If we have no register restore information, the CFA must have been
+     defined in terms of the stack pointer since the end of the prologue.  */
+  gcc_assert (cfi_ops || !frame_pointer_needed);
+
+  if (cfi_ops && (callee_adjust != 0 || maybe_gt (initial_adjust, 65536)))
     {
       /* Emit delayed restores and set the CFA to be SP + initial_adjust.  */
       insn = get_last_insn ();
@@ -5008,8 +8913,11 @@ aarch64_expand_epilogue (bool for_sibcall)
       cfi_ops = NULL;
     }
 
-  aarch64_add_sp (ip0_rtx, ip1_rtx, initial_adjust,
-		  !can_inherit_p || df_regs_ever_live_p (IP0_REGNUM));
+  /* Liveness of EP0_REGNUM can not be trusted across function calls either, so
+     add restriction on emit_move optimization to leaf functions.  */
+  aarch64_add_sp (tmp0_rtx, tmp1_rtx, initial_adjust,
+		  (!can_inherit_p || !crtl->is_leaf
+		   || df_regs_ever_live_p (EP0_REGNUM)));
 
   if (cfi_ops)
     {
@@ -5038,13 +8946,23 @@ aarch64_expand_epilogue (bool for_sibcall)
   if (aarch64_return_address_signing_enabled ()
       && (for_sibcall || !TARGET_ARMV8_3 || crtl->calls_eh_return))
     {
-      insn = emit_insn (gen_autisp ());
+      switch (aarch64_ra_sign_key)
+	{
+	  case AARCH64_KEY_A:
+	    insn = emit_insn (gen_autiasp ());
+	    break;
+	  case AARCH64_KEY_B:
+	    insn = emit_insn (gen_autibsp ());
+	    break;
+	  default:
+	    gcc_unreachable ();
+	}
       add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx);
       RTX_FRAME_RELATED_P (insn) = 1;
     }
 
   /* Stack adjustment for exception handler.  */
-  if (crtl->calls_eh_return)
+  if (crtl->calls_eh_return && !for_sibcall)
     {
       /* We need to unwind the stack by the offset computed by
 	 EH_RETURN_STACKADJ_RTX.  We have already reset the CFA
@@ -5110,13 +9028,17 @@ aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
   int this_regno = R0_REGNUM;
   rtx this_rtx, temp0, temp1, addr, funexp;
   rtx_insn *insn;
+  const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
+
+  if (aarch64_bti_enabled ())
+    emit_insn (gen_bti_c());
 
   reload_completed = 1;
   emit_note (NOTE_INSN_PROLOGUE_END);
 
   this_rtx = gen_rtx_REG (Pmode, this_regno);
-  temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
-  temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
+  temp0 = gen_rtx_REG (Pmode, EP0_REGNUM);
+  temp1 = gen_rtx_REG (Pmode, EP1_REGNUM);
 
   if (vcall_offset == 0)
     aarch64_add_offset (Pmode, this_rtx, this_rtx, delta, temp1, temp0, false);
@@ -5169,14 +9091,18 @@ aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
     }
   funexp = XEXP (DECL_RTL (function), 0);
   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
-  insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
+  rtx callee_abi = gen_int_mode (fndecl_abi (function).id (), DImode);
+  insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, callee_abi));
   SIBLING_CALL_P (insn) = 1;
 
   insn = get_insns ();
   shorten_branches (insn);
+
+  assemble_start_function (thunk, fnname);
   final_start_function (insn, file, 1);
   final (insn, file, 1);
   final_end_function ();
+  assemble_end_function (thunk, fnname);
 
   /* Stop pretending to be a post-reload pass.  */
   reload_completed = 0;
@@ -5191,7 +9117,7 @@ aarch64_tls_referenced_p (rtx x)
   FOR_EACH_SUBRTX (iter, array, x, ALL)
     {
       const_rtx x = *iter;
-      if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
+      if (SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0)
 	return true;
       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
 	 TLS offsets, not real symbol references.  */
@@ -5212,6 +9138,20 @@ aarch64_uimm12_shift (HOST_WIDE_INT val)
 	  );
 }
 
+/* Returns the nearest value to VAL that will fit as a 12-bit unsigned immediate
+   that can be created with a left shift of 0 or 12.  */
+static HOST_WIDE_INT
+aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val)
+{
+  /* Check to see if the value fits in 24 bits, as that is the maximum we can
+     handle correctly.  */
+  gcc_assert ((val & 0xffffff) == val);
+
+  if (((val & 0xfff) << 0) == val)
+    return val;
+
+  return val & (0xfff << 12);
+}
 
 /* Return true if val is an immediate that can be loaded into a
    register by a MOVZ instruction.  */
@@ -5233,6 +9173,30 @@ aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode)
 	  || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
 }
 
+/* Test whether:
+
+     X = (X & AND_VAL) | IOR_VAL;
+
+   can be implemented using:
+
+     MOVK X, #(IOR_VAL >> shift), LSL #shift
+
+   Return the shift if so, otherwise return -1.  */
+int
+aarch64_movk_shift (const wide_int_ref &and_val,
+		    const wide_int_ref &ior_val)
+{
+  unsigned int precision = and_val.get_precision ();
+  unsigned HOST_WIDE_INT mask = 0xffff;
+  for (unsigned int shift = 0; shift < precision; shift += 16)
+    {
+      if (and_val == ~mask && (ior_val & mask) == ior_val)
+	return shift;
+      mask <<= 16;
+    }
+  return -1;
+}
+
 /* VAL is a value with the inner mode of MODE.  Replicate it to fill a
    64-bit (DImode) integer.  */
 
@@ -5368,8 +9332,6 @@ aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
 static bool
 aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
 {
-  rtx base, offset;
-
   if (GET_CODE (x) == HIGH)
     return true;
 
@@ -5379,10 +9341,12 @@ aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
     if (GET_CODE (*iter) == CONST_POLY_INT)
       return true;
 
-  split_const (x, &base, &offset);
-  if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
+  poly_int64 offset;
+  rtx base = strip_offset_and_salt (x, &offset);
+  if (SYMBOL_REF_P (base) || LABEL_REF_P (base))
     {
-      if (aarch64_classify_symbol (base, INTVAL (offset))
+      /* We checked for POLY_INT_CST offsets above.  */
+      if (aarch64_classify_symbol (base, offset.to_constant ())
 	  != SYMBOL_FORCE_TO_MEM)
 	return true;
       else
@@ -5465,7 +9429,7 @@ static bool
 aarch64_base_register_rtx_p (rtx x, bool strict_p)
 {
   if (!strict_p
-      && GET_CODE (x) == SUBREG
+      && SUBREG_P (x)
       && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (x))])
     x = SUBREG_REG (x);
 
@@ -5484,7 +9448,7 @@ aarch64_classify_index (struct aarch64_address_info *info, rtx x,
   int shift;
 
   /* (reg:P) */
-  if ((REG_P (x) || GET_CODE (x) == SUBREG)
+  if ((REG_P (x) || SUBREG_P (x))
       && GET_MODE (x) == Pmode)
     {
       type = ADDRESS_REG_REG;
@@ -5528,22 +9492,6 @@ aarch64_classify_index (struct aarch64_address_info *info, rtx x,
       index = XEXP (XEXP (x, 0), 0);
       shift = INTVAL (XEXP (x, 1));
     }
-  /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
-  else if ((GET_CODE (x) == SIGN_EXTRACT
-	    || GET_CODE (x) == ZERO_EXTRACT)
-	   && GET_MODE (x) == DImode
-	   && GET_CODE (XEXP (x, 0)) == MULT
-	   && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
-	   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
-    {
-      type = (GET_CODE (x) == SIGN_EXTRACT)
-	? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
-      index = XEXP (XEXP (x, 0), 0);
-      shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
-      if (INTVAL (XEXP (x, 1)) != 32 + shift
-	  || INTVAL (XEXP (x, 2)) != 0)
-	shift = -1;
-    }
   /* (and:DI (mult:DI (reg:DI) (const_int scale))
      (const_int 0xffffffff<<shift)) */
   else if (GET_CODE (x) == AND
@@ -5559,22 +9507,6 @@ aarch64_classify_index (struct aarch64_address_info *info, rtx x,
       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
 	shift = -1;
     }
-  /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
-  else if ((GET_CODE (x) == SIGN_EXTRACT
-	    || GET_CODE (x) == ZERO_EXTRACT)
-	   && GET_MODE (x) == DImode
-	   && GET_CODE (XEXP (x, 0)) == ASHIFT
-	   && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
-	   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
-    {
-      type = (GET_CODE (x) == SIGN_EXTRACT)
-	? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
-      index = XEXP (XEXP (x, 0), 0);
-      shift = INTVAL (XEXP (XEXP (x, 0), 1));
-      if (INTVAL (XEXP (x, 1)) != 32 + shift
-	  || INTVAL (XEXP (x, 2)) != 0)
-	shift = -1;
-    }
   /* (and:DI (ashift:DI (reg:DI) (const_int shift))
      (const_int 0xffffffff<<shift)) */
   else if (GET_CODE (x) == AND
@@ -5614,7 +9546,7 @@ aarch64_classify_index (struct aarch64_address_info *info, rtx x,
     return false;
 
   if (!strict_p
-      && GET_CODE (index) == SUBREG
+      && SUBREG_P (index)
       && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (index))])
     index = SUBREG_REG (index);
 
@@ -5653,7 +9585,10 @@ aarch64_mode_valid_for_sched_fusion_p (machine_mode mode)
   return mode == SImode || mode == DImode
 	 || mode == SFmode || mode == DFmode
 	 || (aarch64_vector_mode_supported_p (mode)
-	     && known_eq (GET_MODE_SIZE (mode), 8));
+	     && (known_eq (GET_MODE_SIZE (mode), 8)
+		 || (known_eq (GET_MODE_SIZE (mode), 16)
+		    && (aarch64_tune_params.extra_tuning_flags
+			& AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS) == 0)));
 }
 
 /* Return true if REGNO is a virtual pointer register, or an eliminable
@@ -5672,10 +9607,10 @@ virt_or_elim_regno_p (unsigned regno)
    If it is, fill in INFO appropriately.  STRICT_P is true if
    REG_OK_STRICT is in effect.  */
 
-static bool
+bool
 aarch64_classify_address (struct aarch64_address_info *info,
 			  rtx x, machine_mode mode, bool strict_p,
-			  aarch64_addr_query_type type = ADDR_QUERY_M)
+			  aarch64_addr_query_type type)
 {
   enum rtx_code code = GET_CODE (x);
   rtx op0, op1;
@@ -5683,19 +9618,33 @@ aarch64_classify_address (struct aarch64_address_info *info,
 
   HOST_WIDE_INT const_size;
 
+  /* Whether a vector mode is partial doesn't affect address legitimacy.
+     Partial vectors like VNx8QImode allow the same indexed addressing
+     mode and MUL VL addressing mode as full vectors like VNx16QImode;
+     in both cases, MUL VL counts multiples of GET_MODE_SIZE.  */
+  unsigned int vec_flags = aarch64_classify_vector_mode (mode);
+  vec_flags &= ~VEC_PARTIAL;
+
   /* On BE, we use load/store pair for all large int mode load/stores.
      TI/TFmode may also use a load/store pair.  */
-  unsigned int vec_flags = aarch64_classify_vector_mode (mode);
   bool advsimd_struct_p = (vec_flags == (VEC_ADVSIMD | VEC_STRUCT));
   bool load_store_pair_p = (type == ADDR_QUERY_LDP_STP
+			    || type == ADDR_QUERY_LDP_STP_N
 			    || mode == TImode
 			    || mode == TFmode
 			    || (BYTES_BIG_ENDIAN && advsimd_struct_p));
 
+  /* If we are dealing with ADDR_QUERY_LDP_STP_N that means the incoming mode
+     corresponds to the actual size of the memory being loaded/stored and the
+     mode of the corresponding addressing mode is half of that.  */
+  if (type == ADDR_QUERY_LDP_STP_N
+      && known_eq (GET_MODE_SIZE (mode), 16))
+    mode = DFmode;
+
   bool allow_reg_index_p = (!load_store_pair_p
 			    && (known_lt (GET_MODE_SIZE (mode), 16)
 				|| vec_flags == VEC_ADVSIMD
-				|| vec_flags == VEC_SVE_DATA));
+				|| vec_flags & VEC_SVE_DATA));
 
   /* For SVE, only accept [Rn], [Rn, Rm, LSL #shift] and
      [Rn, #offset, MUL VL].  */
@@ -5760,7 +9709,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
 	     instruction memory accesses.  */
 	  if (mode == TImode || mode == TFmode)
 	    return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
-		    && (offset_9bit_signed_unscaled_p (mode, offset)
+		    && (aarch64_offset_9bit_signed_unscaled_p (mode, offset)
 			|| offset_12bit_unsigned_scaled_p (mode, offset)));
 
 	  /* A 7bit offset check because OImode will emit a ldp/stp
@@ -5774,7 +9723,8 @@ aarch64_classify_address (struct aarch64_address_info *info,
 	     ldr/str instructions (only big endian will get here).  */
 	  if (mode == CImode)
 	    return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
-		    && (offset_9bit_signed_unscaled_p (V16QImode, offset + 32)
+		    && (aarch64_offset_9bit_signed_unscaled_p (V16QImode,
+							       offset + 32)
 			|| offset_12bit_unsigned_scaled_p (V16QImode,
 							   offset + 32)));
 
@@ -5810,10 +9760,11 @@ aarch64_classify_address (struct aarch64_address_info *info,
 
 	  if (load_store_pair_p)
 	    return ((known_eq (GET_MODE_SIZE (mode), 4)
-		     || known_eq (GET_MODE_SIZE (mode), 8))
+		     || known_eq (GET_MODE_SIZE (mode), 8)
+		     || known_eq (GET_MODE_SIZE (mode), 16))
 		    && aarch64_offset_7bit_signed_scaled_p (mode, offset));
 	  else
-	    return (offset_9bit_signed_unscaled_p (mode, offset)
+	    return (aarch64_offset_9bit_signed_unscaled_p (mode, offset)
 		    || offset_12bit_unsigned_scaled_p (mode, offset));
 	}
 
@@ -5866,14 +9817,15 @@ aarch64_classify_address (struct aarch64_address_info *info,
 	   */
 	  if (mode == TImode || mode == TFmode)
 	    return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
-		    && offset_9bit_signed_unscaled_p (mode, offset));
+		    && aarch64_offset_9bit_signed_unscaled_p (mode, offset));
 
 	  if (load_store_pair_p)
 	    return ((known_eq (GET_MODE_SIZE (mode), 4)
-		     || known_eq (GET_MODE_SIZE (mode), 8))
+		     || known_eq (GET_MODE_SIZE (mode), 8)
+		     || known_eq (GET_MODE_SIZE (mode), 16))
 		    && aarch64_offset_7bit_signed_scaled_p (mode, offset));
 	  else
-	    return offset_9bit_signed_unscaled_p (mode, offset);
+	    return aarch64_offset_9bit_signed_unscaled_p (mode, offset);
 	}
       return false;
 
@@ -5888,11 +9840,10 @@ aarch64_classify_address (struct aarch64_address_info *info,
 	  && GET_MODE_SIZE (mode).is_constant (&const_size)
 	  && const_size >= 4)
 	{
-	  rtx sym, addend;
-
-	  split_const (x, &sym, &addend);
-	  return ((GET_CODE (sym) == LABEL_REF
-		   || (GET_CODE (sym) == SYMBOL_REF
+	  poly_int64 offset;
+	  rtx sym = strip_offset_and_salt (x, &offset);
+	  return ((LABEL_REF_P (sym)
+		   || (SYMBOL_REF_P (sym)
 		       && CONSTANT_POOL_ADDRESS_P (sym)
 		       && aarch64_pcrelative_literal_loads)));
 	}
@@ -5905,10 +9856,12 @@ aarch64_classify_address (struct aarch64_address_info *info,
       if (allow_reg_index_p
 	  && aarch64_base_register_rtx_p (info->base, strict_p))
 	{
-	  rtx sym, offs;
-	  split_const (info->offset, &sym, &offs);
-	  if (GET_CODE (sym) == SYMBOL_REF
-	      && (aarch64_classify_symbol (sym, INTVAL (offs))
+	  poly_int64 offset;
+	  HOST_WIDE_INT const_offset;
+	  rtx sym = strip_offset_and_salt (info->offset, &offset);
+	  if (SYMBOL_REF_P (sym)
+	      && offset.is_constant (&const_offset)
+	      && (aarch64_classify_symbol (sym, const_offset)
 		  == SYMBOL_SMALL_ABSOLUTE))
 	    {
 	      /* The symbol and offset must be aligned to the access size.  */
@@ -5934,7 +9887,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
 	      if (known_eq (ref_size, 0))
 		ref_size = GET_MODE_SIZE (DImode);
 
-	      return (multiple_p (INTVAL (offs), ref_size)
+	      return (multiple_p (const_offset, ref_size)
 		      && multiple_p (align / BITS_PER_UNIT, ref_size));
 	    }
 	}
@@ -5966,10 +9919,9 @@ aarch64_address_valid_for_prefetch_p (rtx x, bool strict_p)
 bool
 aarch64_symbolic_address_p (rtx x)
 {
-  rtx offset;
-
-  split_const (x, &x, &offset);
-  return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
+  poly_int64 offset;
+  x = strip_offset_and_salt (x, &offset);
+  return SYMBOL_REF_P (x) || LABEL_REF_P (x);
 }
 
 /* Classify the base of symbolic expression X.  */
@@ -6094,7 +10046,7 @@ aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *intval)
     }
 
   scalar_float_mode mode;
-  if (GET_CODE (value) != CONST_DOUBLE
+  if (!CONST_DOUBLE_P (value)
       || !is_a <scalar_float_mode> (GET_MODE (value), &mode)
       || GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT
       /* Only support up to DF mode.  */
@@ -6134,7 +10086,7 @@ aarch64_float_const_rtx_p (rtx x)
      mov/movk pairs over ldr/adrp pairs.  */
   unsigned HOST_WIDE_INT ival;
 
-  if (GET_CODE (x) == CONST_DOUBLE
+  if (CONST_DOUBLE_P (x)
       && SCALAR_FLOAT_MODE_P (mode)
       && aarch64_reinterpret_float_as_int (x, &ival))
     {
@@ -6173,7 +10125,7 @@ aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode)
   scalar_int_mode imode;
   unsigned HOST_WIDE_INT ival;
 
-  if (GET_CODE (x) == CONST_DOUBLE
+  if (CONST_DOUBLE_P (x)
       && SCALAR_FLOAT_MODE_P (mode))
     {
       if (!aarch64_reinterpret_float_as_int (x, &ival))
@@ -6185,7 +10137,7 @@ aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode)
 
       imode = int_mode_for_mode (mode).require ();
     }
-  else if (GET_CODE (x) == CONST_INT
+  else if (CONST_INT_P (x)
 	   && is_a <scalar_int_mode> (mode, &imode))
     ival = INTVAL (x);
   else
@@ -6216,11 +10168,12 @@ aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
    RESULT is the register in which the result is returned.  It's NULL for
    "call" and "sibcall".
    MEM is the location of the function call.
+   CALLEE_ABI is a const_int that gives the arm_pcs of the callee.
    SIBCALL indicates whether this function call is normal call or sibling call.
    It will generate different pattern accordingly.  */
 
 void
-aarch64_expand_call (rtx result, rtx mem, bool sibcall)
+aarch64_expand_call (rtx result, rtx mem, rtx callee_abi, bool sibcall)
 {
   rtx call, callee, tmp;
   rtvec vec;
@@ -6250,7 +10203,11 @@ aarch64_expand_call (rtx result, rtx mem, bool sibcall)
   else
     tmp = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNUM));
 
-  vec = gen_rtvec (2, call, tmp);
+  gcc_assert (CONST_INT_P (callee_abi));
+  callee_abi = gen_rtx_UNSPEC (DImode, gen_rtvec (1, callee_abi),
+			       UNSPEC_CALLEE_ABI);
+
+  vec = gen_rtvec (3, call, callee_abi, tmp);
   call = gen_rtx_PARALLEL (VOIDmode, vec);
 
   aarch64_emit_call_insn (call);
@@ -6271,9 +10228,12 @@ aarch64_emit_call_insn (rtx pat)
 machine_mode
 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
 {
+  machine_mode mode_x = GET_MODE (x);
+  rtx_code code_x = GET_CODE (x);
+
   /* All floating point compares return CCFP if it is an equality
      comparison, and CCFPE otherwise.  */
-  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+  if (GET_MODE_CLASS (mode_x) == MODE_FLOAT)
     {
       switch (code)
 	{
@@ -6302,53 +10262,70 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
 
   /* Equality comparisons of short modes against zero can be performed
      using the TST instruction with the appropriate bitmask.  */
-  if (y == const0_rtx && REG_P (x)
+  if (y == const0_rtx && (REG_P (x) || SUBREG_P (x))
       && (code == EQ || code == NE)
-      && (GET_MODE (x) == HImode || GET_MODE (x) == QImode))
+      && (mode_x == HImode || mode_x == QImode))
     return CC_NZmode;
 
   /* Similarly, comparisons of zero_extends from shorter modes can
      be performed using an ANDS with an immediate mask.  */
-  if (y == const0_rtx && GET_CODE (x) == ZERO_EXTEND
-      && (GET_MODE (x) == SImode || GET_MODE (x) == DImode)
+  if (y == const0_rtx && code_x == ZERO_EXTEND
+      && (mode_x == SImode || mode_x == DImode)
       && (GET_MODE (XEXP (x, 0)) == HImode || GET_MODE (XEXP (x, 0)) == QImode)
       && (code == EQ || code == NE))
     return CC_NZmode;
 
-  if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
+  if ((mode_x == SImode || mode_x == DImode)
       && y == const0_rtx
       && (code == EQ || code == NE || code == LT || code == GE)
-      && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
-	  || GET_CODE (x) == NEG
-	  || (GET_CODE (x) == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 1))
+      && (code_x == PLUS || code_x == MINUS || code_x == AND
+	  || code_x == NEG
+	  || (code_x == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 1))
 	      && CONST_INT_P (XEXP (x, 2)))))
     return CC_NZmode;
 
   /* A compare with a shifted operand.  Because of canonicalization,
      the comparison will have to be swapped when we emit the assembly
      code.  */
-  if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
-      && (REG_P (y) || GET_CODE (y) == SUBREG || y == const0_rtx)
-      && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
-	  || GET_CODE (x) == LSHIFTRT
-	  || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
+  if ((mode_x == SImode || mode_x == DImode)
+      && (REG_P (y) || SUBREG_P (y) || y == const0_rtx)
+      && (code_x == ASHIFT || code_x == ASHIFTRT
+	  || code_x == LSHIFTRT
+	  || code_x == ZERO_EXTEND || code_x == SIGN_EXTEND))
     return CC_SWPmode;
 
   /* Similarly for a negated operand, but we can only do this for
      equalities.  */
-  if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
-      && (REG_P (y) || GET_CODE (y) == SUBREG)
+  if ((mode_x == SImode || mode_x == DImode)
+      && (REG_P (y) || SUBREG_P (y))
       && (code == EQ || code == NE)
-      && GET_CODE (x) == NEG)
+      && code_x == NEG)
     return CC_Zmode;
 
-  /* A test for unsigned overflow.  */
-  if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode)
-      && code == NE
-      && GET_CODE (x) == PLUS
-      && GET_CODE (y) == ZERO_EXTEND)
+  /* A test for unsigned overflow from an addition.  */
+  if ((mode_x == DImode || mode_x == TImode)
+      && (code == LTU || code == GEU)
+      && code_x == PLUS
+      && rtx_equal_p (XEXP (x, 0), y))
     return CC_Cmode;
 
+  /* A test for unsigned overflow from an add with carry.  */
+  if ((mode_x == DImode || mode_x == TImode)
+      && (code == LTU || code == GEU)
+      && code_x == PLUS
+      && CONST_SCALAR_INT_P (y)
+      && (rtx_mode_t (y, mode_x)
+	  == (wi::shwi (1, mode_x)
+	      << (GET_MODE_BITSIZE (mode_x).to_constant () / 2))))
+    return CC_ADCmode;
+
+  /* A test for signed overflow.  */
+  if ((mode_x == DImode || mode_x == TImode)
+      && code == NE
+      && code_x == PLUS
+      && GET_CODE (y) == SIGN_EXTEND)
+    return CC_Vmode;
+
   /* For everything else, return CCmode.  */
   return CCmode;
 }
@@ -6426,6 +10403,21 @@ aarch64_get_condition_code_1 (machine_mode mode, enum rtx_code comp_code)
 	}
       break;
 
+    case E_CC_NZCmode:
+      switch (comp_code)
+	{
+	case NE: return AARCH64_NE; /* = any */
+	case EQ: return AARCH64_EQ; /* = none */
+	case GE: return AARCH64_PL; /* = nfrst */
+	case LT: return AARCH64_MI; /* = first */
+	case GEU: return AARCH64_CS; /* = nlast */
+	case GTU: return AARCH64_HI; /* = pmore */
+	case LEU: return AARCH64_LS; /* = plast */
+	case LTU: return AARCH64_CC; /* = last */
+	default: return -1;
+	}
+      break;
+
     case E_CC_NZmode:
       switch (comp_code)
 	{
@@ -6449,8 +10441,26 @@ aarch64_get_condition_code_1 (machine_mode mode, enum rtx_code comp_code)
     case E_CC_Cmode:
       switch (comp_code)
 	{
-	case NE: return AARCH64_CS;
-	case EQ: return AARCH64_CC;
+	case LTU: return AARCH64_CS;
+	case GEU: return AARCH64_CC;
+	default: return -1;
+	}
+      break;
+
+    case E_CC_ADCmode:
+      switch (comp_code)
+	{
+	case GEU: return AARCH64_CS;
+	case LTU: return AARCH64_CC;
+	default: return -1;
+	}
+      break;
+
+    case E_CC_Vmode:
+      switch (comp_code)
+	{
+	case NE: return AARCH64_VS;
+	case EQ: return AARCH64_VC;
 	default: return -1;
 	}
       break;
@@ -6550,15 +10560,24 @@ aarch64_print_vector_float_operand (FILE *f, rtx x, bool negate)
   if (negate)
     r = real_value_negate (&r);
 
-  /* We only handle the SVE single-bit immediates here.  */
+  /* Handle the SVE single-bit immediates specially, since they have a
+     fixed form in the assembly syntax.  */
   if (real_equal (&r, &dconst0))
     asm_fprintf (f, "0.0");
+  else if (real_equal (&r, &dconst2))
+    asm_fprintf (f, "2.0");
   else if (real_equal (&r, &dconst1))
     asm_fprintf (f, "1.0");
   else if (real_equal (&r, &dconsthalf))
     asm_fprintf (f, "0.5");
   else
-    return false;
+    {
+      const int buf_size = 20;
+      char float_buf[buf_size] = {'\0'};
+      real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size,
+				1, GET_MODE (elt));
+      asm_fprintf (f, "%s", float_buf);
+    }
 
   return true;
 }
@@ -6586,7 +10605,13 @@ sizetochar (int size)
      'D':		Take the duplicated element in a vector constant
 			and print it as an unsigned integer, in decimal.
      'e':		Print the sign/zero-extend size as a character 8->b,
-			16->h, 32->w.
+			16->h, 32->w.  Can also be used for masks:
+			0xff->b, 0xffff->h, 0xffffffff->w.
+     'I':		If the operand is a duplicated vector constant,
+			replace it with the duplicated scalar.  If the
+			operand is then a floating-point constant, replace
+			it with the integer bit representation.  Print the
+			transformed constant as a signed decimal number.
      'p':		Prints N such that 2^N == X (X must be power of 2 and
 			const int).
      'P':		Print the number of non-zero bits in X (a const_int).
@@ -6600,7 +10625,7 @@ sizetochar (int size)
      'S/T/U/V':		Print a FP/SIMD register name for a register list.
 			The register printed is the FP/SIMD register name
 			of X + 0/1/2/3 for S/T/U/V.
-     'R':		Print a scalar FP/SIMD register name + 1.
+     'R':		Print a scalar Integer/FP/SIMD register name + 1.
      'X':		Print bottom 16 bits of integer constant in hex.
      'w/x':		Print a general register name or the zero register
 			(32-bit or 64-bit).
@@ -6626,53 +10651,37 @@ aarch64_print_operand (FILE *f, rtx x, int code)
   switch (code)
     {
     case 'c':
-      switch (GET_CODE (x))
+      if (CONST_INT_P (x))
+	fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
+      else
 	{
-	case CONST_INT:
-	  fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
-	  break;
-
-	case SYMBOL_REF:
-	  output_addr_const (f, x);
-	  break;
-
-	case CONST:
-	  if (GET_CODE (XEXP (x, 0)) == PLUS
-	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
-	    {
-	      output_addr_const (f, x);
-	      break;
-	    }
-	  /* Fall through.  */
-
-	default:
-	  output_operand_lossage ("unsupported operand for code '%c'", code);
-	}
-      break;
+	  poly_int64 offset;
+	  rtx base = strip_offset_and_salt (x, &offset);
+	  if (SYMBOL_REF_P (base))
+	    output_addr_const (f, x);
+	  else
+	    output_operand_lossage ("unsupported operand for code '%c'", code);
+	}
+      break;
 
     case 'e':
       {
-	int n;
-
-	if (!CONST_INT_P (x)
-	    || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
+	x = unwrap_const_vec_duplicate (x);
+	if (!CONST_INT_P (x))
 	  {
 	    output_operand_lossage ("invalid operand for '%%%c'", code);
 	    return;
 	  }
 
-	switch (n)
+	HOST_WIDE_INT val = INTVAL (x);
+	if ((val & ~7) == 8 || val == 0xff)
+	  fputc ('b', f);
+	else if ((val & ~7) == 16 || val == 0xffff)
+	  fputc ('h', f);
+	else if ((val & ~7) == 32 || val == 0xffffffff)
+	  fputc ('w', f);
+	else
 	  {
-	  case 3:
-	    fputc ('b', f);
-	    break;
-	  case 4:
-	    fputc ('h', f);
-	    break;
-	  case 5:
-	    fputc ('w', f);
-	    break;
-	  default:
 	    output_operand_lossage ("invalid operand for '%%%c'", code);
 	    return;
 	  }
@@ -6704,6 +10713,12 @@ aarch64_print_operand (FILE *f, rtx x, int code)
       break;
 
     case 'H':
+      if (x == const0_rtx)
+	{
+	  asm_fprintf (f, "xzr");
+	  break;
+	}
+
       if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
 	{
 	  output_operand_lossage ("invalid operand for '%%%c'", code);
@@ -6713,6 +10728,19 @@ aarch64_print_operand (FILE *f, rtx x, int code)
       asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
       break;
 
+    case 'I':
+      {
+	x = aarch64_bit_representation (unwrap_const_vec_duplicate (x));
+	if (CONST_INT_P (x))
+	  asm_fprintf (f, "%wd", INTVAL (x));
+	else
+	  {
+	    output_operand_lossage ("invalid operand for '%%%c'", code);
+	    return;
+	  }
+	break;
+      }
+
     case 'M':
     case 'm':
       {
@@ -6735,7 +10763,10 @@ aarch64_print_operand (FILE *f, rtx x, int code)
         gcc_assert (cond_code >= 0);
 	if (code == 'M')
 	  cond_code = AARCH64_INVERSE_CONDITION_CODE (cond_code);
-	fputs (aarch64_condition_codes[cond_code], f);
+	if (GET_MODE (XEXP (x, 0)) == CC_NZCmode)
+	  fputs (aarch64_sve_condition_codes[cond_code], f);
+	else
+	  fputs (aarch64_condition_codes[cond_code], f);
       }
       break;
 
@@ -6747,7 +10778,7 @@ aarch64_print_operand (FILE *f, rtx x, int code)
 	}
 
       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
-	asm_fprintf (f, "%wd", -INTVAL (elt));
+	asm_fprintf (f, "%wd", (HOST_WIDE_INT) -UINTVAL (elt));
       else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_FLOAT
 	       && aarch64_print_vector_float_operand (f, x, true))
 	;
@@ -6786,12 +10817,13 @@ aarch64_print_operand (FILE *f, rtx x, int code)
       break;
 
     case 'R':
-      if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
-	{
-	  output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
-	  return;
-	}
-      asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
+      if (REG_P (x) && FP_REGNUM_P (REGNO (x)))
+	asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
+      else if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
+	asm_fprintf (f, "x%d", REGNO (x) - R0_REGNUM + 1);
+      else
+	output_operand_lossage ("incompatible register operand for '%%%c'",
+				code);
       break;
 
     case 'X':
@@ -7059,20 +11091,17 @@ aarch64_print_operand (FILE *f, rtx x, int code)
       {
 	machine_mode mode = GET_MODE (x);
 
-	if (GET_CODE (x) != MEM
+	if (!MEM_P (x)
 	    || (code == 'y' && maybe_ne (GET_MODE_SIZE (mode), 16)))
 	  {
 	    output_operand_lossage ("invalid operand for '%%%c'", code);
 	    return;
 	  }
 
-	if (code == 'y')
-	  /* LDP/STP which uses a single double-width memory operand.
-	     Adjust the mode to appear like a typical LDP/STP.
-	     Currently this is supported for 16-byte accesses only.  */
-	  mode = DFmode;
-
-	if (!aarch64_print_ldpstp_address (f, mode, XEXP (x, 0)))
+	if (!aarch64_print_address_internal (f, mode, XEXP (x, 0),
+					    code == 'y'
+					    ? ADDR_QUERY_LDP_STP_N
+					    : ADDR_QUERY_LDP_STP))
 	  output_operand_lossage ("invalid operand prefix '%%%c'", code);
       }
       break;
@@ -7091,7 +11120,7 @@ aarch64_print_address_internal (FILE *f, machine_mode mode, rtx x,
 				aarch64_addr_query_type type)
 {
   struct aarch64_address_info addr;
-  unsigned int size;
+  unsigned int size, vec_flags;
 
   /* Check all addresses are Pmode - including ILP32.  */
   if (GET_MODE (x) != Pmode
@@ -7107,26 +11136,24 @@ aarch64_print_address_internal (FILE *f, machine_mode mode, rtx x,
       {
       case ADDRESS_REG_IMM:
 	if (known_eq (addr.const_offset, 0))
-	  asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
-	else if (aarch64_sve_data_mode_p (mode))
 	  {
-	    HOST_WIDE_INT vnum
-	      = exact_div (addr.const_offset,
-			   BYTES_PER_SVE_VECTOR).to_constant ();
-	    asm_fprintf (f, "[%s, #%wd, mul vl]",
-			 reg_names[REGNO (addr.base)], vnum);
+	    asm_fprintf (f, "[%s]", reg_names[REGNO (addr.base)]);
+	    return true;
 	  }
-	else if (aarch64_sve_pred_mode_p (mode))
+
+	vec_flags = aarch64_classify_vector_mode (mode);
+	if (vec_flags & VEC_ANY_SVE)
 	  {
 	    HOST_WIDE_INT vnum
 	      = exact_div (addr.const_offset,
-			   BYTES_PER_SVE_PRED).to_constant ();
+			   aarch64_vl_bytes (mode, vec_flags)).to_constant ();
 	    asm_fprintf (f, "[%s, #%wd, mul vl]",
 			 reg_names[REGNO (addr.base)], vnum);
+	    return true;
 	  }
-	else
-	  asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
-		       INTVAL (addr.offset));
+
+	asm_fprintf (f, "[%s, %wd]", reg_names[REGNO (addr.base)],
+		     INTVAL (addr.offset));
 	return true;
 
       case ADDRESS_REG_REG:
@@ -7200,13 +11227,6 @@ aarch64_print_address_internal (FILE *f, machine_mode mode, rtx x,
   return false;
 }
 
-/* Print address 'x' of a LDP/STP with mode 'mode'.  */
-static bool
-aarch64_print_ldpstp_address (FILE *f, machine_mode mode, rtx x)
-{
-  return aarch64_print_address_internal (f, mode, x, ADDR_QUERY_LDP_STP);
-}
-
 /* Print address 'x' of a memory access with mode 'mode'.  */
 static void
 aarch64_print_operand_address (FILE *f, machine_mode mode, rtx x)
@@ -7215,13 +11235,26 @@ aarch64_print_operand_address (FILE *f, machine_mode mode, rtx x)
     output_addr_const (f, x);
 }
 
+/* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
+
+static bool
+aarch64_output_addr_const_extra (FILE *file, rtx x)
+{
+  if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SALT_ADDR)
+    {
+      output_addr_const (file, XVECEXP (x, 0, 0));
+      return true;
+   }
+  return false;
+}
+
 bool
 aarch64_label_mentioned_p (rtx x)
 {
   const char *fmt;
   int i;
 
-  if (GET_CODE (x) == LABEL_REF)
+  if (LABEL_REF_P (x))
     return true;
 
   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
@@ -7253,6 +11286,9 @@ aarch64_label_mentioned_p (rtx x)
 enum reg_class
 aarch64_regno_regclass (unsigned regno)
 {
+  if (STUB_REGNUM_P (regno))
+    return STUB_REGS;
+
   if (GP_REGNUM_P (regno))
     return GENERAL_REGS;
 
@@ -7264,11 +11300,15 @@ aarch64_regno_regclass (unsigned regno)
     return POINTER_REGS;
 
   if (FP_REGNUM_P (regno))
-    return FP_LO_REGNUM_P (regno) ?  FP_LO_REGS : FP_REGS;
+    return (FP_LO8_REGNUM_P (regno) ? FP_LO8_REGS
+	    : FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS);
 
   if (PR_REGNUM_P (regno))
     return PR_LO_REGNUM_P (regno) ? PR_LO_REGS : PR_HI_REGS;
 
+  if (regno == FFR_REGNUM || regno == FFRT_REGNUM)
+    return FFR_REGS;
+
   return NO_REGS;
 }
 
@@ -7371,79 +11411,37 @@ aarch64_legitimize_address (rtx x, rtx /* orig_x  */, machine_mode mode)
   return x;
 }
 
-/* Return the reload icode required for a constant pool in mode.  */
-static enum insn_code
-aarch64_constant_pool_reload_icode (machine_mode mode)
-{
-  switch (mode)
-    {
-    case E_SFmode:
-      return CODE_FOR_aarch64_reload_movcpsfdi;
-
-    case E_DFmode:
-      return CODE_FOR_aarch64_reload_movcpdfdi;
-
-    case E_TFmode:
-      return CODE_FOR_aarch64_reload_movcptfdi;
-
-    case E_V8QImode:
-      return CODE_FOR_aarch64_reload_movcpv8qidi;
-
-    case E_V16QImode:
-      return CODE_FOR_aarch64_reload_movcpv16qidi;
-
-    case E_V4HImode:
-      return CODE_FOR_aarch64_reload_movcpv4hidi;
-
-    case E_V8HImode:
-      return CODE_FOR_aarch64_reload_movcpv8hidi;
-
-    case E_V2SImode:
-      return CODE_FOR_aarch64_reload_movcpv2sidi;
-
-    case E_V4SImode:
-      return CODE_FOR_aarch64_reload_movcpv4sidi;
-
-    case E_V2DImode:
-      return CODE_FOR_aarch64_reload_movcpv2didi;
-
-    case E_V2DFmode:
-      return CODE_FOR_aarch64_reload_movcpv2dfdi;
-
-    default:
-      gcc_unreachable ();
-    }
-
-  gcc_unreachable ();
-}
 static reg_class_t
 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
 			  reg_class_t rclass,
 			  machine_mode mode,
 			  secondary_reload_info *sri)
 {
-  /* Use aarch64_sve_reload_be for SVE reloads that cannot be handled
-     directly by the *aarch64_sve_mov<mode>_be move pattern.  See the
-     comment at the head of aarch64-sve.md for more details about the
-     big-endian handling.  */
-  if (BYTES_BIG_ENDIAN
-      && reg_class_subset_p (rclass, FP_REGS)
+  /* Use aarch64_sve_reload_mem for SVE memory reloads that cannot use
+     LDR and STR.  See the comment at the head of aarch64-sve.md for
+     more details about the big-endian handling.  */
+  if (reg_class_subset_p (rclass, FP_REGS)
       && !((REG_P (x) && HARD_REGISTER_P (x))
 	   || aarch64_simd_valid_immediate (x, NULL))
-      && aarch64_sve_data_mode_p (mode))
+      && mode != VNx16QImode)
     {
-      sri->icode = CODE_FOR_aarch64_sve_reload_be;
-      return NO_REGS;
+      unsigned int vec_flags = aarch64_classify_vector_mode (mode);
+      if ((vec_flags & VEC_SVE_DATA)
+	  && ((vec_flags & VEC_PARTIAL) || BYTES_BIG_ENDIAN))
+	{
+	  sri->icode = CODE_FOR_aarch64_sve_reload_mem;
+	  return NO_REGS;
+	}
     }
 
   /* If we have to disable direct literal pool loads and stores because the
      function is too big, then we need a scratch register.  */
-  if (MEM_P (x) && GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)
+  if (MEM_P (x) && SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x)
       && (SCALAR_FLOAT_MODE_P (GET_MODE (x))
 	  || targetm.vector_mode_supported_p (GET_MODE (x)))
       && !aarch64_pcrelative_literal_loads)
     {
-      sri->icode = aarch64_constant_pool_reload_icode (mode);
+      sri->icode = code_for_aarch64_reload_movcp (mode, DImode);
       return NO_REGS;
     }
 
@@ -7453,10 +11451,7 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
       && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
       && reg_class_subset_p (rclass, FP_REGS))
     {
-      if (mode == TFmode)
-        sri->icode = CODE_FOR_aarch64_reload_movtf;
-      else if (mode == TImode)
-        sri->icode = CODE_FOR_aarch64_reload_movti;
+      sri->icode = code_for_aarch64_reload_mov (mode);
       return NO_REGS;
     }
 
@@ -7488,8 +11483,6 @@ aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
 poly_int64
 aarch64_initial_elimination_offset (unsigned from, unsigned to)
 {
-  aarch64_layout_frame ();
-
   if (to == HARD_FRAME_POINTER_REGNUM)
     {
       if (from == ARG_POINTER_REGNUM)
@@ -7510,6 +11503,24 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to)
   return cfun->machine->frame.frame_size;
 }
 
+
+/* Get return address without mangling.  */
+
+rtx
+aarch64_return_addr_rtx (void)
+{
+  rtx val = get_hard_reg_initial_val (Pmode, LR_REGNUM);
+  /* Note: aarch64_return_address_signing_enabled only
+     works after cfun->machine->frame.laid_out is set,
+     so here we don't know if the return address will
+     be signed or not.  */
+  rtx lr = gen_rtx_REG (Pmode, LR_REGNUM);
+  emit_move_insn (lr, val);
+  emit_insn (GEN_FCN (CODE_FOR_xpaclri) ());
+  return lr;
+}
+
+
 /* Implement RETURN_ADDR_RTX.  We do not support moving back to a
    previous frame.  */
 
@@ -7518,25 +11529,40 @@ aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
 {
   if (count != 0)
     return const0_rtx;
-  return get_hard_reg_initial_val (Pmode, LR_REGNUM);
+  return aarch64_return_addr_rtx ();
 }
 
-
 static void
 aarch64_asm_trampoline_template (FILE *f)
 {
+  /* Even if the current function doesn't have branch protection, some
+     later function might, so since this template is only generated once
+     we have to add a BTI just in case. */
+  asm_fprintf (f, "\thint\t34 // bti c\n");
+
   if (TARGET_ILP32)
     {
-      asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
-      asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
+      asm_fprintf (f, "\tldr\tw%d, .+20\n", IP1_REGNUM - R0_REGNUM);
+      asm_fprintf (f, "\tldr\tw%d, .+20\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
     }
   else
     {
-      asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
-      asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
+      asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [IP1_REGNUM]);
+      asm_fprintf (f, "\tldr\t%s, .+24\n", reg_names [STATIC_CHAIN_REGNUM]);
     }
   asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
-  assemble_aligned_integer (4, const0_rtx);
+
+  /* We always emit a speculation barrier.
+     This is because the same trampoline template is used for every nested
+     function.  Since nested functions are not particularly common or
+     performant we don't worry too much about the extra instructions to copy
+     around.
+     This is not yet a problem, since we have not yet implemented function
+     specific attributes to choose between hardening against straight line
+     speculation or not, but such function specific attributes are likely to
+     happen in the future.  */
+  asm_fprintf (f, "\tdsb\tsy\n\tisb\n");
+
   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
 }
@@ -7545,10 +11571,14 @@ static void
 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
 {
   rtx fnaddr, mem, a_tramp;
-  const int tramp_code_sz = 16;
+  const int tramp_code_sz = 24;
 
   /* Don't need to copy the trailing D-words, we fill those in below.  */
-  emit_block_move (m_tramp, assemble_trampoline_template (),
+  /* We create our own memory address in Pmode so that `emit_block_move` can
+     use parts of the backend which expect Pmode addresses.  */
+  rtx temp = convert_memory_address (Pmode, XEXP (m_tramp, 0));
+  emit_block_move (gen_rtx_MEM (BLKmode, temp),
+		   assemble_trampoline_template (),
 		   GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
   fnaddr = XEXP (DECL_RTL (fndecl), 0);
@@ -7562,10 +11592,10 @@ aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
   /* XXX We should really define a "clear_cache" pattern and use
      gen_clear_cache().  */
   a_tramp = XEXP (m_tramp, 0);
-  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
-		     LCT_NORMAL, VOIDmode, a_tramp, ptr_mode,
-		     plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
-		     ptr_mode);
+  maybe_emit_call_builtin___clear_cache (a_tramp,
+					 plus_constant (ptr_mode,
+							a_tramp,
+							TRAMPOLINE_SIZE));
 }
 
 static unsigned char
@@ -7576,9 +11606,10 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
      can hold MODE, but at the moment we need to handle all modes.
      Just ignore any runtime parts for registers that can't store them.  */
   HOST_WIDE_INT lowest_size = constant_lower_bound (GET_MODE_SIZE (mode));
-  unsigned int nregs;
+  unsigned int nregs, vec_flags;
   switch (regclass)
     {
+    case STUB_REGS:
     case TAILCALL_ADDR_REGS:
     case POINTER_REGS:
     case GENERAL_REGS:
@@ -7586,17 +11617,21 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
     case POINTER_AND_FP_REGS:
     case FP_REGS:
     case FP_LO_REGS:
-      if (aarch64_sve_data_mode_p (mode)
+    case FP_LO8_REGS:
+      vec_flags = aarch64_classify_vector_mode (mode);
+      if ((vec_flags & VEC_SVE_DATA)
 	  && constant_multiple_p (GET_MODE_SIZE (mode),
-				  BYTES_PER_SVE_VECTOR, &nregs))
+				  aarch64_vl_bytes (mode, vec_flags), &nregs))
 	return nregs;
-      return (aarch64_vector_data_mode_p (mode)
+      return (vec_flags & VEC_ADVSIMD
 	      ? CEIL (lowest_size, UNITS_PER_VREG)
 	      : CEIL (lowest_size, UNITS_PER_WORD));
     case STACK_REG:
     case PR_REGS:
     case PR_LO_REGS:
     case PR_HI_REGS:
+    case FFR_REGS:
+    case PR_AND_FFR_REGS:
       return 1;
 
     case NO_REGS:
@@ -7632,7 +11667,7 @@ aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
       rtx lhs = XEXP (x, 0);
 
       /* Look through a possible SUBREG introduced by ILP32.  */
-      if (GET_CODE (lhs) == SUBREG)
+      if (SUBREG_P (lhs))
 	lhs = SUBREG_REG (lhs);
 
       gcc_assert (REG_P (lhs));
@@ -7735,6 +11770,8 @@ aarch64_output_casesi (rtx *operands)
   output_asm_insn (buf, operands);
   output_asm_insn (patterns[index][1], operands);
   output_asm_insn ("br\t%3", operands);
+  output_asm_insn (aarch64_sls_barrier (aarch64_harden_sls_retbr_p ()),
+		   operands);
   assemble_label (asm_out_file, label);
   return "";
 }
@@ -7847,16 +11884,6 @@ aarch64_strip_extend (rtx x, bool strip_shift)
   if (!is_a <scalar_int_mode> (GET_MODE (op), &mode))
     return op;
 
-  /* Zero and sign extraction of a widened value.  */
-  if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
-      && XEXP (op, 2) == const0_rtx
-      && GET_CODE (XEXP (op, 0)) == MULT
-      && aarch64_is_extend_from_extract (mode, XEXP (XEXP (op, 0), 1),
-					 XEXP (op, 1)))
-    return XEXP (XEXP (op, 0), 0);
-
-  /* It can also be represented (for zero-extend) as an AND with an
-     immediate.  */
   if (GET_CODE (op) == AND
       && GET_CODE (XEXP (op, 0)) == MULT
       && CONST_INT_P (XEXP (XEXP (op, 0), 1))
@@ -7946,7 +11973,33 @@ aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
   op1 = XEXP (x, 1);
 
   if (VECTOR_MODE_P (mode))
-    mode = GET_MODE_INNER (mode);
+    {
+      unsigned int vec_flags = aarch64_classify_vector_mode (mode);
+      if (vec_flags & VEC_ADVSIMD)
+	{
+	  /* The by-element versions of the instruction have the same costs as
+	     the normal 3-vector version.  So don't add the costs of the
+	     duplicate into the costs of the multiply.  We make an assumption
+	     that the input to the VEC_DUPLICATE is already on the FP & SIMD
+	     side.  This means costing of a MUL by element pre RA is a bit
+	     optimistic.  */
+	  if (GET_CODE (op0) == VEC_DUPLICATE)
+	    op0 = XEXP (op0, 0);
+	  else if (GET_CODE (op1) == VEC_DUPLICATE)
+	    op1 = XEXP (op1, 0);
+	}
+      cost += rtx_cost (op0, mode, MULT, 0, speed);
+      cost += rtx_cost (op1, mode, MULT, 1, speed);
+      if (speed)
+	{
+	  if (GET_CODE (x) == MULT)
+	    cost += extra_cost->vect.mult;
+	  /* This is to catch the SSRA costing currently flowing here.  */
+	  else
+	    cost += extra_cost->vect.alu;
+	}
+      return cost;
+    }
 
   /* Integer multiply/fma.  */
   if (GET_MODE_CLASS (mode) == MODE_INT)
@@ -8084,7 +12137,7 @@ aarch64_address_cost (rtx x,
 
   if (!aarch64_classify_address (&info, x, mode, false))
     {
-      if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
+      if (GET_CODE (x) == CONST || SYMBOL_REF_P (x))
 	{
 	  /* This is a CONST or SYMBOL ref which will be split
 	     in a different way depending on the code model in use.
@@ -8117,7 +12170,14 @@ aarch64_address_cost (rtx x,
 	if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
 	  cost += addr_cost->pre_modify;
 	else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
-	  cost += addr_cost->post_modify;
+	  {
+	    if (mode == CImode)
+	      cost += addr_cost->post_modify_ld3_st3;
+	    else if (mode == XImode)
+	      cost += addr_cost->post_modify_ld4_st4;
+	    else
+	      cost += addr_cost->post_modify;
+	  }
 	else
 	  gcc_unreachable ();
 
@@ -8175,35 +12235,15 @@ aarch64_branch_cost (bool speed_p, bool predictable_p)
     return branch_costs->unpredictable;
 }
 
-/* Return true if the RTX X in mode MODE is a zero or sign extract
+/* Return true if X is a zero or sign extract
    usable in an ADD or SUB (extended register) instruction.  */
 static bool
-aarch64_rtx_arith_op_extract_p (rtx x, scalar_int_mode mode)
+aarch64_rtx_arith_op_extract_p (rtx x)
 {
-  /* Catch add with a sign extract.
-     This is add_<optab><mode>_multp2.  */
-  if (GET_CODE (x) == SIGN_EXTRACT
-      || GET_CODE (x) == ZERO_EXTRACT)
-    {
-      rtx op0 = XEXP (x, 0);
-      rtx op1 = XEXP (x, 1);
-      rtx op2 = XEXP (x, 2);
-
-      if (GET_CODE (op0) == MULT
-	  && CONST_INT_P (op1)
-	  && op2 == const0_rtx
-	  && CONST_INT_P (XEXP (op0, 1))
-	  && aarch64_is_extend_from_extract (mode,
-					     XEXP (op0, 1),
-					     op1))
-	{
-	  return true;
-	}
-    }
   /* The simple case <ARITH>, XD, XN, XM, [us]xt.
      No shift.  */
-  else if (GET_CODE (x) == SIGN_EXTEND
-	   || GET_CODE (x) == ZERO_EXTEND)
+  if (GET_CODE (x) == SIGN_EXTEND
+      || GET_CODE (x) == ZERO_EXTEND)
     return REG_P (XEXP (x, 0));
 
   return false;
@@ -8284,6 +12324,8 @@ aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
   rtx inner;
   rtx comparator;
   enum rtx_code cmpcode;
+  const struct cpu_cost_table *extra_cost
+    = aarch64_tune_params.insn_extra_cost;
 
   if (COMPARISON_P (op0))
     {
@@ -8318,8 +12360,17 @@ aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
 		    /* CBZ/CBNZ.  */
 		    *cost += rtx_cost (inner, VOIDmode, cmpcode, 0, speed);
 
-	        return true;
-	      }
+		  return true;
+		}
+	      if (register_operand (inner, VOIDmode)
+		  && aarch64_imm24 (comparator, VOIDmode))
+		{
+		  /* SUB and SUBS.  */
+		  *cost += COSTS_N_INSNS (2);
+		  if (speed)
+		    *cost += extra_cost->alu.arith * 2;
+		  return true;
+		}
 	    }
 	  else if (cmpcode == LT || cmpcode == GE)
 	    {
@@ -8340,8 +12391,6 @@ aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
 	  if (speed)
 	    {
 	      machine_mode mode = GET_MODE (XEXP (op1, 0));
-	      const struct cpu_cost_table *extra_cost
-		= aarch64_tune_params.insn_extra_cost;
 
 	      if (GET_MODE_CLASS (mode) == MODE_INT)
 		*cost += extra_cost->alu.arith;
@@ -8365,6 +12414,13 @@ aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
 	  op1 = XEXP (op1, 0);
 	  op2 = XEXP (op2, 0);
 	}
+      else if (GET_CODE (op1) == ZERO_EXTEND && op2 == const0_rtx)
+	{
+	  inner = XEXP (op1, 0);
+	  if (GET_CODE (inner) == NEG || GET_CODE (inner) == NOT)
+	    /* CSINV/NEG with zero extend + const 0 (*csinv3_uxtw_insn3).  */
+	    op1 = XEXP (inner, 0);
+	}
 
       *cost += rtx_cost (op1, VOIDmode, IF_THEN_ELSE, 1, speed);
       *cost += rtx_cost (op2, VOIDmode, IF_THEN_ELSE, 2, speed);
@@ -8428,10 +12484,40 @@ aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode mode, rtx mask,
 				    rtx shft_amnt)
 {
   return CONST_INT_P (mask) && CONST_INT_P (shft_amnt)
-	 && INTVAL (shft_amnt) < GET_MODE_BITSIZE (mode)
-	 && exact_log2 ((INTVAL (mask) >> INTVAL (shft_amnt)) + 1) >= 0
-	 && (INTVAL (mask)
-	     & ((HOST_WIDE_INT_1U << INTVAL (shft_amnt)) - 1)) == 0;
+	 && INTVAL (mask) > 0
+	 && UINTVAL (shft_amnt) < GET_MODE_BITSIZE (mode)
+	 && exact_log2 ((UINTVAL (mask) >> UINTVAL (shft_amnt)) + 1) >= 0
+	 && (UINTVAL (mask)
+	     & ((HOST_WIDE_INT_1U << UINTVAL (shft_amnt)) - 1)) == 0;
+}
+
+/* Return true if the masks and a shift amount from an RTX of the form
+   ((x & MASK1) | ((y << SHIFT_AMNT) & MASK2)) are valid to combine into
+   a BFI instruction of mode MODE.  See *arch64_bfi patterns.  */
+
+bool
+aarch64_masks_and_shift_for_bfi_p (scalar_int_mode mode,
+				   unsigned HOST_WIDE_INT mask1,
+				   unsigned HOST_WIDE_INT shft_amnt,
+				   unsigned HOST_WIDE_INT mask2)
+{
+  unsigned HOST_WIDE_INT t;
+
+  /* Verify that there is no overlap in what bits are set in the two masks.  */
+  if (mask1 != ~mask2)
+    return false;
+
+  /* Verify that mask2 is not all zeros or ones.  */
+  if (mask2 == 0 || mask2 == HOST_WIDE_INT_M1U)
+    return false;
+
+  /* The shift amount should always be less than the mode size.  */
+  gcc_assert (shft_amnt < GET_MODE_BITSIZE (mode));
+
+  /* Verify that the mask being shifted is contiguous and would be in the
+     least significant bits after shifting by shft_amnt.  */
+  t = mask2 + (HOST_WIDE_INT_1U << shft_amnt);
+  return (t == (t & -t));
 }
 
 /* Calculate the cost of calculating X, storing it in *COST.  Result
@@ -8703,6 +12789,13 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
 
       return false;
 
+    case CTZ:
+      *cost = COSTS_N_INSNS (2);
+
+      if (speed)
+	*cost += extra_cost->alu.clz + extra_cost->alu.rev;
+      return false;
+
     case COMPARE:
       op0 = XEXP (x, 0);
       op1 = XEXP (x, 1);
@@ -8838,8 +12931,8 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
 	  }
 
 	/* Look for SUB (extended register).  */
-	if (is_a <scalar_int_mode> (mode, &int_mode)
-	    && aarch64_rtx_arith_op_extract_p (op1, int_mode))
+	if (is_a <scalar_int_mode> (mode)
+	    && aarch64_rtx_arith_op_extract_p (op1))
 	  {
 	    if (speed)
 	      *cost += extra_cost->alu.extend_arith;
@@ -8904,22 +12997,32 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
 	  }
 
 	if (GET_MODE_CLASS (mode) == MODE_INT
-	    && ((CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
+	    && (aarch64_plus_immediate (op1, mode)
 		|| aarch64_sve_addvl_addpl_immediate (op1, mode)))
 	  {
 	    *cost += rtx_cost (op0, mode, PLUS, 0, speed);
 
 	    if (speed)
-	      /* ADD (immediate).  */
-	      *cost += extra_cost->alu.arith;
+	      {
+		/* ADD (immediate).  */
+		*cost += extra_cost->alu.arith;
+
+		/* Some tunings prefer to not use the VL-based scalar ops.
+		   Increase the cost of the poly immediate to prevent their
+		   formation.  */
+		if (GET_CODE (op1) == CONST_POLY_INT
+		    && (aarch64_tune_params.extra_tuning_flags
+			& AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS))
+		  *cost += COSTS_N_INSNS (1);
+	      }
 	    return true;
 	  }
 
 	*cost += rtx_cost (op1, mode, PLUS, 1, speed);
 
 	/* Look for ADD (extended register).  */
-	if (is_a <scalar_int_mode> (mode, &int_mode)
-	    && aarch64_rtx_arith_op_extract_p (op0, int_mode))
+	if (is_a <scalar_int_mode> (mode)
+	    && aarch64_rtx_arith_op_extract_p (op0))
 	  {
 	    if (speed)
 	      *cost += extra_cost->alu.extend_arith;
@@ -9742,12 +13845,22 @@ aarch64_register_move_cost (machine_mode mode,
     = aarch64_tune_params.regmove_cost;
 
   /* Caller save and pointer regs are equivalent to GENERAL_REGS.  */
-  if (to == TAILCALL_ADDR_REGS || to == POINTER_REGS)
+  if (to == TAILCALL_ADDR_REGS || to == POINTER_REGS
+      || to == STUB_REGS)
     to = GENERAL_REGS;
 
-  if (from == TAILCALL_ADDR_REGS || from == POINTER_REGS)
+  if (from == TAILCALL_ADDR_REGS || from == POINTER_REGS
+      || from == STUB_REGS)
     from = GENERAL_REGS;
 
+  /* Make RDFFR very expensive.  In particular, if we know that the FFR
+     contains a PTRUE (e.g. after a SETFFR), we must never use RDFFR
+     as a way of obtaining a PTRUE.  */
+  if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL
+      && hard_reg_set_subset_p (reg_class_contents[from_i],
+				reg_class_contents[FFR_REGS]))
+    return 80;
+
   /* Moving between GPR and stack cost is the same as GP2GP.  */
   if ((from == GENERAL_REGS && to == STACK_REG)
       || (to == GENERAL_REGS && from == STACK_REG))
@@ -9797,6 +13910,96 @@ aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
   return aarch64_tune_params.memmov_cost;
 }
 
+/* Implement TARGET_INIT_BUILTINS.  */
+static void
+aarch64_init_builtins ()
+{
+  aarch64_general_init_builtins ();
+  aarch64_sve::init_builtins ();
+#ifdef SUBTARGET_INIT_BUILTINS
+  SUBTARGET_INIT_BUILTINS;
+#endif
+}
+
+/* Implement TARGET_FOLD_BUILTIN.  */
+static tree
+aarch64_fold_builtin (tree fndecl, int nargs, tree *args, bool)
+{
+  unsigned int code = DECL_MD_FUNCTION_CODE (fndecl);
+  unsigned int subcode = code >> AARCH64_BUILTIN_SHIFT;
+  tree type = TREE_TYPE (TREE_TYPE (fndecl));
+  switch (code & AARCH64_BUILTIN_CLASS)
+    {
+    case AARCH64_BUILTIN_GENERAL:
+      return aarch64_general_fold_builtin (subcode, type, nargs, args);
+
+    case AARCH64_BUILTIN_SVE:
+      return NULL_TREE;
+    }
+  gcc_unreachable ();
+}
+
+/* Implement TARGET_GIMPLE_FOLD_BUILTIN.  */
+static bool
+aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi)
+{
+  gcall *stmt = as_a <gcall *> (gsi_stmt (*gsi));
+  tree fndecl = gimple_call_fndecl (stmt);
+  unsigned int code = DECL_MD_FUNCTION_CODE (fndecl);
+  unsigned int subcode = code >> AARCH64_BUILTIN_SHIFT;
+  gimple *new_stmt = NULL;
+  switch (code & AARCH64_BUILTIN_CLASS)
+    {
+    case AARCH64_BUILTIN_GENERAL:
+      new_stmt = aarch64_general_gimple_fold_builtin (subcode, stmt);
+      break;
+
+    case AARCH64_BUILTIN_SVE:
+      new_stmt = aarch64_sve::gimple_fold_builtin (subcode, gsi, stmt);
+      break;
+    }
+
+  if (!new_stmt)
+    return false;
+
+  gsi_replace (gsi, new_stmt, true);
+  return true;
+}
+
+/* Implement TARGET_EXPAND_BUILTIN.  */
+static rtx
+aarch64_expand_builtin (tree exp, rtx target, rtx, machine_mode, int ignore)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int code = DECL_MD_FUNCTION_CODE (fndecl);
+  unsigned int subcode = code >> AARCH64_BUILTIN_SHIFT;
+  switch (code & AARCH64_BUILTIN_CLASS)
+    {
+    case AARCH64_BUILTIN_GENERAL:
+      return aarch64_general_expand_builtin (subcode, exp, target, ignore);
+
+    case AARCH64_BUILTIN_SVE:
+      return aarch64_sve::expand_builtin (subcode, exp, target);
+    }
+  gcc_unreachable ();
+}
+
+/* Implement TARGET_BUILTIN_DECL.  */
+static tree
+aarch64_builtin_decl (unsigned int code, bool initialize_p)
+{
+  unsigned int subcode = code >> AARCH64_BUILTIN_SHIFT;
+  switch (code & AARCH64_BUILTIN_CLASS)
+    {
+    case AARCH64_BUILTIN_GENERAL:
+      return aarch64_general_builtin_decl (subcode, initialize_p);
+
+    case AARCH64_BUILTIN_SVE:
+      return aarch64_sve::builtin_decl (subcode, initialize_p);
+    }
+  gcc_unreachable ();
+}
+
 /* Return true if it is safe and beneficial to use the approximate rsqrt optabs
    to optimize 1.0/sqrt.  */
 
@@ -9820,44 +14023,36 @@ aarch64_builtin_reciprocal (tree fndecl)
 
   if (!use_rsqrt_p (mode))
     return NULL_TREE;
-  return aarch64_builtin_rsqrt (DECL_FUNCTION_CODE (fndecl));
-}
-
-typedef rtx (*rsqrte_type) (rtx, rtx);
-
-/* Select reciprocal square root initial estimate insn depending on machine
-   mode.  */
+  unsigned int code = DECL_MD_FUNCTION_CODE (fndecl);
+  unsigned int subcode = code >> AARCH64_BUILTIN_SHIFT;
+  switch (code & AARCH64_BUILTIN_CLASS)
+    {
+    case AARCH64_BUILTIN_GENERAL:
+      return aarch64_general_builtin_rsqrt (subcode);
 
-static rsqrte_type
-get_rsqrte_type (machine_mode mode)
-{
-  switch (mode)
-  {
-    case E_DFmode:   return gen_aarch64_rsqrtedf;
-    case E_SFmode:   return gen_aarch64_rsqrtesf;
-    case E_V2DFmode: return gen_aarch64_rsqrtev2df;
-    case E_V2SFmode: return gen_aarch64_rsqrtev2sf;
-    case E_V4SFmode: return gen_aarch64_rsqrtev4sf;
-    default: gcc_unreachable ();
-  }
+    case AARCH64_BUILTIN_SVE:
+      return NULL_TREE;
+    }
+  gcc_unreachable ();
 }
 
-typedef rtx (*rsqrts_type) (rtx, rtx, rtx);
+/* Emit code to perform the floating-point operation:
 
-/* Select reciprocal square root series step insn depending on machine mode.  */
+     DST = SRC1 * SRC2
 
-static rsqrts_type
-get_rsqrts_type (machine_mode mode)
+   where all three operands are already known to be registers.
+   If the operation is an SVE one, PTRUE is a suitable all-true
+   predicate.  */
+
+static void
+aarch64_emit_mult (rtx dst, rtx ptrue, rtx src1, rtx src2)
 {
-  switch (mode)
-  {
-    case E_DFmode:   return gen_aarch64_rsqrtsdf;
-    case E_SFmode:   return gen_aarch64_rsqrtssf;
-    case E_V2DFmode: return gen_aarch64_rsqrtsv2df;
-    case E_V2SFmode: return gen_aarch64_rsqrtsv2sf;
-    case E_V4SFmode: return gen_aarch64_rsqrtsv4sf;
-    default: gcc_unreachable ();
-  }
+  if (ptrue)
+    emit_insn (gen_aarch64_pred (UNSPEC_COND_FMUL, GET_MODE (dst),
+				 dst, ptrue, src1, src2,
+				 gen_int_mode (SVE_RELAXED_GP, SImode)));
+  else
+    emit_set_insn (dst, gen_rtx_MULT (GET_MODE (dst), src1, src2));
 }
 
 /* Emit instruction sequence to compute either the approximate square root
@@ -9882,7 +14077,7 @@ aarch64_emit_approx_sqrt (rtx dst, rtx src, bool recp)
 		& AARCH64_APPROX_MODE (mode))))
 	return false;
 
-      if (flag_finite_math_only
+      if (!flag_finite_math_only
 	  || flag_trapping_math
 	  || !flag_unsafe_math_optimizations
 	  || optimize_function_for_size_p (cfun))
@@ -9892,19 +14087,37 @@ aarch64_emit_approx_sqrt (rtx dst, rtx src, bool recp)
     /* Caller assumes we cannot fail.  */
     gcc_assert (use_rsqrt_p (mode));
 
-  machine_mode mmsk = mode_for_int_vector (mode).require ();
-  rtx xmsk = gen_reg_rtx (mmsk);
+  rtx pg = NULL_RTX;
+  if (aarch64_sve_mode_p (mode))
+    pg = aarch64_ptrue_reg (aarch64_sve_pred_mode (mode));
+  machine_mode mmsk = (VECTOR_MODE_P (mode)
+		       ? related_int_vector_mode (mode).require ()
+		       : int_mode_for_mode (mode).require ());
+  rtx xmsk = NULL_RTX;
   if (!recp)
-    /* When calculating the approximate square root, compare the
-       argument with 0.0 and create a mask.  */
-    emit_insn (gen_rtx_SET (xmsk,
-			    gen_rtx_NEG (mmsk,
-					 gen_rtx_EQ (mmsk, src,
-						     CONST0_RTX (mode)))));
+    {
+      /* When calculating the approximate square root, compare the
+	 argument with 0.0 and create a mask.  */
+      rtx zero = CONST0_RTX (mode);
+      if (pg)
+	{
+	  xmsk = gen_reg_rtx (GET_MODE (pg));
+	  rtx hint = gen_int_mode (SVE_KNOWN_PTRUE, SImode);
+	  emit_insn (gen_aarch64_pred_fcm (UNSPEC_COND_FCMNE, mode,
+					   xmsk, pg, hint, src, zero));
+	}
+      else
+	{
+	  xmsk = gen_reg_rtx (mmsk);
+	  emit_insn (gen_rtx_SET (xmsk,
+				  gen_rtx_NEG (mmsk,
+					       gen_rtx_EQ (mmsk, src, zero))));
+	}
+    }
 
   /* Estimate the approximate reciprocal square root.  */
   rtx xdst = gen_reg_rtx (mode);
-  emit_insn ((*get_rsqrte_type (mode)) (xdst, src));
+  emit_insn (gen_aarch64_rsqrte (mode, xdst, src));
 
   /* Iterate over the series twice for SF and thrice for DF.  */
   int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
@@ -9921,69 +14134,44 @@ aarch64_emit_approx_sqrt (rtx dst, rtx src, bool recp)
   while (iterations--)
     {
       rtx x2 = gen_reg_rtx (mode);
-      emit_set_insn (x2, gen_rtx_MULT (mode, xdst, xdst));
+      aarch64_emit_mult (x2, pg, xdst, xdst);
 
-      emit_insn ((*get_rsqrts_type (mode)) (x1, src, x2));
+      emit_insn (gen_aarch64_rsqrts (mode, x1, src, x2));
 
       if (iterations > 0)
-	emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, x1));
+	aarch64_emit_mult (xdst, pg, xdst, x1);
     }
 
   if (!recp)
     {
-      /* Qualify the approximate reciprocal square root when the argument is
-	 0.0 by squashing the intermediary result to 0.0.  */
-      rtx xtmp = gen_reg_rtx (mmsk);
-      emit_set_insn (xtmp, gen_rtx_AND (mmsk, gen_rtx_NOT (mmsk, xmsk),
-					      gen_rtx_SUBREG (mmsk, xdst, 0)));
-      emit_move_insn (xdst, gen_rtx_SUBREG (mode, xtmp, 0));
-
-      /* Calculate the approximate square root.  */
-      emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, src));
+      if (pg)
+	/* Multiply nonzero source values by the corresponding intermediate
+	   result elements, so that the final calculation is the approximate
+	   square root rather than its reciprocal.  Select a zero result for
+	   zero source values, to avoid the Inf * 0 -> NaN that we'd get
+	   otherwise.  */
+	emit_insn (gen_cond (UNSPEC_COND_FMUL, mode,
+			     xdst, xmsk, xdst, src, CONST0_RTX (mode)));
+      else
+	{
+	  /* Qualify the approximate reciprocal square root when the
+	     argument is 0.0 by squashing the intermediary result to 0.0.  */
+	  rtx xtmp = gen_reg_rtx (mmsk);
+	  emit_set_insn (xtmp, gen_rtx_AND (mmsk, gen_rtx_NOT (mmsk, xmsk),
+					    gen_rtx_SUBREG (mmsk, xdst, 0)));
+	  emit_move_insn (xdst, gen_rtx_SUBREG (mode, xtmp, 0));
+
+	  /* Calculate the approximate square root.  */
+	  aarch64_emit_mult (xdst, pg, xdst, src);
+	}
     }
 
   /* Finalize the approximation.  */
-  emit_set_insn (dst, gen_rtx_MULT (mode, xdst, x1));
+  aarch64_emit_mult (dst, pg, xdst, x1);
 
   return true;
 }
 
-typedef rtx (*recpe_type) (rtx, rtx);
-
-/* Select reciprocal initial estimate insn depending on machine mode.  */
-
-static recpe_type
-get_recpe_type (machine_mode mode)
-{
-  switch (mode)
-  {
-    case E_SFmode:   return (gen_aarch64_frecpesf);
-    case E_V2SFmode: return (gen_aarch64_frecpev2sf);
-    case E_V4SFmode: return (gen_aarch64_frecpev4sf);
-    case E_DFmode:   return (gen_aarch64_frecpedf);
-    case E_V2DFmode: return (gen_aarch64_frecpev2df);
-    default:         gcc_unreachable ();
-  }
-}
-
-typedef rtx (*recps_type) (rtx, rtx, rtx);
-
-/* Select reciprocal series step insn depending on machine mode.  */
-
-static recps_type
-get_recps_type (machine_mode mode)
-{
-  switch (mode)
-  {
-    case E_SFmode:   return (gen_aarch64_frecpssf);
-    case E_V2SFmode: return (gen_aarch64_frecpsv2sf);
-    case E_V4SFmode: return (gen_aarch64_frecpsv4sf);
-    case E_DFmode:   return (gen_aarch64_frecpsdf);
-    case E_V2DFmode: return (gen_aarch64_frecpsv2df);
-    default:         gcc_unreachable ();
-  }
-}
-
 /* Emit the instruction sequence to compute the approximation for the division
    of NUM by DEN in QUO and return whether the sequence was emitted or not.  */
 
@@ -10009,26 +14197,32 @@ aarch64_emit_approx_div (rtx quo, rtx num, rtx den)
   if (!TARGET_SIMD && VECTOR_MODE_P (mode))
     return false;
 
+  rtx pg = NULL_RTX;
+  if (aarch64_sve_mode_p (mode))
+    pg = aarch64_ptrue_reg (aarch64_sve_pred_mode (mode));
+
   /* Estimate the approximate reciprocal.  */
   rtx xrcp = gen_reg_rtx (mode);
-  emit_insn ((*get_recpe_type (mode)) (xrcp, den));
+  emit_insn (gen_aarch64_frecpe (mode, xrcp, den));
 
   /* Iterate over the series twice for SF and thrice for DF.  */
   int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
 
-  /* Optionally iterate over the series once less for faster performance,
-     while sacrificing the accuracy.  */
+  /* Optionally iterate over the series less for faster performance,
+     while sacrificing the accuracy.  The default is 2 for DF and 1 for SF.  */
   if (flag_mlow_precision_div)
-    iterations--;
+    iterations = (GET_MODE_INNER (mode) == DFmode
+		  ? aarch64_double_recp_precision
+		  : aarch64_float_recp_precision);
 
   /* Iterate over the series to calculate the approximate reciprocal.  */
   rtx xtmp = gen_reg_rtx (mode);
   while (iterations--)
     {
-      emit_insn ((*get_recps_type (mode)) (xtmp, xrcp, den));
+      emit_insn (gen_aarch64_frecps (mode, xtmp, xrcp, den));
 
       if (iterations > 0)
-	emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xtmp));
+	aarch64_emit_mult (xrcp, pg, xrcp, xtmp);
     }
 
   if (num != CONST1_RTX (mode))
@@ -10036,11 +14230,11 @@ aarch64_emit_approx_div (rtx quo, rtx num, rtx den)
       /* As the approximate reciprocal of DEN is already calculated, only
 	 calculate the approximate division when NUM is not 1.0.  */
       rtx xnum = force_reg (mode, num);
-      emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xnum));
+      aarch64_emit_mult (xrcp, pg, xrcp, xnum);
     }
 
   /* Finalize the approximation.  */
-  emit_set_insn (quo, gen_rtx_MULT (mode, xrcp, xtmp));
+  aarch64_emit_mult (quo, pg, xrcp, xtmp);
   return true;
 }
 
@@ -10051,6 +14245,23 @@ aarch64_sched_issue_rate (void)
   return aarch64_tune_params.issue_rate;
 }
 
+/* Implement TARGET_SCHED_VARIABLE_ISSUE.  */
+static int
+aarch64_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
+{
+  if (DEBUG_INSN_P (insn))
+    return more;
+
+  rtx_code code = GET_CODE (PATTERN (insn));
+  if (code == USE || code == CLOBBER)
+    return more;
+
+  if (get_attr_type (insn) == TYPE_NO_INSN)
+    return more;
+
+  return more - 1;
+}
+
 static int
 aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
 {
@@ -10074,6 +14285,265 @@ aarch64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn,
 
 /* Vectorizer cost model target hooks.  */
 
+/* Information about how the CPU would issue the scalar, Advanced SIMD
+   or SVE version of a vector loop, using the scheme defined by the
+   aarch64_base_vec_issue_info hierarchy of structures.  */
+struct aarch64_vec_op_count
+{
+  void dump () const;
+
+  /* The number of individual "general" operations.  See the comments
+     in aarch64_base_vec_issue_info for details.  */
+  unsigned int general_ops = 0;
+
+  /* The number of load and store operations, under the same scheme
+     as above.  */
+  unsigned int loads = 0;
+  unsigned int stores = 0;
+
+  /* The minimum number of cycles needed to execute all loop-carried
+     operations, which in the vector code become associated with
+     reductions.  */
+  unsigned int reduction_latency = 0;
+};
+
+/* Extends aarch64_vec_op_count with SVE-specific information.  */
+struct aarch64_sve_op_count : aarch64_vec_op_count
+{
+  void dump () const;
+
+  /* The number of individual predicate operations.  See the comments
+     in aarch64_sve_vec_issue_info for details.  */
+  unsigned int pred_ops = 0;
+};
+
+/* Information about vector code that we're in the process of costing.  */
+struct aarch64_vector_costs
+{
+  /* The normal latency-based costs for each region (prologue, body and
+     epilogue), indexed by vect_cost_model_location.  */
+  unsigned int region[3] = {};
+
+  /* True if we have performed one-time initialization based on the vec_info.
+
+     This variable exists because the vec_info is not passed to the
+     init_cost hook.  We therefore have to defer initialization based on
+     it till later.  */
+  bool analyzed_vinfo = false;
+
+  /* True if we're costing a vector loop, false if we're costing block-level
+     vectorization.  */
+  bool is_loop = false;
+
+  /* True if we've seen an SVE operation that we cannot currently vectorize
+     using Advanced SIMD.  */
+  bool saw_sve_only_op = false;
+
+  /* - If VEC_FLAGS is zero then we're costing the original scalar code.
+     - If VEC_FLAGS & VEC_ADVSIMD is nonzero then we're costing Advanced
+       SIMD code.
+     - If VEC_FLAGS & VEC_ANY_SVE is nonzero then we're costing SVE code.  */
+  unsigned int vec_flags = 0;
+
+  /* On some CPUs, SVE and Advanced SIMD provide the same theoretical vector
+     throughput, such as 4x128 Advanced SIMD vs. 2x256 SVE.  In those
+     situations, we try to predict whether an Advanced SIMD implementation
+     of the loop could be completely unrolled and become straight-line code.
+     If so, it is generally better to use the Advanced SIMD version rather
+     than length-agnostic SVE, since the SVE loop would execute an unknown
+     number of times and so could not be completely unrolled in the same way.
+
+     If we're applying this heuristic, UNROLLED_ADVSIMD_NITERS is the
+     number of Advanced SIMD loop iterations that would be unrolled and
+     UNROLLED_ADVSIMD_STMTS estimates the total number of statements
+     in the unrolled loop.  Both values are zero if we're not applying
+     the heuristic.  */
+  unsigned HOST_WIDE_INT unrolled_advsimd_niters = 0;
+  unsigned HOST_WIDE_INT unrolled_advsimd_stmts = 0;
+
+  /* If we're vectorizing a loop that executes a constant number of times,
+     this variable gives the number of times that the vector loop would
+     iterate, otherwise it is zero.  */
+  uint64_t num_vector_iterations = 0;
+
+  /* Used only when vectorizing loops.  Estimates the number and kind of scalar
+     operations that would be needed to perform the same work as one iteration
+     of the vector loop.  */
+  aarch64_vec_op_count scalar_ops;
+
+  /* Used only when vectorizing loops.  If VEC_FLAGS & VEC_ADVSIMD,
+     this structure estimates the number and kind of operations that the
+     vector loop would contain.  If VEC_FLAGS & VEC_SVE, the structure
+     estimates what the equivalent Advanced SIMD-only code would need in
+     order to perform the same work as one iteration of the SVE loop.  */
+  aarch64_vec_op_count advsimd_ops;
+
+  /* Used only when vectorizing loops with SVE.  It estimates the number and
+     kind of operations that the SVE loop would contain.  */
+  aarch64_sve_op_count sve_ops;
+
+  /* Used to detect cases in which we end up costing the same load twice,
+     once to account for results that are actually used and once to account
+     for unused results.  */
+  hash_map<nofree_ptr_hash<_stmt_vec_info>, unsigned int> seen_loads;
+};
+
+/* Implement TARGET_VECTORIZE_INIT_COST.  */
+void *
+aarch64_init_cost (class loop *)
+{
+  return new aarch64_vector_costs;
+}
+
+/* Return true if the current CPU should use the new costs defined
+   in GCC 11.  This should be removed for GCC 12 and above, with the
+   costs applying to all CPUs instead.  */
+static bool
+aarch64_use_new_vector_costs_p ()
+{
+  return (aarch64_tune_params.extra_tuning_flags
+	  & AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS);
+}
+
+/* Return the appropriate SIMD costs for vectors of type VECTYPE.  */
+static const simd_vec_cost *
+aarch64_simd_vec_costs (tree vectype)
+{
+  const cpu_vector_cost *costs = aarch64_tune_params.vec_costs;
+  if (vectype != NULL
+      && aarch64_sve_mode_p (TYPE_MODE (vectype))
+      && costs->sve != NULL)
+    return costs->sve;
+  return costs->advsimd;
+}
+
+/* Return the appropriate SIMD costs for vectors with VEC_* flags FLAGS.  */
+static const simd_vec_cost *
+aarch64_simd_vec_costs_for_flags (unsigned int flags)
+{
+  const cpu_vector_cost *costs = aarch64_tune_params.vec_costs;
+  if ((flags & VEC_ANY_SVE) && costs->sve)
+    return costs->sve;
+  return costs->advsimd;
+}
+
+/* Decide whether to use the unrolling heuristic described above
+   aarch64_vector_costs::unrolled_advsimd_niters, updating that
+   field if so.  LOOP_VINFO describes the loop that we're vectorizing
+   and COSTS are the costs that we're calculating for it.  */
+static void
+aarch64_record_potential_advsimd_unrolling (loop_vec_info loop_vinfo,
+					    aarch64_vector_costs *costs)
+{
+  /* The heuristic only makes sense on targets that have the same
+     vector throughput for SVE and Advanced SIMD.  */
+  if (!(aarch64_tune_params.extra_tuning_flags
+	& AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT))
+    return;
+
+  /* We only want to apply the heuristic if LOOP_VINFO is being
+     vectorized for SVE.  */
+  if (!(costs->vec_flags & VEC_ANY_SVE))
+    return;
+
+  /* Check whether it is possible in principle to use Advanced SIMD
+     instead.  */
+  if (aarch64_autovec_preference == 2)
+    return;
+
+  /* We don't want to apply the heuristic to outer loops, since it's
+     harder to track two levels of unrolling.  */
+  if (LOOP_VINFO_LOOP (loop_vinfo)->inner)
+    return;
+
+  /* Only handle cases in which the number of Advanced SIMD iterations
+     would be known at compile time but the number of SVE iterations
+     would not.  */
+  if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+      || aarch64_sve_vg.is_constant ())
+    return;
+
+  /* Guess how many times the Advanced SIMD loop would iterate and make
+     sure that it is within the complete unrolling limit.  Even if the
+     number of iterations is small enough, the number of statements might
+     not be, which is why we need to estimate the number of statements too.  */
+  unsigned int estimated_vq = aarch64_estimated_sve_vq ();
+  unsigned int advsimd_vf = CEIL (vect_vf_for_cost (loop_vinfo), estimated_vq);
+  unsigned HOST_WIDE_INT unrolled_advsimd_niters
+    = LOOP_VINFO_INT_NITERS (loop_vinfo) / advsimd_vf;
+  if (unrolled_advsimd_niters > (unsigned int) param_max_completely_peel_times)
+    return;
+
+  /* Record that we're applying the heuristic and should try to estimate
+     the number of statements in the Advanced SIMD loop.  */
+  costs->unrolled_advsimd_niters = unrolled_advsimd_niters;
+}
+
+/* Do one-time initialization of COSTS given that we're costing the loop
+   vectorization described by LOOP_VINFO.  */
+static void
+aarch64_analyze_loop_vinfo (loop_vec_info loop_vinfo,
+			    aarch64_vector_costs *costs)
+{
+  costs->is_loop = true;
+
+  /* Record the number of times that the vector loop would execute,
+     if known.  */
+  class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+  auto scalar_niters = max_stmt_executions_int (loop);
+  if (scalar_niters >= 0)
+    {
+      unsigned int vf = vect_vf_for_cost (loop_vinfo);
+      if (LOOP_VINFO_MASKS (loop_vinfo).is_empty ())
+	costs->num_vector_iterations = scalar_niters / vf;
+      else
+	costs->num_vector_iterations = CEIL (scalar_niters, vf);
+    }
+
+  /* Detect whether we're costing the scalar code or the vector code.
+     This is a bit hacky: it would be better if the vectorizer told
+     us directly.
+
+     If we're costing the vector code, record whether we're vectorizing
+     for Advanced SIMD or SVE.  */
+  if (costs == LOOP_VINFO_TARGET_COST_DATA (loop_vinfo))
+    costs->vec_flags = aarch64_classify_vector_mode (loop_vinfo->vector_mode);
+  else
+    costs->vec_flags = 0;
+
+  /* Detect whether we're vectorizing for SVE and should
+     apply the unrolling heuristic described above
+     aarch64_vector_costs::unrolled_advsimd_niters.  */
+  aarch64_record_potential_advsimd_unrolling (loop_vinfo, costs);
+
+  /* Record the issue information for any SVE WHILE instructions that the
+     loop needs.  */
+  auto *issue_info = aarch64_tune_params.vec_costs->issue_info;
+  if (issue_info
+      && issue_info->sve
+      && !LOOP_VINFO_MASKS (loop_vinfo).is_empty ())
+    {
+      unsigned int num_masks = 0;
+      rgroup_controls *rgm;
+      unsigned int num_vectors_m1;
+      FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), num_vectors_m1, rgm)
+	if (rgm->type)
+	  num_masks += num_vectors_m1 + 1;
+      costs->sve_ops.pred_ops += num_masks * issue_info->sve->while_pred_ops;
+    }
+}
+
+/* Do one-time initialization of COSTS given that we're costing the block
+   vectorization described by BB_VINFO.  */
+static void
+aarch64_analyze_bb_vinfo (bb_vec_info bb_vinfo, aarch64_vector_costs *costs)
+{
+  /* Unfortunately, there's no easy way of telling whether we're costing
+     the vector code or the scalar code, so just assume that we're costing
+     the vector code.  */
+  costs->vec_flags = aarch64_classify_vector_mode (bb_vinfo->vector_mode);
+}
+
 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
 static int
 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
@@ -10087,6 +14557,8 @@ aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
   if (vectype != NULL)
     fp = FLOAT_TYPE_P (vectype);
 
+  const simd_vec_cost *simd_costs = aarch64_simd_vec_costs (vectype);
+
   switch (type_of_cost)
     {
       case scalar_stmt:
@@ -10099,27 +14571,28 @@ aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 	return costs->scalar_store_cost;
 
       case vector_stmt:
-	return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
+	return fp ? simd_costs->fp_stmt_cost
+		  : simd_costs->int_stmt_cost;
 
       case vector_load:
-	return costs->vec_align_load_cost;
+	return simd_costs->align_load_cost;
 
       case vector_store:
-	return costs->vec_store_cost;
+	return simd_costs->store_cost;
 
       case vec_to_scalar:
-	return costs->vec_to_scalar_cost;
+	return simd_costs->vec_to_scalar_cost;
 
       case scalar_to_vec:
-	return costs->scalar_to_vec_cost;
+	return simd_costs->scalar_to_vec_cost;
 
       case unaligned_load:
       case vector_gather_load:
-	return costs->vec_unalign_load_cost;
+	return simd_costs->unalign_load_cost;
 
       case unaligned_store:
       case vector_scatter_store:
-	return costs->vec_unalign_store_cost;
+	return simd_costs->unalign_store_cost;
 
       case cond_branch_taken:
 	return costs->cond_taken_branch_cost;
@@ -10127,73 +14600,1178 @@ aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
       case cond_branch_not_taken:
 	return costs->cond_not_taken_branch_cost;
 
-      case vec_perm:
-	return costs->vec_permute_cost;
+      case vec_perm:
+	return simd_costs->permute_cost;
+
+      case vec_promote_demote:
+	return fp ? simd_costs->fp_stmt_cost
+		  : simd_costs->int_stmt_cost;
+
+      case vec_construct:
+	elements = estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
+	return elements / 2 + 1;
+
+      default:
+	gcc_unreachable ();
+    }
+}
+
+/* Return true if STMT_INFO represents part of a reduction.  */
+static bool
+aarch64_is_reduction (stmt_vec_info stmt_info)
+{
+  return (STMT_VINFO_REDUC_DEF (stmt_info)
+	  || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)));
+}
+
+/* If STMT_INFO describes a reduction, return the type of reduction
+   it describes, otherwise return -1.  */
+static int
+aarch64_reduc_type (vec_info *vinfo, stmt_vec_info stmt_info)
+{
+  if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo))
+    if (STMT_VINFO_REDUC_DEF (stmt_info))
+      {
+	stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info);
+	return int (STMT_VINFO_REDUC_TYPE (reduc_info));
+      }
+  return -1;
+}
+
+/* Return true if an access of kind KIND for STMT_INFO represents one
+   vector of an LD[234] or ST[234] operation.  Return the total number of
+   vectors (2, 3 or 4) if so, otherwise return a value outside that range.  */
+static int
+aarch64_ld234_st234_vectors (vect_cost_for_stmt kind, stmt_vec_info stmt_info)
+{
+  if ((kind == vector_load
+       || kind == unaligned_load
+       || kind == vector_store
+       || kind == unaligned_store)
+      && STMT_VINFO_DATA_REF (stmt_info))
+    {
+      stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
+      if (stmt_info
+	  && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_LOAD_STORE_LANES)
+	return DR_GROUP_SIZE (stmt_info);
+    }
+  return 0;
+}
+
+/* If STMT_INFO is a COND_EXPR that includes an embedded comparison, return the
+   scalar type of the values being compared.  Return null otherwise.  */
+static tree
+aarch64_embedded_comparison_type (stmt_vec_info stmt_info)
+{
+  if (auto *assign = dyn_cast<gassign *> (stmt_info->stmt))
+    if (gimple_assign_rhs_code (assign) == COND_EXPR)
+      {
+	tree cond = gimple_assign_rhs1 (assign);
+	if (COMPARISON_CLASS_P (cond))
+	  return TREE_TYPE (TREE_OPERAND (cond, 0));
+      }
+  return NULL_TREE;
+}
+
+/* If STMT_INFO is a comparison or contains an embedded comparison, return the
+   scalar type of the values being compared.  Return null otherwise.  */
+static tree
+aarch64_comparison_type (stmt_vec_info stmt_info)
+{
+  if (auto *assign = dyn_cast<gassign *> (stmt_info->stmt))
+    if (TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) == tcc_comparison)
+      return TREE_TYPE (gimple_assign_rhs1 (assign));
+  return aarch64_embedded_comparison_type (stmt_info);
+}
+
+/* Return true if creating multiple copies of STMT_INFO for Advanced SIMD
+   vectors would produce a series of LDP or STP operations.  KIND is the
+   kind of statement that STMT_INFO represents.  */
+static bool
+aarch64_advsimd_ldp_stp_p (enum vect_cost_for_stmt kind,
+			   stmt_vec_info stmt_info)
+{
+  switch (kind)
+    {
+    case vector_load:
+    case vector_store:
+    case unaligned_load:
+    case unaligned_store:
+      break;
+
+    default:
+      return false;
+    }
+
+  if (aarch64_tune_params.extra_tuning_flags
+      & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS)
+    return false;
+
+  return is_gimple_assign (stmt_info->stmt);
+}
+
+/* Return true if STMT_INFO extends the result of a load.  */
+static bool
+aarch64_extending_load_p (class vec_info *vinfo, stmt_vec_info stmt_info)
+{
+  gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
+  if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign)))
+    return false;
+
+  tree rhs = gimple_assign_rhs1 (stmt_info->stmt);
+  tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign));
+  tree rhs_type = TREE_TYPE (rhs);
+  if (!INTEGRAL_TYPE_P (lhs_type)
+      || !INTEGRAL_TYPE_P (rhs_type)
+      || TYPE_PRECISION (lhs_type) <= TYPE_PRECISION (rhs_type))
+    return false;
+
+  stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
+  return (def_stmt_info
+	  && STMT_VINFO_DATA_REF (def_stmt_info)
+	  && DR_IS_READ (STMT_VINFO_DATA_REF (def_stmt_info)));
+}
+
+/* Return true if STMT_INFO is an integer truncation.  */
+static bool
+aarch64_integer_truncation_p (stmt_vec_info stmt_info)
+{
+  gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
+  if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign)))
+    return false;
+
+  tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign));
+  tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (assign));
+  return (INTEGRAL_TYPE_P (lhs_type)
+	  && INTEGRAL_TYPE_P (rhs_type)
+	  && TYPE_PRECISION (lhs_type) < TYPE_PRECISION (rhs_type));
+}
+
+/* Return true if STMT_INFO is the second part of a two-statement multiply-add
+   or multiply-subtract sequence that might be suitable for fusing into a
+   single instruction.  */
+static bool
+aarch64_multiply_add_p (vec_info *vinfo, stmt_vec_info stmt_info)
+{
+  gassign *assign = dyn_cast<gassign *> (stmt_info->stmt);
+  if (!assign)
+    return false;
+  tree_code code = gimple_assign_rhs_code (assign);
+  if (code != PLUS_EXPR && code != MINUS_EXPR)
+    return false;
+
+  if (CONSTANT_CLASS_P (gimple_assign_rhs1 (assign))
+      || CONSTANT_CLASS_P (gimple_assign_rhs2 (assign)))
+    return false;
+
+  for (int i = 1; i < 3; ++i)
+    {
+      tree rhs = gimple_op (assign, i);
+      /* ??? Should we try to check for a single use as well?  */
+      if (TREE_CODE (rhs) != SSA_NAME)
+	continue;
+
+      stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
+      if (!def_stmt_info
+	  || STMT_VINFO_DEF_TYPE (def_stmt_info) != vect_internal_def)
+	continue;
+      gassign *rhs_assign = dyn_cast<gassign *> (def_stmt_info->stmt);
+      if (!rhs_assign || gimple_assign_rhs_code (rhs_assign) != MULT_EXPR)
+	continue;
+
+      return true;
+    }
+  return false;
+}
+
+/* Return true if the vectorized form of STMT_INFO is something that is only
+   possible when using SVE instead of Advanced SIMD.  VECTYPE is the type of
+   the vector that STMT_INFO is operating on.  */
+static bool
+aarch64_sve_only_stmt_p (stmt_vec_info stmt_info, tree vectype)
+{
+  if (!aarch64_sve_mode_p (TYPE_MODE (vectype)))
+    return false;
+
+  if (STMT_VINFO_DATA_REF (stmt_info))
+    {
+      /* Check for true gathers and scatters (rather than just strided accesses
+	 that we've chosen to implement using gathers and scatters).  Although
+	 in principle we could use elementwise accesses for Advanced SIMD,
+	 the vectorizer doesn't yet support that.  */
+      if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+	return true;
+
+      /* Check for masked loads and stores.  */
+      if (auto *call = dyn_cast<gcall *> (stmt_info->stmt))
+	if (gimple_call_internal_p (call)
+	    && internal_fn_mask_index (gimple_call_internal_fn (call)) >= 0)
+	  return true;
+    }
+
+  /* Check for 64-bit integer multiplications.  */
+  auto *assign = dyn_cast<gassign *> (stmt_info->stmt);
+  if (assign
+      && gimple_assign_rhs_code (assign) == MULT_EXPR
+      && GET_MODE_INNER (TYPE_MODE (vectype)) == DImode
+      && !integer_pow2p (gimple_assign_rhs2 (assign)))
+    return true;
+
+  return false;
+}
+
+/* We are considering implementing STMT_INFO using SVE vector type VECTYPE.
+   If STMT_INFO is an in-loop reduction that SVE supports directly, return
+   its latency in cycles, otherwise return zero.  SVE_COSTS specifies the
+   latencies of the relevant instructions.  */
+static unsigned int
+aarch64_sve_in_loop_reduction_latency (vec_info *vinfo,
+				       stmt_vec_info stmt_info,
+				       tree vectype,
+				       const sve_vec_cost *sve_costs)
+{
+  switch (aarch64_reduc_type (vinfo, stmt_info))
+    {
+    case EXTRACT_LAST_REDUCTION:
+      return sve_costs->clast_cost;
+
+    case FOLD_LEFT_REDUCTION:
+      switch (GET_MODE_INNER (TYPE_MODE (vectype)))
+	{
+	case E_HFmode:
+	case E_BFmode:
+	  return sve_costs->fadda_f16_cost;
+
+	case E_SFmode:
+	  return sve_costs->fadda_f32_cost;
+
+	case E_DFmode:
+	  return sve_costs->fadda_f64_cost;
+
+	default:
+	  break;
+	}
+      break;
+    }
+
+  return 0;
+}
+
+/* STMT_INFO describes a loop-carried operation in the original scalar code
+   that we are considering implementing as a reduction.  Return one of the
+   following values, depending on VEC_FLAGS:
+
+   - If VEC_FLAGS is zero, return the loop carry latency of the original
+     scalar operation.
+
+   - If VEC_FLAGS & VEC_ADVSIMD, return the loop carry latency of the
+     the Advanced SIMD implementation.
+
+   - If VEC_FLAGS & VEC_ANY_SVE, return the loop carry latency of the
+     SVE implementation.
+
+   VECTYPE is the type of vector that the vectorizer is considering using
+   for STMT_INFO, which might be different from the type of vector described
+   by VEC_FLAGS.  */
+static unsigned int
+aarch64_in_loop_reduction_latency (vec_info *vinfo, stmt_vec_info stmt_info,
+				   tree vectype, unsigned int vec_flags)
+{
+  const cpu_vector_cost *vec_costs = aarch64_tune_params.vec_costs;
+  const sve_vec_cost *sve_costs = nullptr;
+  if (vec_flags & VEC_ANY_SVE)
+    sve_costs = aarch64_tune_params.vec_costs->sve;
+
+  /* If the caller is asking for the SVE latency, check for forms of reduction
+     that only SVE can handle directly.  */
+  if (sve_costs)
+    {
+      unsigned int latency
+	= aarch64_sve_in_loop_reduction_latency (vinfo, stmt_info, vectype,
+						 sve_costs);
+      if (latency)
+	return latency;
+    }
+
+  /* Handle scalar costs.  */
+  if (vec_flags == 0)
+    {
+      if (FLOAT_TYPE_P (vectype))
+	return vec_costs->scalar_fp_stmt_cost;
+      return vec_costs->scalar_int_stmt_cost;
+    }
+
+  /* Otherwise, the loop body just contains normal integer or FP operations,
+     with a vector reduction outside the loop.  */
+  const simd_vec_cost *simd_costs
+    = aarch64_simd_vec_costs_for_flags (vec_flags);
+  if (FLOAT_TYPE_P (vectype))
+    return simd_costs->fp_stmt_cost;
+  return simd_costs->int_stmt_cost;
+}
+
+/* STMT_COST is the cost calculated by aarch64_builtin_vectorization_cost
+   for STMT_INFO, which has cost kind KIND.  If this is a scalar operation,
+   try to subdivide the target-independent categorization provided by KIND
+   to get a more accurate cost.  */
+static unsigned int
+aarch64_detect_scalar_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind,
+				    stmt_vec_info stmt_info,
+				    unsigned int stmt_cost)
+{
+  /* Detect an extension of a loaded value.  In general, we'll be able to fuse
+     the extension with the load.  */
+  if (kind == scalar_stmt && aarch64_extending_load_p (vinfo, stmt_info))
+    return 0;
+
+  return stmt_cost;
+}
+
+/* STMT_COST is the cost calculated by aarch64_builtin_vectorization_cost
+   for the vectorized form of STMT_INFO, which has cost kind KIND and which
+   when vectorized would operate on vector type VECTYPE.  Try to subdivide
+   the target-independent categorization provided by KIND to get a more
+   accurate cost.  WHERE specifies where the cost associated with KIND
+   occurs.  */
+static unsigned int
+aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind,
+				    stmt_vec_info stmt_info, tree vectype,
+				    enum vect_cost_model_location where,
+				    unsigned int stmt_cost)
+{
+  const simd_vec_cost *simd_costs = aarch64_simd_vec_costs (vectype);
+  const sve_vec_cost *sve_costs = nullptr;
+  if (aarch64_sve_mode_p (TYPE_MODE (vectype)))
+    sve_costs = aarch64_tune_params.vec_costs->sve;
+
+  /* It's generally better to avoid costing inductions, since the induction
+     will usually be hidden by other operations.  This is particularly true
+     for things like COND_REDUCTIONS.  */
+  if (is_a<gphi *> (stmt_info->stmt))
+    return 0;
+
+  /* Detect cases in which vec_to_scalar is describing the extraction of a
+     vector element in preparation for a scalar store.  The store itself is
+     costed separately.  */
+  if (kind == vec_to_scalar
+      && STMT_VINFO_DATA_REF (stmt_info)
+      && DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
+    return simd_costs->store_elt_extra_cost;
+
+  /* Detect cases in which a scalar_store is really storing one element
+     in a scatter operation.  */
+  if (kind == scalar_store
+      && sve_costs
+      && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER)
+    return sve_costs->scatter_store_elt_cost;
+
+  /* Detect cases in which vec_to_scalar represents an in-loop reduction.  */
+  if (kind == vec_to_scalar
+      && where == vect_body
+      && sve_costs)
+    {
+      unsigned int latency
+	= aarch64_sve_in_loop_reduction_latency (vinfo, stmt_info, vectype,
+						 sve_costs);
+      if (latency)
+	return latency;
+    }
+
+  /* Detect cases in which vec_to_scalar represents a single reduction
+     instruction like FADDP or MAXV.  */
+  if (kind == vec_to_scalar
+      && where == vect_epilogue
+      && aarch64_is_reduction (stmt_info))
+    switch (GET_MODE_INNER (TYPE_MODE (vectype)))
+      {
+      case E_QImode:
+	return simd_costs->reduc_i8_cost;
+
+      case E_HImode:
+	return simd_costs->reduc_i16_cost;
+
+      case E_SImode:
+	return simd_costs->reduc_i32_cost;
+
+      case E_DImode:
+	return simd_costs->reduc_i64_cost;
+
+      case E_HFmode:
+      case E_BFmode:
+	return simd_costs->reduc_f16_cost;
+
+      case E_SFmode:
+	return simd_costs->reduc_f32_cost;
+
+      case E_DFmode:
+	return simd_costs->reduc_f64_cost;
+
+      default:
+	break;
+      }
+
+  /* Otherwise stick with the original categorization.  */
+  return stmt_cost;
+}
+
+/* STMT_COST is the cost calculated by aarch64_builtin_vectorization_cost
+   for STMT_INFO, which has cost kind KIND and which when vectorized would
+   operate on vector type VECTYPE.  Adjust the cost as necessary for SVE
+   targets.  */
+static unsigned int
+aarch64_sve_adjust_stmt_cost (class vec_info *vinfo, vect_cost_for_stmt kind,
+			      stmt_vec_info stmt_info, tree vectype,
+			      unsigned int stmt_cost)
+{
+  /* Unlike vec_promote_demote, vector_stmt conversions do not change the
+     vector register size or number of units.  Integer promotions of this
+     type therefore map to SXT[BHW] or UXT[BHW].
+
+     Most loads have extending forms that can do the sign or zero extension
+     on the fly.  Optimistically assume that a load followed by an extension
+     will fold to this form during combine, and that the extension therefore
+     comes for free.  */
+  if (kind == vector_stmt && aarch64_extending_load_p (vinfo, stmt_info))
+    stmt_cost = 0;
+
+  /* For similar reasons, vector_stmt integer truncations are a no-op,
+     because we can just ignore the unused upper bits of the source.  */
+  if (kind == vector_stmt && aarch64_integer_truncation_p (stmt_info))
+    stmt_cost = 0;
+
+  /* Advanced SIMD can load and store pairs of registers using LDP and STP,
+     but there are no equivalent instructions for SVE.  This means that
+     (all other things being equal) 128-bit SVE needs twice as many load
+     and store instructions as Advanced SIMD in order to process vector pairs.
+
+     Also, scalar code can often use LDP and STP to access pairs of values,
+     so it is too simplistic to say that one SVE load or store replaces
+     VF scalar loads and stores.
+
+     Ideally we would account for this in the scalar and Advanced SIMD
+     costs by making suitable load/store pairs as cheap as a single
+     load/store.  However, that would be a very invasive change and in
+     practice it tends to stress other parts of the cost model too much.
+     E.g. stores of scalar constants currently count just a store,
+     whereas stores of vector constants count a store and a vec_init.
+     This is an artificial distinction for AArch64, where stores of
+     nonzero scalar constants need the same kind of register invariant
+     as vector stores.
+
+     An alternative would be to double the cost of any SVE loads and stores
+     that could be paired in Advanced SIMD (and possibly also paired in
+     scalar code).  But this tends to stress other parts of the cost model
+     in the same way.  It also means that we can fall back to Advanced SIMD
+     even if full-loop predication would have been useful.
+
+     Here we go for a more conservative version: double the costs of SVE
+     loads and stores if one iteration of the scalar loop processes enough
+     elements for it to use a whole number of Advanced SIMD LDP or STP
+     instructions.  This makes it very likely that the VF would be 1 for
+     Advanced SIMD, and so no epilogue should be needed.  */
+  if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
+    {
+      stmt_vec_info first = DR_GROUP_FIRST_ELEMENT (stmt_info);
+      unsigned int count = DR_GROUP_SIZE (first) - DR_GROUP_GAP (first);
+      unsigned int elt_bits = GET_MODE_UNIT_BITSIZE (TYPE_MODE (vectype));
+      if (multiple_p (count * elt_bits, 256)
+	  && aarch64_advsimd_ldp_stp_p (kind, stmt_info))
+	stmt_cost *= 2;
+    }
+
+  return stmt_cost;
+}
+
+/* STMT_COST is the cost calculated for STMT_INFO, which has cost kind KIND
+   and which when vectorized would operate on vector type VECTYPE.  Add the
+   cost of any embedded operations.  */
+static unsigned int
+aarch64_adjust_stmt_cost (vect_cost_for_stmt kind, stmt_vec_info stmt_info,
+			  tree vectype, unsigned int stmt_cost)
+{
+  if (vectype)
+    {
+      const simd_vec_cost *simd_costs = aarch64_simd_vec_costs (vectype);
+
+      /* Detect cases in which a vector load or store represents an
+	 LD[234] or ST[234] instruction.  */
+      switch (aarch64_ld234_st234_vectors (kind, stmt_info))
+	{
+	case 2:
+	  stmt_cost += simd_costs->ld2_st2_permute_cost;
+	  break;
+
+	case 3:
+	  stmt_cost += simd_costs->ld3_st3_permute_cost;
+	  break;
+
+	case 4:
+	  stmt_cost += simd_costs->ld4_st4_permute_cost;
+	  break;
+	}
+
+      if (kind == vector_stmt || kind == vec_to_scalar)
+	if (tree cmp_type = aarch64_embedded_comparison_type (stmt_info))
+	  {
+	    if (FLOAT_TYPE_P (cmp_type))
+	      stmt_cost += simd_costs->fp_stmt_cost;
+	    else
+	      stmt_cost += simd_costs->int_stmt_cost;
+	  }
+    }
+
+  if (kind == scalar_stmt)
+    if (tree cmp_type = aarch64_embedded_comparison_type (stmt_info))
+      {
+	if (FLOAT_TYPE_P (cmp_type))
+	  stmt_cost += aarch64_tune_params.vec_costs->scalar_fp_stmt_cost;
+	else
+	  stmt_cost += aarch64_tune_params.vec_costs->scalar_int_stmt_cost;
+      }
+
+  return stmt_cost;
+}
+
+/* VINFO, COSTS, COUNT, KIND, STMT_INFO and VECTYPE are the same as for
+   TARGET_VECTORIZE_ADD_STMT_COST and they describe an operation in the
+   body of a vector loop.  Record issue information relating to the vector
+   operation in OPS, where OPS is one of COSTS->scalar_ops, COSTS->advsimd_ops
+   or COSTS->sve_ops; see the comments above those variables for details.
+   In addition:
+
+   - VEC_FLAGS is zero if OPS is COSTS->scalar_ops.
+
+   - VEC_FLAGS & VEC_ADVSIMD is nonzero if OPS is COSTS->advsimd_ops.
+
+   - VEC_FLAGS & VEC_ANY_SVE is nonzero if OPS is COSTS->sve_ops.
+
+   ISSUE_INFO provides the scalar, Advanced SIMD or SVE issue information
+   associated with OPS and VEC_FLAGS.  FACTOR says how many iterations of
+   the loop described by VEC_FLAGS would be needed to match one iteration
+   of the vector loop in VINFO.  */
+static void
+aarch64_count_ops (class vec_info *vinfo, aarch64_vector_costs *costs,
+		   unsigned int count, enum vect_cost_for_stmt kind,
+		   _stmt_vec_info *stmt_info, tree vectype,
+		   unsigned int vec_flags, aarch64_vec_op_count *ops,
+		   const aarch64_base_vec_issue_info *issue_info,
+		   unsigned int factor)
+{
+  if (!issue_info)
+    return;
+
+  const aarch64_simd_vec_issue_info *simd_issue = nullptr;
+  if (vec_flags)
+    simd_issue = static_cast<const aarch64_simd_vec_issue_info *> (issue_info);
+
+  const aarch64_sve_vec_issue_info *sve_issue = nullptr;
+  if (vec_flags & VEC_ANY_SVE)
+    sve_issue = static_cast<const aarch64_sve_vec_issue_info *> (issue_info);
+
+  /* Calculate the minimum cycles per iteration imposed by a reduction
+     operation.  */
+  if ((kind == vector_stmt || kind == vec_to_scalar)
+      && aarch64_is_reduction (stmt_info))
+    {
+      unsigned int base
+	= aarch64_in_loop_reduction_latency (vinfo, stmt_info, vectype,
+					     vec_flags);
+      if (aarch64_reduc_type (vinfo, stmt_info) == FOLD_LEFT_REDUCTION)
+	{
+	  if (aarch64_sve_mode_p (TYPE_MODE (vectype)))
+	    {
+	      /* When costing an SVE FADDA, the vectorizer treats vec_to_scalar
+		 as a single operation, whereas for Advanced SIMD it is a
+		 per-element one.  Increase the factor accordingly, both for
+		 the reduction_latency calculation and for the op couting.  */
+	      if (vec_flags & VEC_ADVSIMD)
+		factor = vect_nunits_for_cost (vectype);
+	    }
+	  else
+	    /* An Advanced SIMD fold-left reduction is the same as a
+	       scalar one and the vectorizer therefore treats vec_to_scalar
+	       as a per-element cost.  There is no extra factor to apply for
+	       scalar code, either for reduction_latency or for the op
+	       counting below.  */
+	    factor = 1;
+	}
+
+      /* ??? Ideally for vector code we'd do COUNT * FACTOR reductions in
+	 parallel, but unfortunately that's not yet the case.  */
+      ops->reduction_latency = MAX (ops->reduction_latency,
+				    base * count * factor);
+    }
+
+  /* Assume that multiply-adds will become a single operation.  */
+  if (stmt_info && aarch64_multiply_add_p (vinfo, stmt_info))
+    return;
+
+  /* When costing scalar statements in vector code, the count already
+     includes the number of scalar elements in the vector, so we don't
+     need to apply the factor as well.  */
+  if (kind == scalar_load || kind == scalar_store || kind == scalar_stmt)
+    factor = 1;
+
+  /* This can go negative with the load handling below.  */
+  int num_copies = count * factor;
+
+  /* Count the basic operation cost associated with KIND.  */
+  switch (kind)
+    {
+    case cond_branch_taken:
+    case cond_branch_not_taken:
+    case vector_gather_load:
+    case vector_scatter_store:
+      /* We currently don't expect these to be used in a loop body.  */
+      break;
+
+    case vec_perm:
+    case vec_promote_demote:
+    case vec_construct:
+    case vec_to_scalar:
+    case scalar_to_vec:
+      /* Assume that these operations have no overhead in the original
+	 scalar code.  */
+      if (!vec_flags)
+	break;
+      /* Fallthrough.  */
+    case vector_stmt:
+    case scalar_stmt:
+      ops->general_ops += num_copies;
+      break;
+
+    case scalar_load:
+    case vector_load:
+    case unaligned_load:
+      /* When costing scalars, detect cases in which we are called twice for
+	 the same load.  This happens for LD[234] operations if only some of
+	 the results are used.  The first time represents the cost of loading
+	 the unused vectors, while the second time represents the cost of
+	 loading the useful parts.  Only the latter should count towards the
+	 scalar costs.  */
+      if (stmt_info && !vec_flags)
+	{
+	  bool existed = false;
+	  unsigned int &prev_count
+	    = costs->seen_loads.get_or_insert (stmt_info, &existed);
+	  if (existed)
+	    num_copies -= prev_count;
+	  else
+	    prev_count = num_copies;
+	}
+      ops->loads += num_copies;
+      if (vec_flags || FLOAT_TYPE_P (vectype))
+	ops->general_ops += issue_info->fp_simd_load_general_ops * num_copies;
+      break;
+
+    case vector_store:
+    case unaligned_store:
+    case scalar_store:
+      ops->stores += num_copies;
+      if (vec_flags || FLOAT_TYPE_P (vectype))
+	ops->general_ops += issue_info->fp_simd_store_general_ops * num_copies;
+      break;
+    }
+
+  /* Add any embedded comparison operations.  */
+  if ((kind == scalar_stmt || kind == vector_stmt || kind == vec_to_scalar)
+      && aarch64_embedded_comparison_type (stmt_info))
+    ops->general_ops += num_copies;
+
+  /* Detect COND_REDUCTIONs and things that would need to become
+     COND_REDUCTIONs if they were implemented using Advanced SIMD.
+     There are then two sets of VEC_COND_EXPRs, whereas so far we
+     have only accounted for one.  */
+  if (vec_flags && (kind == vector_stmt || kind == vec_to_scalar))
+    {
+      int reduc_type = aarch64_reduc_type (vinfo, stmt_info);
+      if ((reduc_type == EXTRACT_LAST_REDUCTION && (vec_flags & VEC_ADVSIMD))
+	  || reduc_type == COND_REDUCTION)
+	ops->general_ops += num_copies;
+    }
+
+  /* Count the predicate operations needed by an SVE comparison.  */
+  if (sve_issue && (kind == vector_stmt || kind == vec_to_scalar))
+    if (tree type = aarch64_comparison_type (stmt_info))
+      {
+	unsigned int base = (FLOAT_TYPE_P (type)
+			     ? sve_issue->fp_cmp_pred_ops
+			     : sve_issue->int_cmp_pred_ops);
+	costs->sve_ops.pred_ops += base * num_copies;
+      }
+
+  /* Add any extra overhead associated with LD[234] and ST[234] operations.  */
+  if (simd_issue)
+    switch (aarch64_ld234_st234_vectors (kind, stmt_info))
+      {
+      case 2:
+	ops->general_ops += simd_issue->ld2_st2_general_ops * num_copies;
+	break;
 
-      case vec_promote_demote:
-	return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
+      case 3:
+	ops->general_ops += simd_issue->ld3_st3_general_ops * num_copies;
+	break;
 
-      case vec_construct:
-	elements = estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
-	return elements / 2 + 1;
+      case 4:
+	ops->general_ops += simd_issue->ld4_st4_general_ops * num_copies;
+	break;
+      }
 
-      default:
-	gcc_unreachable ();
+  /* Add any overhead associated with gather loads and scatter stores.  */
+  if (sve_issue
+      && (kind == scalar_load || kind == scalar_store)
+      && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER)
+    {
+      unsigned int pairs = CEIL (count, 2);
+      costs->sve_ops.pred_ops
+	+= sve_issue->gather_scatter_pair_pred_ops * pairs;
+      ops->general_ops += sve_issue->gather_scatter_pair_general_ops * pairs;
     }
 }
 
 /* Implement targetm.vectorize.add_stmt_cost.  */
 static unsigned
-aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
-		       struct _stmt_vec_info *stmt_info, int misalign,
-		       enum vect_cost_model_location where)
+aarch64_add_stmt_cost (class vec_info *vinfo, void *data, int count,
+		       enum vect_cost_for_stmt kind,
+		       struct _stmt_vec_info *stmt_info, tree vectype,
+		       int misalign, enum vect_cost_model_location where)
 {
-  unsigned *cost = (unsigned *) data;
+  auto *costs = static_cast<aarch64_vector_costs *> (data);
   unsigned retval = 0;
 
   if (flag_vect_cost_model)
     {
-      tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
-      int stmt_cost =
-	    aarch64_builtin_vectorization_cost (kind, vectype, misalign);
+      int stmt_cost
+	= aarch64_builtin_vectorization_cost (kind, vectype, misalign);
+
+      /* Do one-time initialization based on the vinfo.  */
+      loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
+      bb_vec_info bb_vinfo = dyn_cast<bb_vec_info> (vinfo);
+      if (!costs->analyzed_vinfo && aarch64_use_new_vector_costs_p ())
+	{
+	  if (loop_vinfo)
+	    aarch64_analyze_loop_vinfo (loop_vinfo, costs);
+	  else
+	    aarch64_analyze_bb_vinfo (bb_vinfo, costs);
+	  costs->analyzed_vinfo = true;
+	}
+
+      /* Try to get a more accurate cost by looking at STMT_INFO instead
+	 of just looking at KIND.  */
+      if (stmt_info && aarch64_use_new_vector_costs_p ())
+	{
+	  if (vectype && aarch64_sve_only_stmt_p (stmt_info, vectype))
+	    costs->saw_sve_only_op = true;
+
+	  stmt_cost = aarch64_detect_scalar_stmt_subtype
+	    (vinfo, kind, stmt_info, stmt_cost);
+
+	  if (vectype && costs->vec_flags)
+	    stmt_cost = aarch64_detect_vector_stmt_subtype (vinfo, kind,
+							    stmt_info, vectype,
+							    where, stmt_cost);
+	}
+
+      /* Do any SVE-specific adjustments to the cost.  */
+      if (stmt_info && vectype && aarch64_sve_mode_p (TYPE_MODE (vectype)))
+	stmt_cost = aarch64_sve_adjust_stmt_cost (vinfo, kind, stmt_info,
+						  vectype, stmt_cost);
+
+      if (stmt_info && aarch64_use_new_vector_costs_p ())
+	{
+	  /* Account for any extra "embedded" costs that apply additively
+	     to the base cost calculated above.  */
+	  stmt_cost = aarch64_adjust_stmt_cost (kind, stmt_info, vectype,
+						stmt_cost);
+
+	  /* If we're recording a nonzero vector loop body cost, also estimate
+	     the operations that would need to be issued by all relevant
+	     implementations of the loop.  */
+	  auto *issue_info = aarch64_tune_params.vec_costs->issue_info;
+	  if (loop_vinfo
+	      && issue_info
+	      && costs->vec_flags
+	      && where == vect_body
+	      && vectype
+	      && stmt_cost != 0)
+	    {
+	      /* Record estimates for the scalar code.  */
+	      aarch64_count_ops (vinfo, costs, count, kind, stmt_info, vectype,
+				 0, &costs->scalar_ops, issue_info->scalar,
+				 vect_nunits_for_cost (vectype));
+
+	      if (aarch64_sve_mode_p (vinfo->vector_mode) && issue_info->sve)
+		{
+		  /* Record estimates for a possible Advanced SIMD version
+		     of the SVE code.  */
+		  aarch64_count_ops (vinfo, costs, count, kind, stmt_info,
+				     vectype, VEC_ADVSIMD, &costs->advsimd_ops,
+				     issue_info->advsimd,
+				     aarch64_estimated_sve_vq ());
+
+		  /* Record estimates for the SVE code itself.  */
+		  aarch64_count_ops (vinfo, costs, count, kind, stmt_info,
+				     vectype, VEC_ANY_SVE, &costs->sve_ops,
+				     issue_info->sve, 1);
+		}
+	      else
+		/* Record estimates for the Advanced SIMD code.  Treat SVE like
+		   Advanced SIMD if the CPU has no specific SVE costs.  */
+		aarch64_count_ops (vinfo, costs, count, kind, stmt_info,
+				   vectype, VEC_ADVSIMD, &costs->advsimd_ops,
+				   issue_info->advsimd, 1);
+	    }
+
+	  /* If we're applying the SVE vs. Advanced SIMD unrolling heuristic,
+	     estimate the number of statements in the unrolled Advanced SIMD
+	     loop.  For simplicitly, we assume that one iteration of the
+	     Advanced SIMD loop would need the same number of statements
+	     as one iteration of the SVE loop.  */
+	  if (where == vect_body && costs->unrolled_advsimd_niters)
+	    costs->unrolled_advsimd_stmts
+	      += count * costs->unrolled_advsimd_niters;
+	}
 
       /* Statements in an inner loop relative to the loop being
 	 vectorized are weighted more heavily.  The value here is
 	 arbitrary and could potentially be improved with analysis.  */
-      if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
+      if (where == vect_body && stmt_info
+	  && stmt_in_inner_loop_p (vinfo, stmt_info))
 	count *= 50; /*  FIXME  */
 
       retval = (unsigned) (count * stmt_cost);
-      cost[where] += retval;
+      costs->region[where] += retval;
     }
 
   return retval;
 }
 
+/* Dump information about the structure.  */
+void
+aarch64_vec_op_count::dump () const
+{
+  dump_printf_loc (MSG_NOTE, vect_location,
+		   "  load operations = %d\n", loads);
+  dump_printf_loc (MSG_NOTE, vect_location,
+		   "  store operations = %d\n", stores);
+  dump_printf_loc (MSG_NOTE, vect_location,
+		   "  general operations = %d\n", general_ops);
+  dump_printf_loc (MSG_NOTE, vect_location,
+		   "  reduction latency = %d\n", reduction_latency);
+}
+
+/* Dump information about the structure.  */
+void
+aarch64_sve_op_count::dump () const
+{
+  aarch64_vec_op_count::dump ();
+  dump_printf_loc (MSG_NOTE, vect_location,
+		   "  predicate operations = %d\n", pred_ops);
+}
+
+/* Use ISSUE_INFO to estimate the minimum number of cycles needed to issue
+   the operations described by OPS.  This is a very simplistic model!  */
+static unsigned int
+aarch64_estimate_min_cycles_per_iter
+  (const aarch64_vec_op_count *ops,
+   const aarch64_base_vec_issue_info *issue_info)
+{
+  unsigned int cycles = MAX (ops->reduction_latency, 1);
+  cycles = MAX (cycles, CEIL (ops->stores, issue_info->stores_per_cycle));
+  cycles = MAX (cycles, CEIL (ops->loads + ops->stores,
+			      issue_info->loads_stores_per_cycle));
+  cycles = MAX (cycles, CEIL (ops->general_ops,
+			      issue_info->general_ops_per_cycle));
+  return cycles;
+}
+
+/* BODY_COST is the cost of a vector loop body recorded in COSTS.
+   Adjust the cost as necessary and return the new cost.  */
+static unsigned int
+aarch64_adjust_body_cost (aarch64_vector_costs *costs, unsigned int body_cost)
+{
+  unsigned int orig_body_cost = body_cost;
+  bool should_disparage = false;
+
+  if (dump_enabled_p ())
+    dump_printf_loc (MSG_NOTE, vect_location,
+		     "Original vector body cost = %d\n", body_cost);
+
+  if (costs->unrolled_advsimd_stmts)
+    {
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_NOTE, vect_location, "Number of insns in"
+			 " unrolled Advanced SIMD loop = %d\n",
+			 costs->unrolled_advsimd_stmts);
+
+      /* Apply the Advanced SIMD vs. SVE unrolling heuristic described above
+	 aarch64_vector_costs::unrolled_advsimd_niters.
+
+	 The balance here is tricky.  On the one hand, we can't be sure whether
+	 the code is vectorizable with Advanced SIMD or not.  However, even if
+	 it isn't vectorizable with Advanced SIMD, there's a possibility that
+	 the scalar code could also be unrolled.  Some of the code might then
+	 benefit from SLP, or from using LDP and STP.  We therefore apply
+	 the heuristic regardless of can_use_advsimd_p.  */
+      if (costs->unrolled_advsimd_stmts
+	  && (costs->unrolled_advsimd_stmts
+	      <= (unsigned int) param_max_completely_peeled_insns))
+	{
+	  unsigned int estimated_vq = aarch64_estimated_sve_vq ();
+	  unsigned int min_cost = (orig_body_cost * estimated_vq) + 1;
+	  if (body_cost < min_cost)
+	    {
+	      if (dump_enabled_p ())
+		dump_printf_loc (MSG_NOTE, vect_location,
+				 "Increasing body cost to %d to account for"
+				 " unrolling\n", min_cost);
+	      body_cost = min_cost;
+	      should_disparage = true;
+	    }
+	}
+    }
+
+  auto *issue_info = aarch64_tune_params.vec_costs->issue_info;
+  if (!issue_info)
+    return body_cost;
+
+  unsigned int scalar_cycles_per_iter
+    = aarch64_estimate_min_cycles_per_iter (&costs->scalar_ops,
+					    issue_info->scalar);
+  unsigned int advsimd_cycles_per_iter
+    = aarch64_estimate_min_cycles_per_iter (&costs->advsimd_ops,
+					    issue_info->advsimd);
+  bool could_use_advsimd
+    = ((costs->vec_flags & VEC_ADVSIMD)
+       || (aarch64_autovec_preference != 2
+	   && (aarch64_tune_params.extra_tuning_flags
+	       & AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT)
+	   && !costs->saw_sve_only_op));
+
+  if (dump_enabled_p ())
+    {
+      if (IN_RANGE (costs->num_vector_iterations, 0, 65536))
+	dump_printf_loc (MSG_NOTE, vect_location,
+			 "Vector loop iterates at most %wd times\n",
+			 costs->num_vector_iterations);
+      dump_printf_loc (MSG_NOTE, vect_location, "Scalar issue estimate:\n");
+      costs->scalar_ops.dump ();
+      dump_printf_loc (MSG_NOTE, vect_location,
+		       "  estimated cycles per iteration = %d\n",
+		       scalar_cycles_per_iter);
+      if (could_use_advsimd)
+	{
+	  dump_printf_loc (MSG_NOTE, vect_location,
+			   "Advanced SIMD issue estimate:\n");
+	  costs->advsimd_ops.dump ();
+	  dump_printf_loc (MSG_NOTE, vect_location,
+			   "  estimated cycles per iteration = %d\n",
+			   advsimd_cycles_per_iter);
+	}
+      else
+	dump_printf_loc (MSG_NOTE, vect_location,
+			 "Loop could not use Advanced SIMD\n");
+    }
+
+  uint64_t vector_cycles_per_iter = advsimd_cycles_per_iter;
+  unsigned int vector_reduction_latency = costs->advsimd_ops.reduction_latency;
+  if ((costs->vec_flags & VEC_ANY_SVE) && issue_info->sve)
+    {
+      /* Estimate the minimum number of cycles per iteration needed to issue
+	 non-predicate operations.  */
+      unsigned int sve_cycles_per_iter
+	= aarch64_estimate_min_cycles_per_iter (&costs->sve_ops,
+						issue_info->sve);
+
+      /* Separately estimate the minimum number of cycles per iteration needed
+	 to issue the predicate operations.  */
+      unsigned int pred_cycles_per_iter
+	= CEIL (costs->sve_ops.pred_ops, issue_info->sve->pred_ops_per_cycle);
+
+      if (dump_enabled_p ())
+	{
+	  dump_printf_loc (MSG_NOTE, vect_location, "SVE issue estimate:\n");
+	  costs->sve_ops.dump ();
+	  dump_printf_loc (MSG_NOTE, vect_location,
+			   "  estimated cycles per iteration for non-predicate"
+			   " operations = %d\n", sve_cycles_per_iter);
+	  if (costs->sve_ops.pred_ops)
+	    dump_printf_loc (MSG_NOTE, vect_location, "  estimated cycles per"
+			     " iteration for predicate operations = %d\n",
+			     pred_cycles_per_iter);
+	}
+
+      vector_cycles_per_iter = MAX (sve_cycles_per_iter, pred_cycles_per_iter);
+      vector_reduction_latency = costs->sve_ops.reduction_latency;
+
+      /* If the scalar version of the loop could issue at least as
+	 quickly as the predicate parts of the SVE loop, make the SVE loop
+	 prohibitively expensive.  In this case vectorization is adding an
+	 overhead that the original scalar code didn't have.
+
+	 This is mostly intended to detect cases in which WHILELOs dominate
+	 for very tight loops, which is something that normal latency-based
+	 costs would not model.  Adding this kind of cliffedge would be
+	 too drastic for scalar_cycles_per_iter vs. sve_cycles_per_iter;
+	 code later in the function handles that case in a more
+	 conservative way.  */
+      uint64_t sve_estimate = pred_cycles_per_iter + 1;
+      if (scalar_cycles_per_iter < sve_estimate)
+	{
+	  unsigned int min_cost
+	    = orig_body_cost * estimated_poly_value (BYTES_PER_SVE_VECTOR);
+	  if (body_cost < min_cost)
+	    {
+	      if (dump_enabled_p ())
+		dump_printf_loc (MSG_NOTE, vect_location,
+				 "Increasing body cost to %d because the"
+				 " scalar code could issue within the limit"
+				 " imposed by predicate operations\n",
+				 min_cost);
+	      body_cost = min_cost;
+	      should_disparage = true;
+	    }
+	}
+
+      /* If it appears that the Advanced SIMD version of a loop could issue
+	 more quickly than the SVE one, increase the SVE cost in proportion
+	 to the difference.  The intention is to make Advanced SIMD preferable
+	 in cases where an Advanced SIMD version exists, without increasing
+	 the costs so much that SVE won't be used at all.
+
+	 The reasoning is similar to the scalar vs. predicate comparison above:
+	 if the issue rate of the SVE code is limited by predicate operations
+	 (i.e. if pred_cycles_per_iter > sve_cycles_per_iter), and if the
+	 Advanced SIMD code could issue within the limit imposed by the
+	 predicate operations, the predicate operations are adding an
+	 overhead that the original code didn't have and so we should prefer
+	 the Advanced SIMD version.  However, if the predicate operations
+	 do not dominate in this way, we should only increase the cost of
+	 the SVE code if sve_cycles_per_iter is strictly greater than
+	 advsimd_cycles_per_iter.  Given rounding effects, this should mean
+	 that Advanced SIMD is either better or at least no worse.  */
+      if (sve_cycles_per_iter >= pred_cycles_per_iter)
+	sve_estimate = sve_cycles_per_iter;
+      if (could_use_advsimd && advsimd_cycles_per_iter < sve_estimate)
+	{
+	  /* This ensures that min_cost > orig_body_cost * 2.  */
+	  unsigned int min_cost
+	    = orig_body_cost * CEIL (sve_estimate, advsimd_cycles_per_iter) + 1;
+	  if (body_cost < min_cost)
+	    {
+	      if (dump_enabled_p ())
+		dump_printf_loc (MSG_NOTE, vect_location,
+				 "Increasing body cost to %d because Advanced"
+				 " SIMD code could issue as quickly\n",
+				 min_cost);
+	      body_cost = min_cost;
+	      should_disparage = true;
+	    }
+	}
+    }
+
+  /* Decide whether to stick to latency-based costs or whether to try to
+     take issue rates into account.  */
+  unsigned int threshold = aarch64_loop_vect_issue_rate_niters;
+  if (costs->vec_flags & VEC_ANY_SVE)
+    threshold = CEIL (threshold, aarch64_estimated_sve_vq ());
+
+  if (costs->num_vector_iterations >= 1
+      && costs->num_vector_iterations < threshold)
+    {
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_NOTE, vect_location,
+			 "Low iteration count, so using pure latency"
+			 " costs\n");
+    }
+  /* Increase the cost of the vector code if it looks like the scalar code
+     could issue more quickly.  These values are only rough estimates,
+     so minor differences should only result in minor changes.  */
+  else if (scalar_cycles_per_iter < vector_cycles_per_iter)
+    {
+      body_cost = CEIL (body_cost * vector_cycles_per_iter,
+			scalar_cycles_per_iter);
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_NOTE, vect_location,
+			 "Increasing body cost to %d because scalar code"
+			 " would issue more quickly\n", body_cost);
+    }
+  /* In general, it's expected that the proposed vector code would be able
+     to issue more quickly than the original scalar code.  This should
+     already be reflected to some extent in the latency-based costs.
+
+     However, the latency-based costs effectively assume that the scalar
+     code and the vector code execute serially, which tends to underplay
+     one important case: if the real (non-serialized) execution time of
+     a scalar iteration is dominated by loop-carried dependencies,
+     and if the vector code is able to reduce both the length of
+     the loop-carried dependencies *and* the number of cycles needed
+     to issue the code in general, we can be more confident that the
+     vector code is an improvement, even if adding the other (non-loop-carried)
+     latencies tends to hide this saving.  We therefore reduce the cost of the
+     vector loop body in proportion to the saving.  */
+  else if (costs->scalar_ops.reduction_latency > vector_reduction_latency
+	   && costs->scalar_ops.reduction_latency == scalar_cycles_per_iter
+	   && scalar_cycles_per_iter > vector_cycles_per_iter
+	   && !should_disparage)
+    {
+      body_cost = CEIL (body_cost * vector_cycles_per_iter,
+			scalar_cycles_per_iter);
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_NOTE, vect_location,
+			 "Decreasing body cost to %d account for smaller"
+			 " reduction latency\n", body_cost);
+    }
+
+  return body_cost;
+}
+
+/* Implement TARGET_VECTORIZE_FINISH_COST.  */
+static void
+aarch64_finish_cost (void *data, unsigned *prologue_cost,
+		     unsigned *body_cost, unsigned *epilogue_cost)
+{
+  auto *costs = static_cast<aarch64_vector_costs *> (data);
+  *prologue_cost = costs->region[vect_prologue];
+  *body_cost     = costs->region[vect_body];
+  *epilogue_cost = costs->region[vect_epilogue];
+
+  if (costs->is_loop
+      && costs->vec_flags
+      && aarch64_use_new_vector_costs_p ())
+    *body_cost = aarch64_adjust_body_cost (costs, *body_cost);
+}
+
+/* Implement TARGET_VECTORIZE_DESTROY_COST_DATA.  */
+static void
+aarch64_destroy_cost_data (void *data)
+{
+  delete static_cast<aarch64_vector_costs *> (data);
+}
+
 static void initialize_aarch64_code_model (struct gcc_options *);
 
 /* Parse the TO_PARSE string and put the architecture struct that it
    selects into RES and the architectural features into ISA_FLAGS.
    Return an aarch64_parse_opt_result describing the parse result.
-   If there is an error parsing, RES and ISA_FLAGS are left unchanged.  */
+   If there is an error parsing, RES and ISA_FLAGS are left unchanged.
+   When the TO_PARSE string contains an invalid extension,
+   a copy of the string is created and stored to INVALID_EXTENSION.  */
 
 static enum aarch64_parse_opt_result
 aarch64_parse_arch (const char *to_parse, const struct processor **res,
-		    unsigned long *isa_flags)
+		    uint64_t *isa_flags, std::string *invalid_extension)
 {
-  char *ext;
+  const char *ext;
   const struct processor *arch;
-  char *str = (char *) alloca (strlen (to_parse) + 1);
   size_t len;
 
-  strcpy (str, to_parse);
-
-  ext = strchr (str, '+');
+  ext = strchr (to_parse, '+');
 
   if (ext != NULL)
-    len = ext - str;
+    len = ext - to_parse;
   else
-    len = strlen (str);
+    len = strlen (to_parse);
 
   if (len == 0)
     return AARCH64_PARSE_MISSING_ARG;
@@ -10202,15 +15780,16 @@ aarch64_parse_arch (const char *to_parse, const struct processor **res,
   /* Loop through the list of supported ARCHes to find a match.  */
   for (arch = all_architectures; arch->name != NULL; arch++)
     {
-      if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
+      if (strlen (arch->name) == len
+	  && strncmp (arch->name, to_parse, len) == 0)
 	{
-	  unsigned long isa_temp = arch->flags;
+	  uint64_t isa_temp = arch->flags;
 
 	  if (ext != NULL)
 	    {
 	      /* TO_PARSE string contains at least one extension.  */
 	      enum aarch64_parse_opt_result ext_res
-		= aarch64_parse_extension (ext, &isa_temp);
+		= aarch64_parse_extension (ext, &isa_temp, invalid_extension);
 
 	      if (ext_res != AARCH64_PARSE_OK)
 		return ext_res;
@@ -10230,25 +15809,24 @@ aarch64_parse_arch (const char *to_parse, const struct processor **res,
 /* Parse the TO_PARSE string and put the result tuning in RES and the
    architecture flags in ISA_FLAGS.  Return an aarch64_parse_opt_result
    describing the parse result.  If there is an error parsing, RES and
-   ISA_FLAGS are left unchanged.  */
+   ISA_FLAGS are left unchanged.
+   When the TO_PARSE string contains an invalid extension,
+   a copy of the string is created and stored to INVALID_EXTENSION.  */
 
 static enum aarch64_parse_opt_result
 aarch64_parse_cpu (const char *to_parse, const struct processor **res,
-		   unsigned long *isa_flags)
+		   uint64_t *isa_flags, std::string *invalid_extension)
 {
-  char *ext;
+  const char *ext;
   const struct processor *cpu;
-  char *str = (char *) alloca (strlen (to_parse) + 1);
   size_t len;
 
-  strcpy (str, to_parse);
-
-  ext = strchr (str, '+');
+  ext = strchr (to_parse, '+');
 
   if (ext != NULL)
-    len = ext - str;
+    len = ext - to_parse;
   else
-    len = strlen (str);
+    len = strlen (to_parse);
 
   if (len == 0)
     return AARCH64_PARSE_MISSING_ARG;
@@ -10257,16 +15835,16 @@ aarch64_parse_cpu (const char *to_parse, const struct processor **res,
   /* Loop through the list of supported CPUs to find a match.  */
   for (cpu = all_cores; cpu->name != NULL; cpu++)
     {
-      if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
+      if (strlen (cpu->name) == len && strncmp (cpu->name, to_parse, len) == 0)
 	{
-	  unsigned long isa_temp = cpu->flags;
+	  uint64_t isa_temp = cpu->flags;
 
 
 	  if (ext != NULL)
 	    {
 	      /* TO_PARSE string contains at least one extension.  */
 	      enum aarch64_parse_opt_result ext_res
-		= aarch64_parse_extension (ext, &isa_temp);
+		= aarch64_parse_extension (ext, &isa_temp, invalid_extension);
 
 	      if (ext_res != AARCH64_PARSE_OK)
 		return ext_res;
@@ -10291,14 +15869,11 @@ static enum aarch64_parse_opt_result
 aarch64_parse_tune (const char *to_parse, const struct processor **res)
 {
   const struct processor *cpu;
-  char *str = (char *) alloca (strlen (to_parse) + 1);
-
-  strcpy (str, to_parse);
 
   /* Loop through the list of supported CPUs to find a match.  */
   for (cpu = all_cores; cpu->name != NULL; cpu++)
     {
-      if (strcmp (cpu->name, str) == 0)
+      if (strcmp (cpu->name, to_parse) == 0)
 	{
 	  *res = cpu;
 	  return AARCH64_PARSE_OK;
@@ -10326,7 +15901,7 @@ aarch64_parse_one_option_token (const char *token,
 	return flag->flag;
     }
 
-  error ("unknown flag passed in -moverride=%s (%s)", option_name, token);
+  error ("unknown flag passed in %<-moverride=%s%> (%s)", option_name, token);
   return 0;
 }
 
@@ -10412,6 +15987,37 @@ aarch64_parse_tune_string (const char *tune_string,
 				     "tune=");
 }
 
+/* Parse the sve_width tuning moverride string in TUNE_STRING.
+   Accept the valid SVE vector widths allowed by
+   aarch64_sve_vector_bits_enum and use it to override sve_width
+   in TUNE.  */
+
+static void
+aarch64_parse_sve_width_string (const char *tune_string,
+				struct tune_params *tune)
+{
+  int width = -1;
+
+  int n = sscanf (tune_string, "%d", &width);
+  if (n == EOF)
+    {
+      error ("invalid format for sve_width");
+      return;
+    }
+  switch (width)
+    {
+    case SVE_128:
+    case SVE_256:
+    case SVE_512:
+    case SVE_1024:
+    case SVE_2048:
+      break;
+    default:
+      error ("invalid sve_width value: %d", width);
+    }
+  tune->sve_width = (enum aarch64_sve_vector_bits_enum) width;
+}
+
 /* Parse TOKEN, which has length LENGTH to see if it is a tuning option
    we understand.  If it is, extract the option string and handoff to
    the appropriate function.  */
@@ -10516,19 +16122,38 @@ aarch64_parse_override_string (const char* input_string,
   free (string_root);
 }
 
+/* Adjust CURRENT_TUNE (a generic tuning struct) with settings that
+   are best for a generic target with the currently-enabled architecture
+   extensions.  */
+static void
+aarch64_adjust_generic_arch_tuning (struct tune_params &current_tune)
+{
+  /* Neoverse V1 is the only core that is known to benefit from
+     AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS.  There is therefore no
+     point enabling it for SVE2 and above.  */
+  if (TARGET_SVE2)
+    current_tune.extra_tuning_flags
+      &= ~AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS;
+}
 
 static void
 aarch64_override_options_after_change_1 (struct gcc_options *opts)
 {
+  if (accepted_branch_protection_string)
+    {
+      opts->x_aarch64_branch_protection_string
+	= xstrdup (accepted_branch_protection_string);
+    }
+
   /* PR 70044: We have to be careful about being called multiple times for the
      same function.  This means all changes should be repeatable.  */
 
-  /* If the frame pointer is enabled, set it to a special value that behaves
-     similar to frame pointer omission.  If we don't do this all leaf functions
-     will get a frame pointer even if flag_omit_leaf_frame_pointer is set.
-     If flag_omit_frame_pointer has this special value, we must force the
-     frame pointer if not in a leaf function.  We also need to force it in a
-     leaf function if flag_omit_frame_pointer is not set or if LR is used.  */
+  /* Set aarch64_use_frame_pointer based on -fno-omit-frame-pointer.
+     Disable the frame pointer flag so the mid-end will not use a frame
+     pointer in leaf functions in order to support -fomit-leaf-frame-pointer.
+     Set x_flag_omit_frame_pointer to the special value 2 to differentiate
+     between -fomit-frame-pointer (1) and -fno-omit-frame-pointer (2).  */
+  aarch64_use_frame_pointer = opts->x_flag_omit_frame_pointer != 1;
   if (opts->x_flag_omit_frame_pointer == 0)
     opts->x_flag_omit_frame_pointer = 2;
 
@@ -10536,12 +16161,12 @@ aarch64_override_options_after_change_1 (struct gcc_options *opts)
      alignment to what the target wants.  */
   if (!opts->x_optimize_size)
     {
-      if (opts->x_align_loops <= 0)
-	opts->x_align_loops = aarch64_tune_params.loop_align;
-      if (opts->x_align_jumps <= 0)
-	opts->x_align_jumps = aarch64_tune_params.jump_align;
-      if (opts->x_align_functions <= 0)
-	opts->x_align_functions = aarch64_tune_params.function_align;
+      if (opts->x_flag_align_loops && !opts->x_str_align_loops)
+	opts->x_str_align_loops = aarch64_tune_params.loop_align;
+      if (opts->x_flag_align_jumps && !opts->x_str_align_jumps)
+	opts->x_str_align_jumps = aarch64_tune_params.jump_align;
+      if (opts->x_flag_align_functions && !opts->x_str_align_functions)
+	opts->x_str_align_functions = aarch64_tune_params.function_align;
     }
 
   /* We default to no pc-relative literal loads.  */
@@ -10580,6 +16205,8 @@ aarch64_override_options_internal (struct gcc_options *opts)
      we may later overwrite.  */
   aarch64_tune_params = *(selected_tune->tune);
   aarch64_architecture_version = selected_arch->architecture_version;
+  if (selected_tune->tune == &generic_tunings)
+    aarch64_adjust_generic_arch_tuning (aarch64_tune_params);
 
   if (opts->x_aarch64_override_tune_string)
     aarch64_parse_override_string (opts->x_aarch64_override_tune_string,
@@ -10589,6 +16216,41 @@ aarch64_override_options_internal (struct gcc_options *opts)
   if (opts->x_flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
     opts->x_flag_strict_volatile_bitfields = 1;
 
+  if (aarch64_stack_protector_guard == SSP_GLOBAL
+      && opts->x_aarch64_stack_protector_guard_offset_str)
+    {
+      error ("incompatible options %<-mstack-protector-guard=global%> and "
+	     "%<-mstack-protector-guard-offset=%s%>",
+	     aarch64_stack_protector_guard_offset_str);
+    }
+
+  if (aarch64_stack_protector_guard == SSP_SYSREG
+      && !(opts->x_aarch64_stack_protector_guard_offset_str
+	   && opts->x_aarch64_stack_protector_guard_reg_str))
+    {
+      error ("both %<-mstack-protector-guard-offset%> and "
+	     "%<-mstack-protector-guard-reg%> must be used "
+	     "with %<-mstack-protector-guard=sysreg%>");
+    }
+
+  if (opts->x_aarch64_stack_protector_guard_reg_str)
+    {
+      if (strlen (opts->x_aarch64_stack_protector_guard_reg_str) > 100)
+	  error ("specify a system register with a small string length.");
+    }
+
+  if (opts->x_aarch64_stack_protector_guard_offset_str)
+    {
+      char *end;
+      const char *str = aarch64_stack_protector_guard_offset_str;
+      errno = 0;
+      long offs = strtol (aarch64_stack_protector_guard_offset_str, &end, 0);
+      if (!*str || *end || errno)
+	error ("%qs is not a valid offset in %qs", str,
+	       "-mstack-protector-guard-offset=");
+      aarch64_stack_protector_guard_offset = offs;
+    }
+
   initialize_aarch64_code_model (opts);
   initialize_aarch64_tls_size (opts);
 
@@ -10610,39 +16272,69 @@ aarch64_override_options_internal (struct gcc_options *opts)
 
   /* We don't mind passing in global_options_set here as we don't use
      the *options_set structs anyway.  */
-  maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
-			 queue_depth,
-			 opts->x_param_values,
-			 global_options_set.x_param_values);
+  SET_OPTION_IF_UNSET (opts, &global_options_set,
+		       param_sched_autopref_queue_depth, queue_depth);
+
+  /* If using Advanced SIMD only for autovectorization disable SVE vector costs
+     comparison.  */
+  if (aarch64_autovec_preference == 1)
+    SET_OPTION_IF_UNSET (opts, &global_options_set,
+			 aarch64_sve_compare_costs, 0);
 
   /* Set up parameters to be used in prefetching algorithm.  Do not
      override the defaults unless we are tuning for a core we have
      researched values for.  */
   if (aarch64_tune_params.prefetch->num_slots > 0)
-    maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
-			   aarch64_tune_params.prefetch->num_slots,
-			   opts->x_param_values,
-			   global_options_set.x_param_values);
+    SET_OPTION_IF_UNSET (opts, &global_options_set,
+			 param_simultaneous_prefetches,
+			 aarch64_tune_params.prefetch->num_slots);
   if (aarch64_tune_params.prefetch->l1_cache_size >= 0)
-    maybe_set_param_value (PARAM_L1_CACHE_SIZE,
-			   aarch64_tune_params.prefetch->l1_cache_size,
-			   opts->x_param_values,
-			   global_options_set.x_param_values);
+    SET_OPTION_IF_UNSET (opts, &global_options_set,
+			 param_l1_cache_size,
+			 aarch64_tune_params.prefetch->l1_cache_size);
   if (aarch64_tune_params.prefetch->l1_cache_line_size >= 0)
-    maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
-			   aarch64_tune_params.prefetch->l1_cache_line_size,
-			   opts->x_param_values,
-			   global_options_set.x_param_values);
+    SET_OPTION_IF_UNSET (opts, &global_options_set,
+			 param_l1_cache_line_size,
+			 aarch64_tune_params.prefetch->l1_cache_line_size);
   if (aarch64_tune_params.prefetch->l2_cache_size >= 0)
-    maybe_set_param_value (PARAM_L2_CACHE_SIZE,
-			   aarch64_tune_params.prefetch->l2_cache_size,
-			   opts->x_param_values,
-			   global_options_set.x_param_values);
+    SET_OPTION_IF_UNSET (opts, &global_options_set,
+			 param_l2_cache_size,
+			 aarch64_tune_params.prefetch->l2_cache_size);
+  if (!aarch64_tune_params.prefetch->prefetch_dynamic_strides)
+    SET_OPTION_IF_UNSET (opts, &global_options_set,
+			 param_prefetch_dynamic_strides, 0);
+  if (aarch64_tune_params.prefetch->minimum_stride >= 0)
+    SET_OPTION_IF_UNSET (opts, &global_options_set,
+			 param_prefetch_minimum_stride,
+			 aarch64_tune_params.prefetch->minimum_stride);
 
   /* Use the alternative scheduling-pressure algorithm by default.  */
-  maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
-			 opts->x_param_values,
-			 global_options_set.x_param_values);
+  SET_OPTION_IF_UNSET (opts, &global_options_set,
+		       param_sched_pressure_algorithm,
+		       SCHED_PRESSURE_MODEL);
+
+  /* Validate the guard size.  */
+  int guard_size = param_stack_clash_protection_guard_size;
+
+  if (guard_size != 12 && guard_size != 16)
+    error ("only values 12 (4 KB) and 16 (64 KB) are supported for guard "
+	   "size.  Given value %d (%llu KB) is out of range",
+	   guard_size, (1ULL << guard_size) / 1024ULL);
+
+  /* Enforce that interval is the same size as size so the mid-end does the
+     right thing.  */
+  SET_OPTION_IF_UNSET (opts, &global_options_set,
+		       param_stack_clash_protection_probe_interval,
+		       guard_size);
+
+  /* The maybe_set calls won't update the value if the user has explicitly set
+     one.  Which means we need to validate that probing interval and guard size
+     are equal.  */
+  int probe_interval
+    = param_stack_clash_protection_probe_interval;
+  if (guard_size != probe_interval)
+    error ("stack clash guard size %<%d%> must be equal to probing interval "
+	   "%<%d%>", guard_size, probe_interval);
 
   /* Enable sw prefetching at specified optimization level for
      CPUS that have prefetch.  Lower optimization level threshold by 1
@@ -10653,6 +16345,13 @@ aarch64_override_options_internal (struct gcc_options *opts)
       && opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level)
     opts->x_flag_prefetch_loop_arrays = 1;
 
+  if (opts->x_aarch64_arch_string == NULL)
+    opts->x_aarch64_arch_string = selected_arch->name;
+  if (opts->x_aarch64_cpu_string == NULL)
+    opts->x_aarch64_cpu_string = selected_cpu->name;
+  if (opts->x_aarch64_tune_string == NULL)
+    opts->x_aarch64_tune_string = selected_tune->name;
+
   aarch64_override_options_after_change_1 (opts);
 }
 
@@ -10704,6 +16403,26 @@ aarch64_print_hint_for_arch (const char *str)
   aarch64_print_hint_for_core_or_arch (str, true);
 }
 
+
+/* Print a hint with a suggestion for an extension name
+   that most closely resembles what the user passed in STR.  */
+
+void
+aarch64_print_hint_for_extensions (const std::string &str)
+{
+  auto_vec<const char *> candidates;
+  aarch64_get_all_extension_candidates (&candidates);
+  char *s;
+  const char *hint = candidates_list_and_hint (str.c_str (), s, candidates);
+  if (hint)
+    inform (input_location, "valid arguments are: %s;"
+			     " did you mean %qs?", s, hint);
+  else
+    inform (input_location, "valid arguments are: %s;", s);
+
+  XDELETEVEC (s);
+}
+
 /* Validate a command-line -mcpu option.  Parse the cpu and extensions (if any)
    specified in STR and throw errors if appropriate.  Put the results if
    they are valid in RES and ISA_FLAGS.  Return whether the option is
@@ -10711,10 +16430,11 @@ aarch64_print_hint_for_arch (const char *str)
 
 static bool
 aarch64_validate_mcpu (const char *str, const struct processor **res,
-		       unsigned long *isa_flags)
+		       uint64_t *isa_flags)
 {
+  std::string invalid_extension;
   enum aarch64_parse_opt_result parse_res
-    = aarch64_parse_cpu (str, res, isa_flags);
+    = aarch64_parse_cpu (str, res, isa_flags, &invalid_extension);
 
   if (parse_res == AARCH64_PARSE_OK)
     return true;
@@ -10725,11 +16445,13 @@ aarch64_validate_mcpu (const char *str, const struct processor **res,
 	error ("missing cpu name in %<-mcpu=%s%>", str);
 	break;
       case AARCH64_PARSE_INVALID_ARG:
-	error ("unknown value %qs for -mcpu", str);
+	error ("unknown value %qs for %<-mcpu%>", str);
 	aarch64_print_hint_for_core (str);
 	break;
       case AARCH64_PARSE_INVALID_FEATURE:
-	error ("invalid feature modifier in %<-mcpu=%s%>", str);
+	error ("invalid feature modifier %qs in %<-mcpu=%s%>",
+	       invalid_extension.c_str (), str);
+	aarch64_print_hint_for_extensions (invalid_extension);
 	break;
       default:
 	gcc_unreachable ();
@@ -10738,6 +16460,183 @@ aarch64_validate_mcpu (const char *str, const struct processor **res,
   return false;
 }
 
+/* Straight line speculation indicators.  */
+enum aarch64_sls_hardening_type
+{
+  SLS_NONE = 0,
+  SLS_RETBR = 1,
+  SLS_BLR = 2,
+  SLS_ALL = 3,
+};
+static enum aarch64_sls_hardening_type aarch64_sls_hardening;
+
+/* Return whether we should mitigatate Straight Line Speculation for the RET
+   and BR instructions.  */
+bool
+aarch64_harden_sls_retbr_p (void)
+{
+  return aarch64_sls_hardening & SLS_RETBR;
+}
+
+/* Return whether we should mitigatate Straight Line Speculation for the BLR
+   instruction.  */
+bool
+aarch64_harden_sls_blr_p (void)
+{
+  return aarch64_sls_hardening & SLS_BLR;
+}
+
+/* As of yet we only allow setting these options globally, in the future we may
+   allow setting them per function.  */
+static void
+aarch64_validate_sls_mitigation (const char *const_str)
+{
+  char *token_save = NULL;
+  char *str = NULL;
+
+  if (strcmp (const_str, "none") == 0)
+    {
+      aarch64_sls_hardening = SLS_NONE;
+      return;
+    }
+  if (strcmp (const_str, "all") == 0)
+    {
+      aarch64_sls_hardening = SLS_ALL;
+      return;
+    }
+
+  char *str_root = xstrdup (const_str);
+  str = strtok_r (str_root, ",", &token_save);
+  if (!str)
+    error ("invalid argument given to %<-mharden-sls=%>");
+
+  int temp = SLS_NONE;
+  while (str)
+    {
+      if (strcmp (str, "blr") == 0)
+	temp |= SLS_BLR;
+      else if (strcmp (str, "retbr") == 0)
+	temp |= SLS_RETBR;
+      else if (strcmp (str, "none") == 0 || strcmp (str, "all") == 0)
+	{
+	  error ("%<%s%> must be by itself for %<-mharden-sls=%>", str);
+	  break;
+	}
+      else
+	{
+	  error ("invalid argument %<%s%> for %<-mharden-sls=%>", str);
+	  break;
+	}
+      str = strtok_r (NULL, ",", &token_save);
+    }
+  aarch64_sls_hardening = (aarch64_sls_hardening_type) temp;
+  free (str_root);
+}
+
+/* Parses CONST_STR for branch protection features specified in
+   aarch64_branch_protect_types, and set any global variables required.  Returns
+   the parsing result and assigns LAST_STR to the last processed token from
+   CONST_STR so that it can be used for error reporting.  */
+
+static enum
+aarch64_parse_opt_result aarch64_parse_branch_protection (const char *const_str,
+							  char** last_str)
+{
+  char *str_root = xstrdup (const_str);
+  char* token_save = NULL;
+  char *str = strtok_r (str_root, "+", &token_save);
+  enum aarch64_parse_opt_result res = AARCH64_PARSE_OK;
+  if (!str)
+    res = AARCH64_PARSE_MISSING_ARG;
+  else
+    {
+      char *next_str = strtok_r (NULL, "+", &token_save);
+      /* Reset the branch protection features to their defaults.  */
+      aarch64_handle_no_branch_protection (NULL, NULL);
+
+      while (str && res == AARCH64_PARSE_OK)
+	{
+	  const aarch64_branch_protect_type* type = aarch64_branch_protect_types;
+	  bool found = false;
+	  /* Search for this type.  */
+	  while (type && type->name && !found && res == AARCH64_PARSE_OK)
+	    {
+	      if (strcmp (str, type->name) == 0)
+		{
+		  found = true;
+		  res = type->handler (str, next_str);
+		  str = next_str;
+		  next_str = strtok_r (NULL, "+", &token_save);
+		}
+	      else
+		type++;
+	    }
+	  if (found && res == AARCH64_PARSE_OK)
+	    {
+	      bool found_subtype = true;
+	      /* Loop through each token until we find one that isn't a
+		 subtype.  */
+	      while (found_subtype)
+		{
+		  found_subtype = false;
+		  const aarch64_branch_protect_type *subtype = type->subtypes;
+		  /* Search for the subtype.  */
+		  while (str && subtype && subtype->name && !found_subtype
+			  && res == AARCH64_PARSE_OK)
+		    {
+		      if (strcmp (str, subtype->name) == 0)
+			{
+			  found_subtype = true;
+			  res = subtype->handler (str, next_str);
+			  str = next_str;
+			  next_str = strtok_r (NULL, "+", &token_save);
+			}
+		      else
+			subtype++;
+		    }
+		}
+	    }
+	  else if (!found)
+	    res = AARCH64_PARSE_INVALID_ARG;
+	}
+    }
+  /* Copy the last processed token into the argument to pass it back.
+    Used by option and attribute validation to print the offending token.  */
+  if (last_str)
+    {
+      if (str) strcpy (*last_str, str);
+      else *last_str = NULL;
+    }
+  if (res == AARCH64_PARSE_OK)
+    {
+      /* If needed, alloc the accepted string then copy in const_str.
+	Used by override_option_after_change_1.  */
+      if (!accepted_branch_protection_string)
+	accepted_branch_protection_string = (char *) xmalloc (
+						      BRANCH_PROTECT_STR_MAX
+							+ 1);
+      strncpy (accepted_branch_protection_string, const_str,
+		BRANCH_PROTECT_STR_MAX + 1);
+      /* Forcibly null-terminate.  */
+      accepted_branch_protection_string[BRANCH_PROTECT_STR_MAX] = '\0';
+    }
+  return res;
+}
+
+static bool
+aarch64_validate_mbranch_protection (const char *const_str)
+{
+  char *str = (char *) xmalloc (strlen (const_str));
+  enum aarch64_parse_opt_result res =
+    aarch64_parse_branch_protection (const_str, &str);
+  if (res == AARCH64_PARSE_INVALID_ARG)
+    error ("invalid argument %<%s%> for %<-mbranch-protection=%>", str);
+  else if (res == AARCH64_PARSE_MISSING_ARG)
+    error ("missing argument for %<-mbranch-protection=%>");
+  free (str);
+  return res == AARCH64_PARSE_OK;
+}
+
 /* Validate a command-line -march option.  Parse the arch and extensions
    (if any) specified in STR and throw errors if appropriate.  Put the
    results, if they are valid, in RES and ISA_FLAGS.  Return whether the
@@ -10745,10 +16644,11 @@ aarch64_validate_mcpu (const char *str, const struct processor **res,
 
 static bool
 aarch64_validate_march (const char *str, const struct processor **res,
-			 unsigned long *isa_flags)
+			 uint64_t *isa_flags)
 {
+  std::string invalid_extension;
   enum aarch64_parse_opt_result parse_res
-    = aarch64_parse_arch (str, res, isa_flags);
+    = aarch64_parse_arch (str, res, isa_flags, &invalid_extension);
 
   if (parse_res == AARCH64_PARSE_OK)
     return true;
@@ -10759,11 +16659,13 @@ aarch64_validate_march (const char *str, const struct processor **res,
 	error ("missing arch name in %<-march=%s%>", str);
 	break;
       case AARCH64_PARSE_INVALID_ARG:
-	error ("unknown value %qs for -march", str);
+	error ("unknown value %qs for %<-march%>", str);
 	aarch64_print_hint_for_arch (str);
 	break;
       case AARCH64_PARSE_INVALID_FEATURE:
-	error ("invalid feature modifier in %<-march=%s%>", str);
+	error ("invalid feature modifier %qs in %<-march=%s%>",
+	       invalid_extension.c_str (), str);
+	aarch64_print_hint_for_extensions (invalid_extension);
 	break;
       default:
 	gcc_unreachable ();
@@ -10792,7 +16694,7 @@ aarch64_validate_mtune (const char *str, const struct processor **res)
 	error ("missing cpu name in %<-mtune=%s%>", str);
 	break;
       case AARCH64_PARSE_INVALID_ARG:
-	error ("unknown value %qs for -mtune", str);
+	error ("unknown value %qs for %<-mtune%>", str);
 	aarch64_print_hint_for_core (str);
 	break;
       default:
@@ -10837,11 +16739,17 @@ aarch64_get_arch (enum aarch64_arch arch)
 static poly_uint16
 aarch64_convert_sve_vector_bits (aarch64_sve_vector_bits_enum value)
 {
-  /* For now generate vector-length agnostic code for -msve-vector-bits=128.
-     This ensures we can clearly distinguish SVE and Advanced SIMD modes when
-     deciding which .md file patterns to use and when deciding whether
-     something is a legitimate address or constant.  */
-  if (value == SVE_SCALABLE || value == SVE_128)
+  /* 128-bit SVE and Advanced SIMD modes use different register layouts
+     on big-endian targets, so we would need to forbid subregs that convert
+     from one to the other.  By default a reinterpret sequence would then
+     involve a store to memory in one mode and a load back in the other.
+     Even if we optimize that sequence using reverse instructions,
+     it would still be a significant potential overhead.
+
+     For now, it seems better to generate length-agnostic code for that
+     case instead.  */
+  if (value == SVE_SCALABLE
+      || (value == SVE_128 && BYTES_BIG_ENDIAN))
     return poly_uint16 (2, 2);
   else
     return (int) value / 64;
@@ -10857,8 +16765,8 @@ aarch64_convert_sve_vector_bits (aarch64_sve_vector_bits_enum value)
 static void
 aarch64_override_options (void)
 {
-  unsigned long cpu_isa = 0;
-  unsigned long arch_isa = 0;
+  uint64_t cpu_isa = 0;
+  uint64_t arch_isa = 0;
   aarch64_isa_flags = 0;
 
   bool valid_cpu = true;
@@ -10869,6 +16777,12 @@ aarch64_override_options (void)
   selected_arch = NULL;
   selected_tune = NULL;
 
+  if (aarch64_harden_sls_string)
+    aarch64_validate_sls_mitigation (aarch64_harden_sls_string);
+
+  if (aarch64_branch_protection_string)
+    aarch64_validate_mbranch_protection (aarch64_branch_protection_string);
+
   /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
      If either of -march or -mtune is given, they override their
      respective component of -mcpu.  */
@@ -10883,6 +16797,10 @@ aarch64_override_options (void)
   if (aarch64_tune_string)
     valid_tune = aarch64_validate_mtune (aarch64_tune_string, &selected_tune);
 
+#ifdef SUBTARGET_OVERRIDE_OPTIONS
+  SUBTARGET_OVERRIDE_OPTIONS;
+#endif
+
   /* If the user did not specify a processor, choose the default
      one for them.  This will be the CPU set during configuration using
      --with-cpu, otherwise it is "generic".  */
@@ -10910,9 +16828,9 @@ aarch64_override_options (void)
     {
       if (selected_arch->arch != selected_cpu->arch)
 	{
-	  warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
-		       all_architectures[selected_cpu->arch].name,
-		       selected_arch->name);
+	  warning (0, "switch %<-mcpu=%s%> conflicts with %<-march=%s%> switch",
+		       aarch64_cpu_string,
+		       aarch64_arch_string);
 	}
       aarch64_isa_flags = arch_isa;
       explicit_arch = selected_arch->arch;
@@ -10941,18 +16859,39 @@ aarch64_override_options (void)
   if (!selected_tune)
     selected_tune = selected_cpu;
 
+  if (aarch64_enable_bti == 2)
+    {
+#ifdef TARGET_ENABLE_BTI
+      aarch64_enable_bti = 1;
+#else
+      aarch64_enable_bti = 0;
+#endif
+    }
+
+  /* Return address signing is currently not supported for ILP32 targets.  For
+     LP64 targets use the configured option in the absence of a command-line
+     option for -mbranch-protection.  */
+  if (!TARGET_ILP32 && accepted_branch_protection_string == NULL)
+    {
+#ifdef TARGET_ENABLE_PAC_RET
+      aarch64_ra_sign_scope = AARCH64_FUNCTION_NON_LEAF;
+#else
+      aarch64_ra_sign_scope = AARCH64_FUNCTION_NONE;
+#endif
+    }
+
 #ifndef HAVE_AS_MABI_OPTION
   /* The compiler may have been configured with 2.23.* binutils, which does
      not have support for ILP32.  */
   if (TARGET_ILP32)
-    error ("assembler does not support -mabi=ilp32");
+    error ("assembler does not support %<-mabi=ilp32%>");
 #endif
 
   /* Convert -msve-vector-bits to a VG count.  */
   aarch64_sve_vg = aarch64_convert_sve_vector_bits (aarch64_sve_vector_bits);
 
   if (aarch64_ra_sign_scope != AARCH64_FUNCTION_NONE && TARGET_ILP32)
-    sorry ("return address signing is only supported for -mabi=lp64");
+    sorry ("return address signing is only supported for %<-mabi=lp64%>");
 
   /* Make sure we properly set up the explicit options.  */
   if ((aarch64_cpu_string && valid_cpu)
@@ -10963,12 +16902,18 @@ aarch64_override_options (void)
        || (aarch64_arch_string && valid_arch))
     gcc_assert (explicit_arch != aarch64_no_arch);
 
+  /* The pass to insert speculation tracking runs before
+     shrink-wrapping and the latter does not know how to update the
+     tracking status.  So disable it in this case.  */
+  if (aarch64_track_speculation)
+    flag_shrink_wrap = 0;
+
   aarch64_override_options_internal (&global_options);
 
   /* Save these options as the default ones in case we push and pop them later
      while processing functions with potential target attributes.  */
   target_option_default_node = target_option_current_node
-      = build_target_option_node (&global_options);
+    = build_target_option_node (&global_options, &global_options_set);
 }
 
 /* Implement targetm.override_options_after_change.  */
@@ -10979,6 +16924,16 @@ aarch64_override_options_after_change (void)
   aarch64_override_options_after_change_1 (&global_options);
 }
 
+/* Implement the TARGET_OFFLOAD_OPTIONS hook.  */
+static char *
+aarch64_offload_options (void)
+{
+  if (TARGET_ILP32)
+    return xstrdup ("-foffload-abi=ilp32");
+  else
+    return xstrdup ("-foffload-abi=lp64");
+}
+
 static struct machine_function *
 aarch64_init_machine_status (void)
 {
@@ -10997,53 +16952,74 @@ aarch64_init_expanders (void)
 static void
 initialize_aarch64_code_model (struct gcc_options *opts)
 {
-   if (opts->x_flag_pic)
-     {
-       switch (opts->x_aarch64_cmodel_var)
-	 {
-	 case AARCH64_CMODEL_TINY:
-	   aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
-	   break;
-	 case AARCH64_CMODEL_SMALL:
+  aarch64_cmodel = opts->x_aarch64_cmodel_var;
+  switch (opts->x_aarch64_cmodel_var)
+    {
+    case AARCH64_CMODEL_TINY:
+      if (opts->x_flag_pic)
+	aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
+      break;
+    case AARCH64_CMODEL_SMALL:
+      if (opts->x_flag_pic)
+	{
 #ifdef HAVE_AS_SMALL_PIC_RELOCS
-	   aarch64_cmodel = (flag_pic == 2
-			     ? AARCH64_CMODEL_SMALL_PIC
-			     : AARCH64_CMODEL_SMALL_SPIC);
+	  aarch64_cmodel = (flag_pic == 2
+			    ? AARCH64_CMODEL_SMALL_PIC
+			    : AARCH64_CMODEL_SMALL_SPIC);
 #else
-	   aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
+	  aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
 #endif
-	   break;
-	 case AARCH64_CMODEL_LARGE:
-	   sorry ("code model %qs with -f%s", "large",
-		  opts->x_flag_pic > 1 ? "PIC" : "pic");
-	   break;
-	 default:
-	   gcc_unreachable ();
-	 }
-     }
-   else
-     aarch64_cmodel = opts->x_aarch64_cmodel_var;
+	}
+      break;
+    case AARCH64_CMODEL_LARGE:
+      if (opts->x_flag_pic)
+	sorry ("code model %qs with %<-f%s%>", "large",
+	       opts->x_flag_pic > 1 ? "PIC" : "pic");
+      if (opts->x_aarch64_abi == AARCH64_ABI_ILP32)
+	sorry ("code model %qs not supported in ilp32 mode", "large");
+      break;
+    case AARCH64_CMODEL_TINY_PIC:
+    case AARCH64_CMODEL_SMALL_PIC:
+    case AARCH64_CMODEL_SMALL_SPIC:
+      gcc_unreachable ();
+    }
 }
 
 /* Implement TARGET_OPTION_SAVE.  */
 
 static void
-aarch64_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
+aarch64_option_save (struct cl_target_option *ptr, struct gcc_options *opts,
+		     struct gcc_options */* opts_set */)
 {
   ptr->x_aarch64_override_tune_string = opts->x_aarch64_override_tune_string;
+  ptr->x_aarch64_branch_protection_string
+    = opts->x_aarch64_branch_protection_string;
 }
 
 /* Implements TARGET_OPTION_RESTORE.  Restore the backend codegen decisions
    using the information saved in PTR.  */
 
 static void
-aarch64_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
+aarch64_option_restore (struct gcc_options *opts,
+			struct gcc_options */* opts_set */,
+			struct cl_target_option *ptr)
 {
-  opts->x_explicit_tune_core = ptr->x_explicit_tune_core;
-  selected_tune = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
   opts->x_explicit_arch = ptr->x_explicit_arch;
   selected_arch = aarch64_get_arch (ptr->x_explicit_arch);
+  opts->x_explicit_tune_core = ptr->x_explicit_tune_core;
+  if (opts->x_explicit_tune_core == aarch64_none
+      && opts->x_explicit_arch != aarch64_no_arch)
+    selected_tune = &all_cores[selected_arch->ident];
+  else
+    selected_tune = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
   opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string;
+  opts->x_aarch64_branch_protection_string
+    = ptr->x_aarch64_branch_protection_string;
+  if (opts->x_aarch64_branch_protection_string)
+    {
+      aarch64_parse_branch_protection (opts->x_aarch64_branch_protection_string,
+					NULL);
+    }
 
   aarch64_override_options_internal (opts);
 }
@@ -11055,7 +17031,7 @@ aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr)
 {
   const struct processor *cpu
     = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
-  unsigned long isa_flags = ptr->x_aarch64_isa_flags;
+  uint64_t isa_flags = ptr->x_aarch64_isa_flags;
   const struct processor *arch = aarch64_get_arch (ptr->x_explicit_arch);
   std::string extension
     = aarch64_get_extension_string_for_isa_flags (isa_flags, arch->flags);
@@ -11120,7 +17096,8 @@ aarch64_set_current_function (tree fndecl)
   aarch64_previous_fndecl = fndecl;
 
   /* First set the target options.  */
-  cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
+  cl_target_option_restore (&global_options, &global_options_set,
+			    TREE_TARGET_OPTION (new_tree));
 
   aarch64_save_restore_target_globals (new_tree);
 }
@@ -11163,8 +17140,9 @@ static bool
 aarch64_handle_attr_arch (const char *str)
 {
   const struct processor *tmp_arch = NULL;
+  std::string invalid_extension;
   enum aarch64_parse_opt_result parse_res
-    = aarch64_parse_arch (str, &tmp_arch, &aarch64_isa_flags);
+    = aarch64_parse_arch (str, &tmp_arch, &aarch64_isa_flags, &invalid_extension);
 
   if (parse_res == AARCH64_PARSE_OK)
     {
@@ -11184,7 +17162,9 @@ aarch64_handle_attr_arch (const char *str)
 	aarch64_print_hint_for_arch (str);
 	break;
       case AARCH64_PARSE_INVALID_FEATURE:
-	error ("invalid value (\"%s\") in %<target()%> pragma or attribute", str);
+	error ("invalid feature modifier %s of value (\"%s\") in "
+	       "%<target()%> pragma or attribute", invalid_extension.c_str (), str);
+	aarch64_print_hint_for_extensions (invalid_extension);
 	break;
       default:
 	gcc_unreachable ();
@@ -11199,8 +17179,9 @@ static bool
 aarch64_handle_attr_cpu (const char *str)
 {
   const struct processor *tmp_cpu = NULL;
+  std::string invalid_extension;
   enum aarch64_parse_opt_result parse_res
-    = aarch64_parse_cpu (str, &tmp_cpu, &aarch64_isa_flags);
+    = aarch64_parse_cpu (str, &tmp_cpu, &aarch64_isa_flags, &invalid_extension);
 
   if (parse_res == AARCH64_PARSE_OK)
     {
@@ -11223,7 +17204,9 @@ aarch64_handle_attr_cpu (const char *str)
 	aarch64_print_hint_for_core (str);
 	break;
       case AARCH64_PARSE_INVALID_FEATURE:
-	error ("invalid value (\"%s\") in %<target()%> pragma or attribute", str);
+	error ("invalid feature modifier %s of value (\"%s\") in "
+	       "%<target()%> pragma or attribute", invalid_extension.c_str (), str);
+	aarch64_print_hint_for_extensions (invalid_extension);
 	break;
       default:
 	gcc_unreachable ();
@@ -11232,6 +17215,37 @@ aarch64_handle_attr_cpu (const char *str)
   return false;
 }
 
+/* Handle the argument STR to the branch-protection= attribute.  */
+
+ static bool
+ aarch64_handle_attr_branch_protection (const char* str)
+ {
+  char *err_str = (char *) xmalloc (strlen (str) + 1);
+  enum aarch64_parse_opt_result res = aarch64_parse_branch_protection (str,
+								      &err_str);
+  bool success = false;
+  switch (res)
+    {
+     case AARCH64_PARSE_MISSING_ARG:
+       error ("missing argument to %<target(\"branch-protection=\")%> pragma or"
+	      " attribute");
+       break;
+     case AARCH64_PARSE_INVALID_ARG:
+       error ("invalid protection type (\"%s\") in %<target(\"branch-protection"
+	      "=\")%> pragma or attribute", err_str);
+       break;
+     case AARCH64_PARSE_OK:
+       success = true;
+      /* Fall through.  */
+     case AARCH64_PARSE_INVALID_FEATURE:
+       break;
+     default:
+       gcc_unreachable ();
+    }
+  free (err_str);
+  return success;
+ }
+
 /* Handle the argument STR to the tune= target attribute.  */
 
 static bool
@@ -11271,7 +17285,7 @@ static bool
 aarch64_handle_attr_isa_flags (char *str)
 {
   enum aarch64_parse_opt_result parse_res;
-  unsigned long isa_flags = aarch64_isa_flags;
+  uint64_t isa_flags = aarch64_isa_flags;
 
   /* We allow "+nothing" in the beginning to clear out all architectural
      features if the user wants to handpick specific features.  */
@@ -11281,7 +17295,8 @@ aarch64_handle_attr_isa_flags (char *str)
       str += 8;
     }
 
-  parse_res = aarch64_parse_extension (str, &isa_flags);
+  std::string invalid_extension;
+  parse_res = aarch64_parse_extension (str, &isa_flags, &invalid_extension);
 
   if (parse_res == AARCH64_PARSE_OK)
     {
@@ -11296,7 +17311,8 @@ aarch64_handle_attr_isa_flags (char *str)
 	break;
 
       case AARCH64_PARSE_INVALID_FEATURE:
-	error ("invalid value (\"%s\") in %<target()%> pragma or attribute", str);
+	error ("invalid feature modifier %s of value (\"%s\") in "
+	       "%<target()%> pragma or attribute", invalid_extension.c_str (), str);
 	break;
 
       default:
@@ -11319,7 +17335,7 @@ static const struct aarch64_attribute_info aarch64_attributes[] =
   { "fix-cortex-a53-843419", aarch64_attr_bool, true, NULL,
      OPT_mfix_cortex_a53_843419 },
   { "cmodel", aarch64_attr_enum, false, NULL, OPT_mcmodel_ },
-  { "strict-align", aarch64_attr_mask, false, NULL, OPT_mstrict_align },
+  { "strict-align", aarch64_attr_mask, true, NULL, OPT_mstrict_align },
   { "omit-leaf-frame-pointer", aarch64_attr_bool, true, NULL,
      OPT_momit_leaf_frame_pointer },
   { "tls-dialect", aarch64_attr_enum, false, NULL, OPT_mtls_dialect_ },
@@ -11328,8 +17344,12 @@ static const struct aarch64_attribute_info aarch64_attributes[] =
   { "cpu", aarch64_attr_custom, false, aarch64_handle_attr_cpu, OPT_mcpu_ },
   { "tune", aarch64_attr_custom, false, aarch64_handle_attr_tune,
      OPT_mtune_ },
+  { "branch-protection", aarch64_attr_custom, false,
+     aarch64_handle_attr_branch_protection, OPT_mbranch_protection_ },
   { "sign-return-address", aarch64_attr_enum, false, NULL,
      OPT_msign_return_address_ },
+  { "outline-atomics", aarch64_attr_bool, true, NULL,
+     OPT_moutline_atomics},
   { NULL, aarch64_attr_custom, false, NULL, OPT____ }
 };
 
@@ -11352,10 +17372,6 @@ aarch64_process_one_target_attr (char *arg_str)
   char *str_to_check = (char *) alloca (len + 1);
   strcpy (str_to_check, arg_str);
 
-  /* Skip leading whitespace.  */
-  while (*str_to_check == ' ' || *str_to_check == '\t')
-    str_to_check++;
-
   /* We have something like __attribute__ ((target ("+fp+nosimd"))).
      It is easier to detect and handle it explicitly here rather than going
      through the machinery for the rest of the target attributes in this
@@ -11531,7 +17547,7 @@ aarch64_process_target_attr (tree args)
   unsigned int num_commas = num_occurences_in_str (',', str_to_check);
 
   /* Handle multiple target attributes separated by ','.  */
-  char *token = strtok (str_to_check, ",");
+  char *token = strtok_r (str_to_check, ",", &str_to_check);
 
   unsigned int num_attrs = 0;
   while (token)
@@ -11543,7 +17559,7 @@ aarch64_process_target_attr (tree args)
 	  return false;
 	}
 
-      token = strtok (NULL, ",");
+      token = strtok_r (NULL, ",", &str_to_check);
     }
 
   if (num_attrs != num_commas + 1)
@@ -11580,17 +17596,18 @@ aarch64_option_valid_attribute_p (tree fndecl, tree, tree args, int)
     }
   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
 
-  old_optimize = build_optimization_node (&global_options);
+  old_optimize
+    = build_optimization_node (&global_options, &global_options_set);
   func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
 
   /* If the function changed the optimization levels as well as setting
      target options, start with the optimizations specified.  */
   if (func_optimize && func_optimize != old_optimize)
-    cl_optimization_restore (&global_options,
+    cl_optimization_restore (&global_options, &global_options_set,
 			     TREE_OPTIMIZATION (func_optimize));
 
   /* Save the current target options to restore at the end.  */
-  cl_target_option_save (&cur_target, &global_options);
+  cl_target_option_save (&cur_target, &global_options, &global_options_set);
 
   /* If fndecl already has some target attributes applied to it, unpack
      them so that we add this attribute on top of them, rather than
@@ -11601,11 +17618,12 @@ aarch64_option_valid_attribute_p (tree fndecl, tree, tree args, int)
 	= TREE_TARGET_OPTION (existing_target);
 
       if (existing_options)
-	cl_target_option_restore (&global_options, existing_options);
+	cl_target_option_restore (&global_options, &global_options_set,
+				  existing_options);
     }
   else
-    cl_target_option_restore (&global_options,
-			TREE_TARGET_OPTION (target_option_current_node));
+    cl_target_option_restore (&global_options, &global_options_set,
+			      TREE_TARGET_OPTION (target_option_current_node));
 
   ret = aarch64_process_target_attr (args);
 
@@ -11625,12 +17643,14 @@ aarch64_option_valid_attribute_p (tree fndecl, tree, tree args, int)
 	  aarch64_init_simd_builtins ();
 	  current_target_pragma = saved_current_target_pragma;
 	}
-      new_target = build_target_option_node (&global_options);
+      new_target = build_target_option_node (&global_options,
+					     &global_options_set);
     }
   else
     new_target = NULL;
 
-  new_optimize = build_optimization_node (&global_options);
+  new_optimize = build_optimization_node (&global_options,
+					  &global_options_set);
 
   if (fndecl && ret)
     {
@@ -11640,10 +17660,10 @@ aarch64_option_valid_attribute_p (tree fndecl, tree, tree args, int)
 	DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
     }
 
-  cl_target_option_restore (&global_options, &cur_target);
+  cl_target_option_restore (&global_options, &global_options_set, &cur_target);
 
   if (old_optimize != new_optimize)
-    cl_optimization_restore (&global_options,
+    cl_optimization_restore (&global_options, &global_options_set,
 			     TREE_OPTIMIZATION (old_optimize));
   return ret;
 }
@@ -11682,16 +17702,13 @@ aarch64_can_inline_p (tree caller, tree callee)
   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
 
-  /* If callee has no option attributes, then it is ok to inline.  */
-  if (!callee_tree)
-    return true;
-
   struct cl_target_option *caller_opts
 	= TREE_TARGET_OPTION (caller_tree ? caller_tree
 					   : target_option_default_node);
 
-  struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
-
+  struct cl_target_option *callee_opts
+	= TREE_TARGET_OPTION (callee_tree ? callee_tree
+					   : target_option_default_node);
 
   /* Callee's ISA flags should be a subset of the caller's.  */
   if ((caller_opts->x_aarch64_isa_flags & callee_opts->x_aarch64_isa_flags)
@@ -11761,6 +17778,26 @@ aarch64_can_inline_p (tree caller, tree callee)
   return true;
 }
 
+/* Return the ID of the TLDESC ABI, initializing the descriptor if hasn't
+   been already.  */
+
+unsigned int
+aarch64_tlsdesc_abi_id ()
+{
+  predefined_function_abi &tlsdesc_abi = function_abis[ARM_PCS_TLSDESC];
+  if (!tlsdesc_abi.initialized_p ())
+    {
+      HARD_REG_SET full_reg_clobbers;
+      CLEAR_HARD_REG_SET (full_reg_clobbers);
+      SET_HARD_REG_BIT (full_reg_clobbers, R0_REGNUM);
+      SET_HARD_REG_BIT (full_reg_clobbers, CC_REGNUM);
+      for (int regno = P0_REGNUM; regno <= P15_REGNUM; ++regno)
+	SET_HARD_REG_BIT (full_reg_clobbers, regno);
+      tlsdesc_abi.initialize (ARM_PCS_TLSDESC, full_reg_clobbers);
+    }
+  return tlsdesc_abi.id ();
+}
+
 /* Return true if SYMBOL_REF X binds locally.  */
 
 static bool
@@ -11778,7 +17815,8 @@ aarch64_tls_symbol_p (rtx x)
   if (! TARGET_HAVE_TLS)
     return false;
 
-  if (GET_CODE (x) != SYMBOL_REF)
+  x = strip_salt (x);
+  if (!SYMBOL_REF_P (x))
     return false;
 
   return SYMBOL_REF_TLS_MODEL (x) != 0;
@@ -11833,7 +17871,9 @@ aarch64_classify_tls_symbol (rtx x)
 enum aarch64_symbol_type
 aarch64_classify_symbol (rtx x, HOST_WIDE_INT offset)
 {
-  if (GET_CODE (x) == LABEL_REF)
+  x = strip_salt (x);
+
+  if (LABEL_REF_P (x))
     {
       switch (aarch64_cmodel)
 	{
@@ -11854,7 +17894,7 @@ aarch64_classify_symbol (rtx x, HOST_WIDE_INT offset)
 	}
     }
 
-  if (GET_CODE (x) == SYMBOL_REF)
+  if (SYMBOL_REF_P (x))
     {
       if (aarch64_tls_symbol_p (x))
 	return aarch64_classify_tls_symbol (x);
@@ -11866,26 +17906,31 @@ aarch64_classify_symbol (rtx x, HOST_WIDE_INT offset)
 	     the offset does not cause overflow of the final address.  But
 	     we have no way of knowing the address of symbol at compile time
 	     so we can't accurately say if the distance between the PC and
-	     symbol + offset is outside the addressible range of +/-1M in the
-	     TINY code model.  So we rely on images not being greater than
-	     1M and cap the offset at 1M and anything beyond 1M will have to
-	     be loaded using an alternative mechanism.  Furthermore if the
-	     symbol is a weak reference to something that isn't known to
-	     resolve to a symbol in this module, then force to memory.  */
-	  if ((SYMBOL_REF_WEAK (x)
-	       && !aarch64_symbol_binds_local_p (x))
-	      || !IN_RANGE (offset, -1048575, 1048575))
+	     symbol + offset is outside the addressible range of +/-1MB in the
+	     TINY code model.  So we limit the maximum offset to +/-64KB and
+	     assume the offset to the symbol is not larger than +/-(1MB - 64KB).
+	     If offset_within_block_p is true we allow larger offsets.
+	     Furthermore force to memory if the symbol is a weak reference to
+	     something that doesn't resolve to a symbol in this module.  */
+
+	  if (SYMBOL_REF_WEAK (x) && !aarch64_symbol_binds_local_p (x))
 	    return SYMBOL_FORCE_TO_MEM;
+	  if (!(IN_RANGE (offset, -0x10000, 0x10000)
+		|| offset_within_block_p (x, offset)))
+	    return SYMBOL_FORCE_TO_MEM;
+
 	  return SYMBOL_TINY_ABSOLUTE;
 
 	case AARCH64_CMODEL_SMALL:
 	  /* Same reasoning as the tiny code model, but the offset cap here is
-	     4G.  */
-	  if ((SYMBOL_REF_WEAK (x)
-	       && !aarch64_symbol_binds_local_p (x))
-	      || !IN_RANGE (offset, HOST_WIDE_INT_C (-4294967263),
-			    HOST_WIDE_INT_C (4294967264)))
+	     1MB, allowing +/-3.9GB for the offset to the symbol.  */
+
+	  if (SYMBOL_REF_WEAK (x) && !aarch64_symbol_binds_local_p (x))
+	    return SYMBOL_FORCE_TO_MEM;
+	  if (!(IN_RANGE (offset, -0x100000, 0x100000)
+		|| offset_within_block_p (x, offset)))
 	    return SYMBOL_FORCE_TO_MEM;
+
 	  return SYMBOL_SMALL_ABSOLUTE;
 
 	case AARCH64_CMODEL_TINY_PIC:
@@ -11927,11 +17972,10 @@ aarch64_constant_address_p (rtx x)
 bool
 aarch64_legitimate_pic_operand_p (rtx x)
 {
-  if (GET_CODE (x) == SYMBOL_REF
-      || (GET_CODE (x) == CONST
-	  && GET_CODE (XEXP (x, 0)) == PLUS
-	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
-     return false;
+  poly_int64 offset;
+  x = strip_offset_and_salt (x, &offset);
+  if (SYMBOL_REF_P (x))
+    return false;
 
   return true;
 }
@@ -11944,10 +17988,22 @@ aarch64_legitimate_constant_p (machine_mode mode, rtx x)
 {
   /* Support CSE and rematerialization of common constants.  */
   if (CONST_INT_P (x)
-      || (CONST_DOUBLE_P (x) && GET_MODE_CLASS (mode) == MODE_FLOAT)
-      || GET_CODE (x) == CONST_VECTOR)
+      || (CONST_DOUBLE_P (x) && GET_MODE_CLASS (mode) == MODE_FLOAT))
     return true;
 
+  /* Only accept variable-length vector constants if they can be
+     handled directly.
+
+     ??? It would be possible (but complex) to handle rematerialization
+     of other constants via secondary reloads.  */
+  if (!GET_MODE_SIZE (mode).is_constant ())
+    return aarch64_simd_valid_immediate (x, NULL);
+
+  /* Otherwise, accept any CONST_VECTOR that, if all else fails, can at
+     least be forced to memory and loaded from there.  */
+  if (GET_CODE (x) == CONST_VECTOR)
+    return !targetm.cannot_force_const_mem (mode, x);
+
   /* Do not allow vector struct mode constants for Advanced SIMD.
      We could support 0 and -1 easily, but they need support in
      aarch64-simd.md.  */
@@ -11955,14 +18011,6 @@ aarch64_legitimate_constant_p (machine_mode mode, rtx x)
   if (vec_flags == (VEC_ADVSIMD | VEC_STRUCT))
     return false;
 
-  /* Only accept variable-length vector constants if they can be
-     handled directly.
-
-     ??? It would be possible to handle rematerialization of other
-     constants via secondary reloads.  */
-  if (vec_flags & VEC_ANY_SVE)
-    return aarch64_simd_valid_immediate (x, NULL);
-
   if (GET_CODE (x) == HIGH)
     x = XEXP (x, 0);
 
@@ -11977,7 +18025,7 @@ aarch64_legitimate_constant_p (machine_mode mode, rtx x)
   /* If an offset is being added to something else, we need to allow the
      base to be moved into the destination register, meaning that there
      are no free temporaries for the offset.  */
-  x = strip_offset (x, &offset);
+  x = strip_offset_and_salt (x, &offset);
   if (!offset.is_constant () && aarch64_offset_temporaries (true, offset) > 0)
     return false;
 
@@ -11991,7 +18039,7 @@ aarch64_legitimate_constant_p (machine_mode mode, rtx x)
     return true;
 
   /* Label references are always constant.  */
-  if (GET_CODE (x) == LABEL_REF)
+  if (LABEL_REF_P (x))
     return true;
 
   return false;
@@ -12194,7 +18242,7 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
   HOST_WIDE_INT size, rsize, adjust, align;
   tree t, u, cond1, cond2;
 
-  indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
+  indirect_p = pass_va_arg_by_reference (type);
   if (indirect_p)
     type = build_pointer_type (type);
 
@@ -12209,15 +18257,15 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
 		  f_stack, NULL_TREE);
   size = int_size_in_bytes (type);
-  align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
+
+  unsigned int abi_break;
+  align
+    = aarch64_function_arg_alignment (mode, type, &abi_break) / BITS_PER_UNIT;
 
   dw_align = false;
   adjust = 0;
-  if (aarch64_vfp_is_call_or_return_candidate (mode,
-					       type,
-					       &ag_mode,
-					       &nregs,
-					       &is_ha))
+  if (aarch64_vfp_is_call_or_return_candidate (mode, type, &ag_mode, &nregs,
+					       &is_ha, false))
     {
       /* No frontends can create types with variable-sized modes, so we
 	 shouldn't be asked to pass or return them.  */
@@ -12225,7 +18273,7 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
 
       /* TYPE passed in fp/simd registers.  */
       if (!TARGET_FLOAT)
-	aarch64_err_no_fpadvsimd (mode, "varargs");
+	aarch64_err_no_fpadvsimd (mode);
 
       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
 		      unshare_expr (valist), f_vrtop, NULL_TREE);
@@ -12256,7 +18304,12 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
       nregs = rsize / UNITS_PER_WORD;
 
       if (align > 8)
-	dw_align = true;
+	{
+	  if (abi_break && warn_psabi)
+	    inform (input_location, "parameter passing for argument of type "
+		    "%qT changed in GCC 9.1", type);
+	  dw_align = true;
+	}
 
       if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
 	  && size < UNITS_PER_WORD)
@@ -12309,23 +18362,17 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
   if (align > 8)
     {
       /* if (alignof(type) > 8) (arg = arg + 15) & -16;  */
-      t = fold_convert (intDI_type_node, arg);
-      t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
-		  build_int_cst (TREE_TYPE (t), 15));
+      t = fold_build_pointer_plus_hwi (arg, 15);
       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
 		  build_int_cst (TREE_TYPE (t), -16));
-      t = fold_convert (TREE_TYPE (arg), t);
       roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
     }
   else
     roundup = NULL;
   /* Advance ap.__stack  */
-  t = fold_convert (intDI_type_node, arg);
-  t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
-	      build_int_cst (TREE_TYPE (t), size + 7));
+  t = fold_build_pointer_plus_hwi (arg, size + 7);
   t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
 	      build_int_cst (TREE_TYPE (t), -8));
-  t = fold_convert (TREE_TYPE (arg), t);
   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
   /* String up roundup and advance.  */
   if (roundup)
@@ -12386,6 +18433,10 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
 	  field_t = aarch64_fp16_type_node;
 	  field_ptr_t = aarch64_fp16_ptr_type_node;
 	  break;
+	case E_BFmode:
+	  field_t = aarch64_bf16_type_node;
+	  field_ptr_t = aarch64_bf16_ptr_type_node;
+	  break;
 	case E_V2SImode:
 	case E_V4SImode:
 	    {
@@ -12399,6 +18450,7 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
 	}
 
       /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area  */
+      TREE_ADDRESSABLE (tmp_ha) = 1;
       tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
       addr = t;
       t = fold_convert (field_ptr_t, addr);
@@ -12437,9 +18489,9 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
 /* Implement TARGET_SETUP_INCOMING_VARARGS.  */
 
 static void
-aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
-				tree type, int *pretend_size ATTRIBUTE_UNUSED,
-				int no_rtl)
+aarch64_setup_incoming_varargs (cumulative_args_t cum_v,
+				const function_arg_info &arg,
+				int *pretend_size ATTRIBUTE_UNUSED, int no_rtl)
 {
   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
   CUMULATIVE_ARGS local_cum;
@@ -12450,7 +18502,7 @@ aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
      argument.  Advance a local copy of CUM past the last "real" named
      argument, to find out how many registers are left over.  */
   local_cum = *cum;
-  aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
+  aarch64_function_arg_advance (pack_cumulative_args(&local_cum), arg);
 
   /* Found out how many registers we need to save.
      Honor tree-stdvar analysis results.  */
@@ -12536,19 +18588,92 @@ aarch64_conditional_register_usage (void)
 	fixed_regs[i] = 1;
 	call_used_regs[i] = 1;
       }
+
+  /* Only allow the FFR and FFRT to be accessed via special patterns.  */
+  CLEAR_HARD_REG_BIT (operand_reg_set, FFR_REGNUM);
+  CLEAR_HARD_REG_BIT (operand_reg_set, FFRT_REGNUM);
+
+  /* When tracking speculation, we need a couple of call-clobbered registers
+     to track the speculation state.  It would be nice to just use
+     IP0 and IP1, but currently there are numerous places that just
+     assume these registers are free for other uses (eg pointer
+     authentication).  */
+  if (aarch64_track_speculation)
+    {
+      fixed_regs[SPECULATION_TRACKER_REGNUM] = 1;
+      call_used_regs[SPECULATION_TRACKER_REGNUM] = 1;
+      fixed_regs[SPECULATION_SCRATCH_REGNUM] = 1;
+      call_used_regs[SPECULATION_SCRATCH_REGNUM] = 1;
+    }
 }
 
+/* Implement TARGET_MEMBER_TYPE_FORCES_BLK.  */
+
+bool
+aarch64_member_type_forces_blk (const_tree field_or_array, machine_mode mode)
+{
+  /* For records we're passed a FIELD_DECL, for arrays we're passed
+     an ARRAY_TYPE.  In both cases we're interested in the TREE_TYPE.  */
+  const_tree type = TREE_TYPE (field_or_array);
+
+  /* Assign BLKmode to anything that contains multiple SVE predicates.
+     For structures, the "multiple" case is indicated by MODE being
+     VOIDmode.  */
+  unsigned int num_zr, num_pr;
+  if (aarch64_sve::builtin_type_p (type, &num_zr, &num_pr) && num_pr != 0)
+    {
+      if (TREE_CODE (field_or_array) == ARRAY_TYPE)
+	return !simple_cst_equal (TYPE_SIZE (field_or_array),
+				  TYPE_SIZE (type));
+      return mode == VOIDmode;
+    }
+
+  return default_member_type_forces_blk (field_or_array, mode);
+}
+
+/* Bitmasks that indicate whether earlier versions of GCC would have
+   taken a different path through the ABI logic.  This should result in
+   a -Wpsabi warning if the earlier path led to a different ABI decision.
+
+   WARN_PSABI_EMPTY_CXX17_BASE
+      Indicates that the type includes an artificial empty C++17 base field
+      that, prior to GCC 10.1, would prevent the type from being treated as
+      a HFA or HVA.  See PR94383 for details.
+
+   WARN_PSABI_NO_UNIQUE_ADDRESS
+      Indicates that the type includes an empty [[no_unique_address]] field
+      that, prior to GCC 10.1, would prevent the type from being treated as
+      a HFA or HVA.  */
+const unsigned int WARN_PSABI_EMPTY_CXX17_BASE = 1U << 0;
+const unsigned int WARN_PSABI_NO_UNIQUE_ADDRESS = 1U << 1;
+
 /* Walk down the type tree of TYPE counting consecutive base elements.
    If *MODEP is VOIDmode, then set it to the first valid floating point
    type.  If a non-floating point type is found, or if a floating point
    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
-   otherwise return the count in the sub-tree.  */
+   otherwise return the count in the sub-tree.
+
+   The WARN_PSABI_FLAGS argument allows the caller to check whether this
+   function has changed its behavior relative to earlier versions of GCC.
+   Normally the argument should be nonnull and point to a zero-initialized
+   variable.  The function then records whether the ABI decision might
+   be affected by a known fix to the ABI logic, setting the associated
+   WARN_PSABI_* bits if so.
+
+   When the argument is instead a null pointer, the function tries to
+   simulate the behavior of GCC before all such ABI fixes were made.
+   This is useful to check whether the function returns something
+   different after the ABI fixes.  */
 static int
-aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
+aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep,
+			 unsigned int *warn_psabi_flags)
 {
   machine_mode mode;
   HOST_WIDE_INT size;
 
+  if (aarch64_sve::builtin_type_p (type))
+    return -1;
+
   switch (TREE_CODE (type))
     {
     case REAL_TYPE:
@@ -12617,7 +18742,8 @@ aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
 	    || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
 	  return -1;
 
-	count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
+	count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep,
+					 warn_psabi_flags);
 	if (count == -1
 	    || !index
 	    || !TYPE_MAX_VALUE (index)
@@ -12655,7 +18781,30 @@ aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
 	    if (TREE_CODE (field) != FIELD_DECL)
 	      continue;
 
-	    sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
+	    if (DECL_FIELD_ABI_IGNORED (field))
+	      {
+		/* See whether this is something that earlier versions of
+		   GCC failed to ignore.  */
+		unsigned int flag;
+		if (lookup_attribute ("no_unique_address",
+				      DECL_ATTRIBUTES (field)))
+		  flag = WARN_PSABI_NO_UNIQUE_ADDRESS;
+		else if (cxx17_empty_base_field_p (field))
+		  flag = WARN_PSABI_EMPTY_CXX17_BASE;
+		else
+		  /* No compatibility problem.  */
+		  continue;
+
+		/* Simulate the old behavior when WARN_PSABI_FLAGS is null.  */
+		if (warn_psabi_flags)
+		  {
+		    *warn_psabi_flags |= flag;
+		    continue;
+		  }
+	      }
+
+	    sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
+						 warn_psabi_flags);
 	    if (sub_count < 0)
 	      return -1;
 	    count += sub_count;
@@ -12688,7 +18837,8 @@ aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
 	    if (TREE_CODE (field) != FIELD_DECL)
 	      continue;
 
-	    sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
+	    sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
+						 warn_psabi_flags);
 	    if (sub_count < 0)
 	      return -1;
 	    count = count > sub_count ? count : sub_count;
@@ -12721,12 +18871,29 @@ aarch64_short_vector_p (const_tree type,
   poly_int64 size = -1;
 
   if (type && TREE_CODE (type) == VECTOR_TYPE)
-    size = int_size_in_bytes (type);
+    {
+      if (aarch64_sve::builtin_type_p (type))
+	return false;
+      size = int_size_in_bytes (type);
+    }
   else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
-	    || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
-    size = GET_MODE_SIZE (mode);
-
-  return known_eq (size, 8) || known_eq (size, 16);
+	   || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
+    {
+      /* Rely only on the type, not the mode, when processing SVE types.  */
+      if (type && aarch64_some_values_include_pst_objects_p (type))
+	/* Leave later code to report an error if SVE is disabled.  */
+	gcc_assert (!TARGET_SVE || aarch64_sve_mode_p (mode));
+      else
+	size = GET_MODE_SIZE (mode);
+    }
+  if (known_eq (size, 8) || known_eq (size, 16))
+    {
+      /* 64-bit and 128-bit vectors should only acquire an SVE mode if
+	 they are being treated as scalable AAPCS64 types.  */
+      gcc_assert (!aarch64_sve_mode_p (mode));
+      return true;
+    }
+  return false;
 }
 
 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
@@ -12769,22 +18936,28 @@ aarch64_composite_type_p (const_tree type,
    parameter passing registers are available).
 
    Upon successful return, *COUNT returns the number of needed registers,
-   *BASE_MODE returns the mode of the individual register and when IS_HAF
+   *BASE_MODE returns the mode of the individual register and when IS_HA
    is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
-   floating-point aggregate or a homogeneous short-vector aggregate.  */
+   floating-point aggregate or a homogeneous short-vector aggregate.
+
+   SILENT_P is true if the function should refrain from reporting any
+   diagnostics.  This should only be used if the caller is certain that
+   any ABI decisions would eventually come through this function with
+   SILENT_P set to false.  */
 
 static bool
 aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
 					 const_tree type,
 					 machine_mode *base_mode,
 					 int *count,
-					 bool *is_ha)
+					 bool *is_ha,
+					 bool silent_p)
 {
+  if (is_ha != NULL) *is_ha = false;
+
   machine_mode new_mode = VOIDmode;
   bool composite_p = aarch64_composite_type_p (type, mode);
 
-  if (is_ha != NULL) *is_ha = false;
-
   if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
       || aarch64_short_vector_p (type, mode))
     {
@@ -12799,10 +18972,39 @@ aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
     }
   else if (type && composite_p)
     {
-      int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
-
+      unsigned int warn_psabi_flags = 0;
+      int ag_count = aapcs_vfp_sub_candidate (type, &new_mode,
+					      &warn_psabi_flags);
       if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
 	{
+	  static unsigned last_reported_type_uid;
+	  unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (type));
+	  int alt;
+	  if (!silent_p
+	      && warn_psabi
+	      && warn_psabi_flags
+	      && uid != last_reported_type_uid
+	      && ((alt = aapcs_vfp_sub_candidate (type, &new_mode, NULL))
+		  != ag_count))
+	    {
+	      const char *url
+		= CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
+	      gcc_assert (alt == -1);
+	      last_reported_type_uid = uid;
+	      /* Use TYPE_MAIN_VARIANT to strip any redundant const
+		 qualification.  */
+	      if (warn_psabi_flags & WARN_PSABI_NO_UNIQUE_ADDRESS)
+		inform (input_location, "parameter passing for argument of "
+			"type %qT with %<[[no_unique_address]]%> members "
+			"changed %{in GCC 10.1%}",
+			TYPE_MAIN_VARIANT (type), url);
+	      else if (warn_psabi_flags & WARN_PSABI_EMPTY_CXX17_BASE)
+		inform (input_location, "parameter passing for argument of "
+			"type %qT when C++17 is enabled changed to match "
+			"C++14 %{in GCC 10.1%}",
+			TYPE_MAIN_VARIANT (type), url);
+	    }
+
 	  if (is_ha != NULL) *is_ha = true;
 	  *count = ag_count;
 	}
@@ -12812,6 +19014,7 @@ aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
   else
     return false;
 
+  gcc_assert (!aarch64_sve_mode_p (new_mode));
   *base_mode = new_mode;
   return true;
 }
@@ -12833,55 +19036,77 @@ aarch64_vector_mode_supported_p (machine_mode mode)
   return vec_flags != 0 && (vec_flags & VEC_STRUCT) == 0;
 }
 
+/* Return the full-width SVE vector mode for element mode MODE, if one
+   exists.  */
+opt_machine_mode
+aarch64_full_sve_mode (scalar_mode mode)
+{
+  switch (mode)
+    {
+    case E_DFmode:
+      return VNx2DFmode;
+    case E_SFmode:
+      return VNx4SFmode;
+    case E_HFmode:
+      return VNx8HFmode;
+    case E_BFmode:
+      return VNx8BFmode;
+    case E_DImode:
+      return VNx2DImode;
+    case E_SImode:
+      return VNx4SImode;
+    case E_HImode:
+      return VNx8HImode;
+    case E_QImode:
+      return VNx16QImode;
+    default:
+      return opt_machine_mode ();
+    }
+}
+
+/* Return the 128-bit Advanced SIMD vector mode for element mode MODE,
+   if it exists.  */
+opt_machine_mode
+aarch64_vq_mode (scalar_mode mode)
+{
+  switch (mode)
+    {
+    case E_DFmode:
+      return V2DFmode;
+    case E_SFmode:
+      return V4SFmode;
+    case E_HFmode:
+      return V8HFmode;
+    case E_BFmode:
+      return V8BFmode;
+    case E_SImode:
+      return V4SImode;
+    case E_HImode:
+      return V8HImode;
+    case E_QImode:
+      return V16QImode;
+    case E_DImode:
+      return V2DImode;
+    default:
+      return opt_machine_mode ();
+    }
+}
+
 /* Return appropriate SIMD container
    for MODE within a vector of WIDTH bits.  */
 static machine_mode
 aarch64_simd_container_mode (scalar_mode mode, poly_int64 width)
 {
-  if (TARGET_SVE && known_eq (width, BITS_PER_SVE_VECTOR))
-    switch (mode)
-      {
-      case E_DFmode:
-	return VNx2DFmode;
-      case E_SFmode:
-	return VNx4SFmode;
-      case E_HFmode:
-	return VNx8HFmode;
-      case E_DImode:
-	return VNx2DImode;
-      case E_SImode:
-	return VNx4SImode;
-      case E_HImode:
-	return VNx8HImode;
-      case E_QImode:
-	return VNx16QImode;
-      default:
-	return word_mode;
-      }
+  if (TARGET_SVE
+      && maybe_ne (width, 128)
+      && known_eq (width, BITS_PER_SVE_VECTOR))
+    return aarch64_full_sve_mode (mode).else_mode (word_mode);
 
   gcc_assert (known_eq (width, 64) || known_eq (width, 128));
   if (TARGET_SIMD)
     {
       if (known_eq (width, 128))
-	switch (mode)
-	  {
-	  case E_DFmode:
-	    return V2DFmode;
-	  case E_SFmode:
-	    return V4SFmode;
-	  case E_HFmode:
-	    return V8HFmode;
-	  case E_SImode:
-	    return V4SImode;
-	  case E_HImode:
-	    return V8HImode;
-	  case E_QImode:
-	    return V16QImode;
-	  case E_DImode:
-	    return V2DImode;
-	  default:
-	    break;
-	  }
+	return aarch64_vq_mode (mode).else_mode (word_mode);
       else
 	switch (mode)
 	  {
@@ -12889,6 +19114,8 @@ aarch64_simd_container_mode (scalar_mode mode, poly_int64 width)
 	    return V2SFmode;
 	  case E_HFmode:
 	    return V4HFmode;
+	  case E_BFmode:
+	    return V4BFmode;
 	  case E_SImode:
 	    return V2SImode;
 	  case E_HImode:
@@ -12902,23 +19129,153 @@ aarch64_simd_container_mode (scalar_mode mode, poly_int64 width)
   return word_mode;
 }
 
+/* Compare an SVE mode SVE_M and an Advanced SIMD mode ASIMD_M
+   and return whether the SVE mode should be preferred over the
+   Advanced SIMD one in aarch64_autovectorize_vector_modes.  */
+static bool
+aarch64_cmp_autovec_modes (machine_mode sve_m, machine_mode asimd_m)
+{
+  /* Take into account the aarch64-autovec-preference param if non-zero.  */
+  bool only_asimd_p = aarch64_autovec_preference == 1;
+  bool only_sve_p = aarch64_autovec_preference == 2;
+
+  if (only_asimd_p)
+    return false;
+  if (only_sve_p)
+    return true;
+
+  /* The preference in case of a tie in costs.  */
+  bool prefer_asimd = aarch64_autovec_preference == 3;
+  bool prefer_sve = aarch64_autovec_preference == 4;
+
+  aarch64_sve_vector_bits_enum tune_width = aarch64_tune_params.sve_width;
+
+  poly_int64 nunits_sve = GET_MODE_NUNITS (sve_m);
+  poly_int64 nunits_asimd = GET_MODE_NUNITS (asimd_m);
+  /* If the CPU information does not have an SVE width registered use the
+     generic poly_int comparison that prefers SVE.  If a preference is
+     explicitly requested avoid this path.  */
+  if (tune_width == SVE_SCALABLE
+      && !prefer_asimd
+      && !prefer_sve)
+    return maybe_gt (nunits_sve, nunits_asimd);
+
+  /* Otherwise estimate the runtime width of the modes involved.  */
+  HOST_WIDE_INT est_sve = estimated_poly_value (nunits_sve);
+  HOST_WIDE_INT est_asimd = estimated_poly_value (nunits_asimd);
+
+  /* Preferring SVE means picking it first unless the Advanced SIMD mode
+     is clearly wider.  */
+  if (prefer_sve)
+    return est_sve >= est_asimd;
+  /* Conversely, preferring Advanced SIMD means picking SVE only if SVE
+     is clearly wider.  */
+  if (prefer_asimd)
+    return est_sve > est_asimd;
+
+  /* In the default case prefer Advanced SIMD over SVE in case of a tie.  */
+  return est_sve > est_asimd;
+}
+
 /* Return 128-bit container as the preferred SIMD mode for MODE.  */
 static machine_mode
 aarch64_preferred_simd_mode (scalar_mode mode)
 {
-  poly_int64 bits = TARGET_SVE ? BITS_PER_SVE_VECTOR : 128;
-  return aarch64_simd_container_mode (mode, bits);
+  /* Take into account explicit auto-vectorization ISA preferences through
+     aarch64_cmp_autovec_modes.  */
+  if (TARGET_SVE && aarch64_cmp_autovec_modes (VNx16QImode, V16QImode))
+    return aarch64_full_sve_mode (mode).else_mode (word_mode);
+  if (TARGET_SIMD)
+    return aarch64_vq_mode (mode).else_mode (word_mode);
+  return word_mode;
 }
 
 /* Return a list of possible vector sizes for the vectorizer
    to iterate over.  */
-static void
-aarch64_autovectorize_vector_sizes (vector_sizes *sizes)
+static unsigned int
+aarch64_autovectorize_vector_modes (vector_modes *modes, bool)
 {
-  if (TARGET_SVE)
-    sizes->safe_push (BYTES_PER_SVE_VECTOR);
-  sizes->safe_push (16);
-  sizes->safe_push (8);
+  static const machine_mode sve_modes[] = {
+    /* Try using full vectors for all element types.  */
+    VNx16QImode,
+
+    /* Try using 16-bit containers for 8-bit elements and full vectors
+       for wider elements.  */
+    VNx8QImode,
+
+    /* Try using 32-bit containers for 8-bit and 16-bit elements and
+       full vectors for wider elements.  */
+    VNx4QImode,
+
+    /* Try using 64-bit containers for all element types.  */
+    VNx2QImode
+  };
+
+  static const machine_mode advsimd_modes[] = {
+    /* Try using 128-bit vectors for all element types.  */
+    V16QImode,
+
+    /* Try using 64-bit vectors for 8-bit elements and 128-bit vectors
+       for wider elements.  */
+    V8QImode,
+
+    /* Try using 64-bit vectors for 16-bit elements and 128-bit vectors
+       for wider elements.
+
+       TODO: We could support a limited form of V4QImode too, so that
+       we use 32-bit vectors for 8-bit elements.  */
+    V4HImode,
+
+    /* Try using 64-bit vectors for 32-bit elements and 128-bit vectors
+       for 64-bit elements.
+
+       TODO: We could similarly support limited forms of V2QImode and V2HImode
+       for this case.  */
+    V2SImode
+  };
+
+  /* Try using N-byte SVE modes only after trying N-byte Advanced SIMD mode.
+     This is because:
+
+     - If we can't use N-byte Advanced SIMD vectors then the placement
+       doesn't matter; we'll just continue as though the Advanced SIMD
+       entry didn't exist.
+
+     - If an SVE main loop with N bytes ends up being cheaper than an
+       Advanced SIMD main loop with N bytes then by default we'll replace
+       the Advanced SIMD version with the SVE one.
+
+     - If an Advanced SIMD main loop with N bytes ends up being cheaper
+       than an SVE main loop with N bytes then by default we'll try to
+       use the SVE loop to vectorize the epilogue instead.  */
+
+  bool only_asimd_p = aarch64_autovec_preference == 1;
+  bool only_sve_p = aarch64_autovec_preference == 2;
+
+  unsigned int sve_i = (TARGET_SVE && !only_asimd_p) ? 0 : ARRAY_SIZE (sve_modes);
+  unsigned int advsimd_i = 0;
+
+  while (!only_sve_p && advsimd_i < ARRAY_SIZE (advsimd_modes))
+    {
+      if (sve_i < ARRAY_SIZE (sve_modes)
+	  && aarch64_cmp_autovec_modes (sve_modes[sve_i],
+					advsimd_modes[advsimd_i]))
+	modes->safe_push (sve_modes[sve_i++]);
+      else
+	modes->safe_push (advsimd_modes[advsimd_i++]);
+    }
+  while (sve_i < ARRAY_SIZE (sve_modes))
+   modes->safe_push (sve_modes[sve_i++]);
+
+  unsigned int flags = 0;
+  /* Consider enabling VECT_COMPARE_COSTS for SVE, both so that we
+     can compare SVE against Advanced SIMD and so that we can compare
+     multiple SVE vectorization approaches against each other.  There's
+     not really any point doing this for Advanced SIMD only, since the
+     first mode that works should always be the best.  */
+  if (TARGET_SVE && aarch64_sve_compare_costs)
+    flags |= VECT_COMPARE_COSTS;
+  return flags;
 }
 
 /* Implement TARGET_MANGLE_TYPE.  */
@@ -12927,23 +19284,42 @@ static const char *
 aarch64_mangle_type (const_tree type)
 {
   /* The AArch64 ABI documents say that "__va_list" has to be
-     managled as if it is in the "std" namespace.  */
+     mangled as if it is in the "std" namespace.  */
   if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
     return "St9__va_list";
 
-  /* Half-precision float.  */
+  /* Half-precision floating point types.  */
   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
-    return "Dh";
+    {
+      if (TYPE_MODE (type) == BFmode)
+	return "u6__bf16";
+      else
+	return "Dh";
+    }
 
   /* Mangle AArch64-specific internal types.  TYPE_NAME is non-NULL_TREE for
      builtin types.  */
   if (TYPE_NAME (type) != NULL)
-    return aarch64_mangle_builtin_type (type);
+    {
+      const char *res;
+      if ((res = aarch64_general_mangle_builtin_type (type))
+	  || (res = aarch64_sve::mangle_builtin_type (type)))
+	return res;
+    }
 
   /* Use the default mangling.  */
   return NULL;
 }
 
+/* Implement TARGET_VERIFY_TYPE_CONTEXT.  */
+
+static bool
+aarch64_verify_type_context (location_t loc, type_context_kind context,
+			     const_tree type, bool silent_p)
+{
+  return aarch64_sve::verify_type_context (loc, context, type, silent_p);
+}
+
 /* Find the first rtx_insn before insn that will generate an assembly
    instruction.  */
 
@@ -13074,28 +19450,43 @@ aarch64_sve_index_immediate_p (rtx base_or_step)
 	  && IN_RANGE (INTVAL (base_or_step), -16, 15));
 }
 
-/* Return true if X is a valid immediate for the SVE ADD and SUB
-   instructions.  Negate X first if NEGATE_P is true.  */
+/* Return true if X is a valid immediate for the SVE ADD and SUB instructions
+   when applied to mode MODE.  Negate X first if NEGATE_P is true.  */
 
 bool
-aarch64_sve_arith_immediate_p (rtx x, bool negate_p)
+aarch64_sve_arith_immediate_p (machine_mode mode, rtx x, bool negate_p)
 {
-  rtx elt;
-
-  if (!const_vec_duplicate_p (x, &elt)
-      || !CONST_INT_P (elt))
+  rtx elt = unwrap_const_vec_duplicate (x);
+  if (!CONST_INT_P (elt))
     return false;
 
   HOST_WIDE_INT val = INTVAL (elt);
   if (negate_p)
     val = -val;
-  val &= GET_MODE_MASK (GET_MODE_INNER (GET_MODE (x)));
+  val &= GET_MODE_MASK (GET_MODE_INNER (mode));
 
   if (val & 0xff)
     return IN_RANGE (val, 0, 0xff);
   return IN_RANGE (val, 0, 0xff00);
 }
 
+/* Return true if X is a valid immediate for the SVE SQADD and SQSUB
+   instructions when applied to mode MODE.  Negate X first if NEGATE_P
+   is true.  */
+
+bool
+aarch64_sve_sqadd_sqsub_immediate_p (machine_mode mode, rtx x, bool negate_p)
+{
+  if (!aarch64_sve_arith_immediate_p (mode, x, negate_p))
+    return false;
+
+  /* After the optional negation, the immediate must be nonnegative.
+     E.g. a saturating add of -127 must be done via SQSUB Zn.B, Zn.B, #127
+     instead of SQADD Zn.B, Zn.B, #129.  */
+  rtx elt = unwrap_const_vec_duplicate (x);
+  return negate_p == (INTVAL (elt) < 0);
+}
+
 /* Return true if X is a valid immediate operand for an SVE logical
    instruction such as AND.  */
 
@@ -13116,13 +19507,11 @@ aarch64_sve_bitmask_immediate_p (rtx x)
 bool
 aarch64_sve_dup_immediate_p (rtx x)
 {
-  rtx elt;
-
-  if (!const_vec_duplicate_p (x, &elt)
-      || !CONST_INT_P (elt))
+  x = aarch64_bit_representation (unwrap_const_vec_duplicate (x));
+  if (!CONST_INT_P (x))
     return false;
 
-  HOST_WIDE_INT val = INTVAL (elt);
+  HOST_WIDE_INT val = INTVAL (x);
   if (val & 0xff)
     return IN_RANGE (val, -0x80, 0x7f);
   return IN_RANGE (val, -0x8000, 0x7f00);
@@ -13134,13 +19523,11 @@ aarch64_sve_dup_immediate_p (rtx x)
 bool
 aarch64_sve_cmp_immediate_p (rtx x, bool signed_p)
 {
-  rtx elt;
-
-  return (const_vec_duplicate_p (x, &elt)
-	  && CONST_INT_P (elt)
+  x = unwrap_const_vec_duplicate (x);
+  return (CONST_INT_P (x)
 	  && (signed_p
-	      ? IN_RANGE (INTVAL (elt), -16, 15)
-	      : IN_RANGE (INTVAL (elt), 0, 127)));
+	      ? IN_RANGE (INTVAL (x), -16, 15)
+	      : IN_RANGE (INTVAL (x), 0, 127)));
 }
 
 /* Return true if X is a valid immediate operand for an SVE FADD or FSUB
@@ -13153,7 +19540,7 @@ aarch64_sve_float_arith_immediate_p (rtx x, bool negate_p)
   REAL_VALUE_TYPE r;
 
   if (!const_vec_duplicate_p (x, &elt)
-      || GET_CODE (elt) != CONST_DOUBLE)
+      || !CONST_DOUBLE_P (elt))
     return false;
 
   r = *CONST_DOUBLE_REAL_VALUE (elt);
@@ -13176,11 +19563,10 @@ aarch64_sve_float_mul_immediate_p (rtx x)
 {
   rtx elt;
 
-  /* GCC will never generate a multiply with an immediate of 2, so there is no
-     point testing for it (even though it is a valid constant).  */
   return (const_vec_duplicate_p (x, &elt)
-	  && GET_CODE (elt) == CONST_DOUBLE
-	  && real_equal (CONST_DOUBLE_REAL_VALUE (elt), &dconsthalf));
+	  && CONST_DOUBLE_P (elt)
+	  && (real_equal (CONST_DOUBLE_REAL_VALUE (elt), &dconsthalf)
+	      || real_equal (CONST_DOUBLE_REAL_VALUE (elt), &dconst2)));
 }
 
 /* Return true if replicating VAL32 is a valid 2-byte or 4-byte immediate
@@ -13333,6 +19719,77 @@ aarch64_sve_valid_immediate (unsigned HOST_WIDE_INT val64,
   return false;
 }
 
+/* Return true if X is an UNSPEC_PTRUE constant of the form:
+
+       (const (unspec [PATTERN ZERO] UNSPEC_PTRUE))
+
+   where PATTERN is the svpattern as a CONST_INT and where ZERO
+   is a zero constant of the required PTRUE mode (which can have
+   fewer elements than X's mode, if zero bits are significant).
+
+   If so, and if INFO is nonnull, describe the immediate in INFO.  */
+bool
+aarch64_sve_ptrue_svpattern_p (rtx x, struct simd_immediate_info *info)
+{
+  if (GET_CODE (x) != CONST)
+    return false;
+
+  x = XEXP (x, 0);
+  if (GET_CODE (x) != UNSPEC || XINT (x, 1) != UNSPEC_PTRUE)
+    return false;
+
+  if (info)
+    {
+      aarch64_svpattern pattern
+	= (aarch64_svpattern) INTVAL (XVECEXP (x, 0, 0));
+      machine_mode pred_mode = GET_MODE (XVECEXP (x, 0, 1));
+      scalar_int_mode int_mode = aarch64_sve_element_int_mode (pred_mode);
+      *info = simd_immediate_info (int_mode, pattern);
+    }
+  return true;
+}
+
+/* Return true if X is a valid SVE predicate.  If INFO is nonnull, use
+   it to describe valid immediates.  */
+
+static bool
+aarch64_sve_pred_valid_immediate (rtx x, simd_immediate_info *info)
+{
+  if (aarch64_sve_ptrue_svpattern_p (x, info))
+    return true;
+
+  if (x == CONST0_RTX (GET_MODE (x)))
+    {
+      if (info)
+	*info = simd_immediate_info (DImode, 0);
+      return true;
+    }
+
+  /* Analyze the value as a VNx16BImode.  This should be relatively
+     efficient, since rtx_vector_builder has enough built-in capacity
+     to store all VLA predicate constants without needing the heap.  */
+  rtx_vector_builder builder;
+  if (!aarch64_get_sve_pred_bits (builder, x))
+    return false;
+
+  unsigned int elt_size = aarch64_widest_sve_pred_elt_size (builder);
+  if (int vl = aarch64_partial_ptrue_length (builder, elt_size))
+    {
+      machine_mode mode = aarch64_sve_pred_mode (elt_size).require ();
+      aarch64_svpattern pattern = aarch64_svpattern_for_vl (mode, vl);
+      if (pattern != AARCH64_NUM_SVPATTERNS)
+	{
+	  if (info)
+	    {
+	      scalar_int_mode int_mode = aarch64_sve_element_int_mode (mode);
+	      *info = simd_immediate_info (int_mode, pattern);
+	    }
+	  return true;
+	}
+    }
+  return false;
+}
+
 /* Return true if OP is a valid SIMD immediate for the operation
    described by WHICH.  If INFO is nonnull, use it to describe valid
    immediates.  */
@@ -13345,6 +19802,9 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info,
   if (vec_flags == 0 || vec_flags == (VEC_ADVSIMD | VEC_STRUCT))
     return false;
 
+  if (vec_flags & VEC_SVE_PRED)
+    return aarch64_sve_pred_valid_immediate (op, info);
+
   scalar_mode elt_mode = GET_MODE_INNER (mode);
   rtx base, step;
   unsigned int n_elts;
@@ -13360,7 +19820,14 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info,
 	return false;
 
       if (info)
-	*info = simd_immediate_info (elt_mode, base, step);
+	{
+	  /* Get the corresponding container mode.  E.g. an INDEX on V2SI
+	     should yield two integer values per 128-bit block, meaning
+	     that we need to treat it in the same way as V2DI and then
+	     ignore the upper 32 bits of each element.  */
+	  elt_mode = aarch64_sve_container_int_mode (mode);
+	  *info = simd_immediate_info (elt_mode, base, step);
+	}
       return true;
     }
   else if (GET_CODE (op) == CONST_VECTOR
@@ -13369,11 +19836,6 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info,
   else
     return false;
 
-  /* Handle PFALSE and PTRUE.  */
-  if (vec_flags & VEC_SVE_PRED)
-    return (op == CONST0_RTX (mode)
-	    || op == CONSTM1_RTX (mode));
-
   scalar_float_mode elt_float_mode;
   if (n_elts == 1
       && is_a <scalar_float_mode> (elt_mode, &elt_float_mode))
@@ -13388,12 +19850,28 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info,
 	}
     }
 
-  unsigned int elt_size = GET_MODE_SIZE (elt_mode);
+  /* If all elements in an SVE vector have the same value, we have a free
+     choice between using the element mode and using the container mode.
+     Using the element mode means that unused parts of the vector are
+     duplicates of the used elements, while using the container mode means
+     that the unused parts are an extension of the used elements.  Using the
+     element mode is better for (say) VNx4HI 0x101, since 0x01010101 is valid
+     for its container mode VNx4SI while 0x00000101 isn't.
+
+     If not all elements in an SVE vector have the same value, we need the
+     transition from one element to the next to occur at container boundaries.
+     E.g. a fixed-length VNx4HI containing { 1, 2, 3, 4 } should be treated
+     in the same way as a VNx4SI containing { 1, 2, 3, 4 }.  */
+  scalar_int_mode elt_int_mode;
+  if ((vec_flags & VEC_SVE_DATA) && n_elts > 1)
+    elt_int_mode = aarch64_sve_container_int_mode (mode);
+  else
+    elt_int_mode = int_mode_for_mode (elt_mode).require ();
+
+  unsigned int elt_size = GET_MODE_SIZE (elt_int_mode);
   if (elt_size > 8)
     return false;
 
-  scalar_int_mode elt_int_mode = int_mode_for_mode (elt_mode).require ();
-
   /* Expand the vector constant out into a byte vector, with the least
      significant byte of the register first.  */
   auto_vec<unsigned char, 16> bytes;
@@ -13457,11 +19935,14 @@ aarch64_check_zero_based_sve_index_immediate (rtx x)
 bool
 aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
 {
+  x = unwrap_const_vec_duplicate (x);
+  if (!CONST_INT_P (x))
+    return false;
   int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
   if (left)
-    return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
+    return IN_RANGE (INTVAL (x), 0, bit_width - 1);
   else
-    return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
+    return IN_RANGE (INTVAL (x), 1, bit_width);
 }
 
 /* Return the bitmask CONST_INT to select the bits required by a zero extract
@@ -13489,12 +19970,23 @@ aarch64_mov_operand_p (rtx x, machine_mode mode)
     return true;
 
   if (VECTOR_MODE_P (GET_MODE (x)))
-    return aarch64_simd_valid_immediate (x, NULL);
+    {
+      /* Require predicate constants to be VNx16BI before RA, so that we
+	 force everything to have a canonical form.  */
+      if (!lra_in_progress
+	  && !reload_completed
+	  && GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_BOOL
+	  && GET_MODE (x) != VNx16BImode)
+	return false;
+
+      return aarch64_simd_valid_immediate (x, NULL);
+    }
 
-  if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
+  x = strip_salt (x);
+  if (SYMBOL_REF_P (x) && mode == DImode && CONSTANT_ADDRESS_P (x))
     return true;
 
-  if (aarch64_sve_cnt_immediate_p (x))
+  if (TARGET_SVE && aarch64_sve_cnt_immediate_p (x))
     return true;
 
   return aarch64_classify_symbolic_expression (x)
@@ -13597,6 +20089,36 @@ aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
   return true;
 }
 
+/* Return a PARALLEL containing NELTS elements, with element I equal
+   to BASE + I * STEP.  */
+
+rtx
+aarch64_gen_stepped_int_parallel (unsigned int nelts, int base, int step)
+{
+  rtvec vec = rtvec_alloc (nelts);
+  for (unsigned int i = 0; i < nelts; ++i)
+    RTVEC_ELT (vec, i) = gen_int_mode (base + i * step, DImode);
+  return gen_rtx_PARALLEL (VOIDmode, vec);
+}
+
+/* Return true if OP is a PARALLEL of CONST_INTs that form a linear
+   series with step STEP.  */
+
+bool
+aarch64_stepped_int_parallel_p (rtx op, int step)
+{
+  if (GET_CODE (op) != PARALLEL || !CONST_INT_P (XVECEXP (op, 0, 0)))
+    return false;
+
+  unsigned HOST_WIDE_INT base = UINTVAL (XVECEXP (op, 0, 0));
+  for (int i = 1; i < XVECLEN (op, 0); ++i)
+    if (!CONST_INT_P (XVECEXP (op, 0, i))
+	|| UINTVAL (XVECEXP (op, 0, i)) != base + i * step)
+      return false;
+
+  return true;
+}
+
 /* Bounds-check lanes.  Ensure OPERAND lies between LOW (inclusive) and
    HIGH (exclusive).  */
 void
@@ -13649,6 +20171,72 @@ aarch64_sve_ld1r_operand_p (rtx op)
 	  && offset_6bit_unsigned_scaled_p (mode, addr.const_offset));
 }
 
+/* Return true if OP is a valid MEM operand for an SVE LD1R{Q,O} instruction
+   where the size of the read data is specified by `mode` and the size of the
+   vector elements are specified by `elem_mode`.   */
+bool
+aarch64_sve_ld1rq_ld1ro_operand_p (rtx op, machine_mode mode,
+				   scalar_mode elem_mode)
+{
+  struct aarch64_address_info addr;
+  if (!MEM_P (op)
+      || !aarch64_classify_address (&addr, XEXP (op, 0), elem_mode, false))
+    return false;
+
+  if (addr.type == ADDRESS_REG_IMM)
+    return offset_4bit_signed_scaled_p (mode, addr.const_offset);
+
+  if (addr.type == ADDRESS_REG_REG)
+    return (1U << addr.shift) == GET_MODE_SIZE (elem_mode);
+
+  return false;
+}
+
+/* Return true if OP is a valid MEM operand for an SVE LD1RQ instruction.  */
+bool
+aarch64_sve_ld1rq_operand_p (rtx op)
+{
+  return aarch64_sve_ld1rq_ld1ro_operand_p (op, TImode,
+					    GET_MODE_INNER (GET_MODE (op)));
+}
+
+/* Return true if OP is a valid MEM operand for an SVE LD1RO instruction for
+   accessing a vector where the element size is specified by `elem_mode`.  */
+bool
+aarch64_sve_ld1ro_operand_p (rtx op, scalar_mode elem_mode)
+{
+  return aarch64_sve_ld1rq_ld1ro_operand_p (op, OImode, elem_mode);
+}
+
+/* Return true if OP is a valid MEM operand for an SVE LDFF1 instruction.  */
+bool
+aarch64_sve_ldff1_operand_p (rtx op)
+{
+  if (!MEM_P (op))
+    return false;
+
+  struct aarch64_address_info addr;
+  if (!aarch64_classify_address (&addr, XEXP (op, 0), GET_MODE (op), false))
+    return false;
+
+  if (addr.type == ADDRESS_REG_IMM)
+    return known_eq (addr.const_offset, 0);
+
+  return addr.type == ADDRESS_REG_REG;
+}
+
+/* Return true if OP is a valid MEM operand for an SVE LDNF1 instruction.  */
+bool
+aarch64_sve_ldnf1_operand_p (rtx op)
+{
+  struct aarch64_address_info addr;
+
+  return (MEM_P (op)
+	  && aarch64_classify_address (&addr, XEXP (op, 0),
+				       GET_MODE (op), false)
+	  && addr.type == ADDRESS_REG_IMM);
+}
+
 /* Return true if OP is a valid MEM operand for an SVE LDR instruction.
    The conditions for STR are the same.  */
 bool
@@ -13662,6 +20250,21 @@ aarch64_sve_ldr_operand_p (rtx op)
 	  && addr.type == ADDRESS_REG_IMM);
 }
 
+/* Return true if OP is a valid address for an SVE PRF[BHWD] instruction,
+   addressing memory of mode MODE.  */
+bool
+aarch64_sve_prefetch_operand_p (rtx op, machine_mode mode)
+{
+  struct aarch64_address_info addr;
+  if (!aarch64_classify_address (&addr, op, mode, false, ADDR_QUERY_ANY))
+    return false;
+
+  if (addr.type == ADDRESS_REG_IMM)
+    return offset_6bit_signed_scaled_p (mode, addr.const_offset);
+
+  return addr.type == ADDRESS_REG_REG;
+}
+
 /* Return true if OP is a valid MEM operand for an SVE_STRUCT mode.
    We need to be able to access the individual pieces, so the range
    is different from LD[234] and ST[234].  */
@@ -13723,25 +20326,27 @@ aarch64_simd_attr_length_rglist (machine_mode mode)
 static HOST_WIDE_INT
 aarch64_simd_vector_alignment (const_tree type)
 {
-  if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
-    /* ??? Checking the mode isn't ideal, but VECTOR_BOOLEAN_TYPE_P can
-       be set for non-predicate vectors of booleans.  Modes are the most
-       direct way we have of identifying real SVE predicate types.  */
-    return GET_MODE_CLASS (TYPE_MODE (type)) == MODE_VECTOR_BOOL ? 16 : 128;
-  HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
-  return MIN (align, 128);
+  /* ??? Checking the mode isn't ideal, but VECTOR_BOOLEAN_TYPE_P can
+     be set for non-predicate vectors of booleans.  Modes are the most
+     direct way we have of identifying real SVE predicate types.  */
+  if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_VECTOR_BOOL)
+    return 16;
+  widest_int min_size
+    = constant_lower_bound (wi::to_poly_widest (TYPE_SIZE (type)));
+  return wi::umin (min_size, 128).to_uhwi ();
 }
 
 /* Implement target hook TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT.  */
-static HOST_WIDE_INT
+static poly_uint64
 aarch64_vectorize_preferred_vector_alignment (const_tree type)
 {
   if (aarch64_sve_data_mode_p (TYPE_MODE (type)))
     {
-      /* If the length of the vector is fixed, try to align to that length,
-	 otherwise don't try to align at all.  */
+      /* If the length of the vector is a fixed power of 2, try to align
+	 to that length, otherwise don't try to align at all.  */
       HOST_WIDE_INT result;
-      if (!BITS_PER_SVE_VECTOR.is_constant (&result))
+      if (!GET_MODE_BITSIZE (TYPE_MODE (type)).is_constant (&result)
+	  || !pow2p_hwi (result))
 	result = TYPE_ALIGN (TREE_TYPE (type));
       return result;
     }
@@ -13758,9 +20363,11 @@ aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
   /* For fixed-length vectors, check that the vectorizer will aim for
      full-vector alignment.  This isn't true for generic GCC vectors
      that are wider than the ABI maximum of 128 bits.  */
+  poly_uint64 preferred_alignment =
+    aarch64_vectorize_preferred_vector_alignment (type);
   if (TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
-      && (wi::to_widest (TYPE_SIZE (type))
-	  != aarch64_vectorize_preferred_vector_alignment (type)))
+      && maybe_ne (wi::to_widest (TYPE_SIZE (type)),
+		   preferred_alignment))
     return false;
 
   /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned.  */
@@ -13812,7 +20419,7 @@ aarch64_simd_dup_constant (rtx vals)
 /* Generate code to load VALS, which is a PARALLEL containing only
    constants (for vec_init) or CONST_VECTOR, efficiently into a
    register.  Returns an RTX to copy into the register, or NULL_RTX
-   for a PARALLEL that can not be converted into a CONST_VECTOR.  */
+   for a PARALLEL that cannot be converted into a CONST_VECTOR.  */
 static rtx
 aarch64_simd_make_constant (rtx vals)
 {
@@ -13850,12 +20457,12 @@ aarch64_simd_make_constant (rtx vals)
     /* Loaded using DUP.  */
     return const_dup;
   else if (const_vec != NULL_RTX)
-    /* Load from constant pool. We can not take advantage of single-cycle
+    /* Load from constant pool. We cannot take advantage of single-cycle
        LD1 because we need a PC-relative addressing mode.  */
     return const_vec;
   else
     /* A PARALLEL containing something not valid inside CONST_VECTOR.
-       We can not construct an initializer.  */
+       We cannot construct an initializer.  */
     return NULL_RTX;
 }
 
@@ -13876,6 +20483,45 @@ aarch64_expand_vector_init (rtx target, rtx vals)
   rtx v0 = XVECEXP (vals, 0, 0);
   bool all_same = true;
 
+  /* This is a special vec_init<M><N> where N is not an element mode but a
+     vector mode with half the elements of M.  We expect to find two entries
+     of mode N in VALS and we must put their concatentation into TARGET.  */
+  if (XVECLEN (vals, 0) == 2 && VECTOR_MODE_P (GET_MODE (XVECEXP (vals, 0, 0))))
+    {
+      gcc_assert (known_eq (GET_MODE_SIZE (mode),
+		  2 * GET_MODE_SIZE (GET_MODE (XVECEXP (vals, 0, 0)))));
+      rtx lo = XVECEXP (vals, 0, 0);
+      rtx hi = XVECEXP (vals, 0, 1);
+      machine_mode narrow_mode = GET_MODE (lo);
+      gcc_assert (GET_MODE_INNER (narrow_mode) == inner_mode);
+      gcc_assert (narrow_mode == GET_MODE (hi));
+
+      /* When we want to concatenate a half-width vector with zeroes we can
+	 use the aarch64_combinez[_be] patterns.  Just make sure that the
+	 zeroes are in the right half.  */
+      if (BYTES_BIG_ENDIAN
+	  && aarch64_simd_imm_zero (lo, narrow_mode)
+	  && general_operand (hi, narrow_mode))
+	emit_insn (gen_aarch64_combinez_be (narrow_mode, target, hi, lo));
+      else if (!BYTES_BIG_ENDIAN
+	       && aarch64_simd_imm_zero (hi, narrow_mode)
+	       && general_operand (lo, narrow_mode))
+	emit_insn (gen_aarch64_combinez (narrow_mode, target, lo, hi));
+      else
+	{
+	  /* Else create the two half-width registers and combine them.  */
+	  if (!REG_P (lo))
+	    lo = force_reg (GET_MODE (lo), lo);
+	  if (!REG_P (hi))
+	    hi = force_reg (GET_MODE (hi), hi);
+
+	  if (BYTES_BIG_ENDIAN)
+	    std::swap (lo, hi);
+	  emit_insn (gen_aarch64_simd_combine (narrow_mode, target, lo, hi));
+	}
+     return;
+   }
+
   /* Count the number of variable elements to initialise.  */
   for (int i = 0; i < n_elts; ++i)
     {
@@ -13943,9 +20589,54 @@ aarch64_expand_vector_init (rtx target, rtx vals)
 	    maxv = matches[i][1];
 	  }
 
-      /* Create a duplicate of the most common element.  */
-      rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, maxelement));
-      aarch64_emit_move (target, gen_vec_duplicate (mode, x));
+      /* Create a duplicate of the most common element, unless all elements
+	 are equally useless to us, in which case just immediately set the
+	 vector register using the first element.  */
+
+      if (maxv == 1)
+	{
+	  /* For vectors of two 64-bit elements, we can do even better.  */
+	  if (n_elts == 2
+	      && (inner_mode == E_DImode
+		  || inner_mode == E_DFmode))
+
+	    {
+	      rtx x0 = XVECEXP (vals, 0, 0);
+	      rtx x1 = XVECEXP (vals, 0, 1);
+	      /* Combine can pick up this case, but handling it directly
+		 here leaves clearer RTL.
+
+		 This is load_pair_lanes<mode>, and also gives us a clean-up
+		 for store_pair_lanes<mode>.  */
+	      if (memory_operand (x0, inner_mode)
+		  && memory_operand (x1, inner_mode)
+		  && !STRICT_ALIGNMENT
+		  && rtx_equal_p (XEXP (x1, 0),
+				  plus_constant (Pmode,
+						 XEXP (x0, 0),
+						 GET_MODE_SIZE (inner_mode))))
+		{
+		  rtx t;
+		  if (inner_mode == DFmode)
+		    t = gen_load_pair_lanesdf (target, x0, x1);
+		  else
+		    t = gen_load_pair_lanesdi (target, x0, x1);
+		  emit_insn (t);
+		  return;
+		}
+	    }
+	  /* The subreg-move sequence below will move into lane zero of the
+	     vector register.  For big-endian we want that position to hold
+	     the last element of VALS.  */
+	  maxelement = BYTES_BIG_ENDIAN ? n_elts - 1 : 0;
+	  rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, maxelement));
+	  aarch64_emit_move (target, lowpart_subreg (mode, x, inner_mode));
+	}
+      else
+	{
+	  rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, maxelement));
+	  aarch64_emit_move (target, gen_vec_duplicate (mode, x));
+	}
 
       /* Insert the rest.  */
       for (int i = 0; i < n_elts; i++)
@@ -14002,6 +20693,367 @@ aarch64_expand_vector_init (rtx target, rtx vals)
     }
 }
 
+/* Emit RTL corresponding to:
+   insr TARGET, ELEM.  */
+
+static void
+emit_insr (rtx target, rtx elem)
+{
+  machine_mode mode = GET_MODE (target);
+  scalar_mode elem_mode = GET_MODE_INNER (mode);
+  elem = force_reg (elem_mode, elem);
+
+  insn_code icode = optab_handler (vec_shl_insert_optab, mode);
+  gcc_assert (icode != CODE_FOR_nothing);
+  emit_insn (GEN_FCN (icode) (target, target, elem));
+}
+
+/* Subroutine of aarch64_sve_expand_vector_init for handling
+   trailing constants.
+   This function works as follows:
+   (a) Create a new vector consisting of trailing constants.
+   (b) Initialize TARGET with the constant vector using emit_move_insn.
+   (c) Insert remaining elements in TARGET using insr.
+   NELTS is the total number of elements in original vector while
+   while NELTS_REQD is the number of elements that are actually
+   significant.
+
+   ??? The heuristic used is to do above only if number of constants
+   is at least half the total number of elements.  May need fine tuning.  */
+
+static bool
+aarch64_sve_expand_vector_init_handle_trailing_constants
+ (rtx target, const rtx_vector_builder &builder, int nelts, int nelts_reqd)
+{
+  machine_mode mode = GET_MODE (target);
+  scalar_mode elem_mode = GET_MODE_INNER (mode);
+  int n_trailing_constants = 0;
+
+  for (int i = nelts_reqd - 1;
+       i >= 0 && valid_for_const_vector_p (elem_mode, builder.elt (i));
+       i--)
+    n_trailing_constants++;
+
+  if (n_trailing_constants >= nelts_reqd / 2)
+    {
+      /* Try to use the natural pattern of BUILDER to extend the trailing
+	 constant elements to a full vector.  Replace any variables in the
+	 extra elements with zeros.
+
+	 ??? It would be better if the builders supported "don't care"
+	     elements, with the builder filling in whichever elements
+	     give the most compact encoding.  */
+      rtx_vector_builder v (mode, nelts, 1);
+      for (int i = 0; i < nelts; i++)
+	{
+	  rtx x = builder.elt (i + nelts_reqd - n_trailing_constants);
+	  if (!valid_for_const_vector_p (elem_mode, x))
+	    x = const0_rtx;
+	  v.quick_push (x);
+	}
+      rtx const_vec = v.build ();
+      emit_move_insn (target, const_vec);
+
+      for (int i = nelts_reqd - n_trailing_constants - 1; i >= 0; i--)
+	emit_insr (target, builder.elt (i));
+
+      return true;
+    }
+
+  return false;
+}
+
+/* Subroutine of aarch64_sve_expand_vector_init.
+   Works as follows:
+   (a) Initialize TARGET by broadcasting element NELTS_REQD - 1 of BUILDER.
+   (b) Skip trailing elements from BUILDER, which are the same as
+       element NELTS_REQD - 1.
+   (c) Insert earlier elements in reverse order in TARGET using insr.  */
+
+static void
+aarch64_sve_expand_vector_init_insert_elems (rtx target,
+					     const rtx_vector_builder &builder,
+					     int nelts_reqd)
+{
+  machine_mode mode = GET_MODE (target);
+  scalar_mode elem_mode = GET_MODE_INNER (mode);
+
+  struct expand_operand ops[2];
+  enum insn_code icode = optab_handler (vec_duplicate_optab, mode);
+  gcc_assert (icode != CODE_FOR_nothing);
+
+  create_output_operand (&ops[0], target, mode);
+  create_input_operand (&ops[1], builder.elt (nelts_reqd - 1), elem_mode);
+  expand_insn (icode, 2, ops);
+
+  int ndups = builder.count_dups (nelts_reqd - 1, -1, -1);
+  for (int i = nelts_reqd - ndups - 1; i >= 0; i--)
+    emit_insr (target, builder.elt (i));
+}
+
+/* Subroutine of aarch64_sve_expand_vector_init to handle case
+   when all trailing elements of builder are same.
+   This works as follows:
+   (a) Use expand_insn interface to broadcast last vector element in TARGET.
+   (b) Insert remaining elements in TARGET using insr.
+
+   ??? The heuristic used is to do above if number of same trailing elements
+   is at least 3/4 of total number of elements, loosely based on
+   heuristic from mostly_zeros_p.  May need fine-tuning.  */
+
+static bool
+aarch64_sve_expand_vector_init_handle_trailing_same_elem
+ (rtx target, const rtx_vector_builder &builder, int nelts_reqd)
+{
+  int ndups = builder.count_dups (nelts_reqd - 1, -1, -1);
+  if (ndups >= (3 * nelts_reqd) / 4)
+    {
+      aarch64_sve_expand_vector_init_insert_elems (target, builder,
+						   nelts_reqd - ndups + 1);
+      return true;
+    }
+
+  return false;
+}
+
+/* Initialize register TARGET from BUILDER. NELTS is the constant number
+   of elements in BUILDER.
+
+   The function tries to initialize TARGET from BUILDER if it fits one
+   of the special cases outlined below.
+
+   Failing that, the function divides BUILDER into two sub-vectors:
+   v_even = even elements of BUILDER;
+   v_odd = odd elements of BUILDER;
+
+   and recursively calls itself with v_even and v_odd.
+
+   if (recursive call succeeded for v_even or v_odd)
+     TARGET = zip (v_even, v_odd)
+
+   The function returns true if it managed to build TARGET from BUILDER
+   with one of the special cases, false otherwise.
+
+   Example: {a, 1, b, 2, c, 3, d, 4}
+
+   The vector gets divided into:
+   v_even = {a, b, c, d}
+   v_odd = {1, 2, 3, 4}
+
+   aarch64_sve_expand_vector_init(v_odd) hits case 1 and
+   initialize tmp2 from constant vector v_odd using emit_move_insn.
+
+   aarch64_sve_expand_vector_init(v_even) fails since v_even contains
+   4 elements, so we construct tmp1 from v_even using insr:
+   tmp1 = dup(d)
+   insr tmp1, c
+   insr tmp1, b
+   insr tmp1, a
+
+   And finally:
+   TARGET = zip (tmp1, tmp2)
+   which sets TARGET to {a, 1, b, 2, c, 3, d, 4}.  */
+
+static bool
+aarch64_sve_expand_vector_init (rtx target, const rtx_vector_builder &builder,
+				int nelts, int nelts_reqd)
+{
+  machine_mode mode = GET_MODE (target);
+
+  /* Case 1: Vector contains trailing constants.  */
+
+  if (aarch64_sve_expand_vector_init_handle_trailing_constants
+       (target, builder, nelts, nelts_reqd))
+    return true;
+
+  /* Case 2: Vector contains leading constants.  */
+
+  rtx_vector_builder rev_builder (mode, nelts_reqd, 1);
+  for (int i = 0; i < nelts_reqd; i++)
+    rev_builder.quick_push (builder.elt (nelts_reqd - i - 1));
+  rev_builder.finalize ();
+
+  if (aarch64_sve_expand_vector_init_handle_trailing_constants
+       (target, rev_builder, nelts, nelts_reqd))
+    {
+      emit_insn (gen_aarch64_sve_rev (mode, target, target));
+      return true;
+    }
+
+  /* Case 3: Vector contains trailing same element.  */
+
+  if (aarch64_sve_expand_vector_init_handle_trailing_same_elem
+       (target, builder, nelts_reqd))
+    return true;
+
+  /* Case 4: Vector contains leading same element.  */
+
+  if (aarch64_sve_expand_vector_init_handle_trailing_same_elem
+       (target, rev_builder, nelts_reqd) && nelts_reqd == nelts)
+    {
+      emit_insn (gen_aarch64_sve_rev (mode, target, target));
+      return true;
+    }
+
+  /* Avoid recursing below 4-elements.
+     ??? The threshold 4 may need fine-tuning.  */
+
+  if (nelts_reqd <= 4)
+    return false;
+
+  rtx_vector_builder v_even (mode, nelts, 1);
+  rtx_vector_builder v_odd (mode, nelts, 1);
+
+  for (int i = 0; i < nelts * 2; i += 2)
+    {
+      v_even.quick_push (builder.elt (i));
+      v_odd.quick_push (builder.elt (i + 1));
+    }
+
+  v_even.finalize ();
+  v_odd.finalize ();
+
+  rtx tmp1 = gen_reg_rtx (mode);
+  bool did_even_p = aarch64_sve_expand_vector_init (tmp1, v_even,
+						    nelts, nelts_reqd / 2);
+
+  rtx tmp2 = gen_reg_rtx (mode);
+  bool did_odd_p = aarch64_sve_expand_vector_init (tmp2, v_odd,
+						   nelts, nelts_reqd / 2);
+
+  if (!did_even_p && !did_odd_p)
+    return false;
+
+  /* Initialize v_even and v_odd using INSR if it didn't match any of the
+     special cases and zip v_even, v_odd.  */
+
+  if (!did_even_p)
+    aarch64_sve_expand_vector_init_insert_elems (tmp1, v_even, nelts_reqd / 2);
+
+  if (!did_odd_p)
+    aarch64_sve_expand_vector_init_insert_elems (tmp2, v_odd, nelts_reqd / 2);
+
+  rtvec v = gen_rtvec (2, tmp1, tmp2);
+  emit_set_insn (target, gen_rtx_UNSPEC (mode, v, UNSPEC_ZIP1));
+  return true;
+}
+
+/* Initialize register TARGET from the elements in PARALLEL rtx VALS.  */
+
+void
+aarch64_sve_expand_vector_init (rtx target, rtx vals)
+{
+  machine_mode mode = GET_MODE (target);
+  int nelts = XVECLEN (vals, 0);
+
+  rtx_vector_builder v (mode, nelts, 1);
+  for (int i = 0; i < nelts; i++)
+    v.quick_push (XVECEXP (vals, 0, i));
+  v.finalize ();
+
+  /* If neither sub-vectors of v could be initialized specially,
+     then use INSR to insert all elements from v into TARGET.
+     ??? This might not be optimal for vectors with large
+     initializers like 16-element or above.
+     For nelts < 4, it probably isn't useful to handle specially.  */
+
+  if (nelts < 4
+      || !aarch64_sve_expand_vector_init (target, v, nelts, nelts))
+    aarch64_sve_expand_vector_init_insert_elems (target, v, nelts);
+}
+
+/* Check whether VALUE is a vector constant in which every element
+   is either a power of 2 or a negated power of 2.  If so, return
+   a constant vector of log2s, and flip CODE between PLUS and MINUS
+   if VALUE contains negated powers of 2.  Return NULL_RTX otherwise.  */
+
+static rtx
+aarch64_convert_mult_to_shift (rtx value, rtx_code &code)
+{
+  if (GET_CODE (value) != CONST_VECTOR)
+    return NULL_RTX;
+
+  rtx_vector_builder builder;
+  if (!builder.new_unary_operation (GET_MODE (value), value, false))
+    return NULL_RTX;
+
+  scalar_mode int_mode = GET_MODE_INNER (GET_MODE (value));
+  /* 1 if the result of the multiplication must be negated,
+     0 if it mustn't, or -1 if we don't yet care.  */
+  int negate = -1;
+  unsigned int encoded_nelts = const_vector_encoded_nelts (value);
+  for (unsigned int i = 0; i < encoded_nelts; ++i)
+    {
+      rtx elt = CONST_VECTOR_ENCODED_ELT (value, i);
+      if (!CONST_SCALAR_INT_P (elt))
+	return NULL_RTX;
+      rtx_mode_t val (elt, int_mode);
+      wide_int pow2 = wi::neg (val);
+      if (val != pow2)
+	{
+	  /* It matters whether we negate or not.  Make that choice,
+	     and make sure that it's consistent with previous elements.  */
+	  if (negate == !wi::neg_p (val))
+	    return NULL_RTX;
+	  negate = wi::neg_p (val);
+	  if (!negate)
+	    pow2 = val;
+	}
+      /* POW2 is now the value that we want to be a power of 2.  */
+      int shift = wi::exact_log2 (pow2);
+      if (shift < 0)
+	return NULL_RTX;
+      builder.quick_push (gen_int_mode (shift, int_mode));
+    }
+  if (negate == -1)
+    /* PLUS and MINUS are equivalent; canonicalize on PLUS.  */
+    code = PLUS;
+  else if (negate == 1)
+    code = code == PLUS ? MINUS : PLUS;
+  return builder.build ();
+}
+
+/* Prepare for an integer SVE multiply-add or multiply-subtract pattern;
+   CODE is PLUS for the former and MINUS for the latter.  OPERANDS is the
+   operands array, in the same order as for fma_optab.  Return true if
+   the function emitted all the necessary instructions, false if the caller
+   should generate the pattern normally with the new OPERANDS array.  */
+
+bool
+aarch64_prepare_sve_int_fma (rtx *operands, rtx_code code)
+{
+  machine_mode mode = GET_MODE (operands[0]);
+  if (rtx shifts = aarch64_convert_mult_to_shift (operands[2], code))
+    {
+      rtx product = expand_binop (mode, vashl_optab, operands[1], shifts,
+				  NULL_RTX, true, OPTAB_DIRECT);
+      force_expand_binop (mode, code == PLUS ? add_optab : sub_optab,
+			  operands[3], product, operands[0], true,
+			  OPTAB_DIRECT);
+      return true;
+    }
+  operands[2] = force_reg (mode, operands[2]);
+  return false;
+}
+
+/* Likewise, but for a conditional pattern.  */
+
+bool
+aarch64_prepare_sve_cond_int_fma (rtx *operands, rtx_code code)
+{
+  machine_mode mode = GET_MODE (operands[0]);
+  if (rtx shifts = aarch64_convert_mult_to_shift (operands[3], code))
+    {
+      rtx product = expand_binop (mode, vashl_optab, operands[2], shifts,
+				  NULL_RTX, true, OPTAB_DIRECT);
+      emit_insn (gen_cond (code, mode, operands[0], operands[1],
+			   operands[4], product, operands[5]));
+      return true;
+    }
+  operands[3] = force_reg (mode, operands[3]);
+  return false;
+}
+
 static unsigned HOST_WIDE_INT
 aarch64_shift_truncation_mask (machine_mode mode)
 {
@@ -14034,6 +21086,23 @@ aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
    return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
 }
 
+/* Output .variant_pcs for aarch64_vector_pcs function symbols.  */
+
+static void
+aarch64_asm_output_variant_pcs (FILE *stream, const tree decl, const char* name)
+{
+  if (TREE_CODE (decl) == FUNCTION_DECL)
+    {
+      arm_pcs pcs = (arm_pcs) fndecl_abi (decl).id ();
+      if (pcs == ARM_PCS_SIMD || pcs == ARM_PCS_SVE)
+	{
+	  fprintf (stream, "\t.variant_pcs\t");
+	  assemble_name (stream, name);
+	  fprintf (stream, "\n");
+	}
+    }
+}
+
 /* The last .arch and .tune assembly strings that we printed.  */
 static std::string aarch64_last_printed_arch_string;
 static std::string aarch64_last_printed_tune_string;
@@ -14057,7 +21126,7 @@ aarch64_declare_function_name (FILE *stream, const char* name,
   const struct processor *this_arch
     = aarch64_get_arch (targ_options->x_explicit_arch);
 
-  unsigned long isa_flags = targ_options->x_aarch64_isa_flags;
+  uint64_t isa_flags = targ_options->x_aarch64_isa_flags;
   std::string extension
     = aarch64_get_extension_string_for_isa_flags (isa_flags,
 						  this_arch->flags);
@@ -14083,9 +21152,72 @@ aarch64_declare_function_name (FILE *stream, const char* name,
       aarch64_last_printed_tune_string = this_tune->name;
     }
 
+  aarch64_asm_output_variant_pcs (stream, fndecl, name);
+
   /* Don't forget the type directive for ELF.  */
   ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
   ASM_OUTPUT_LABEL (stream, name);
+
+  cfun->machine->label_is_assembled = true;
+}
+
+/* Implement PRINT_PATCHABLE_FUNCTION_ENTRY.  Check if the patch area is after
+   the function label and emit a BTI if necessary.  */
+
+void
+aarch64_print_patchable_function_entry (FILE *file,
+					unsigned HOST_WIDE_INT patch_area_size,
+					bool record_p)
+{
+  if (cfun->machine->label_is_assembled
+      && aarch64_bti_enabled ()
+      && !cgraph_node::get (cfun->decl)->only_called_directly_p ())
+    {
+      /* Remove the BTI that follows the patch area and insert a new BTI
+	 before the patch area right after the function label.  */
+      rtx_insn *insn = next_real_nondebug_insn (get_insns ());
+      if (insn
+	  && INSN_P (insn)
+	  && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
+	  && XINT (PATTERN (insn), 1) == UNSPECV_BTI_C)
+	delete_insn (insn);
+      asm_fprintf (file, "\thint\t34 // bti c\n");
+    }
+
+  default_print_patchable_function_entry (file, patch_area_size, record_p);
+}
+
+/* Implement ASM_OUTPUT_DEF_FROM_DECLS.  Output .variant_pcs for aliases.  */
+
+void
+aarch64_asm_output_alias (FILE *stream, const tree decl, const tree target)
+{
+  const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
+  const char *value = IDENTIFIER_POINTER (target);
+  aarch64_asm_output_variant_pcs (stream, decl, name);
+  ASM_OUTPUT_DEF (stream, name, value);
+}
+
+/* Implement ASM_OUTPUT_EXTERNAL.  Output .variant_pcs for undefined
+   function symbol references.  */
+
+void
+aarch64_asm_output_external (FILE *stream, tree decl, const char* name)
+{
+  default_elf_asm_output_external (stream, decl, name);
+  aarch64_asm_output_variant_pcs (stream, decl, name);
+}
+
+/* Triggered after a .cfi_startproc directive is emitted into the assembly file.
+   Used to output the .cfi_b_key_frame directive when signing the current
+   function with the B key.  */
+
+void
+aarch64_post_cfi_startproc (FILE *f, tree ignored ATTRIBUTE_UNUSED)
+{
+  if (cfun->machine->frame.laid_out && aarch64_return_address_signing_enabled ()
+      && aarch64_ra_sign_key == AARCH64_KEY_B)
+	asm_fprintf (f, "\t.cfi_b_key_frame\n");
 }
 
 /* Implements TARGET_ASM_FILE_START.  Output the assembly header.  */
@@ -14098,7 +21230,7 @@ aarch64_start_file (void)
 
   const struct processor *default_arch
     = aarch64_get_arch (default_options->x_explicit_arch);
-  unsigned long default_isa_flags = default_options->x_aarch64_isa_flags;
+  uint64_t default_isa_flags = default_options->x_aarch64_isa_flags;
   std::string extension
     = aarch64_get_extension_string_for_isa_flags (default_isa_flags,
 						  default_arch->flags);
@@ -14108,84 +21240,129 @@ aarch64_start_file (void)
    asm_fprintf (asm_out_file, "\t.arch %s\n",
 		aarch64_last_printed_arch_string.c_str ());
 
-   default_file_start ();
+   default_file_start ();
+}
+
+/* Emit load exclusive.  */
+
+static void
+aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
+			     rtx mem, rtx model_rtx)
+{
+  if (mode == TImode)
+    emit_insn (gen_aarch64_load_exclusive_pair (gen_lowpart (DImode, rval),
+						gen_highpart (DImode, rval),
+						mem, model_rtx));
+  else
+    emit_insn (gen_aarch64_load_exclusive (mode, rval, mem, model_rtx));
+}
+
+/* Emit store exclusive.  */
+
+static void
+aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
+			      rtx mem, rtx rval, rtx model_rtx)
+{
+  if (mode == TImode)
+    emit_insn (gen_aarch64_store_exclusive_pair
+	       (bval, mem, operand_subword (rval, 0, 0, TImode),
+		operand_subword (rval, 1, 0, TImode), model_rtx));
+  else
+    emit_insn (gen_aarch64_store_exclusive (mode, bval, mem, rval, model_rtx));
+}
+
+/* Mark the previous jump instruction as unlikely.  */
+
+static void
+aarch64_emit_unlikely_jump (rtx insn)
+{
+  rtx_insn *jump = emit_jump_insn (insn);
+  add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
 }
 
-/* Emit load exclusive.  */
+/* We store the names of the various atomic helpers in a 5x4 array.
+   Return the libcall function given MODE, MODEL and NAMES.  */
 
-static void
-aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
-			     rtx mem, rtx model_rtx)
+rtx
+aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
+			const atomic_ool_names *names)
 {
-  rtx (*gen) (rtx, rtx, rtx);
+  memmodel model = memmodel_base (INTVAL (model_rtx));
+  int mode_idx, model_idx;
 
   switch (mode)
     {
-    case E_QImode: gen = gen_aarch64_load_exclusiveqi; break;
-    case E_HImode: gen = gen_aarch64_load_exclusivehi; break;
-    case E_SImode: gen = gen_aarch64_load_exclusivesi; break;
-    case E_DImode: gen = gen_aarch64_load_exclusivedi; break;
+    case E_QImode:
+      mode_idx = 0;
+      break;
+    case E_HImode:
+      mode_idx = 1;
+      break;
+    case E_SImode:
+      mode_idx = 2;
+      break;
+    case E_DImode:
+      mode_idx = 3;
+      break;
+    case E_TImode:
+      mode_idx = 4;
+      break;
     default:
       gcc_unreachable ();
     }
 
-  emit_insn (gen (rval, mem, model_rtx));
-}
-
-/* Emit store exclusive.  */
-
-static void
-aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
-			      rtx rval, rtx mem, rtx model_rtx)
-{
-  rtx (*gen) (rtx, rtx, rtx, rtx);
-
-  switch (mode)
+  switch (model)
     {
-    case E_QImode: gen = gen_aarch64_store_exclusiveqi; break;
-    case E_HImode: gen = gen_aarch64_store_exclusivehi; break;
-    case E_SImode: gen = gen_aarch64_store_exclusivesi; break;
-    case E_DImode: gen = gen_aarch64_store_exclusivedi; break;
+    case MEMMODEL_RELAXED:
+      model_idx = 0;
+      break;
+    case MEMMODEL_CONSUME:
+    case MEMMODEL_ACQUIRE:
+      model_idx = 1;
+      break;
+    case MEMMODEL_RELEASE:
+      model_idx = 2;
+      break;
+    case MEMMODEL_ACQ_REL:
+    case MEMMODEL_SEQ_CST:
+      model_idx = 3;
+      break;
     default:
       gcc_unreachable ();
     }
 
-  emit_insn (gen (bval, rval, mem, model_rtx));
+  return init_one_libfunc_visibility (names->str[mode_idx][model_idx],
+				      VISIBILITY_HIDDEN);
 }
 
-/* Mark the previous jump instruction as unlikely.  */
+#define DEF0(B, N) \
+  { "__aarch64_" #B #N "_relax", \
+    "__aarch64_" #B #N "_acq", \
+    "__aarch64_" #B #N "_rel", \
+    "__aarch64_" #B #N "_acq_rel" }
 
-static void
-aarch64_emit_unlikely_jump (rtx insn)
-{
-  rtx_insn *jump = emit_jump_insn (insn);
-  add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
-}
+#define DEF4(B)  DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), \
+		 { NULL, NULL, NULL, NULL }
+#define DEF5(B)  DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), DEF0(B, 16)
+
+static const atomic_ool_names aarch64_ool_cas_names = { { DEF5(cas) } };
+const atomic_ool_names aarch64_ool_swp_names = { { DEF4(swp) } };
+const atomic_ool_names aarch64_ool_ldadd_names = { { DEF4(ldadd) } };
+const atomic_ool_names aarch64_ool_ldset_names = { { DEF4(ldset) } };
+const atomic_ool_names aarch64_ool_ldclr_names = { { DEF4(ldclr) } };
+const atomic_ool_names aarch64_ool_ldeor_names = { { DEF4(ldeor) } };
+
+#undef DEF0
+#undef DEF4
+#undef DEF5
 
 /* Expand a compare and swap pattern.  */
 
 void
 aarch64_expand_compare_and_swap (rtx operands[])
 {
-  rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
-  machine_mode mode, cmp_mode;
-  typedef rtx (*gen_cas_fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
-  int idx;
-  gen_cas_fn gen;
-  const gen_cas_fn split_cas[] =
-  {
-    gen_aarch64_compare_and_swapqi,
-    gen_aarch64_compare_and_swaphi,
-    gen_aarch64_compare_and_swapsi,
-    gen_aarch64_compare_and_swapdi
-  };
-  const gen_cas_fn atomic_cas[] =
-  {
-    gen_aarch64_compare_and_swapqi_lse,
-    gen_aarch64_compare_and_swaphi_lse,
-    gen_aarch64_compare_and_swapsi_lse,
-    gen_aarch64_compare_and_swapdi_lse
-  };
+  rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x, cc_reg;
+  machine_mode mode, r_mode;
 
   bval = operands[0];
   rval = operands[1];
@@ -14196,88 +21373,66 @@ aarch64_expand_compare_and_swap (rtx operands[])
   mod_s = operands[6];
   mod_f = operands[7];
   mode = GET_MODE (mem);
-  cmp_mode = mode;
 
   /* Normally the succ memory model must be stronger than fail, but in the
      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
-
   if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
       && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
 
-  switch (mode)
+  r_mode = mode;
+  if (mode == QImode || mode == HImode)
     {
-    case E_QImode:
-    case E_HImode:
-      /* For short modes, we're going to perform the comparison in SImode,
-	 so do the zero-extension now.  */
-      cmp_mode = SImode;
-      rval = gen_reg_rtx (SImode);
-      oldval = convert_modes (SImode, mode, oldval, true);
-      /* Fall through.  */
+      r_mode = SImode;
+      rval = gen_reg_rtx (r_mode);
+    }
 
-    case E_SImode:
-    case E_DImode:
-      /* Force the value into a register if needed.  */
-      if (!aarch64_plus_operand (oldval, mode))
-	oldval = force_reg (cmp_mode, oldval);
-      break;
+  if (TARGET_LSE)
+    {
+      /* The CAS insn requires oldval and rval overlap, but we need to
+	 have a copy of oldval saved across the operation to tell if
+	 the operation is successful.  */
+      if (reg_overlap_mentioned_p (rval, oldval))
+        rval = copy_to_mode_reg (r_mode, oldval);
+      else
+	emit_move_insn (rval, gen_lowpart (r_mode, oldval));
 
-    default:
-      gcc_unreachable ();
+      emit_insn (gen_aarch64_compare_and_swap_lse (mode, rval, mem,
+						   newval, mod_s));
+      cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
     }
-
-  switch (mode)
+  else if (TARGET_OUTLINE_ATOMICS)
     {
-    case E_QImode: idx = 0; break;
-    case E_HImode: idx = 1; break;
-    case E_SImode: idx = 2; break;
-    case E_DImode: idx = 3; break;
-    default:
-      gcc_unreachable ();
+      /* Oldval must satisfy compare afterward.  */
+      if (!aarch64_plus_operand (oldval, mode))
+	oldval = force_reg (mode, oldval);
+      rtx func = aarch64_atomic_ool_func (mode, mod_s, &aarch64_ool_cas_names);
+      rval = emit_library_call_value (func, NULL_RTX, LCT_NORMAL, r_mode,
+				      oldval, mode, newval, mode,
+				      XEXP (mem, 0), Pmode);
+      cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
     }
-  if (TARGET_LSE)
-    gen = atomic_cas[idx];
   else
-    gen = split_cas[idx];
+    {
+      /* The oldval predicate varies by mode.  Test it and force to reg.  */
+      insn_code code = code_for_aarch64_compare_and_swap (mode);
+      if (!insn_data[code].operand[2].predicate (oldval, mode))
+	oldval = force_reg (mode, oldval);
 
-  emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
+      emit_insn (GEN_FCN (code) (rval, mem, oldval, newval,
+				 is_weak, mod_s, mod_f));
+      cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
+    }
 
-  if (mode == QImode || mode == HImode)
-    emit_move_insn (operands[1], gen_lowpart (mode, rval));
+  if (r_mode != mode)
+    rval = gen_lowpart (mode, rval);
+  emit_move_insn (operands[1], rval);
 
-  x = gen_rtx_REG (CCmode, CC_REGNUM);
-  x = gen_rtx_EQ (SImode, x, const0_rtx);
+  x = gen_rtx_EQ (SImode, cc_reg, const0_rtx);
   emit_insn (gen_rtx_SET (bval, x));
 }
 
-/* Test whether the target supports using a atomic load-operate instruction.
-   CODE is the operation and AFTER is TRUE if the data in memory after the
-   operation should be returned and FALSE if the data before the operation
-   should be returned.  Returns FALSE if the operation isn't supported by the
-   architecture.  */
-
-bool
-aarch64_atomic_ldop_supported_p (enum rtx_code code)
-{
-  if (!TARGET_LSE)
-    return false;
-
-  switch (code)
-    {
-    case SET:
-    case AND:
-    case IOR:
-    case XOR:
-    case MINUS:
-    case PLUS:
-      return true;
-    default:
-      return false;
-    }
-}
-
 /* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
    sequence implementing an atomic operation.  */
 
@@ -14295,42 +21450,6 @@ aarch64_emit_post_barrier (enum memmodel model)
     }
 }
 
-/* Emit an atomic compare-and-swap operation.  RVAL is the destination register
-   for the data in memory.  EXPECTED is the value expected to be in memory.
-   DESIRED is the value to store to memory.  MEM is the memory location.  MODEL
-   is the memory ordering to use.  */
-
-void
-aarch64_gen_atomic_cas (rtx rval, rtx mem,
-			rtx expected, rtx desired,
-			rtx model)
-{
-  rtx (*gen) (rtx, rtx, rtx, rtx);
-  machine_mode mode;
-
-  mode = GET_MODE (mem);
-
-  switch (mode)
-    {
-    case E_QImode: gen = gen_aarch64_atomic_casqi; break;
-    case E_HImode: gen = gen_aarch64_atomic_cashi; break;
-    case E_SImode: gen = gen_aarch64_atomic_cassi; break;
-    case E_DImode: gen = gen_aarch64_atomic_casdi; break;
-    default:
-      gcc_unreachable ();
-    }
-
-  /* Move the expected value into the CAS destination register.  */
-  emit_insn (gen_rtx_SET (rval, expected));
-
-  /* Emit the CAS.  */
-  emit_insn (gen (rval, mem, desired, model));
-
-  /* Compare the expected value with the value loaded by the CAS, to establish
-     whether the swap was made.  */
-  aarch64_gen_compare_reg (EQ, rval, expected);
-}
-
 /* Split a compare and swap pattern.  */
 
 void
@@ -14339,13 +21458,11 @@ aarch64_split_compare_and_swap (rtx operands[])
   /* Split after prolog/epilog to avoid interactions with shrinkwrapping.  */
   gcc_assert (epilogue_completed);
 
-  rtx rval, mem, oldval, newval, scratch;
+  rtx rval, mem, oldval, newval, scratch, x, model_rtx;
   machine_mode mode;
   bool is_weak;
   rtx_code_label *label1, *label2;
-  rtx x, cond;
   enum memmodel model;
-  rtx model_rtx;
 
   rval = operands[0];
   mem = operands[1];
@@ -14366,7 +21483,8 @@ aarch64_split_compare_and_swap (rtx operands[])
 	CBNZ	scratch, .label1
     .label2:
 	CMP	rval, 0.  */
-  bool strong_zero_p = !is_weak && oldval == const0_rtx;
+  bool strong_zero_p = (!is_weak && !aarch64_track_speculation &&
+			oldval == const0_rtx && mode != TImode);
 
   label1 = NULL;
   if (!is_weak)
@@ -14379,304 +21497,55 @@ aarch64_split_compare_and_swap (rtx operands[])
   /* The initial load can be relaxed for a __sync operation since a final
      barrier will be emitted to stop code hoisting.  */
   if (is_mm_sync (model))
-    aarch64_emit_load_exclusive (mode, rval, mem,
-				 GEN_INT (MEMMODEL_RELAXED));
+    aarch64_emit_load_exclusive (mode, rval, mem, GEN_INT (MEMMODEL_RELAXED));
   else
     aarch64_emit_load_exclusive (mode, rval, mem, model_rtx);
 
   if (strong_zero_p)
-    {
-      x = gen_rtx_NE (VOIDmode, rval, const0_rtx);
-      x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
-				gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
-      aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
-    }
+    x = gen_rtx_NE (VOIDmode, rval, const0_rtx);
   else
     {
-      cond = aarch64_gen_compare_reg (NE, rval, oldval);
-      x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
-      x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
-				 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
-      aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
+      rtx cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
+      x = gen_rtx_NE (VOIDmode, cc_reg, const0_rtx);
     }
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
+  aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
 
   aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx);
 
   if (!is_weak)
     {
-      x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
+      if (aarch64_track_speculation)
+	{
+	  /* Emit an explicit compare instruction, so that we can correctly
+	     track the condition codes.  */
+	  rtx cc_reg = aarch64_gen_compare_reg (NE, scratch, const0_rtx);
+	  x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
+	}
+      else
+	x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
+
       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
 				gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
       aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
     }
   else
-    {
-      cond = gen_rtx_REG (CCmode, CC_REGNUM);
-      x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
-      emit_insn (gen_rtx_SET (cond, x));
-    }
+    aarch64_gen_compare_reg (NE, scratch, const0_rtx);
 
   emit_label (label2);
+
   /* If we used a CBNZ in the exchange loop emit an explicit compare with RVAL
      to set the condition flags.  If this is not used it will be removed by
      later passes.  */
   if (strong_zero_p)
-    {
-      cond = gen_rtx_REG (CCmode, CC_REGNUM);
-      x = gen_rtx_COMPARE (CCmode, rval, const0_rtx);
-      emit_insn (gen_rtx_SET (cond, x));
-    }
+    aarch64_gen_compare_reg (NE, rval, const0_rtx);
+
   /* Emit any final barrier needed for a __sync operation.  */
   if (is_mm_sync (model))
     aarch64_emit_post_barrier (model);
 }
 
-/* Emit a BIC instruction.  */
-
-static void
-aarch64_emit_bic (machine_mode mode, rtx dst, rtx s1, rtx s2, int shift)
-{
-  rtx shift_rtx = GEN_INT (shift);
-  rtx (*gen) (rtx, rtx, rtx, rtx);
-
-  switch (mode)
-    {
-    case E_SImode: gen = gen_and_one_cmpl_lshrsi3; break;
-    case E_DImode: gen = gen_and_one_cmpl_lshrdi3; break;
-    default:
-      gcc_unreachable ();
-    }
-
-  emit_insn (gen (dst, s2, shift_rtx, s1));
-}
-
-/* Emit an atomic swap.  */
-
-static void
-aarch64_emit_atomic_swap (machine_mode mode, rtx dst, rtx value,
-			  rtx mem, rtx model)
-{
-  rtx (*gen) (rtx, rtx, rtx, rtx);
-
-  switch (mode)
-    {
-    case E_QImode: gen = gen_aarch64_atomic_swpqi; break;
-    case E_HImode: gen = gen_aarch64_atomic_swphi; break;
-    case E_SImode: gen = gen_aarch64_atomic_swpsi; break;
-    case E_DImode: gen = gen_aarch64_atomic_swpdi; break;
-    default:
-      gcc_unreachable ();
-    }
-
-  emit_insn (gen (dst, mem, value, model));
-}
-
-/* Operations supported by aarch64_emit_atomic_load_op.  */
-
-enum aarch64_atomic_load_op_code
-{
-  AARCH64_LDOP_PLUS,	/* A + B  */
-  AARCH64_LDOP_XOR,	/* A ^ B  */
-  AARCH64_LDOP_OR,	/* A | B  */
-  AARCH64_LDOP_BIC	/* A & ~B  */
-};
-
-/* Emit an atomic load-operate.  */
-
-static void
-aarch64_emit_atomic_load_op (enum aarch64_atomic_load_op_code code,
-			     machine_mode mode, rtx dst, rtx src,
-			     rtx mem, rtx model)
-{
-  typedef rtx (*aarch64_atomic_load_op_fn) (rtx, rtx, rtx, rtx);
-  const aarch64_atomic_load_op_fn plus[] =
-  {
-    gen_aarch64_atomic_loadaddqi,
-    gen_aarch64_atomic_loadaddhi,
-    gen_aarch64_atomic_loadaddsi,
-    gen_aarch64_atomic_loadadddi
-  };
-  const aarch64_atomic_load_op_fn eor[] =
-  {
-    gen_aarch64_atomic_loadeorqi,
-    gen_aarch64_atomic_loadeorhi,
-    gen_aarch64_atomic_loadeorsi,
-    gen_aarch64_atomic_loadeordi
-  };
-  const aarch64_atomic_load_op_fn ior[] =
-  {
-    gen_aarch64_atomic_loadsetqi,
-    gen_aarch64_atomic_loadsethi,
-    gen_aarch64_atomic_loadsetsi,
-    gen_aarch64_atomic_loadsetdi
-  };
-  const aarch64_atomic_load_op_fn bic[] =
-  {
-    gen_aarch64_atomic_loadclrqi,
-    gen_aarch64_atomic_loadclrhi,
-    gen_aarch64_atomic_loadclrsi,
-    gen_aarch64_atomic_loadclrdi
-  };
-  aarch64_atomic_load_op_fn gen;
-  int idx = 0;
-
-  switch (mode)
-    {
-    case E_QImode: idx = 0; break;
-    case E_HImode: idx = 1; break;
-    case E_SImode: idx = 2; break;
-    case E_DImode: idx = 3; break;
-    default:
-      gcc_unreachable ();
-    }
-
-  switch (code)
-    {
-    case AARCH64_LDOP_PLUS: gen = plus[idx]; break;
-    case AARCH64_LDOP_XOR: gen = eor[idx]; break;
-    case AARCH64_LDOP_OR: gen = ior[idx]; break;
-    case AARCH64_LDOP_BIC: gen = bic[idx]; break;
-    default:
-      gcc_unreachable ();
-    }
-
-  emit_insn (gen (dst, mem, src, model));
-}
-
-/* Emit an atomic load+operate.  CODE is the operation.  OUT_DATA is the
-   location to store the data read from memory.  OUT_RESULT is the location to
-   store the result of the operation.  MEM is the memory location to read and
-   modify.  MODEL_RTX is the memory ordering to use.  VALUE is the second
-   operand for the operation.  Either OUT_DATA or OUT_RESULT, but not both, can
-   be NULL.  */
-
-void
-aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data, rtx out_result,
-			 rtx mem, rtx value, rtx model_rtx)
-{
-  machine_mode mode = GET_MODE (mem);
-  machine_mode wmode = (mode == DImode ? DImode : SImode);
-  const bool short_mode = (mode < SImode);
-  aarch64_atomic_load_op_code ldop_code;
-  rtx src;
-  rtx x;
-
-  if (out_data)
-    out_data = gen_lowpart (mode, out_data);
-
-  if (out_result)
-    out_result = gen_lowpart (mode, out_result);
-
-  /* Make sure the value is in a register, putting it into a destination
-     register if it needs to be manipulated.  */
-  if (!register_operand (value, mode)
-      || code == AND || code == MINUS)
-    {
-      src = out_result ? out_result : out_data;
-      emit_move_insn (src, gen_lowpart (mode, value));
-    }
-  else
-    src = value;
-  gcc_assert (register_operand (src, mode));
-
-  /* Preprocess the data for the operation as necessary.  If the operation is
-     a SET then emit a swap instruction and finish.  */
-  switch (code)
-    {
-    case SET:
-      aarch64_emit_atomic_swap (mode, out_data, src, mem, model_rtx);
-      return;
-
-    case MINUS:
-      /* Negate the value and treat it as a PLUS.  */
-      {
-	rtx neg_src;
-
-	/* Resize the value if necessary.  */
-	if (short_mode)
-	  src = gen_lowpart (wmode, src);
-
-	neg_src = gen_rtx_NEG (wmode, src);
-	emit_insn (gen_rtx_SET (src, neg_src));
-
-	if (short_mode)
-	  src = gen_lowpart (mode, src);
-      }
-      /* Fall-through.  */
-    case PLUS:
-      ldop_code = AARCH64_LDOP_PLUS;
-      break;
-
-    case IOR:
-      ldop_code = AARCH64_LDOP_OR;
-      break;
-
-    case XOR:
-      ldop_code = AARCH64_LDOP_XOR;
-      break;
-
-    case AND:
-      {
-	rtx not_src;
-
-	/* Resize the value if necessary.  */
-	if (short_mode)
-	  src = gen_lowpart (wmode, src);
-
-	not_src = gen_rtx_NOT (wmode, src);
-	emit_insn (gen_rtx_SET (src, not_src));
-
-	if (short_mode)
-	  src = gen_lowpart (mode, src);
-      }
-      ldop_code = AARCH64_LDOP_BIC;
-      break;
-
-    default:
-      /* The operation can't be done with atomic instructions.  */
-      gcc_unreachable ();
-    }
-
-  aarch64_emit_atomic_load_op (ldop_code, mode, out_data, src, mem, model_rtx);
-
-  /* If necessary, calculate the data in memory after the update by redoing the
-     operation from values in registers.  */
-  if (!out_result)
-    return;
-
-  if (short_mode)
-    {
-      src = gen_lowpart (wmode, src);
-      out_data = gen_lowpart (wmode, out_data);
-      out_result = gen_lowpart (wmode, out_result);
-    }
-
-  x = NULL_RTX;
-
-  switch (code)
-    {
-    case MINUS:
-    case PLUS:
-      x = gen_rtx_PLUS (wmode, out_data, src);
-      break;
-    case IOR:
-      x = gen_rtx_IOR (wmode, out_data, src);
-      break;
-    case XOR:
-      x = gen_rtx_XOR (wmode, out_data, src);
-      break;
-    case AND:
-      aarch64_emit_bic (wmode, out_result, out_data, src, 0);
-      return;
-    default:
-      gcc_unreachable ();
-    }
-
-  emit_set_insn (out_result, x);
-
-  return;
-}
-
 /* Split an atomic operation.  */
 
 void
@@ -14729,7 +21598,7 @@ aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
     case MINUS:
       if (CONST_INT_P (value))
 	{
-	  value = GEN_INT (-INTVAL (value));
+	  value = GEN_INT (-UINTVAL (value));
 	  code = PLUS;
 	}
       /* Fall through.  */
@@ -14743,7 +21612,16 @@ aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
   aarch64_emit_store_exclusive (mode, cond, mem,
 				gen_lowpart (mode, new_out), model_rtx);
 
-  x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+  if (aarch64_track_speculation)
+    {
+      /* Emit an explicit compare instruction, so that we can correctly
+	 track the condition codes.  */
+      rtx cc_reg = aarch64_gen_compare_reg (NE, cond, const0_rtx);
+      x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
+    }
+  else
+    x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+
   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
 			    gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
   aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
@@ -14815,11 +21693,12 @@ aarch64_float_const_representable_p (rtx x)
   REAL_VALUE_TYPE r, m;
   bool fail;
 
+  x = unwrap_const_vec_duplicate (x);
   if (!CONST_DOUBLE_P (x))
     return false;
 
-  /* We don't support HFmode constants yet.  */
-  if (GET_MODE (x) == VOIDmode || GET_MODE (x) == HFmode)
+  if (GET_MODE (x) == VOIDmode
+      || (GET_MODE (x) == HFmode && !TARGET_FP_F16INST))
     return false;
 
   r = *CONST_DOUBLE_REAL_VALUE (x);
@@ -14910,17 +21789,18 @@ aarch64_output_simd_mov_immediate (rtx const_vector, unsigned width,
 
   if (GET_MODE_CLASS (info.elt_mode) == MODE_FLOAT)
     {
-      gcc_assert (info.shift == 0 && info.insn == simd_immediate_info::MOV);
+      gcc_assert (info.insn == simd_immediate_info::MOV
+		  && info.u.mov.shift == 0);
       /* For FP zero change it to a CONST_INT 0 and use the integer SIMD
 	 move immediate path.  */
-      if (aarch64_float_const_zero_rtx_p (info.value))
-        info.value = GEN_INT (0);
+      if (aarch64_float_const_zero_rtx_p (info.u.mov.value))
+        info.u.mov.value = GEN_INT (0);
       else
 	{
 	  const unsigned int buf_size = 20;
 	  char float_buf[buf_size] = {'\0'};
 	  real_to_decimal_for_mode (float_buf,
-				    CONST_DOUBLE_REAL_VALUE (info.value),
+				    CONST_DOUBLE_REAL_VALUE (info.u.mov.value),
 				    buf_size, buf_size, 1, info.elt_mode);
 
 	  if (lane_count == 1)
@@ -14932,36 +21812,39 @@ aarch64_output_simd_mov_immediate (rtx const_vector, unsigned width,
 	}
     }
 
-  gcc_assert (CONST_INT_P (info.value));
+  gcc_assert (CONST_INT_P (info.u.mov.value));
 
   if (which == AARCH64_CHECK_MOV)
     {
       mnemonic = info.insn == simd_immediate_info::MVN ? "mvni" : "movi";
-      shift_op = info.modifier == simd_immediate_info::MSL ? "msl" : "lsl";
+      shift_op = (info.u.mov.modifier == simd_immediate_info::MSL
+		  ? "msl" : "lsl");
       if (lane_count == 1)
 	snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
-		  mnemonic, UINTVAL (info.value));
-      else if (info.shift)
+		  mnemonic, UINTVAL (info.u.mov.value));
+      else if (info.u.mov.shift)
 	snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, "
 		  HOST_WIDE_INT_PRINT_HEX ", %s %d", mnemonic, lane_count,
-		  element_char, UINTVAL (info.value), shift_op, info.shift);
+		  element_char, UINTVAL (info.u.mov.value), shift_op,
+		  info.u.mov.shift);
       else
 	snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, "
 		  HOST_WIDE_INT_PRINT_HEX, mnemonic, lane_count,
-		  element_char, UINTVAL (info.value));
+		  element_char, UINTVAL (info.u.mov.value));
     }
   else
     {
       /* For AARCH64_CHECK_BIC and AARCH64_CHECK_ORR.  */
       mnemonic = info.insn == simd_immediate_info::MVN ? "bic" : "orr";
-      if (info.shift)
+      if (info.u.mov.shift)
 	snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #"
 		  HOST_WIDE_INT_PRINT_DEC ", %s #%d", mnemonic, lane_count,
-		  element_char, UINTVAL (info.value), "lsl", info.shift);
+		  element_char, UINTVAL (info.u.mov.value), "lsl",
+		  info.u.mov.shift);
       else
 	snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #"
 		  HOST_WIDE_INT_PRINT_DEC, mnemonic, lane_count,
-		  element_char, UINTVAL (info.value));
+		  element_char, UINTVAL (info.u.mov.value));
     }
   return templ;
 }
@@ -15005,24 +21888,49 @@ aarch64_output_sve_mov_immediate (rtx const_vector)
 
   element_char = sizetochar (GET_MODE_BITSIZE (info.elt_mode));
 
-  if (info.step)
+  machine_mode vec_mode = GET_MODE (const_vector);
+  if (aarch64_sve_pred_mode_p (vec_mode))
+    {
+      static char buf[sizeof ("ptrue\t%0.N, vlNNNNN")];
+      if (info.insn == simd_immediate_info::MOV)
+	{
+	  gcc_assert (info.u.mov.value == const0_rtx);
+	  snprintf (buf, sizeof (buf), "pfalse\t%%0.b");
+	}
+      else
+	{
+	  gcc_assert (info.insn == simd_immediate_info::PTRUE);
+	  unsigned int total_bytes;
+	  if (info.u.pattern == AARCH64_SV_ALL
+	      && BYTES_PER_SVE_VECTOR.is_constant (&total_bytes))
+	    snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, vl%d", element_char,
+		      total_bytes / GET_MODE_SIZE (info.elt_mode));
+	  else
+	    snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, %s", element_char,
+		      svpattern_token (info.u.pattern));
+	}
+      return buf;
+    }
+
+  if (info.insn == simd_immediate_info::INDEX)
     {
       snprintf (templ, sizeof (templ), "index\t%%0.%c, #"
 		HOST_WIDE_INT_PRINT_DEC ", #" HOST_WIDE_INT_PRINT_DEC,
-		element_char, INTVAL (info.value), INTVAL (info.step));
+		element_char, INTVAL (info.u.index.base),
+		INTVAL (info.u.index.step));
       return templ;
     }
 
   if (GET_MODE_CLASS (info.elt_mode) == MODE_FLOAT)
     {
-      if (aarch64_float_const_zero_rtx_p (info.value))
-	info.value = GEN_INT (0);
+      if (aarch64_float_const_zero_rtx_p (info.u.mov.value))
+	info.u.mov.value = GEN_INT (0);
       else
 	{
 	  const int buf_size = 20;
 	  char float_buf[buf_size] = {};
 	  real_to_decimal_for_mode (float_buf,
-				    CONST_DOUBLE_REAL_VALUE (info.value),
+				    CONST_DOUBLE_REAL_VALUE (info.u.mov.value),
 				    buf_size, buf_size, 1, info.elt_mode);
 
 	  snprintf (templ, sizeof (templ), "fmov\t%%0.%c, #%s",
@@ -15032,23 +21940,27 @@ aarch64_output_sve_mov_immediate (rtx const_vector)
     }
 
   snprintf (templ, sizeof (templ), "mov\t%%0.%c, #" HOST_WIDE_INT_PRINT_DEC,
-	    element_char, INTVAL (info.value));
+	    element_char, INTVAL (info.u.mov.value));
   return templ;
 }
 
-/* Return the asm format for a PTRUE instruction whose destination has
-   mode MODE.  SUFFIX is the element size suffix.  */
+/* Return the asm template for a PTRUES.  CONST_UNSPEC is the
+   aarch64_sve_ptrue_svpattern_immediate that describes the predicate
+   pattern.  */
 
 char *
-aarch64_output_ptrue (machine_mode mode, char suffix)
+aarch64_output_sve_ptrues (rtx const_unspec)
 {
-  unsigned int nunits;
-  static char buf[sizeof ("ptrue\t%0.N, vlNNNNN")];
-  if (GET_MODE_NUNITS (mode).is_constant (&nunits))
-    snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, vl%d", suffix, nunits);
-  else
-    snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, all", suffix);
-  return buf;
+  static char templ[40];
+
+  struct simd_immediate_info info;
+  bool is_valid = aarch64_simd_valid_immediate (const_unspec, &info);
+  gcc_assert (is_valid && info.insn == simd_immediate_info::PTRUE);
+
+  char element_char = sizetochar (GET_MODE_BITSIZE (info.elt_mode));
+  snprintf (templ, sizeof (templ), "ptrues\t%%0.%c, %s", element_char,
+	    svpattern_token (info.u.pattern));
+  return templ;
 }
 
 /* Split operands into moves from op[1] + op[2] into op[0].  */
@@ -15115,6 +22027,8 @@ struct expand_vec_perm_d
   bool testing_p;
 };
 
+static bool aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d);
+
 /* Generate a variable permutation.  */
 
 static void
@@ -15300,6 +22214,59 @@ aarch64_evpc_trn (struct expand_vec_perm_d *d)
   return true;
 }
 
+/* Try to re-encode the PERM constant so it combines odd and even elements.
+   This rewrites constants such as {0, 1, 4, 5}/V4SF to {0, 2}/V2DI.
+   We retry with this new constant with the full suite of patterns.  */
+static bool
+aarch64_evpc_reencode (struct expand_vec_perm_d *d)
+{
+  expand_vec_perm_d newd;
+  unsigned HOST_WIDE_INT nelt;
+
+  if (d->vec_flags != VEC_ADVSIMD)
+    return false;
+
+  /* Get the new mode.  Always twice the size of the inner
+     and half the elements.  */
+  poly_uint64 vec_bits = GET_MODE_BITSIZE (d->vmode);
+  unsigned int new_elt_bits = GET_MODE_UNIT_BITSIZE (d->vmode) * 2;
+  auto new_elt_mode = int_mode_for_size (new_elt_bits, false).require ();
+  machine_mode new_mode = aarch64_simd_container_mode (new_elt_mode, vec_bits);
+
+  if (new_mode == word_mode)
+    return false;
+
+  /* to_constant is safe since this routine is specific to Advanced SIMD
+     vectors.  */
+  nelt = d->perm.length ().to_constant ();
+
+  vec_perm_builder newpermconst;
+  newpermconst.new_vector (nelt / 2, nelt / 2, 1);
+
+  /* Convert the perm constant if we can.  Require even, odd as the pairs.  */
+  for (unsigned int i = 0; i < nelt; i += 2)
+    {
+      poly_int64 elt0 = d->perm[i];
+      poly_int64 elt1 = d->perm[i + 1];
+      poly_int64 newelt;
+      if (!multiple_p (elt0, 2, &newelt) || maybe_ne (elt0 + 1, elt1))
+	return false;
+      newpermconst.quick_push (newelt.to_constant ());
+    }
+  newpermconst.finalize ();
+
+  newd.vmode = new_mode;
+  newd.vec_flags = VEC_ADVSIMD;
+  newd.target = d->target ? gen_lowpart (new_mode, d->target) : NULL;
+  newd.op0 = d->op0 ? gen_lowpart (new_mode, d->op0) : NULL;
+  newd.op1 = d->op1 ? gen_lowpart (new_mode, d->op1) : NULL;
+  newd.testing_p = d->testing_p;
+  newd.one_vector_p = d->one_vector_p;
+
+  newd.perm.new_vector (newpermconst, newd.one_vector_p ? 1 : 2, nelt / 2);
+  return aarch64_expand_vec_perm_const_1 (&newd);
+}
+
 /* Recognize patterns suitable for the UZP instructions.  */
 static bool
 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
@@ -15435,21 +22402,25 @@ aarch64_evpc_rev_local (struct expand_vec_perm_d *d)
 
   if (d->vec_flags == VEC_SVE_PRED
       || !d->one_vector_p
-      || !d->perm[0].is_constant (&diff))
+      || !d->perm[0].is_constant (&diff)
+      || !diff)
     return false;
 
-  size = (diff + 1) * GET_MODE_UNIT_SIZE (d->vmode);
-  if (size == 8)
+  if (d->vec_flags & VEC_SVE_DATA)
+    size = (diff + 1) * aarch64_sve_container_bits (d->vmode);
+  else
+    size = (diff + 1) * GET_MODE_UNIT_BITSIZE (d->vmode);
+  if (size == 64)
     {
       unspec = UNSPEC_REV64;
       pred_mode = VNx2BImode;
     }
-  else if (size == 4)
+  else if (size == 32)
     {
       unspec = UNSPEC_REV32;
       pred_mode = VNx4BImode;
     }
-  else if (size == 2)
+  else if (size == 16)
     {
       unspec = UNSPEC_REV16;
       pred_mode = VNx8BImode;
@@ -15466,13 +22437,14 @@ aarch64_evpc_rev_local (struct expand_vec_perm_d *d)
   if (d->testing_p)
     return true;
 
-  rtx src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (1, d->op0), unspec);
-  if (d->vec_flags == VEC_SVE_DATA)
+  if (d->vec_flags & VEC_SVE_DATA)
     {
-      rtx pred = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
-      src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (2, pred, src),
-			    UNSPEC_MERGE_PTRUE);
+      rtx pred = aarch64_ptrue_reg (pred_mode);
+      emit_insn (gen_aarch64_sve_revbhw (d->vmode, pred_mode,
+					 d->target, pred, d->op0));
+      return true;
     }
+  rtx src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (1, d->op0), unspec);
   emit_set_insn (d->target, src);
   return true;
 }
@@ -15485,7 +22457,7 @@ aarch64_evpc_rev_global (struct expand_vec_perm_d *d)
 {
   poly_uint64 nelt = d->perm.length ();
 
-  if (!d->one_vector_p || d->vec_flags != VEC_SVE_DATA)
+  if (!d->one_vector_p || d->vec_flags == VEC_ADVSIMD)
     return false;
 
   if (!d->perm.series_p (0, 1, nelt - 1, -1))
@@ -15514,7 +22486,8 @@ aarch64_evpc_dup (struct expand_vec_perm_d *d)
       || !d->perm[0].is_constant (&elt))
     return false;
 
-  if (d->vec_flags == VEC_SVE_DATA && elt >= 64 * GET_MODE_UNIT_SIZE (vmode))
+  if ((d->vec_flags & VEC_SVE_DATA)
+      && elt * (aarch64_sve_container_bits (vmode) / 8) >= 64)
     return false;
 
   /* Success! */
@@ -15589,9 +22562,133 @@ aarch64_evpc_sve_tbl (struct expand_vec_perm_d *d)
   if (d->testing_p)
     return true;
 
-  machine_mode sel_mode = mode_for_int_vector (d->vmode).require ();
+  machine_mode sel_mode = related_int_vector_mode (d->vmode).require ();
   rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm);
-  aarch64_expand_sve_vec_perm (d->target, d->op0, d->op1, sel);
+  if (d->one_vector_p)
+    emit_unspec2 (d->target, UNSPEC_TBL, d->op0, force_reg (sel_mode, sel));
+  else
+    aarch64_expand_sve_vec_perm (d->target, d->op0, d->op1, sel);
+  return true;
+}
+
+/* Try to implement D using SVE SEL instruction.  */
+
+static bool
+aarch64_evpc_sel (struct expand_vec_perm_d *d)
+{
+  machine_mode vmode = d->vmode;
+  int unit_size = GET_MODE_UNIT_SIZE (vmode);
+
+  if (d->vec_flags != VEC_SVE_DATA
+      || unit_size > 8)
+    return false;
+
+  int n_patterns = d->perm.encoding ().npatterns ();
+  poly_int64 vec_len = d->perm.length ();
+
+  for (int i = 0; i < n_patterns; ++i)
+    if (!known_eq (d->perm[i], i)
+	&& !known_eq (d->perm[i], vec_len + i))
+      return false;
+
+  for (int i = n_patterns; i < n_patterns * 2; i++)
+    if (!d->perm.series_p (i, n_patterns, i, n_patterns)
+	&& !d->perm.series_p (i, n_patterns, vec_len + i, n_patterns))
+      return false;
+
+  if (d->testing_p)
+    return true;
+
+  machine_mode pred_mode = aarch64_sve_pred_mode (vmode);
+
+  /* Build a predicate that is true when op0 elements should be used.  */
+  rtx_vector_builder builder (pred_mode, n_patterns, 2);
+  for (int i = 0; i < n_patterns * 2; i++)
+    {
+      rtx elem = known_eq (d->perm[i], i) ? CONST1_RTX (BImode)
+					  : CONST0_RTX (BImode);
+      builder.quick_push (elem);
+    }
+
+  rtx const_vec = builder.build ();
+  rtx pred = force_reg (pred_mode, const_vec);
+  /* TARGET = PRED ? OP0 : OP1.  */
+  emit_insn (gen_vcond_mask (vmode, vmode, d->target, d->op0, d->op1, pred));
+  return true;
+}
+
+/* Recognize patterns suitable for the INS instructions.  */
+static bool
+aarch64_evpc_ins (struct expand_vec_perm_d *d)
+{
+  machine_mode mode = d->vmode;
+  unsigned HOST_WIDE_INT nelt;
+
+  if (d->vec_flags != VEC_ADVSIMD)
+    return false;
+
+  /* to_constant is safe since this routine is specific to Advanced SIMD
+     vectors.  */
+  nelt = d->perm.length ().to_constant ();
+  rtx insv = d->op0;
+
+  HOST_WIDE_INT idx = -1;
+
+  for (unsigned HOST_WIDE_INT i = 0; i < nelt; i++)
+    {
+      HOST_WIDE_INT elt;
+      if (!d->perm[i].is_constant (&elt))
+	return false;
+      if (elt == (HOST_WIDE_INT) i)
+	continue;
+      if (idx != -1)
+	{
+	  idx = -1;
+	  break;
+	}
+      idx = i;
+    }
+
+  if (idx == -1)
+    {
+      insv = d->op1;
+      for (unsigned HOST_WIDE_INT i = 0; i < nelt; i++)
+	{
+	  if (d->perm[i].to_constant () == (HOST_WIDE_INT) (i + nelt))
+	    continue;
+	  if (idx != -1)
+	    return false;
+	  idx = i;
+	}
+
+      if (idx == -1)
+	return false;
+    }
+
+  if (d->testing_p)
+    return true;
+
+  gcc_assert (idx != -1);
+
+  unsigned extractindex = d->perm[idx].to_constant ();
+  rtx extractv = d->op0;
+  if (extractindex >= nelt)
+    {
+      extractv = d->op1;
+      extractindex -= nelt;
+    }
+  gcc_assert (extractindex < nelt);
+
+  emit_move_insn (d->target, insv);
+  insn_code icode = code_for_aarch64_simd_vec_copy_lane (mode);
+  expand_operand ops[5];
+  create_output_operand (&ops[0], d->target, mode);
+  create_input_operand (&ops[1], d->target, mode);
+  create_integer_operand (&ops[2], 1 << idx);
+  create_input_operand (&ops[3], extractv, mode);
+  create_integer_operand (&ops[4], extractindex);
+  expand_insn (icode, 5, ops);
+
   return true;
 }
 
@@ -15610,6 +22707,7 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
 
   if ((d->vec_flags == VEC_ADVSIMD
        || d->vec_flags == VEC_SVE_DATA
+       || d->vec_flags == (VEC_SVE_DATA | VEC_PARTIAL)
        || d->vec_flags == VEC_SVE_PRED)
       && known_gt (nelt, 1))
     {
@@ -15627,6 +22725,12 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
 	return true;
       else if (aarch64_evpc_trn (d))
 	return true;
+      else if (aarch64_evpc_sel (d))
+	return true;
+      else if (aarch64_evpc_ins (d))
+	return true;
+      else if (aarch64_evpc_reencode (d))
+	return true;
       if (d->vec_flags == VEC_SVE_DATA)
 	return aarch64_evpc_sve_tbl (d);
       else if (d->vec_flags == VEC_ADVSIMD)
@@ -15644,7 +22748,8 @@ aarch64_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
   struct expand_vec_perm_d d;
 
   /* Check whether the mask can be applied to a single vector.  */
-  if (op0 && rtx_equal_p (op0, op1))
+  if (sel.ninputs () == 1
+      || (op0 && rtx_equal_p (op0, op1)))
     d.one_vector_p = true;
   else if (sel.all_from_input_p (0))
     {
@@ -15664,8 +22769,11 @@ aarch64_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
   d.vmode = vmode;
   d.vec_flags = aarch64_classify_vector_mode (d.vmode);
   d.target = target;
-  d.op0 = op0;
-  d.op1 = op1;
+  d.op0 = op0 ? force_reg (vmode, op0) : NULL_RTX;
+  if (op0 == op1)
+    d.op1 = d.op0;
+  else
+    d.op1 = op1 ? force_reg (vmode, op1) : NULL_RTX;
   d.testing_p = !target;
 
   if (!d.testing_p)
@@ -15701,32 +22809,19 @@ aarch64_reverse_mask (machine_mode mode, unsigned int nunits)
   return force_reg (V16QImode, mask);
 }
 
-/* Return true if X is a valid second operand for the SVE instruction
-   that implements integer comparison OP_CODE.  */
+/* Expand an SVE integer comparison using the SVE equivalent of:
 
-static bool
-aarch64_sve_cmp_operand_p (rtx_code op_code, rtx x)
-{
-  if (register_operand (x, VOIDmode))
-    return true;
+     (set TARGET (CODE OP0 OP1)).  */
 
-  switch (op_code)
-    {
-    case LTU:
-    case LEU:
-    case GEU:
-    case GTU:
-      return aarch64_sve_cmp_immediate_p (x, false);
-    case LT:
-    case LE:
-    case GE:
-    case GT:
-    case NE:
-    case EQ:
-      return aarch64_sve_cmp_immediate_p (x, true);
-    default:
-      gcc_unreachable ();
-    }
+void
+aarch64_expand_sve_vec_cmp_int (rtx target, rtx_code code, rtx op0, rtx op1)
+{
+  machine_mode pred_mode = GET_MODE (target);
+  machine_mode data_mode = GET_MODE (op0);
+  rtx res = aarch64_sve_emit_int_cmp (target, pred_mode, code, data_mode,
+				      op0, op1);
+  if (!rtx_equal_p (target, res))
+    emit_move_insn (target, res);
 }
 
 /* Return the UNSPEC_COND_* code for comparison CODE.  */
@@ -15737,129 +22832,84 @@ aarch64_unspec_cond_code (rtx_code code)
   switch (code)
     {
     case NE:
-      return UNSPEC_COND_NE;
+      return UNSPEC_COND_FCMNE;
     case EQ:
-      return UNSPEC_COND_EQ;
+      return UNSPEC_COND_FCMEQ;
     case LT:
-      return UNSPEC_COND_LT;
+      return UNSPEC_COND_FCMLT;
     case GT:
-      return UNSPEC_COND_GT;
+      return UNSPEC_COND_FCMGT;
     case LE:
-      return UNSPEC_COND_LE;
+      return UNSPEC_COND_FCMLE;
     case GE:
-      return UNSPEC_COND_GE;
-    case LTU:
-      return UNSPEC_COND_LO;
-    case GTU:
-      return UNSPEC_COND_HI;
-    case LEU:
-      return UNSPEC_COND_LS;
-    case GEU:
-      return UNSPEC_COND_HS;
+      return UNSPEC_COND_FCMGE;
     case UNORDERED:
-      return UNSPEC_COND_UO;
+      return UNSPEC_COND_FCMUO;
     default:
       gcc_unreachable ();
     }
 }
 
-/* Return an (unspec:PRED_MODE [PRED OP0 OP1] UNSPEC_COND_<X>) expression,
-   where <X> is the operation associated with comparison CODE.  */
-
-static rtx
-aarch64_gen_unspec_cond (rtx_code code, machine_mode pred_mode,
-			 rtx pred, rtx op0, rtx op1)
-{
-  rtvec vec = gen_rtvec (3, pred, op0, op1);
-  return gen_rtx_UNSPEC (pred_mode, vec, aarch64_unspec_cond_code (code));
-}
-
-/* Expand an SVE integer comparison:
-
-     TARGET = CODE (OP0, OP1).  */
-
-void
-aarch64_expand_sve_vec_cmp_int (rtx target, rtx_code code, rtx op0, rtx op1)
-{
-  machine_mode pred_mode = GET_MODE (target);
-  machine_mode data_mode = GET_MODE (op0);
-
-  if (!aarch64_sve_cmp_operand_p (code, op1))
-    op1 = force_reg (data_mode, op1);
-
-  rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
-  rtx unspec = aarch64_gen_unspec_cond (code, pred_mode, ptrue, op0, op1);
-  emit_insn (gen_set_clobber_cc (target, unspec));
-}
-
-/* Emit an instruction:
+/* Emit:
 
-      (set TARGET (unspec:PRED_MODE [PRED OP0 OP1] UNSPEC_COND_<X>))
+      (set TARGET (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_<X>))
 
-   where <X> is the operation associated with comparison CODE.  */
+   where <X> is the operation associated with comparison CODE.
+   KNOWN_PTRUE_P is true if PRED is known to be a PTRUE.  */
 
 static void
-aarch64_emit_unspec_cond (rtx target, rtx_code code, machine_mode pred_mode,
-			  rtx pred, rtx op0, rtx op1)
+aarch64_emit_sve_fp_cond (rtx target, rtx_code code, rtx pred,
+			  bool known_ptrue_p, rtx op0, rtx op1)
 {
-  rtx unspec = aarch64_gen_unspec_cond (code, pred_mode, pred, op0, op1);
+  rtx flag = gen_int_mode (known_ptrue_p, SImode);
+  rtx unspec = gen_rtx_UNSPEC (GET_MODE (pred),
+			       gen_rtvec (4, pred, flag, op0, op1),
+			       aarch64_unspec_cond_code (code));
   emit_set_insn (target, unspec);
 }
 
-/* Emit:
+/* Emit the SVE equivalent of:
 
-      (set TMP1 (unspec:PRED_MODE [PTRUE OP0 OP1] UNSPEC_COND_<X1>))
-      (set TMP2 (unspec:PRED_MODE [PTRUE OP0 OP1] UNSPEC_COND_<X2>))
-      (set TARGET (and:PRED_MODE (ior:PRED_MODE TMP1 TMP2) PTRUE))
+      (set TMP1 (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_<X1>))
+      (set TMP2 (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_<X2>))
+      (set TARGET (ior:PRED_MODE TMP1 TMP2))
 
-   where <Xi> is the operation associated with comparison CODEi.  */
+   where <Xi> is the operation associated with comparison CODEi.
+   KNOWN_PTRUE_P is true if PRED is known to be a PTRUE.  */
 
 static void
-aarch64_emit_unspec_cond_or (rtx target, rtx_code code1, rtx_code code2,
-			     machine_mode pred_mode, rtx ptrue,
-			     rtx op0, rtx op1)
+aarch64_emit_sve_or_fp_conds (rtx target, rtx_code code1, rtx_code code2,
+			      rtx pred, bool known_ptrue_p, rtx op0, rtx op1)
 {
+  machine_mode pred_mode = GET_MODE (pred);
   rtx tmp1 = gen_reg_rtx (pred_mode);
-  aarch64_emit_unspec_cond (tmp1, code1, pred_mode, ptrue, op0, op1);
+  aarch64_emit_sve_fp_cond (tmp1, code1, pred, known_ptrue_p, op0, op1);
   rtx tmp2 = gen_reg_rtx (pred_mode);
-  aarch64_emit_unspec_cond (tmp2, code2, pred_mode, ptrue, op0, op1);
-  emit_set_insn (target, gen_rtx_AND (pred_mode,
-				      gen_rtx_IOR (pred_mode, tmp1, tmp2),
-				      ptrue));
+  aarch64_emit_sve_fp_cond (tmp2, code2, pred, known_ptrue_p, op0, op1);
+  aarch64_emit_binop (target, ior_optab, tmp1, tmp2);
 }
 
-/* If CAN_INVERT_P, emit an instruction:
+/* Emit the SVE equivalent of:
 
-      (set TARGET (unspec:PRED_MODE [PRED OP0 OP1] UNSPEC_COND_<X>))
+      (set TMP (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_<X>))
+      (set TARGET (not TMP))
 
-   where <X> is the operation associated with comparison CODE.  Otherwise
-   emit:
-
-      (set TMP (unspec:PRED_MODE [PRED OP0 OP1] UNSPEC_COND_<X>))
-      (set TARGET (and:PRED_MODE (not:PRED_MODE TMP) PTRUE))
-
-   where the second instructions sets TARGET to the inverse of TMP.  */
+   where <X> is the operation associated with comparison CODE.
+   KNOWN_PTRUE_P is true if PRED is known to be a PTRUE.  */
 
 static void
-aarch64_emit_inverted_unspec_cond (rtx target, rtx_code code,
-				   machine_mode pred_mode, rtx ptrue, rtx pred,
-				   rtx op0, rtx op1, bool can_invert_p)
+aarch64_emit_sve_invert_fp_cond (rtx target, rtx_code code, rtx pred,
+				 bool known_ptrue_p, rtx op0, rtx op1)
 {
-  if (can_invert_p)
-    aarch64_emit_unspec_cond (target, code, pred_mode, pred, op0, op1);
-  else
-    {
-      rtx tmp = gen_reg_rtx (pred_mode);
-      aarch64_emit_unspec_cond (tmp, code, pred_mode, pred, op0, op1);
-      emit_set_insn (target, gen_rtx_AND (pred_mode,
-					  gen_rtx_NOT (pred_mode, tmp),
-					  ptrue));
-    }
+  machine_mode pred_mode = GET_MODE (pred);
+  rtx tmp = gen_reg_rtx (pred_mode);
+  aarch64_emit_sve_fp_cond (tmp, code, pred, known_ptrue_p, op0, op1);
+  aarch64_emit_unop (target, one_cmpl_optab, tmp);
 }
 
-/* Expand an SVE floating-point comparison:
+/* Expand an SVE floating-point comparison using the SVE equivalent of:
 
-     TARGET = CODE (OP0, OP1)
+     (set TARGET (CODE OP0 OP1))
 
    If CAN_INVERT_P is true, the caller can also handle inverted results;
    return true if the result is in fact inverted.  */
@@ -15871,36 +22921,28 @@ aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code,
   machine_mode pred_mode = GET_MODE (target);
   machine_mode data_mode = GET_MODE (op0);
 
-  rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
+  rtx ptrue = aarch64_ptrue_reg (pred_mode);
   switch (code)
     {
     case UNORDERED:
       /* UNORDERED has no immediate form.  */
       op1 = force_reg (data_mode, op1);
-      aarch64_emit_unspec_cond (target, code, pred_mode, ptrue, op0, op1);
-      return false;
-
+      /* fall through */
     case LT:
     case LE:
     case GT:
     case GE:
     case EQ:
     case NE:
-      /* There is native support for the comparison.  */
-      aarch64_emit_unspec_cond (target, code, pred_mode, ptrue, op0, op1);
-      return false;
-
-    case ORDERED:
-      /* There is native support for the inverse comparison.  */
-      op1 = force_reg (data_mode, op1);
-      aarch64_emit_inverted_unspec_cond (target, UNORDERED,
-					 pred_mode, ptrue, ptrue, op0, op1,
-					 can_invert_p);
-      return can_invert_p;
+      {
+	/* There is native support for the comparison.  */
+	aarch64_emit_sve_fp_cond (target, code, ptrue, true, op0, op1);
+	return false;
+      }
 
     case LTGT:
       /* This is a trapping operation (LT or GT).  */
-      aarch64_emit_unspec_cond_or (target, LT, GT, pred_mode, ptrue, op0, op1);
+      aarch64_emit_sve_or_fp_conds (target, LT, GT, ptrue, true, op0, op1);
       return false;
 
     case UNEQ:
@@ -15908,38 +22950,59 @@ aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code,
 	{
 	  /* This would trap for signaling NaNs.  */
 	  op1 = force_reg (data_mode, op1);
-	  aarch64_emit_unspec_cond_or (target, UNORDERED, EQ,
-				       pred_mode, ptrue, op0, op1);
+	  aarch64_emit_sve_or_fp_conds (target, UNORDERED, EQ,
+					ptrue, true, op0, op1);
 	  return false;
 	}
       /* fall through */
-
     case UNLT:
     case UNLE:
     case UNGT:
     case UNGE:
-      {
-	rtx ordered = ptrue;
-	if (flag_trapping_math)
-	  {
-	    /* Only compare the elements that are known to be ordered.  */
-	    ordered = gen_reg_rtx (pred_mode);
-	    op1 = force_reg (data_mode, op1);
-	    aarch64_emit_inverted_unspec_cond (ordered, UNORDERED, pred_mode,
-					       ptrue, ptrue, op0, op1, false);
-	  }
-	if (code == UNEQ)
-	  code = NE;
-	else
-	  code = reverse_condition_maybe_unordered (code);
-	aarch64_emit_inverted_unspec_cond (target, code, pred_mode, ptrue,
-					   ordered, op0, op1, can_invert_p);
-	return can_invert_p;
-      }
+      if (flag_trapping_math)
+	{
+	  /* Work out which elements are ordered.  */
+	  rtx ordered = gen_reg_rtx (pred_mode);
+	  op1 = force_reg (data_mode, op1);
+	  aarch64_emit_sve_invert_fp_cond (ordered, UNORDERED,
+					   ptrue, true, op0, op1);
+
+	  /* Test the opposite condition for the ordered elements,
+	     then invert the result.  */
+	  if (code == UNEQ)
+	    code = NE;
+	  else
+	    code = reverse_condition_maybe_unordered (code);
+	  if (can_invert_p)
+	    {
+	      aarch64_emit_sve_fp_cond (target, code,
+					ordered, false, op0, op1);
+	      return true;
+	    }
+	  aarch64_emit_sve_invert_fp_cond (target, code,
+					   ordered, false, op0, op1);
+	  return false;
+	}
+      break;
+
+    case ORDERED:
+      /* ORDERED has no immediate form.  */
+      op1 = force_reg (data_mode, op1);
+      break;
 
     default:
       gcc_unreachable ();
     }
+
+  /* There is native support for the inverse comparison.  */
+  code = reverse_condition_maybe_unordered (code);
+  if (can_invert_p)
+    {
+      aarch64_emit_sve_fp_cond (target, code, ptrue, true, op0, op1);
+      return true;
+    }
+  aarch64_emit_sve_invert_fp_cond (target, code, ptrue, true, op0, op1);
+  return false;
 }
 
 /* Expand an SVE vcond pattern with operands OPS.  DATA_MODE is the mode
@@ -15950,9 +23013,7 @@ void
 aarch64_expand_sve_vcond (machine_mode data_mode, machine_mode cmp_mode,
 			  rtx *ops)
 {
-  machine_mode pred_mode
-    = aarch64_get_mask_mode (GET_MODE_NUNITS (cmp_mode),
-			     GET_MODE_SIZE (cmp_mode)).require ();
+  machine_mode pred_mode = aarch64_get_mask_mode (cmp_mode).require ();
   rtx pred = gen_reg_rtx (pred_mode);
   if (FLOAT_MODE_P (cmp_mode))
     {
@@ -15963,6 +23024,13 @@ aarch64_expand_sve_vcond (machine_mode data_mode, machine_mode cmp_mode,
   else
     aarch64_expand_sve_vec_cmp_int (pred, GET_CODE (ops[3]), ops[4], ops[5]);
 
+  if (!aarch64_sve_reg_or_dup_imm (ops[1], data_mode))
+    ops[1] = force_reg (data_mode, ops[1]);
+  /* The "false" value can only be zero if the "true" value is a constant.  */
+  if (register_operand (ops[1], data_mode)
+      || !aarch64_simd_reg_or_zero (ops[2], data_mode))
+    ops[2] = force_reg (data_mode, ops[2]);
+
   rtvec vec = gen_rtvec (3, pred, ops[1], ops[2]);
   emit_set_insn (ops[0], gen_rtx_UNSPEC (data_mode, vec, UNSPEC_SEL));
 }
@@ -16011,150 +23079,271 @@ aarch64_move_pointer (rtx pointer, poly_int64 amount)
 				    next, amount);
 }
 
-/* Return a new RTX holding the result of moving POINTER forward by the
-   size of the mode it points to.  */
+/* Return a new RTX holding the result of moving POINTER forward by the
+   size of the mode it points to.  */
+
+static rtx
+aarch64_progress_pointer (rtx pointer)
+{
+  return aarch64_move_pointer (pointer, GET_MODE_SIZE (GET_MODE (pointer)));
+}
+
+/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
+   MODE bytes.  */
+
+static void
+aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
+					      machine_mode mode)
+{
+  /* Handle 256-bit memcpy separately.  We do this by making 2 adjacent memory
+     address copies using V4SImode so that we can use Q registers.  */
+  if (known_eq (GET_MODE_BITSIZE (mode), 256))
+    {
+      mode = V4SImode;
+      rtx reg1 = gen_reg_rtx (mode);
+      rtx reg2 = gen_reg_rtx (mode);
+      /* "Cast" the pointers to the correct mode.  */
+      *src = adjust_address (*src, mode, 0);
+      *dst = adjust_address (*dst, mode, 0);
+      /* Emit the memcpy.  */
+      emit_insn (aarch64_gen_load_pair (mode, reg1, *src, reg2,
+					aarch64_progress_pointer (*src)));
+      emit_insn (aarch64_gen_store_pair (mode, *dst, reg1,
+					 aarch64_progress_pointer (*dst), reg2));
+      /* Move the pointers forward.  */
+      *src = aarch64_move_pointer (*src, 32);
+      *dst = aarch64_move_pointer (*dst, 32);
+      return;
+    }
+
+  rtx reg = gen_reg_rtx (mode);
+
+  /* "Cast" the pointers to the correct mode.  */
+  *src = adjust_address (*src, mode, 0);
+  *dst = adjust_address (*dst, mode, 0);
+  /* Emit the memcpy.  */
+  emit_move_insn (reg, *src);
+  emit_move_insn (*dst, reg);
+  /* Move the pointers forward.  */
+  *src = aarch64_progress_pointer (*src);
+  *dst = aarch64_progress_pointer (*dst);
+}
+
+/* Expand cpymem, as if from a __builtin_memcpy.  Return true if
+   we succeed, otherwise return false.  */
+
+bool
+aarch64_expand_cpymem (rtx *operands)
+{
+  int mode_bits;
+  rtx dst = operands[0];
+  rtx src = operands[1];
+  rtx base;
+  machine_mode cur_mode = BLKmode;
+
+  /* Only expand fixed-size copies.  */
+  if (!CONST_INT_P (operands[2]))
+    return false;
+
+  unsigned HOST_WIDE_INT size = INTVAL (operands[2]);
+
+  /* Inline up to 256 bytes when optimizing for speed.  */
+  unsigned HOST_WIDE_INT max_copy_size = 256;
+
+  if (optimize_function_for_size_p (cfun))
+    max_copy_size = 128;
+
+  int copy_bits = 256;
+
+  /* Default to 256-bit LDP/STP on large copies, however small copies, no SIMD
+     support or slow 256-bit LDP/STP fall back to 128-bit chunks.  */
+  if (size <= 24
+      || !TARGET_SIMD
+      || (aarch64_tune_params.extra_tuning_flags
+	  & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS))
+    {
+      copy_bits = 128;
+      max_copy_size = max_copy_size / 2;
+    }
+
+  if (size > max_copy_size)
+    return false;
+
+  base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
+  dst = adjust_automodify_address (dst, VOIDmode, base, 0);
+
+  base = copy_to_mode_reg (Pmode, XEXP (src, 0));
+  src = adjust_automodify_address (src, VOIDmode, base, 0);
+
+  /* Convert size to bits to make the rest of the code simpler.  */
+  int n = size * BITS_PER_UNIT;
+
+  while (n > 0)
+    {
+      /* Find the largest mode in which to do the copy in without over reading
+	 or writing.  */
+      opt_scalar_int_mode mode_iter;
+      FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
+	if (GET_MODE_BITSIZE (mode_iter.require ()) <= MIN (n, copy_bits))
+	  cur_mode = mode_iter.require ();
+
+      gcc_assert (cur_mode != BLKmode);
+
+      mode_bits = GET_MODE_BITSIZE (cur_mode).to_constant ();
+
+      /* Prefer Q-register accesses for the last bytes.  */
+      if (mode_bits == 128 && copy_bits == 256)
+	cur_mode = V4SImode;
+
+      aarch64_copy_one_block_and_progress_pointers (&src, &dst, cur_mode);
+
+      n -= mode_bits;
 
-static rtx
-aarch64_progress_pointer (rtx pointer)
-{
-  return aarch64_move_pointer (pointer, GET_MODE_SIZE (GET_MODE (pointer)));
-}
+      /* Emit trailing copies using overlapping unaligned accesses - this is
+	 smaller and faster.  */
+      if (n > 0 && n < copy_bits / 2)
+	{
+	  machine_mode next_mode = smallest_mode_for_size (n, MODE_INT);
+	  int n_bits = GET_MODE_BITSIZE (next_mode).to_constant ();
+	  gcc_assert (n_bits <= mode_bits);
+	  src = aarch64_move_pointer (src, (n - n_bits) / BITS_PER_UNIT);
+	  dst = aarch64_move_pointer (dst, (n - n_bits) / BITS_PER_UNIT);
+	  n = n_bits;
+	}
+    }
 
-/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
-   MODE bytes.  */
+  return true;
+}
 
+/* Like aarch64_copy_one_block_and_progress_pointers, except for memset where
+   SRC is a register we have created with the duplicated value to be set.  */
 static void
-aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
-					      machine_mode mode)
-{
-  rtx reg = gen_reg_rtx (mode);
+aarch64_set_one_block_and_progress_pointer (rtx src, rtx *dst,
+					    machine_mode mode)
+{
+  /* If we are copying 128bits or 256bits, we can do that straight from
+     the SIMD register we prepared.  */
+  if (known_eq (GET_MODE_BITSIZE (mode), 256))
+    {
+      mode = GET_MODE (src);
+      /* "Cast" the *dst to the correct mode.  */
+      *dst = adjust_address (*dst, mode, 0);
+      /* Emit the memset.  */
+      emit_insn (aarch64_gen_store_pair (mode, *dst, src,
+					 aarch64_progress_pointer (*dst), src));
+
+      /* Move the pointers forward.  */
+      *dst = aarch64_move_pointer (*dst, 32);
+      return;
+    }
+  if (known_eq (GET_MODE_BITSIZE (mode), 128))
+    {
+      /* "Cast" the *dst to the correct mode.  */
+      *dst = adjust_address (*dst, GET_MODE (src), 0);
+      /* Emit the memset.  */
+      emit_move_insn (*dst, src);
+      /* Move the pointers forward.  */
+      *dst = aarch64_move_pointer (*dst, 16);
+      return;
+    }
+  /* For copying less, we have to extract the right amount from src.  */
+  rtx reg = lowpart_subreg (mode, src, GET_MODE (src));
 
-  /* "Cast" the pointers to the correct mode.  */
-  *src = adjust_address (*src, mode, 0);
+  /* "Cast" the *dst to the correct mode.  */
   *dst = adjust_address (*dst, mode, 0);
-  /* Emit the memcpy.  */
-  emit_move_insn (reg, *src);
+  /* Emit the memset.  */
   emit_move_insn (*dst, reg);
-  /* Move the pointers forward.  */
-  *src = aarch64_progress_pointer (*src);
+  /* Move the pointer forward.  */
   *dst = aarch64_progress_pointer (*dst);
 }
 
-/* Expand movmem, as if from a __builtin_memcpy.  Return true if
+/* Expand setmem, as if from a __builtin_memset.  Return true if
    we succeed, otherwise return false.  */
 
 bool
-aarch64_expand_movmem (rtx *operands)
+aarch64_expand_setmem (rtx *operands)
 {
-  unsigned int n;
+  int n, mode_bits;
+  unsigned HOST_WIDE_INT len;
   rtx dst = operands[0];
-  rtx src = operands[1];
+  rtx val = operands[2], src;
   rtx base;
-  bool speed_p = !optimize_function_for_size_p (cfun);
-
-  /* When optimizing for size, give a better estimate of the length of a
-     memcpy call, but use the default otherwise.  */
-  unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
+  machine_mode cur_mode = BLKmode, next_mode;
 
   /* We can't do anything smart if the amount to copy is not constant.  */
-  if (!CONST_INT_P (operands[2]))
+  if (!CONST_INT_P (operands[1]))
     return false;
 
-  n = UINTVAL (operands[2]);
+  bool speed_p = !optimize_function_for_size_p (cfun);
+
+  /* Default the maximum to 256-bytes.  */
+  unsigned max_set_size = 256;
+
+  /* In case we are optimizing for size or if the core does not
+     want to use STP Q regs, lower the max_set_size.  */
+  max_set_size = (!speed_p
+		  || (aarch64_tune_params.extra_tuning_flags
+		      & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS))
+		  ? max_set_size / 2 : max_set_size;
 
-  /* Try to keep the number of instructions low.  For cases below 16 bytes we
-     need to make at most two moves.  For cases above 16 bytes it will be one
-     move for each 16 byte chunk, then at most two additional moves.  */
-  if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
+  len = INTVAL (operands[1]);
+
+  /* Upper bound check.  */
+  if (len > max_set_size)
     return false;
 
   base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
 
-  base = copy_to_mode_reg (Pmode, XEXP (src, 0));
-  src = adjust_automodify_address (src, VOIDmode, base, 0);
-
-  /* Simple cases.  Copy 0-3 bytes, as (if applicable) a 2-byte, then a
-     1-byte chunk.  */
-  if (n < 4)
-    {
-      if (n >= 2)
-	{
-	  aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
-	  n -= 2;
-	}
+  /* Prepare the val using a DUP/MOVI v0.16B, val.  */
+  src = expand_vector_broadcast (V16QImode, val);
+  src = force_reg (V16QImode, src);
 
-      if (n == 1)
-	aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
+  /* Convert len to bits to make the rest of the code simpler.  */
+  n = len * BITS_PER_UNIT;
 
-      return true;
-    }
+  /* Maximum amount to copy in one go.  We allow 256-bit chunks based on the
+     AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS tuning parameter.  setmem expand
+     pattern is only turned on for TARGET_SIMD.  */
+  const int copy_limit = (speed_p
+			  && (aarch64_tune_params.extra_tuning_flags
+			      & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS))
+			  ? GET_MODE_BITSIZE (TImode) : 256;
 
-  /* Copy 4-8 bytes.  First a 4-byte chunk, then (if applicable) a second
-     4-byte chunk, partially overlapping with the previously copied chunk.  */
-  if (n < 8)
+  while (n > 0)
     {
-      aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
-      n -= 4;
-      if (n > 0)
-	{
-	  int move = n - 4;
+      /* Find the largest mode in which to do the copy without
+	 over writing.  */
+      opt_scalar_int_mode mode_iter;
+      FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
+	if (GET_MODE_BITSIZE (mode_iter.require ()) <= MIN (n, copy_limit))
+	  cur_mode = mode_iter.require ();
 
-	  src = aarch64_move_pointer (src, move);
-	  dst = aarch64_move_pointer (dst, move);
-	  aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
-	}
-      return true;
-    }
+      gcc_assert (cur_mode != BLKmode);
 
-  /* Copy more than 8 bytes.  Copy chunks of 16 bytes until we run out of
-     them, then (if applicable) an 8-byte chunk.  */
-  while (n >= 8)
-    {
-      if (n / 16)
-	{
-	  aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
-	  n -= 16;
-	}
-      else
-	{
-	  aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
-	  n -= 8;
-	}
-    }
+      mode_bits = GET_MODE_BITSIZE (cur_mode).to_constant ();
+      aarch64_set_one_block_and_progress_pointer (src, &dst, cur_mode);
 
-  /* Finish the final bytes of the copy.  We can always do this in one
-     instruction.  We either copy the exact amount we need, or partially
-     overlap with the previous chunk we copied and copy 8-bytes.  */
-  if (n == 0)
-    return true;
-  else if (n == 1)
-    aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
-  else if (n == 2)
-    aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
-  else if (n == 4)
-    aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
-  else
-    {
-      if (n == 3)
-	{
-	  src = aarch64_move_pointer (src, -1);
-	  dst = aarch64_move_pointer (dst, -1);
-	  aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
-	}
-      else
-	{
-	  int move = n - 8;
+      n -= mode_bits;
 
-	  src = aarch64_move_pointer (src, move);
-	  dst = aarch64_move_pointer (dst, move);
-	  aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
+      /* Do certain trailing copies as overlapping if it's going to be
+	 cheaper.  i.e. less instructions to do so.  For instance doing a 15
+	 byte copy it's more efficient to do two overlapping 8 byte copies than
+	 8 + 4 + 2 + 1.  */
+      if (n > 0 && n < copy_limit / 2)
+	{
+	  next_mode = smallest_mode_for_size (n, MODE_INT);
+	  int n_bits = GET_MODE_BITSIZE (next_mode).to_constant ();
+	  gcc_assert (n_bits <= mode_bits);
+	  dst = aarch64_move_pointer (dst, (n - n_bits) / BITS_PER_UNIT);
+	  n = n_bits;
 	}
     }
 
   return true;
 }
 
+
 /* Split a DImode store of a CONST_INT SRC to MEM DST as two
    SImode stores.  Handle the case when the constant has identical
    bottom and top halves.  This is beneficial when the two stores can be
@@ -16212,12 +23401,147 @@ aarch64_split_dimode_const_store (rtx dst, rtx src)
   return true;
 }
 
+/* Generate RTL for a conditional branch with rtx comparison CODE in
+   mode CC_MODE.  The destination of the unlikely conditional branch
+   is LABEL_REF.  */
+
+void
+aarch64_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
+			      rtx label_ref)
+{
+  rtx x;
+  x = gen_rtx_fmt_ee (code, VOIDmode,
+		      gen_rtx_REG (cc_mode, CC_REGNUM),
+		      const0_rtx);
+
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (VOIDmode, label_ref),
+			    pc_rtx);
+  aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
+}
+
+/* Generate DImode scratch registers for 128-bit (TImode) addition.
+
+   OP1 represents the TImode destination operand 1
+   OP2 represents the TImode destination operand 2
+   LOW_DEST represents the low half (DImode) of TImode operand 0
+   LOW_IN1 represents the low half (DImode) of TImode operand 1
+   LOW_IN2 represents the low half (DImode) of TImode operand 2
+   HIGH_DEST represents the high half (DImode) of TImode operand 0
+   HIGH_IN1 represents the high half (DImode) of TImode operand 1
+   HIGH_IN2 represents the high half (DImode) of TImode operand 2.  */
+
+void
+aarch64_addti_scratch_regs (rtx op1, rtx op2, rtx *low_dest,
+			    rtx *low_in1, rtx *low_in2,
+			    rtx *high_dest, rtx *high_in1,
+			    rtx *high_in2)
+{
+  *low_dest = gen_reg_rtx (DImode);
+  *low_in1 = gen_lowpart (DImode, op1);
+  *low_in2 = simplify_gen_subreg (DImode, op2, TImode,
+				  subreg_lowpart_offset (DImode, TImode));
+  *high_dest = gen_reg_rtx (DImode);
+  *high_in1 = gen_highpart (DImode, op1);
+  *high_in2 = simplify_gen_subreg (DImode, op2, TImode,
+				   subreg_highpart_offset (DImode, TImode));
+}
+
+/* Generate DImode scratch registers for 128-bit (TImode) subtraction.
+
+   This function differs from 'arch64_addti_scratch_regs' in that
+   OP1 can be an immediate constant (zero). We must call
+   subreg_highpart_offset with DImode and TImode arguments, otherwise
+   VOIDmode will be used for the const_int which generates an internal
+   error from subreg_size_highpart_offset which does not expect a size of zero.
+
+   OP1 represents the TImode destination operand 1
+   OP2 represents the TImode destination operand 2
+   LOW_DEST represents the low half (DImode) of TImode operand 0
+   LOW_IN1 represents the low half (DImode) of TImode operand 1
+   LOW_IN2 represents the low half (DImode) of TImode operand 2
+   HIGH_DEST represents the high half (DImode) of TImode operand 0
+   HIGH_IN1 represents the high half (DImode) of TImode operand 1
+   HIGH_IN2 represents the high half (DImode) of TImode operand 2.  */
+
+
+void
+aarch64_subvti_scratch_regs (rtx op1, rtx op2, rtx *low_dest,
+			     rtx *low_in1, rtx *low_in2,
+			     rtx *high_dest, rtx *high_in1,
+			     rtx *high_in2)
+{
+  *low_dest = gen_reg_rtx (DImode);
+  *low_in1 = simplify_gen_subreg (DImode, op1, TImode,
+				  subreg_lowpart_offset (DImode, TImode));
+
+  *low_in2 = simplify_gen_subreg (DImode, op2, TImode,
+				  subreg_lowpart_offset (DImode, TImode));
+  *high_dest = gen_reg_rtx (DImode);
+
+  *high_in1 = simplify_gen_subreg (DImode, op1, TImode,
+				   subreg_highpart_offset (DImode, TImode));
+  *high_in2 = simplify_gen_subreg (DImode, op2, TImode,
+				   subreg_highpart_offset (DImode, TImode));
+}
+
+/* Generate RTL for 128-bit (TImode) subtraction with overflow.
+
+   OP0 represents the TImode destination operand 0
+   LOW_DEST represents the low half (DImode) of TImode operand 0
+   LOW_IN1 represents the low half (DImode) of TImode operand 1
+   LOW_IN2 represents the low half (DImode) of TImode operand 2
+   HIGH_DEST represents the high half (DImode) of TImode operand 0
+   HIGH_IN1 represents the high half (DImode) of TImode operand 1
+   HIGH_IN2 represents the high half (DImode) of TImode operand 2
+   UNSIGNED_P is true if the operation is being performed on unsigned
+   values.  */
+void
+aarch64_expand_subvti (rtx op0, rtx low_dest, rtx low_in1,
+		       rtx low_in2, rtx high_dest, rtx high_in1,
+		       rtx high_in2, bool unsigned_p)
+{
+  if (low_in2 == const0_rtx)
+    {
+      low_dest = low_in1;
+      high_in2 = force_reg (DImode, high_in2);
+      if (unsigned_p)
+	emit_insn (gen_subdi3_compare1 (high_dest, high_in1, high_in2));
+      else
+	emit_insn (gen_subvdi_insn (high_dest, high_in1, high_in2));
+    }
+  else
+    {
+      if (aarch64_plus_immediate (low_in2, DImode))
+	emit_insn (gen_subdi3_compare1_imm (low_dest, low_in1, low_in2,
+					    GEN_INT (-UINTVAL (low_in2))));
+      else
+	{
+	  low_in2 = force_reg (DImode, low_in2);
+	  emit_insn (gen_subdi3_compare1 (low_dest, low_in1, low_in2));
+	}
+      high_in2 = force_reg (DImode, high_in2);
+
+      if (unsigned_p)
+	emit_insn (gen_usubdi3_carryinC (high_dest, high_in1, high_in2));
+      else
+	emit_insn (gen_subdi3_carryinV (high_dest, high_in1, high_in2));
+    }
+
+  emit_move_insn (gen_lowpart (DImode, op0), low_dest);
+  emit_move_insn (gen_highpart (DImode, op0), high_dest);
+
+}
+
 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
 
 static unsigned HOST_WIDE_INT
 aarch64_asan_shadow_offset (void)
 {
-  return (HOST_WIDE_INT_1 << 36);
+  if (TARGET_ILP32)
+    return (HOST_WIDE_INT_1 << 29);
+  else
+    return (HOST_WIDE_INT_1 << 36);
 }
 
 static rtx
@@ -16318,24 +23642,20 @@ aarch64_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
     case E_HImode:
     case E_SImode:
       cmp_mode = SImode;
-      icode = CODE_FOR_ccmpsi;
       break;
 
     case E_DImode:
       cmp_mode = DImode;
-      icode = CODE_FOR_ccmpdi;
       break;
 
     case E_SFmode:
       cmp_mode = SFmode;
       cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
-      icode = cc_mode == CCFPEmode ? CODE_FOR_fccmpesf : CODE_FOR_fccmpsf;
       break;
 
     case E_DFmode:
       cmp_mode = DFmode;
       cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
-      icode = cc_mode == CCFPEmode ? CODE_FOR_fccmpedf : CODE_FOR_fccmpdf;
       break;
 
     default:
@@ -16343,6 +23663,8 @@ aarch64_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
       return NULL_RTX;
     }
 
+  icode = code_for_ccmp (cc_mode, cmp_mode);
+
   op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
   op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
   if (!op0 || !op1)
@@ -16358,9 +23680,21 @@ aarch64_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
 
   if (bit_code != AND)
     {
-      prev = gen_rtx_fmt_ee (REVERSE_CONDITION (GET_CODE (prev),
-						GET_MODE (XEXP (prev, 0))),
-			     VOIDmode, XEXP (prev, 0), const0_rtx);
+      /* Treat the ccmp patterns as canonical and use them where possible,
+	 but fall back to ccmp_rev patterns if there's no other option.  */
+      rtx_code prev_code = GET_CODE (prev);
+      machine_mode prev_mode = GET_MODE (XEXP (prev, 0));
+      if ((prev_mode == CCFPmode || prev_mode == CCFPEmode)
+	  && !(prev_code == EQ
+	       || prev_code == NE
+	       || prev_code == ORDERED
+	       || prev_code == UNORDERED))
+	icode = code_for_ccmp_rev (cc_mode, cmp_mode);
+      else
+	{
+	  rtx_code code = reverse_condition (prev_code);
+	  prev = gen_rtx_fmt_ee (code, VOIDmode, XEXP (prev, 0), const0_rtx);
+	}
       aarch64_cond = AARCH64_INVERSE_CONDITION_CODE (aarch64_cond);
     }
 
@@ -16523,11 +23857,16 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
         }
     }
 
-  if (aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)
-       && aarch_crypto_can_dual_issue (prev, curr))
+  /* Fuse compare (CMP/CMN/TST/BICS) and conditional branch.  */
+  if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH)
+      && prev_set && curr_set && any_condjump_p (curr)
+      && GET_CODE (SET_SRC (prev_set)) == COMPARE
+      && SCALAR_INT_MODE_P (GET_MODE (XEXP (SET_SRC (prev_set), 0)))
+      && reg_referenced_p (SET_DEST (prev_set), PATTERN (curr)))
     return true;
 
-  if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH)
+  /* Fuse flag-setting ALU instructions and conditional branch.  */
+  if (aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH)
       && any_condjump_p (curr))
     {
       unsigned int condreg1, condreg2;
@@ -16551,9 +23890,10 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
 	}
     }
 
+  /* Fuse ALU instructions and CBZ/CBNZ.  */
   if (prev_set
       && curr_set
-      && aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH)
+      && aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_CBZ)
       && any_condjump_p (curr))
     {
       /* We're trying to match:
@@ -16679,20 +24019,20 @@ fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset)
     {
       fusion = SCHED_FUSION_LD_SIGN_EXTEND;
       src = XEXP (src, 0);
-      if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
+      if (!MEM_P (src) || GET_MODE (src) != SImode)
 	return SCHED_FUSION_NONE;
     }
   else if (GET_CODE (src) == ZERO_EXTEND)
     {
       fusion = SCHED_FUSION_LD_ZERO_EXTEND;
       src = XEXP (src, 0);
-      if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
+      if (!MEM_P (src) || GET_MODE (src) != SImode)
 	return SCHED_FUSION_NONE;
     }
 
-  if (GET_CODE (src) == MEM && REG_P (dest))
+  if (MEM_P (src) && REG_P (dest))
     extract_base_offset_in_addr (src, base, offset);
-  else if (GET_CODE (dest) == MEM && (REG_P (src) || src == const0_rtx))
+  else if (MEM_P (dest) && (REG_P (src) || src == const0_rtx))
     {
       fusion = SCHED_FUSION_ST;
       extract_base_offset_in_addr (dest, base, offset);
@@ -16826,6 +24166,10 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
   if (!rtx_equal_p (base_1, base_2))
     return false;
 
+  /* The operands must be of the same size.  */
+  gcc_assert (known_eq (GET_MODE_SIZE (GET_MODE (mem_1)),
+			 GET_MODE_SIZE (GET_MODE (mem_2))));
+
   offval_1 = INTVAL (offset_1);
   offval_2 = INTVAL (offset_2);
   /* We should only be trying this for fixed-sized modes.  There is no
@@ -16843,9 +24187,16 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
 
       /* In increasing order, the last load can clobber the address.  */
       if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2))
-      return false;
+	return false;
     }
 
+  /* One of the memory accesses must be a mempair operand.
+     If it is not the first one, they need to be swapped by the
+     peephole.  */
+  if (!aarch64_mem_pair_operand (mem_1, GET_MODE (mem_1))
+       && !aarch64_mem_pair_operand (mem_2, GET_MODE (mem_2)))
+    return false;
+
   if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
     rclass_1 = FP_REGS;
   else
@@ -16863,6 +24214,84 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
   return true;
 }
 
+/* Given OPERANDS of consecutive load/store that can be merged,
+   swap them if they are not in ascending order.  */
+void
+aarch64_swap_ldrstr_operands (rtx* operands, bool load)
+{
+  rtx mem_1, mem_2, base_1, base_2, offset_1, offset_2;
+  HOST_WIDE_INT offval_1, offval_2;
+
+  if (load)
+    {
+      mem_1 = operands[1];
+      mem_2 = operands[3];
+    }
+  else
+    {
+      mem_1 = operands[0];
+      mem_2 = operands[2];
+    }
+
+  extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
+  extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
+
+  offval_1 = INTVAL (offset_1);
+  offval_2 = INTVAL (offset_2);
+
+  if (offval_1 > offval_2)
+    {
+      /* Irrespective of whether this is a load or a store,
+	 we do the same swap.  */
+      std::swap (operands[0], operands[2]);
+      std::swap (operands[1], operands[3]);
+    }
+}
+
+/* Taking X and Y to be HOST_WIDE_INT pointers, return the result of a
+   comparison between the two.  */
+int
+aarch64_host_wide_int_compare (const void *x, const void *y)
+{
+  return wi::cmps (* ((const HOST_WIDE_INT *) x),
+		   * ((const HOST_WIDE_INT *) y));
+}
+
+/* Taking X and Y to be pairs of RTX, one pointing to a MEM rtx and the
+   other pointing to a REG rtx containing an offset, compare the offsets
+   of the two pairs.
+
+   Return:
+
+	1 iff offset (X) > offset (Y)
+	0 iff offset (X) == offset (Y)
+	-1 iff offset (X) < offset (Y)  */
+int
+aarch64_ldrstr_offset_compare (const void *x, const void *y)
+{
+  const rtx * operands_1 = (const rtx *) x;
+  const rtx * operands_2 = (const rtx *) y;
+  rtx mem_1, mem_2, base, offset_1, offset_2;
+
+  if (MEM_P (operands_1[0]))
+    mem_1 = operands_1[0];
+  else
+    mem_1 = operands_1[1];
+
+  if (MEM_P (operands_2[0]))
+    mem_2 = operands_2[0];
+  else
+    mem_2 = operands_2[1];
+
+  /* Extract the offsets.  */
+  extract_base_offset_in_addr (mem_1, &base, &offset_1);
+  extract_base_offset_in_addr (mem_2, &base, &offset_2);
+
+  gcc_assert (offset_1 != NULL_RTX && offset_2 != NULL_RTX);
+
+  return wi::cmps (INTVAL (offset_1), INTVAL (offset_2));
+}
+
 /* Given OPERANDS of consecutive load/store, check if we can merge
    them into ldp/stp by adjusting the offset.  LOAD is true if they
    are load instructions.  MODE is the mode of memory operands.
@@ -16886,211 +24315,230 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
 
 bool
 aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
-				       scalar_mode mode)
+				       machine_mode mode)
 {
-  enum reg_class rclass_1, rclass_2, rclass_3, rclass_4;
-  HOST_WIDE_INT offval_1, offval_2, offval_3, offval_4, msize;
-  rtx mem_1, mem_2, mem_3, mem_4, reg_1, reg_2, reg_3, reg_4;
-  rtx base_1, base_2, base_3, base_4, offset_1, offset_2, offset_3, offset_4;
+  const int num_insns = 4;
+  enum reg_class rclass;
+  HOST_WIDE_INT offvals[num_insns], msize;
+  rtx mem[num_insns], reg[num_insns], base[num_insns], offset[num_insns];
 
   if (load)
     {
-      reg_1 = operands[0];
-      mem_1 = operands[1];
-      reg_2 = operands[2];
-      mem_2 = operands[3];
-      reg_3 = operands[4];
-      mem_3 = operands[5];
-      reg_4 = operands[6];
-      mem_4 = operands[7];
-      gcc_assert (REG_P (reg_1) && REG_P (reg_2)
-		  && REG_P (reg_3) && REG_P (reg_4));
-      if (REGNO (reg_1) == REGNO (reg_2) || REGNO (reg_3) == REGNO (reg_4))
-	return false;
+      for (int i = 0; i < num_insns; i++)
+	{
+	  reg[i] = operands[2 * i];
+	  mem[i] = operands[2 * i + 1];
+
+	  gcc_assert (REG_P (reg[i]));
+	}
+
+      /* Do not attempt to merge the loads if the loads clobber each other.  */
+      for (int i = 0; i < 8; i += 2)
+	for (int j = i + 2; j < 8; j += 2)
+	  if (reg_overlap_mentioned_p (operands[i], operands[j]))
+	    return false;
     }
   else
+    for (int i = 0; i < num_insns; i++)
+      {
+	mem[i] = operands[2 * i];
+	reg[i] = operands[2 * i + 1];
+      }
+
+  /* Skip if memory operand is by itself valid for ldp/stp.  */
+  if (!MEM_P (mem[0]) || aarch64_mem_pair_operand (mem[0], mode))
+    return false;
+
+  for (int i = 0; i < num_insns; i++)
     {
-      mem_1 = operands[0];
-      reg_1 = operands[1];
-      mem_2 = operands[2];
-      reg_2 = operands[3];
-      mem_3 = operands[4];
-      reg_3 = operands[5];
-      mem_4 = operands[6];
-      reg_4 = operands[7];
+      /* The mems cannot be volatile.  */
+      if (MEM_VOLATILE_P (mem[i]))
+	return false;
+
+      /* Check if the addresses are in the form of [base+offset].  */
+      extract_base_offset_in_addr (mem[i], base + i, offset + i);
+      if (base[i] == NULL_RTX || offset[i] == NULL_RTX)
+	return false;
     }
-  /* Skip if memory operand is by itslef valid for ldp/stp.  */
-  if (!MEM_P (mem_1) || aarch64_mem_pair_operand (mem_1, mode))
-    return false;
 
-  /* The mems cannot be volatile.  */
-  if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2)
-      || MEM_VOLATILE_P (mem_3) ||MEM_VOLATILE_P (mem_4))
-    return false;
+  /* Check if the registers are of same class.  */
+  rclass = REG_P (reg[0]) && FP_REGNUM_P (REGNO (reg[0]))
+    ? FP_REGS : GENERAL_REGS;
 
-  /* Check if the addresses are in the form of [base+offset].  */
-  extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
-  if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
-    return false;
-  extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
-  if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
-    return false;
-  extract_base_offset_in_addr (mem_3, &base_3, &offset_3);
-  if (base_3 == NULL_RTX || offset_3 == NULL_RTX)
-    return false;
-  extract_base_offset_in_addr (mem_4, &base_4, &offset_4);
-  if (base_4 == NULL_RTX || offset_4 == NULL_RTX)
-    return false;
+  for (int i = 1; i < num_insns; i++)
+    if (REG_P (reg[i]) && FP_REGNUM_P (REGNO (reg[i])))
+      {
+	if (rclass != FP_REGS)
+	  return false;
+      }
+    else
+      {
+	if (rclass != GENERAL_REGS)
+	  return false;
+      }
+
+  /* Only the last register in the order in which they occur
+     may be clobbered by the load.  */
+  if (rclass == GENERAL_REGS && load)
+    for (int i = 0; i < num_insns - 1; i++)
+      if (reg_mentioned_p (reg[i], mem[i]))
+	return false;
 
   /* Check if the bases are same.  */
-  if (!rtx_equal_p (base_1, base_2)
-      || !rtx_equal_p (base_2, base_3)
-      || !rtx_equal_p (base_3, base_4))
-    return false;
+  for (int i = 0; i < num_insns - 1; i++)
+    if (!rtx_equal_p (base[i], base[i + 1]))
+      return false;
 
-  offval_1 = INTVAL (offset_1);
-  offval_2 = INTVAL (offset_2);
-  offval_3 = INTVAL (offset_3);
-  offval_4 = INTVAL (offset_4);
-  msize = GET_MODE_SIZE (mode);
-  /* Check if the offsets are consecutive.  */
-  if ((offval_1 != (offval_2 + msize)
-       || offval_1 != (offval_3 + msize * 2)
-       || offval_1 != (offval_4 + msize * 3))
-      && (offval_4 != (offval_3 + msize)
-	  || offval_4 != (offval_2 + msize * 2)
-	  || offval_4 != (offval_1 + msize * 3)))
+  for (int i = 0; i < num_insns; i++)
+    offvals[i] = INTVAL (offset[i]);
+
+  msize = GET_MODE_SIZE (mode).to_constant ();
+
+  /* Check if the offsets can be put in the right order to do a ldp/stp.  */
+  qsort (offvals, num_insns, sizeof (HOST_WIDE_INT),
+	 aarch64_host_wide_int_compare);
+
+  if (!(offvals[1] == offvals[0] + msize
+	&& offvals[3] == offvals[2] + msize))
     return false;
 
-  /* Check if the addresses are clobbered by load.  */
-  if (load)
-    {
-      if (reg_mentioned_p (reg_1, mem_1)
-	  || reg_mentioned_p (reg_2, mem_2)
-	  || reg_mentioned_p (reg_3, mem_3))
-	return false;
+  /* Check that offsets are within range of each other.  The ldp/stp
+     instructions have 7 bit immediate offsets, so use 0x80.  */
+  if (offvals[2] - offvals[0] >= msize * 0x80)
+    return false;
 
-      /* In increasing order, the last load can clobber the address.  */
-      if (offval_1 > offval_2 && reg_mentioned_p (reg_4, mem_4))
-	return false;
-    }
+  /* The offsets must be aligned with respect to each other.  */
+  if (offvals[0] % msize != offvals[2] % msize)
+    return false;
 
   /* If we have SImode and slow unaligned ldp,
      check the alignment to be at least 8 byte. */
   if (mode == SImode
       && (aarch64_tune_params.extra_tuning_flags
-          & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
+	  & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
       && !optimize_size
-      && MEM_ALIGN (mem_1) < 8 * BITS_PER_UNIT)
-    return false;
-
-  if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
-    rclass_1 = FP_REGS;
-  else
-    rclass_1 = GENERAL_REGS;
-
-  if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
-    rclass_2 = FP_REGS;
-  else
-    rclass_2 = GENERAL_REGS;
-
-  if (REG_P (reg_3) && FP_REGNUM_P (REGNO (reg_3)))
-    rclass_3 = FP_REGS;
-  else
-    rclass_3 = GENERAL_REGS;
-
-  if (REG_P (reg_4) && FP_REGNUM_P (REGNO (reg_4)))
-    rclass_4 = FP_REGS;
-  else
-    rclass_4 = GENERAL_REGS;
-
-  /* Check if the registers are of same class.  */
-  if (rclass_1 != rclass_2 || rclass_2 != rclass_3 || rclass_3 != rclass_4)
+      && MEM_ALIGN (mem[0]) < 8 * BITS_PER_UNIT)
     return false;
 
   return true;
 }
 
 /* Given OPERANDS of consecutive load/store, this function pairs them
-   into ldp/stp after adjusting the offset.  It depends on the fact
-   that addresses of load/store instructions are in increasing order.
+   into LDP/STP after adjusting the offset.  It depends on the fact
+   that the operands can be sorted so the offsets are correct for STP.
    MODE is the mode of memory operands.  CODE is the rtl operator
    which should be applied to all memory operands, it's SIGN_EXTEND,
    ZERO_EXTEND or UNKNOWN.  */
 
 bool
 aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
-			     scalar_mode mode, RTX_CODE code)
+			     machine_mode mode, RTX_CODE code)
 {
-  rtx base, offset, t1, t2;
+  rtx base, offset_1, offset_3, t1, t2;
   rtx mem_1, mem_2, mem_3, mem_4;
-  HOST_WIDE_INT off_val, abs_off, adj_off, new_off, stp_off_limit, msize;
+  rtx temp_operands[8];
+  HOST_WIDE_INT off_val_1, off_val_3, base_off, new_off_1, new_off_3,
+		stp_off_upper_limit, stp_off_lower_limit, msize;
+
+  /* We make changes on a copy as we may still bail out.  */
+  for (int i = 0; i < 8; i ++)
+    temp_operands[i] = operands[i];
 
+  /* Sort the operands.  */
+  qsort (temp_operands, 4, 2 * sizeof (rtx *), aarch64_ldrstr_offset_compare);
+
+  /* Copy the memory operands so that if we have to bail for some
+     reason the original addresses are unchanged.  */
   if (load)
     {
-      mem_1 = operands[1];
-      mem_2 = operands[3];
-      mem_3 = operands[5];
-      mem_4 = operands[7];
+      mem_1 = copy_rtx (temp_operands[1]);
+      mem_2 = copy_rtx (temp_operands[3]);
+      mem_3 = copy_rtx (temp_operands[5]);
+      mem_4 = copy_rtx (temp_operands[7]);
     }
   else
     {
-      mem_1 = operands[0];
-      mem_2 = operands[2];
-      mem_3 = operands[4];
-      mem_4 = operands[6];
+      mem_1 = copy_rtx (temp_operands[0]);
+      mem_2 = copy_rtx (temp_operands[2]);
+      mem_3 = copy_rtx (temp_operands[4]);
+      mem_4 = copy_rtx (temp_operands[6]);
       gcc_assert (code == UNKNOWN);
     }
 
-  extract_base_offset_in_addr (mem_1, &base, &offset);
-  gcc_assert (base != NULL_RTX && offset != NULL_RTX);
+  extract_base_offset_in_addr (mem_1, &base, &offset_1);
+  extract_base_offset_in_addr (mem_3, &base, &offset_3);
+  gcc_assert (base != NULL_RTX && offset_1 != NULL_RTX
+	      && offset_3 != NULL_RTX);
+
+  /* Adjust offset so it can fit in LDP/STP instruction.  */
+  msize = GET_MODE_SIZE (mode).to_constant();
+  stp_off_upper_limit = msize * (0x40 - 1);
+  stp_off_lower_limit = - msize * 0x40;
+
+  off_val_1 = INTVAL (offset_1);
+  off_val_3 = INTVAL (offset_3);
+
+  /* The base offset is optimally half way between the two STP/LDP offsets.  */
+  if (msize <= 4)
+    base_off = (off_val_1 + off_val_3) / 2;
+  else
+    /* However, due to issues with negative LDP/STP offset generation for
+       larger modes, for DF, DI and vector modes. we must not use negative
+       addresses smaller than 9 signed unadjusted bits can store.  This
+       provides the most range in this case.  */
+    base_off = off_val_1;
 
-  /* Adjust offset thus it can fit in ldp/stp instruction.  */
-  msize = GET_MODE_SIZE (mode);
-  stp_off_limit = msize * 0x40;
-  off_val = INTVAL (offset);
-  abs_off = (off_val < 0) ? -off_val : off_val;
-  new_off = abs_off % stp_off_limit;
-  adj_off = abs_off - new_off;
+  /* Adjust the base so that it is aligned with the addresses but still
+     optimal.  */
+  if (base_off % msize != off_val_1 % msize)
+    /* Fix the offset, bearing in mind we want to make it bigger not
+       smaller.  */
+    base_off += (((base_off % msize) - (off_val_1 % msize)) + msize) % msize;
+  else if (msize <= 4)
+    /* The negative range of LDP/STP is one larger than the positive range.  */
+    base_off += msize;
 
-  /* Further adjust to make sure all offsets are OK.  */
-  if ((new_off + msize * 2) >= stp_off_limit)
+  /* Check if base offset is too big or too small.  We can attempt to resolve
+     this issue by setting it to the maximum value and seeing if the offsets
+     still fit.  */
+  if (base_off >= 0x1000)
     {
-      adj_off += stp_off_limit;
-      new_off -= stp_off_limit;
+      base_off = 0x1000 - 1;
+      /* We must still make sure that the base offset is aligned with respect
+	 to the address.  But it may not be made any bigger.  */
+      base_off -= (((base_off % msize) - (off_val_1 % msize)) + msize) % msize;
     }
 
-  /* Make sure the adjustment can be done with ADD/SUB instructions.  */
-  if (adj_off >= 0x1000)
-    return false;
-
-  if (off_val < 0)
+  /* Likewise for the case where the base is too small.  */
+  if (base_off <= -0x1000)
     {
-      adj_off = -adj_off;
-      new_off = -new_off;
+      base_off = -0x1000 + 1;
+      base_off += (((base_off % msize) - (off_val_1 % msize)) + msize) % msize;
     }
 
-  /* Create new memory references.  */
-  mem_1 = change_address (mem_1, VOIDmode,
-			  plus_constant (DImode, operands[8], new_off));
+  /* Offset of the first STP/LDP.  */
+  new_off_1 = off_val_1 - base_off;
+
+  /* Offset of the second STP/LDP.  */
+  new_off_3 = off_val_3 - base_off;
 
-  /* Check if the adjusted address is OK for ldp/stp.  */
-  if (!aarch64_mem_pair_operand (mem_1, mode))
+  /* The offsets must be within the range of the LDP/STP instructions.  */
+  if (new_off_1 > stp_off_upper_limit || new_off_1 < stp_off_lower_limit
+      || new_off_3 > stp_off_upper_limit || new_off_3 < stp_off_lower_limit)
     return false;
 
-  msize = GET_MODE_SIZE (mode);
-  mem_2 = change_address (mem_2, VOIDmode,
-			  plus_constant (DImode,
-					 operands[8],
-					 new_off + msize));
-  mem_3 = change_address (mem_3, VOIDmode,
-			  plus_constant (DImode,
-					 operands[8],
-					 new_off + msize * 2));
-  mem_4 = change_address (mem_4, VOIDmode,
-			  plus_constant (DImode,
-					 operands[8],
-					 new_off + msize * 3));
+  replace_equiv_address_nv (mem_1, plus_constant (Pmode, operands[8],
+						  new_off_1), true);
+  replace_equiv_address_nv (mem_2, plus_constant (Pmode, operands[8],
+						  new_off_1 + msize), true);
+  replace_equiv_address_nv (mem_3, plus_constant (Pmode, operands[8],
+						  new_off_3), true);
+  replace_equiv_address_nv (mem_4, plus_constant (Pmode, operands[8],
+						  new_off_3 + msize), true);
+
+  if (!aarch64_mem_pair_operand (mem_1, mode)
+      || !aarch64_mem_pair_operand (mem_3, mode))
+    return false;
 
   if (code == ZERO_EXTEND)
     {
@@ -17109,21 +24557,29 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
 
   if (load)
     {
+      operands[0] = temp_operands[0];
       operands[1] = mem_1;
+      operands[2] = temp_operands[2];
       operands[3] = mem_2;
+      operands[4] = temp_operands[4];
       operands[5] = mem_3;
+      operands[6] = temp_operands[6];
       operands[7] = mem_4;
     }
   else
     {
       operands[0] = mem_1;
+      operands[1] = temp_operands[1];
       operands[2] = mem_2;
+      operands[3] = temp_operands[3];
       operands[4] = mem_3;
+      operands[5] = temp_operands[5];
       operands[6] = mem_4;
+      operands[7] = temp_operands[7];
     }
 
   /* Emit adjusting instruction.  */
-  emit_insn (gen_rtx_SET (operands[8], plus_constant (DImode, base, adj_off)));
+  emit_insn (gen_rtx_SET (operands[8], plus_constant (DImode, base, base_off)));
   /* Emit ldp/stp instructions.  */
   t1 = gen_rtx_SET (operands[0], operands[1]);
   t2 = gen_rtx_SET (operands[2], operands[3]);
@@ -17185,13 +24641,36 @@ aarch64_fpconst_pow_of_2 (rtx x)
 
   r = CONST_DOUBLE_REAL_VALUE (x);
 
-  if (REAL_VALUE_NEGATIVE (*r)
-      || REAL_VALUE_ISNAN (*r)
-      || REAL_VALUE_ISINF (*r)
-      || !real_isinteger (r, DFmode))
+  if (REAL_VALUE_NEGATIVE (*r)
+      || REAL_VALUE_ISNAN (*r)
+      || REAL_VALUE_ISINF (*r)
+      || !real_isinteger (r, DFmode))
+    return -1;
+
+  return exact_log2 (real_to_integer (r));
+}
+
+/* If X is a positive CONST_DOUBLE with a value that is the reciprocal of a
+   power of 2 (i.e 1/2^n) return the number of float bits. e.g. for x==(1/2^n)
+   return n. Otherwise return -1.  */
+
+int
+aarch64_fpconst_pow2_recip (rtx x)
+{
+  REAL_VALUE_TYPE r0;
+
+  if (!CONST_DOUBLE_P (x))
     return -1;
 
-  return exact_log2 (real_to_integer (r));
+  r0 = *CONST_DOUBLE_REAL_VALUE (x);
+  if (exact_real_inverse (DFmode, &r0)
+      && !REAL_VALUE_NEGATIVE (r0))
+    {
+	int ret = exact_log2 (real_to_integer (&r0));
+	if (ret >= 1 && ret <= 32)
+	    return ret;
+    }
+  return -1;
 }
 
 /* If X is a vector of equal CONST_DOUBLE values and that value is
@@ -17382,11 +24861,52 @@ static bool
 aarch64_can_change_mode_class (machine_mode from,
 			       machine_mode to, reg_class_t)
 {
-  if (BYTES_BIG_ENDIAN)
+  unsigned int from_flags = aarch64_classify_vector_mode (from);
+  unsigned int to_flags = aarch64_classify_vector_mode (to);
+
+  bool from_sve_p = (from_flags & VEC_ANY_SVE);
+  bool to_sve_p = (to_flags & VEC_ANY_SVE);
+
+  bool from_partial_sve_p = from_sve_p && (from_flags & VEC_PARTIAL);
+  bool to_partial_sve_p = to_sve_p && (to_flags & VEC_PARTIAL);
+
+  bool from_pred_p = (from_flags & VEC_SVE_PRED);
+  bool to_pred_p = (to_flags & VEC_SVE_PRED);
+
+  /* Don't allow changes between predicate modes and other modes.
+     Only predicate registers can hold predicate modes and only
+     non-predicate registers can hold non-predicate modes, so any
+     attempt to mix them would require a round trip through memory.  */
+  if (from_pred_p != to_pred_p)
+    return false;
+
+  /* Don't allow changes between partial SVE modes and other modes.
+     The contents of partial SVE modes are distributed evenly across
+     the register, whereas GCC expects them to be clustered together.  */
+  if (from_partial_sve_p != to_partial_sve_p)
+    return false;
+
+  /* Similarly reject changes between partial SVE modes that have
+     different patterns of significant and insignificant bits.  */
+  if (from_partial_sve_p
+      && (aarch64_sve_container_bits (from) != aarch64_sve_container_bits (to)
+	  || GET_MODE_UNIT_SIZE (from) != GET_MODE_UNIT_SIZE (to)))
+    return false;
+
+  if (maybe_ne (BITS_PER_SVE_VECTOR, 128u))
     {
-      bool from_sve_p = aarch64_sve_data_mode_p (from);
-      bool to_sve_p = aarch64_sve_data_mode_p (to);
+      /* Don't allow changes between SVE modes and other modes that might
+	 be bigger than 128 bits.  In particular, OImode, CImode and XImode
+	 divide into 128-bit quantities while SVE modes divide into
+	 BITS_PER_SVE_VECTOR quantities.  */
+      if (from_sve_p && !to_sve_p && maybe_gt (GET_MODE_BITSIZE (to), 128))
+	return false;
+      if (to_sve_p && !from_sve_p && maybe_gt (GET_MODE_BITSIZE (from), 128))
+	return false;
+    }
 
+  if (BYTES_BIG_ENDIAN)
+    {
       /* Don't allow changes between SVE data modes and non-SVE modes.
 	 See the comment at the head of aarch64-sve.md for details.  */
       if (from_sve_p != to_sve_p)
@@ -17414,12 +24934,615 @@ aarch64_select_early_remat_modes (sbitmap modes)
   /* SVE values are not normally live across a call, so it should be
      worth doing early rematerialization even in VL-specific mode.  */
   for (int i = 0; i < NUM_MACHINE_MODES; ++i)
+    if (aarch64_sve_mode_p ((machine_mode) i))
+      bitmap_set_bit (modes, i);
+}
+
+/* Override the default target speculation_safe_value.  */
+static rtx
+aarch64_speculation_safe_value (machine_mode mode,
+				rtx result, rtx val, rtx failval)
+{
+  /* Maybe we should warn if falling back to hard barriers.  They are
+     likely to be noticably more expensive than the alternative below.  */
+  if (!aarch64_track_speculation)
+    return default_speculation_safe_value (mode, result, val, failval);
+
+  if (!REG_P (val))
+    val = copy_to_mode_reg (mode, val);
+
+  if (!aarch64_reg_or_zero (failval, mode))
+    failval = copy_to_mode_reg (mode, failval);
+
+  emit_insn (gen_despeculate_copy (mode, result, val, failval));
+  return result;
+}
+
+/* Implement TARGET_ESTIMATED_POLY_VALUE.
+   Look into the tuning structure for an estimate.
+   KIND specifies the type of requested estimate: min, max or likely.
+   For cores with a known SVE width all three estimates are the same.
+   For generic SVE tuning we want to distinguish the maximum estimate from
+   the minimum and likely ones.
+   The likely estimate is the same as the minimum in that case to give a
+   conservative behavior of auto-vectorizing with SVE when it is a win
+   even for 128-bit SVE.
+   When SVE width information is available VAL.coeffs[1] is multiplied by
+   the number of VQ chunks over the initial Advanced SIMD 128 bits.  */
+
+static HOST_WIDE_INT
+aarch64_estimated_poly_value (poly_int64 val,
+			      poly_value_estimate_kind kind
+				= POLY_VALUE_LIKELY)
+{
+  enum aarch64_sve_vector_bits_enum width_source
+    = aarch64_tune_params.sve_width;
+
+  /* If there is no core-specific information then the minimum and likely
+     values are based on 128-bit vectors and the maximum is based on
+     the architectural maximum of 2048 bits.  */
+  if (width_source == SVE_SCALABLE)
+    switch (kind)
+      {
+      case POLY_VALUE_MIN:
+      case POLY_VALUE_LIKELY:
+	return val.coeffs[0];
+      case POLY_VALUE_MAX:
+	  return val.coeffs[0] + val.coeffs[1] * 15;
+      }
+
+  /* If the core provides width information, use that.  */
+  HOST_WIDE_INT over_128 = width_source - 128;
+  return val.coeffs[0] + val.coeffs[1] * over_128 / 128;
+}
+
+
+/* Return true for types that could be supported as SIMD return or
+   argument types.  */
+
+static bool
+supported_simd_type (tree t)
+{
+  if (SCALAR_FLOAT_TYPE_P (t) || INTEGRAL_TYPE_P (t) || POINTER_TYPE_P (t))
     {
-      machine_mode mode = (machine_mode) i;
-      unsigned int vec_flags = aarch64_classify_vector_mode (mode);
-      if (vec_flags & VEC_ANY_SVE)
-	bitmap_set_bit (modes, i);
+      HOST_WIDE_INT s = tree_to_shwi (TYPE_SIZE_UNIT (t));
+      return s == 1 || s == 2 || s == 4 || s == 8;
+    }
+  return false;
+}
+
+/* Return true for types that currently are supported as SIMD return
+   or argument types.  */
+
+static bool
+currently_supported_simd_type (tree t, tree b)
+{
+  if (COMPLEX_FLOAT_TYPE_P (t))
+    return false;
+
+  if (TYPE_SIZE (t) != TYPE_SIZE (b))
+    return false;
+
+  return supported_simd_type (t);
+}
+
+/* Implement TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN.  */
+
+static int
+aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
+					struct cgraph_simd_clone *clonei,
+					tree base_type, int num)
+{
+  tree t, ret_type;
+  unsigned int elt_bits, count;
+  unsigned HOST_WIDE_INT const_simdlen;
+  poly_uint64 vec_bits;
+
+  if (!TARGET_SIMD)
+    return 0;
+
+  /* For now, SVE simdclones won't produce illegal simdlen, So only check
+     const simdlens here.  */
+  if (maybe_ne (clonei->simdlen, 0U)
+      && clonei->simdlen.is_constant (&const_simdlen)
+      && (const_simdlen < 2
+	  || const_simdlen > 1024
+	  || (const_simdlen & (const_simdlen - 1)) != 0))
+    {
+      warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+		  "unsupported simdlen %wd", const_simdlen);
+      return 0;
+    }
+
+  ret_type = TREE_TYPE (TREE_TYPE (node->decl));
+  if (TREE_CODE (ret_type) != VOID_TYPE
+      && !currently_supported_simd_type (ret_type, base_type))
+    {
+      if (TYPE_SIZE (ret_type) != TYPE_SIZE (base_type))
+	warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+		    "GCC does not currently support mixed size types "
+		    "for %<simd%> functions");
+      else if (supported_simd_type (ret_type))
+	warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+		    "GCC does not currently support return type %qT "
+		    "for %<simd%> functions", ret_type);
+      else
+	warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+		    "unsupported return type %qT for %<simd%> functions",
+		    ret_type);
+      return 0;
+    }
+
+  int i;
+  tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl));
+  bool decl_arg_p = (node->definition || type_arg_types == NULL_TREE);
+
+  for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0;
+       t && t != void_list_node; t = TREE_CHAIN (t), i++)
+    {
+      tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
+
+      if (clonei->args[i].arg_type != SIMD_CLONE_ARG_TYPE_UNIFORM
+	  && !currently_supported_simd_type (arg_type, base_type))
+	{
+	  if (TYPE_SIZE (arg_type) != TYPE_SIZE (base_type))
+	    warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+			"GCC does not currently support mixed size types "
+			"for %<simd%> functions");
+	  else
+	    warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+			"GCC does not currently support argument type %qT "
+			"for %<simd%> functions", arg_type);
+	  return 0;
+	}
+    }
+
+  clonei->vecsize_mangle = 'n';
+  clonei->mask_mode = VOIDmode;
+  elt_bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type));
+  if (known_eq (clonei->simdlen, 0U))
+    {
+      count = 2;
+      vec_bits = (num == 0 ? 64 : 128);
+      clonei->simdlen = exact_div (vec_bits, elt_bits);
+    }
+  else
+    {
+      count = 1;
+      vec_bits = clonei->simdlen * elt_bits;
+      /* For now, SVE simdclones won't produce illegal simdlen, So only check
+	 const simdlens here.  */
+      if (clonei->simdlen.is_constant (&const_simdlen)
+	  && maybe_ne (vec_bits, 64U) && maybe_ne (vec_bits, 128U))
+	{
+	  warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+		      "GCC does not currently support simdlen %wd for type %qT",
+		      const_simdlen, base_type);
+	  return 0;
+	}
+    }
+  clonei->vecsize_int = vec_bits;
+  clonei->vecsize_float = vec_bits;
+  return count;
+}
+
+/* Implement TARGET_SIMD_CLONE_ADJUST.  */
+
+static void
+aarch64_simd_clone_adjust (struct cgraph_node *node)
+{
+  /* Add aarch64_vector_pcs target attribute to SIMD clones so they
+     use the correct ABI.  */
+
+  tree t = TREE_TYPE (node->decl);
+  TYPE_ATTRIBUTES (t) = make_attribute ("aarch64_vector_pcs", "default",
+					TYPE_ATTRIBUTES (t));
+}
+
+/* Implement TARGET_SIMD_CLONE_USABLE.  */
+
+static int
+aarch64_simd_clone_usable (struct cgraph_node *node)
+{
+  switch (node->simdclone->vecsize_mangle)
+    {
+    case 'n':
+      if (!TARGET_SIMD)
+	return -1;
+      return 0;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Implement TARGET_COMP_TYPE_ATTRIBUTES */
+
+static int
+aarch64_comp_type_attributes (const_tree type1, const_tree type2)
+{
+  auto check_attr = [&](const char *name) {
+    tree attr1 = lookup_attribute (name, TYPE_ATTRIBUTES (type1));
+    tree attr2 = lookup_attribute (name, TYPE_ATTRIBUTES (type2));
+    if (!attr1 && !attr2)
+      return true;
+
+    return attr1 && attr2 && attribute_value_equal (attr1, attr2);
+  };
+
+  if (!check_attr ("aarch64_vector_pcs"))
+    return 0;
+  if (!check_attr ("Advanced SIMD type"))
+    return 0;
+  if (!check_attr ("SVE type"))
+    return 0;
+  if (!check_attr ("SVE sizeless type"))
+    return 0;
+  return 1;
+}
+
+/* Implement TARGET_GET_MULTILIB_ABI_NAME */
+
+static const char *
+aarch64_get_multilib_abi_name (void)
+{
+  if (TARGET_BIG_END)
+    return TARGET_ILP32 ? "aarch64_be_ilp32" : "aarch64_be";
+  return TARGET_ILP32 ? "aarch64_ilp32" : "aarch64";
+}
+
+/* Implement TARGET_STACK_PROTECT_GUARD. In case of a
+   global variable based guard use the default else
+   return a null tree.  */
+static tree
+aarch64_stack_protect_guard (void)
+{
+  if (aarch64_stack_protector_guard == SSP_GLOBAL)
+    return default_stack_protect_guard ();
+
+  return NULL_TREE;
+}
+
+/* Return the diagnostic message string if conversion from FROMTYPE to
+   TOTYPE is not allowed, NULL otherwise.  */
+
+static const char *
+aarch64_invalid_conversion (const_tree fromtype, const_tree totype)
+{
+  if (element_mode (fromtype) != element_mode (totype))
+    {
+      /* Do no allow conversions to/from BFmode scalar types.  */
+      if (TYPE_MODE (fromtype) == BFmode)
+	return N_("invalid conversion from type %<bfloat16_t%>");
+      if (TYPE_MODE (totype) == BFmode)
+	return N_("invalid conversion to type %<bfloat16_t%>");
+    }
+
+  /* Conversion allowed.  */
+  return NULL;
+}
+
+/* Return the diagnostic message string if the unary operation OP is
+   not permitted on TYPE, NULL otherwise.  */
+
+static const char *
+aarch64_invalid_unary_op (int op, const_tree type)
+{
+  /* Reject all single-operand operations on BFmode except for &.  */
+  if (element_mode (type) == BFmode && op != ADDR_EXPR)
+    return N_("operation not permitted on type %<bfloat16_t%>");
+
+  /* Operation allowed.  */
+  return NULL;
+}
+
+/* Return the diagnostic message string if the binary operation OP is
+   not permitted on TYPE1 and TYPE2, NULL otherwise.  */
+
+static const char *
+aarch64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
+			   const_tree type2)
+{
+  /* Reject all 2-operand operations on BFmode.  */
+  if (element_mode (type1) == BFmode
+      || element_mode (type2) == BFmode)
+    return N_("operation not permitted on type %<bfloat16_t%>");
+
+  if (VECTOR_TYPE_P (type1)
+      && VECTOR_TYPE_P (type2)
+      && !TYPE_INDIVISIBLE_P (type1)
+      && !TYPE_INDIVISIBLE_P (type2)
+      && (aarch64_sve::builtin_type_p (type1)
+	  != aarch64_sve::builtin_type_p (type2)))
+    return N_("cannot combine GNU and SVE vectors in a binary operation");
+
+  /* Operation allowed.  */
+  return NULL;
+}
+
+/* Implement TARGET_MEMTAG_CAN_TAG_ADDRESSES.  Here we tell the rest of the
+   compiler that we automatically ignore the top byte of our pointers, which
+   allows using -fsanitize=hwaddress.  */
+bool
+aarch64_can_tag_addresses ()
+{
+  return !TARGET_ILP32;
+}
+
+/* Implement TARGET_ASM_FILE_END for AArch64.  This adds the AArch64 GNU NOTE
+   section at the end if needed.  */
+#define GNU_PROPERTY_AARCH64_FEATURE_1_AND	0xc0000000
+#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI	(1U << 0)
+#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC	(1U << 1)
+void
+aarch64_file_end_indicate_exec_stack ()
+{
+  file_end_indicate_exec_stack ();
+
+  unsigned feature_1_and = 0;
+  if (aarch64_bti_enabled ())
+    feature_1_and |= GNU_PROPERTY_AARCH64_FEATURE_1_BTI;
+
+  if (aarch64_ra_sign_scope != AARCH64_FUNCTION_NONE)
+    feature_1_and |= GNU_PROPERTY_AARCH64_FEATURE_1_PAC;
+
+  if (feature_1_and)
+    {
+      /* Generate .note.gnu.property section.  */
+      switch_to_section (get_section (".note.gnu.property",
+				      SECTION_NOTYPE, NULL));
+
+      /* PT_NOTE header: namesz, descsz, type.
+	 namesz = 4 ("GNU\0")
+	 descsz = 16 (Size of the program property array)
+		  [(12 + padding) * Number of array elements]
+	 type   = 5 (NT_GNU_PROPERTY_TYPE_0).  */
+      assemble_align (POINTER_SIZE);
+      assemble_integer (GEN_INT (4), 4, 32, 1);
+      assemble_integer (GEN_INT (ROUND_UP (12, POINTER_BYTES)), 4, 32, 1);
+      assemble_integer (GEN_INT (5), 4, 32, 1);
+
+      /* PT_NOTE name.  */
+      assemble_string ("GNU", 4);
+
+      /* PT_NOTE contents for NT_GNU_PROPERTY_TYPE_0:
+	 type   = GNU_PROPERTY_AARCH64_FEATURE_1_AND
+	 datasz = 4
+	 data   = feature_1_and.  */
+      assemble_integer (GEN_INT (GNU_PROPERTY_AARCH64_FEATURE_1_AND), 4, 32, 1);
+      assemble_integer (GEN_INT (4), 4, 32, 1);
+      assemble_integer (GEN_INT (feature_1_and), 4, 32, 1);
+
+      /* Pad the size of the note to the required alignment.  */
+      assemble_align (POINTER_SIZE);
+    }
+}
+#undef GNU_PROPERTY_AARCH64_FEATURE_1_PAC
+#undef GNU_PROPERTY_AARCH64_FEATURE_1_BTI
+#undef GNU_PROPERTY_AARCH64_FEATURE_1_AND
+
+/* Helper function for straight line speculation.
+   Return what barrier should be emitted for straight line speculation
+   mitigation.
+   When not mitigating against straight line speculation this function returns
+   an empty string.
+   When mitigating against straight line speculation, use:
+   * SB when the v8.5-A SB extension is enabled.
+   * DSB+ISB otherwise.  */
+const char *
+aarch64_sls_barrier (int mitigation_required)
+{
+  return mitigation_required
+    ? (TARGET_SB ? "sb" : "dsb\tsy\n\tisb")
+    : "";
+}
+
+static GTY (()) tree aarch64_sls_shared_thunks[30];
+static GTY (()) bool aarch64_sls_shared_thunks_needed = false;
+const char *indirect_symbol_names[30] = {
+    "__call_indirect_x0",
+    "__call_indirect_x1",
+    "__call_indirect_x2",
+    "__call_indirect_x3",
+    "__call_indirect_x4",
+    "__call_indirect_x5",
+    "__call_indirect_x6",
+    "__call_indirect_x7",
+    "__call_indirect_x8",
+    "__call_indirect_x9",
+    "__call_indirect_x10",
+    "__call_indirect_x11",
+    "__call_indirect_x12",
+    "__call_indirect_x13",
+    "__call_indirect_x14",
+    "__call_indirect_x15",
+    "", /* "__call_indirect_x16",  */
+    "", /* "__call_indirect_x17",  */
+    "__call_indirect_x18",
+    "__call_indirect_x19",
+    "__call_indirect_x20",
+    "__call_indirect_x21",
+    "__call_indirect_x22",
+    "__call_indirect_x23",
+    "__call_indirect_x24",
+    "__call_indirect_x25",
+    "__call_indirect_x26",
+    "__call_indirect_x27",
+    "__call_indirect_x28",
+    "__call_indirect_x29",
+};
+
+/* Function to create a BLR thunk.  This thunk is used to mitigate straight
+   line speculation.  Instead of a simple BLR that can be speculated past,
+   we emit a BL to this thunk, and this thunk contains a BR to the relevant
+   register.  These thunks have the relevant speculation barries put after
+   their indirect branch so that speculation is blocked.
+
+   We use such a thunk so the speculation barriers are kept off the
+   architecturally executed path in order to reduce the performance overhead.
+
+   When optimizing for size we use stubs shared by the linked object.
+   When optimizing for performance we emit stubs for each function in the hope
+   that the branch predictor can better train on jumps specific for a given
+   function.  */
+rtx
+aarch64_sls_create_blr_label (int regnum)
+{
+  gcc_assert (STUB_REGNUM_P (regnum));
+  if (optimize_function_for_size_p (cfun))
+    {
+      /* For the thunks shared between different functions in this compilation
+	 unit we use a named symbol -- this is just for users to more easily
+	 understand the generated assembly.  */
+      aarch64_sls_shared_thunks_needed = true;
+      const char *thunk_name = indirect_symbol_names[regnum];
+      if (aarch64_sls_shared_thunks[regnum] == NULL)
+	{
+	  /* Build a decl representing this function stub and record it for
+	     later.  We build a decl here so we can use the GCC machinery for
+	     handling sections automatically (through `get_named_section` and
+	     `make_decl_one_only`).  That saves us a lot of trouble handling
+	     the specifics of different output file formats.  */
+	  tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
+				  get_identifier (thunk_name),
+				  build_function_type_list (void_type_node,
+							    NULL_TREE));
+	  DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
+					   NULL_TREE, void_type_node);
+	  TREE_PUBLIC (decl) = 1;
+	  TREE_STATIC (decl) = 1;
+	  DECL_IGNORED_P (decl) = 1;
+	  DECL_ARTIFICIAL (decl) = 1;
+	  make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
+	  resolve_unique_section (decl, 0, false);
+	  aarch64_sls_shared_thunks[regnum] = decl;
+	}
+
+      return gen_rtx_SYMBOL_REF (Pmode, thunk_name);
+    }
+
+  if (cfun->machine->call_via[regnum] == NULL)
+    cfun->machine->call_via[regnum]
+      = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
+  return cfun->machine->call_via[regnum];
+}
+
+/* Helper function for aarch64_sls_emit_blr_function_thunks and
+   aarch64_sls_emit_shared_blr_thunks below.  */
+static void
+aarch64_sls_emit_function_stub (FILE *out_file, int regnum)
+{
+  /* Save in x16 and branch to that function so this transformation does
+     not prevent jumping to `BTI c` instructions.  */
+  asm_fprintf (out_file, "\tmov\tx16, x%d\n", regnum);
+  asm_fprintf (out_file, "\tbr\tx16\n");
+}
+
+/* Emit all BLR stubs for this particular function.
+   Here we emit all the BLR stubs needed for the current function.  Since we
+   emit these stubs in a consecutive block we know there will be no speculation
+   gadgets between each stub, and hence we only emit a speculation barrier at
+   the end of the stub sequences.
+
+   This is called in the TARGET_ASM_FUNCTION_EPILOGUE hook.  */
+void
+aarch64_sls_emit_blr_function_thunks (FILE *out_file)
+{
+  if (! aarch64_harden_sls_blr_p ())
+    return;
+
+  bool any_functions_emitted = false;
+  /* We must save and restore the current function section since this assembly
+     is emitted at the end of the function.  This means it can be emitted *just
+     after* the cold section of a function.  That cold part would be emitted in
+     a different section.  That switch would trigger a `.cfi_endproc` directive
+     to be emitted in the original section and a `.cfi_startproc` directive to
+     be emitted in the new section.  Switching to the original section without
+     restoring would mean that the `.cfi_endproc` emitted as a function ends
+     would happen in a different section -- leaving an unmatched
+     `.cfi_startproc` in the cold text section and an unmatched `.cfi_endproc`
+     in the standard text section.  */
+  section *save_text_section = in_section;
+  switch_to_section (function_section (current_function_decl));
+  for (int regnum = 0; regnum < 30; ++regnum)
+    {
+      rtx specu_label = cfun->machine->call_via[regnum];
+      if (specu_label == NULL)
+	continue;
+
+      targetm.asm_out.print_operand (out_file, specu_label, 0);
+      asm_fprintf (out_file, ":\n");
+      aarch64_sls_emit_function_stub (out_file, regnum);
+      any_functions_emitted = true;
+    }
+  if (any_functions_emitted)
+    /* Can use the SB if needs be here, since this stub will only be used
+      by the current function, and hence for the current target.  */
+    asm_fprintf (out_file, "\t%s\n", aarch64_sls_barrier (true));
+  switch_to_section (save_text_section);
+}
+
+/* Emit shared BLR stubs for the current compilation unit.
+   Over the course of compiling this unit we may have converted some BLR
+   instructions to a BL to a shared stub function.  This is where we emit those
+   stub functions.
+   This function is for the stubs shared between different functions in this
+   compilation unit.  We share when optimizing for size instead of speed.
+
+   This function is called through the TARGET_ASM_FILE_END hook.  */
+void
+aarch64_sls_emit_shared_blr_thunks (FILE *out_file)
+{
+  if (! aarch64_sls_shared_thunks_needed)
+    return;
+
+  for (int regnum = 0; regnum < 30; ++regnum)
+    {
+      tree decl = aarch64_sls_shared_thunks[regnum];
+      if (!decl)
+	continue;
+
+      const char *name = indirect_symbol_names[regnum];
+      switch_to_section (get_named_section (decl, NULL, 0));
+      ASM_OUTPUT_ALIGN (out_file, 2);
+      targetm.asm_out.globalize_label (out_file, name);
+      /* Only emits if the compiler is configured for an assembler that can
+	 handle visibility directives.  */
+      targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
+      ASM_OUTPUT_TYPE_DIRECTIVE (out_file, name, "function");
+      ASM_OUTPUT_LABEL (out_file, name);
+      aarch64_sls_emit_function_stub (out_file, regnum);
+      /* Use the most conservative target to ensure it can always be used by any
+	 function in the translation unit.  */
+      asm_fprintf (out_file, "\tdsb\tsy\n\tisb\n");
+      ASM_DECLARE_FUNCTION_SIZE (out_file, name, decl);
+    }
+}
+
+/* Implement TARGET_ASM_FILE_END.  */
+void
+aarch64_asm_file_end ()
+{
+  aarch64_sls_emit_shared_blr_thunks (asm_out_file);
+  /* Since this function will be called for the ASM_FILE_END hook, we ensure
+     that what would be called otherwise (e.g. `file_end_indicate_exec_stack`
+     for FreeBSD) still gets called.  */
+#ifdef TARGET_ASM_FILE_END
+  TARGET_ASM_FILE_END ();
+#endif
+}
+
+const char *
+aarch64_indirect_call_asm (rtx addr)
+{
+  gcc_assert (REG_P (addr));
+  if (aarch64_harden_sls_blr_p ())
+    {
+      rtx stub_label = aarch64_sls_create_blr_label (REGNO (addr));
+      output_asm_insn ("bl\t%0", &stub_label);
     }
+  else
+   output_asm_insn ("blr\t%0", &addr);
+  return "";
 }
 
 /* Target-specific selftests.  */
@@ -17468,6 +25591,9 @@ aarch64_run_selftests (void)
 
 #endif /* #if CHECKING_P */
 
+#undef TARGET_STACK_PROTECT_GUARD
+#define TARGET_STACK_PROTECT_GUARD aarch64_stack_protect_guard
+
 #undef TARGET_ADDRESS_COST
 #define TARGET_ADDRESS_COST aarch64_address_cost
 
@@ -17503,11 +25629,14 @@ aarch64_run_selftests (void)
 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
 
+#undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
+#define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY aarch64_print_patchable_function_entry
+
 #undef TARGET_BUILD_BUILTIN_VA_LIST
 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
 
 #undef TARGET_CALLEE_COPIES
-#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
+#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_arg_info_false
 
 #undef TARGET_CAN_ELIMINATE
 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
@@ -17524,6 +25653,9 @@ aarch64_run_selftests (void)
 #undef TARGET_CONDITIONAL_REGISTER_USAGE
 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
 
+#undef TARGET_MEMBER_TYPE_FORCES_BLK
+#define TARGET_MEMBER_TYPE_FORCES_BLK aarch64_member_type_forces_blk
+
 /* Only the least significant bit is used for initialization guard
    variables.  */
 #undef TARGET_CXX_GUARD_MASK_BIT
@@ -17617,6 +25749,18 @@ aarch64_libgcc_floating_mode_supported_p
 #undef TARGET_MANGLE_TYPE
 #define TARGET_MANGLE_TYPE aarch64_mangle_type
 
+#undef TARGET_INVALID_CONVERSION
+#define TARGET_INVALID_CONVERSION aarch64_invalid_conversion
+
+#undef TARGET_INVALID_UNARY_OP
+#define TARGET_INVALID_UNARY_OP aarch64_invalid_unary_op
+
+#undef TARGET_INVALID_BINARY_OP
+#define TARGET_INVALID_BINARY_OP aarch64_invalid_binary_op
+
+#undef TARGET_VERIFY_TYPE_CONTEXT
+#define TARGET_VERIFY_TYPE_CONTEXT aarch64_verify_type_context
+
 #undef TARGET_MEMORY_MOVE_COST
 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
 
@@ -17639,6 +25783,9 @@ aarch64_libgcc_floating_mode_supported_p
 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
   aarch64_override_options_after_change
 
+#undef TARGET_OFFLOAD_OPTIONS
+#define TARGET_OFFLOAD_OPTIONS aarch64_offload_options
+
 #undef TARGET_OPTION_SAVE
 #define TARGET_OPTION_SAVE aarch64_option_save
 
@@ -17696,6 +25843,9 @@ aarch64_libgcc_floating_mode_supported_p
 #undef TARGET_SCHED_ISSUE_RATE
 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
 
+#undef TARGET_SCHED_VARIABLE_ISSUE
+#define TARGET_SCHED_VARIABLE_ISSUE aarch64_sched_variable_issue
+
 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
   aarch64_sched_first_cycle_multipass_dfa_lookahead
@@ -17737,6 +25887,9 @@ aarch64_libgcc_floating_mode_supported_p
 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
 
+#undef TARGET_COMPATIBLE_VECTOR_TYPES_P
+#define TARGET_COMPATIBLE_VECTOR_TYPES_P aarch64_compatible_vector_types_p
+
 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
   aarch64_builtin_support_vector_misalignment
@@ -17747,9 +25900,18 @@ aarch64_libgcc_floating_mode_supported_p
 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
 
+#undef TARGET_VECTORIZE_INIT_COST
+#define TARGET_VECTORIZE_INIT_COST aarch64_init_cost
+
 #undef TARGET_VECTORIZE_ADD_STMT_COST
 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
 
+#undef TARGET_VECTORIZE_FINISH_COST
+#define TARGET_VECTORIZE_FINISH_COST aarch64_finish_cost
+
+#undef TARGET_VECTORIZE_DESTROY_COST_DATA
+#define TARGET_VECTORIZE_DESTROY_COST_DATA aarch64_destroy_cost_data
+
 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
   aarch64_builtin_vectorization_cost
@@ -17764,9 +25926,9 @@ aarch64_libgcc_floating_mode_supported_p
 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
   aarch64_builtin_vectorized_function
 
-#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
-#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
-  aarch64_autovectorize_vector_sizes
+#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
+#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
+  aarch64_autovectorize_vector_modes
 
 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
@@ -17799,11 +25961,16 @@ aarch64_libgcc_floating_mode_supported_p
 #define TARGET_VECTORIZE_VEC_PERM_CONST \
   aarch64_vectorize_vec_perm_const
 
+#undef TARGET_VECTORIZE_RELATED_MODE
+#define TARGET_VECTORIZE_RELATED_MODE aarch64_vectorize_related_mode
 #undef TARGET_VECTORIZE_GET_MASK_MODE
 #define TARGET_VECTORIZE_GET_MASK_MODE aarch64_get_mask_mode
 #undef TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE
 #define TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE \
   aarch64_empty_mask_is_expensive
+#undef TARGET_PREFERRED_ELSE_VALUE
+#define TARGET_PREFERRED_ELSE_VALUE \
+  aarch64_preferred_else_value
 
 #undef TARGET_INIT_LIBFUNCS
 #define TARGET_INIT_LIBFUNCS aarch64_init_libfuncs
@@ -17853,6 +26020,9 @@ aarch64_libgcc_floating_mode_supported_p
 #undef TARGET_PRINT_OPERAND_ADDRESS
 #define TARGET_PRINT_OPERAND_ADDRESS aarch64_print_operand_address
 
+#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
+#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA aarch64_output_addr_const_extra
+
 #undef TARGET_OPTAB_SUPPORTED_P
 #define TARGET_OPTAB_SUPPORTED_P aarch64_optab_supported_p
 
@@ -17879,9 +26049,16 @@ aarch64_libgcc_floating_mode_supported_p
 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
   aarch64_hard_regno_call_part_clobbered
 
+#undef TARGET_INSN_CALLEE_ABI
+#define TARGET_INSN_CALLEE_ABI aarch64_insn_callee_abi
+
 #undef TARGET_CONSTANT_ALIGNMENT
 #define TARGET_CONSTANT_ALIGNMENT aarch64_constant_alignment
 
+#undef TARGET_STACK_CLASH_PROTECTION_ALLOCA_PROBE_RANGE
+#define TARGET_STACK_CLASH_PROTECTION_ALLOCA_PROBE_RANGE \
+  aarch64_stack_clash_protection_alloca_probe_range
+
 #undef TARGET_COMPUTE_PRESSURE_CLASSES
 #define TARGET_COMPUTE_PRESSURE_CLASSES aarch64_compute_pressure_classes
 
@@ -17891,11 +26068,57 @@ aarch64_libgcc_floating_mode_supported_p
 #undef TARGET_SELECT_EARLY_REMAT_MODES
 #define TARGET_SELECT_EARLY_REMAT_MODES aarch64_select_early_remat_modes
 
+#undef TARGET_SPECULATION_SAFE_VALUE
+#define TARGET_SPECULATION_SAFE_VALUE aarch64_speculation_safe_value
+
+#undef TARGET_ESTIMATED_POLY_VALUE
+#define TARGET_ESTIMATED_POLY_VALUE aarch64_estimated_poly_value
+
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE aarch64_attribute_table
+
+#undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
+#define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
+  aarch64_simd_clone_compute_vecsize_and_simdlen
+
+#undef TARGET_SIMD_CLONE_ADJUST
+#define TARGET_SIMD_CLONE_ADJUST aarch64_simd_clone_adjust
+
+#undef TARGET_SIMD_CLONE_USABLE
+#define TARGET_SIMD_CLONE_USABLE aarch64_simd_clone_usable
+
+#undef TARGET_COMP_TYPE_ATTRIBUTES
+#define TARGET_COMP_TYPE_ATTRIBUTES aarch64_comp_type_attributes
+
+#undef TARGET_GET_MULTILIB_ABI_NAME
+#define TARGET_GET_MULTILIB_ABI_NAME aarch64_get_multilib_abi_name
+
+#undef TARGET_FNTYPE_ABI
+#define TARGET_FNTYPE_ABI aarch64_fntype_abi
+
+#undef TARGET_MEMTAG_CAN_TAG_ADDRESSES
+#define TARGET_MEMTAG_CAN_TAG_ADDRESSES aarch64_can_tag_addresses
+
 #if CHECKING_P
 #undef TARGET_RUN_TARGET_SELFTESTS
 #define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests
 #endif /* #if CHECKING_P */
 
+#undef TARGET_ASM_POST_CFI_STARTPROC
+#define TARGET_ASM_POST_CFI_STARTPROC aarch64_post_cfi_startproc
+
+#undef TARGET_STRICT_ARGUMENT_NAMING
+#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
+
+#undef TARGET_MD_ASM_ADJUST
+#define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
+
+#undef TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END aarch64_asm_file_end
+
+#undef TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE aarch64_sls_emit_blr_function_thunks
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-aarch64.h"
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 976f9afae54c1..bfffbcd6abff6 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -1,5 +1,5 @@
 /* Machine description for AArch64 architecture.
-   Copyright (C) 2009-2018 Free Software Foundation, Inc.
+   Copyright (C) 2009-2021 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -26,6 +26,10 @@
 #define TARGET_CPU_CPP_BUILTINS()	\
   aarch64_cpu_cpp_builtins (pfile)
 
+/* Target hooks for D language.  */
+#define TARGET_D_CPU_VERSIONS aarch64_d_target_versions
+#define TARGET_D_REGISTER_CPU_TARGET_INFO aarch64_d_register_target_info
+
 
 
 #define REGISTER_TARGET_PRAGMAS() aarch64_register_pragmas ()
@@ -84,12 +88,29 @@
 
 #define LONG_DOUBLE_TYPE_SIZE	128
 
+/* This value is the amount of bytes a caller is allowed to drop the stack
+   before probing has to be done for stack clash protection.  */
+#define STACK_CLASH_CALLER_GUARD 1024
+
+/* This value represents the minimum amount of bytes we expect the function's
+   outgoing arguments to be when stack-clash is enabled.  */
+#define STACK_CLASH_MIN_BYTES_OUTGOING_ARGS 8
+
+/* This value controls how many pages we manually unroll the loop for when
+   generating stack clash probes.  */
+#define STACK_CLASH_MAX_UNROLL_PAGES 4
+
 /* The architecture reserves all bits of the address for hardware use,
    so the vbit must go into the delta field of pointers to member
    functions.  This is the same config as that in the AArch32
    port.  */
 #define TARGET_PTRMEMFUNC_VBIT_LOCATION ptrmemfunc_vbit_in_delta
 
+
+/* Emit calls to libgcc helpers for atomic operations for runtime detection
+   of LSE instructions.  */
+#define TARGET_OUTLINE_ATOMICS (aarch64_flag_outline_atomics)
+
 /* Align definitions of arrays, unions and structures so that
    initializations and copies can be made more efficient.  This is not
    ABI-changing, so it only affects places where we can see the
@@ -141,6 +162,8 @@ extern unsigned aarch64_architecture_version;
 #define AARCH64_FL_LSE	      (1 << 4)  /* Has Large System Extensions.  */
 #define AARCH64_FL_RDMA       (1 << 5)  /* Has Round Double Multiply Add.  */
 #define AARCH64_FL_V8_1       (1 << 6)  /* Has ARMv8.1-A extensions.  */
+/* Armv8-R.  */
+#define AARCH64_FL_V8_R       (1 << 7)  /* Armv8-R AArch64.  */
 /* ARMv8.2-A architecture extensions.  */
 #define AARCH64_FL_V8_2       (1 << 8)  /* Has ARMv8.2-A features.  */
 #define AARCH64_FL_F16	      (1 << 9)  /* Has ARMv8.2-A FP16 extensions.  */
@@ -157,6 +180,56 @@ extern unsigned aarch64_architecture_version;
 #define AARCH64_FL_SM4	      (1 << 17)  /* Has ARMv8.4-A SM3 and SM4.  */
 #define AARCH64_FL_SHA3	      (1 << 18)  /* Has ARMv8.4-a SHA3 and SHA512.  */
 #define AARCH64_FL_F16FML     (1 << 19)  /* Has ARMv8.4-a FP16 extensions.  */
+#define AARCH64_FL_RCPC8_4    (1 << 20)  /* Has ARMv8.4-a RCPC extensions.  */
+
+/* Statistical Profiling extensions.  */
+#define AARCH64_FL_PROFILE    (1 << 21)
+
+/* ARMv8.5-A architecture extensions.  */
+#define AARCH64_FL_V8_5	      (1 << 22)  /* Has ARMv8.5-A features.  */
+#define AARCH64_FL_RNG	      (1 << 23)  /* ARMv8.5-A Random Number Insns.  */
+#define AARCH64_FL_MEMTAG     (1 << 24)  /* ARMv8.5-A Memory Tagging
+					    Extensions.  */
+
+/* Speculation Barrier instruction supported.  */
+#define AARCH64_FL_SB	      (1 << 25)
+
+/* Speculative Store Bypass Safe instruction supported.  */
+#define AARCH64_FL_SSBS	      (1 << 26)
+
+/* Execution and Data Prediction Restriction instructions supported.  */
+#define AARCH64_FL_PREDRES    (1 << 27)
+
+/* SVE2 instruction supported.  */
+#define AARCH64_FL_SVE2		(1 << 28)
+#define AARCH64_FL_SVE2_AES	(1 << 29)
+#define AARCH64_FL_SVE2_SM4	(1 << 30)
+#define AARCH64_FL_SVE2_SHA3	(1ULL << 31)
+#define AARCH64_FL_SVE2_BITPERM	(1ULL << 32)
+
+/* Transactional Memory Extension.  */
+#define AARCH64_FL_TME	      (1ULL << 33)  /* Has TME instructions.  */
+
+/* Armv8.6-A architecture extensions.  */
+#define AARCH64_FL_V8_6	      (1ULL << 34)
+
+/* 8-bit Integer Matrix Multiply (I8MM) extensions.  */
+#define AARCH64_FL_I8MM	      (1ULL << 35)
+
+/* Brain half-precision floating-point (BFloat16) Extension.  */
+#define AARCH64_FL_BF16	      (1ULL << 36)
+
+/* 32-bit Floating-point Matrix Multiply (F32MM) extensions.  */
+#define AARCH64_FL_F32MM      (1ULL << 37)
+
+/* 64-bit Floating-point Matrix Multiply (F64MM) extensions.  */
+#define AARCH64_FL_F64MM      (1ULL << 38)
+
+/* Flag Manipulation Instructions (FLAGM) extension.  */
+#define AARCH64_FL_FLAGM      (1ULL << 39)
+
+/* Pointer Authentication (PAUTH) extension.  */
+#define AARCH64_FL_PAUTH      (1ULL << 40)
 
 /* Has FP and SIMD.  */
 #define AARCH64_FL_FPSIMD     (AARCH64_FL_FP | AARCH64_FL_SIMD)
@@ -172,10 +245,18 @@ extern unsigned aarch64_architecture_version;
 #define AARCH64_FL_FOR_ARCH8_2			\
   (AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_V8_2)
 #define AARCH64_FL_FOR_ARCH8_3			\
-  (AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_V8_3)
+  (AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_V8_3 | AARCH64_FL_PAUTH)
 #define AARCH64_FL_FOR_ARCH8_4			\
   (AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_V8_4 | AARCH64_FL_F16FML \
-   | AARCH64_FL_DOTPROD)
+   | AARCH64_FL_DOTPROD | AARCH64_FL_RCPC8_4 | AARCH64_FL_FLAGM)
+#define AARCH64_FL_FOR_ARCH8_5			\
+  (AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_V8_5	\
+   | AARCH64_FL_SB | AARCH64_FL_SSBS | AARCH64_FL_PREDRES)
+#define AARCH64_FL_FOR_ARCH8_6			\
+  (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_V8_6 | AARCH64_FL_FPSIMD \
+   | AARCH64_FL_I8MM | AARCH64_FL_BF16)
+#define AARCH64_FL_FOR_ARCH8_R     \
+  (AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_V8_R)
 
 /* Macros to test ISA flags.  */
 
@@ -188,6 +269,11 @@ extern unsigned aarch64_architecture_version;
 #define AARCH64_ISA_V8_2	   (aarch64_isa_flags & AARCH64_FL_V8_2)
 #define AARCH64_ISA_F16		   (aarch64_isa_flags & AARCH64_FL_F16)
 #define AARCH64_ISA_SVE            (aarch64_isa_flags & AARCH64_FL_SVE)
+#define AARCH64_ISA_SVE2	   (aarch64_isa_flags & AARCH64_FL_SVE2)
+#define AARCH64_ISA_SVE2_AES	   (aarch64_isa_flags & AARCH64_FL_SVE2_AES)
+#define AARCH64_ISA_SVE2_BITPERM  (aarch64_isa_flags & AARCH64_FL_SVE2_BITPERM)
+#define AARCH64_ISA_SVE2_SHA3	   (aarch64_isa_flags & AARCH64_FL_SVE2_SHA3)
+#define AARCH64_ISA_SVE2_SM4	   (aarch64_isa_flags & AARCH64_FL_SVE2_SM4)
 #define AARCH64_ISA_V8_3	   (aarch64_isa_flags & AARCH64_FL_V8_3)
 #define AARCH64_ISA_DOTPROD	   (aarch64_isa_flags & AARCH64_FL_DOTPROD)
 #define AARCH64_ISA_AES	           (aarch64_isa_flags & AARCH64_FL_AES)
@@ -196,6 +282,19 @@ extern unsigned aarch64_architecture_version;
 #define AARCH64_ISA_SM4	           (aarch64_isa_flags & AARCH64_FL_SM4)
 #define AARCH64_ISA_SHA3	   (aarch64_isa_flags & AARCH64_FL_SHA3)
 #define AARCH64_ISA_F16FML	   (aarch64_isa_flags & AARCH64_FL_F16FML)
+#define AARCH64_ISA_RCPC8_4	   (aarch64_isa_flags & AARCH64_FL_RCPC8_4)
+#define AARCH64_ISA_RNG		   (aarch64_isa_flags & AARCH64_FL_RNG)
+#define AARCH64_ISA_V8_5	   (aarch64_isa_flags & AARCH64_FL_V8_5)
+#define AARCH64_ISA_TME		   (aarch64_isa_flags & AARCH64_FL_TME)
+#define AARCH64_ISA_MEMTAG	   (aarch64_isa_flags & AARCH64_FL_MEMTAG)
+#define AARCH64_ISA_V8_6	   (aarch64_isa_flags & AARCH64_FL_V8_6)
+#define AARCH64_ISA_I8MM	   (aarch64_isa_flags & AARCH64_FL_I8MM)
+#define AARCH64_ISA_F32MM	   (aarch64_isa_flags & AARCH64_FL_F32MM)
+#define AARCH64_ISA_F64MM	   (aarch64_isa_flags & AARCH64_FL_F64MM)
+#define AARCH64_ISA_BF16	   (aarch64_isa_flags & AARCH64_FL_BF16)
+#define AARCH64_ISA_SB		   (aarch64_isa_flags & AARCH64_FL_SB)
+#define AARCH64_ISA_V8_R	   (aarch64_isa_flags & AARCH64_FL_V8_R)
+#define AARCH64_ISA_PAUTH	   (aarch64_isa_flags & AARCH64_FL_PAUTH)
 
 /* Crypto is an optional extension to AdvSIMD.  */
 #define TARGET_CRYPTO (TARGET_SIMD && AARCH64_ISA_CRYPTO)
@@ -229,11 +328,64 @@ extern unsigned aarch64_architecture_version;
 #define TARGET_DOTPROD (TARGET_SIMD && AARCH64_ISA_DOTPROD)
 
 /* SVE instructions, enabled through +sve.  */
-#define TARGET_SVE (AARCH64_ISA_SVE)
+#define TARGET_SVE (!TARGET_GENERAL_REGS_ONLY && AARCH64_ISA_SVE)
+
+/* SVE2 instructions, enabled through +sve2.  */
+#define TARGET_SVE2 (TARGET_SVE && AARCH64_ISA_SVE2)
+
+/* SVE2 AES instructions, enabled through +sve2-aes.  */
+#define TARGET_SVE2_AES (TARGET_SVE2 && AARCH64_ISA_SVE2_AES)
+
+/* SVE2 BITPERM instructions, enabled through +sve2-bitperm.  */
+#define TARGET_SVE2_BITPERM (TARGET_SVE2 && AARCH64_ISA_SVE2_BITPERM)
+
+/* SVE2 SHA3 instructions, enabled through +sve2-sha3.  */
+#define TARGET_SVE2_SHA3 (TARGET_SVE2 && AARCH64_ISA_SVE2_SHA3)
+
+/* SVE2 SM4 instructions, enabled through +sve2-sm4.  */
+#define TARGET_SVE2_SM4 (TARGET_SVE2 && AARCH64_ISA_SVE2_SM4)
 
 /* ARMv8.3-A features.  */
 #define TARGET_ARMV8_3	(AARCH64_ISA_V8_3)
 
+/* Javascript conversion instruction from Armv8.3-a.  */
+#define TARGET_JSCVT	(TARGET_FLOAT && AARCH64_ISA_V8_3)
+
+/* Armv8.3-a Complex number extension to AdvSIMD extensions.  */
+#define TARGET_COMPLEX (TARGET_SIMD && TARGET_ARMV8_3)
+
+/* Floating-point rounding instructions from Armv8.5-a.  */
+#define TARGET_FRINT (AARCH64_ISA_V8_5 && TARGET_FLOAT)
+
+/* TME instructions are enabled.  */
+#define TARGET_TME (AARCH64_ISA_TME)
+
+/* Random number instructions from Armv8.5-a.  */
+#define TARGET_RNG (AARCH64_ISA_RNG)
+
+/* Memory Tagging instructions optional to Armv8.5 enabled through +memtag.  */
+#define TARGET_MEMTAG (AARCH64_ISA_V8_5 && AARCH64_ISA_MEMTAG)
+
+/* I8MM instructions are enabled through +i8mm.  */
+#define TARGET_I8MM (AARCH64_ISA_I8MM)
+#define TARGET_SVE_I8MM (TARGET_SVE && AARCH64_ISA_I8MM)
+
+/* F32MM instructions are enabled through +f32mm.  */
+#define TARGET_F32MM (AARCH64_ISA_F32MM)
+#define TARGET_SVE_F32MM (TARGET_SVE && AARCH64_ISA_F32MM)
+
+/* F64MM instructions are enabled through +f64mm.  */
+#define TARGET_F64MM (AARCH64_ISA_F64MM)
+#define TARGET_SVE_F64MM (TARGET_SVE && AARCH64_ISA_F64MM)
+
+/* BF16 instructions are enabled through +bf16.  */
+#define TARGET_BF16_FP (AARCH64_ISA_BF16)
+#define TARGET_BF16_SIMD (AARCH64_ISA_BF16 && TARGET_SIMD)
+#define TARGET_SVE_BF16 (TARGET_SVE && AARCH64_ISA_BF16)
+
+/* PAUTH instructions are enabled through +pauth.  */
+#define TARGET_PAUTH (AARCH64_ISA_PAUTH)
+
 /* Make sure this is always defined so we don't have to check for ifdefs
    but rather use normal ifs.  */
 #ifndef TARGET_FIX_ERR_A53_835769_DEFAULT
@@ -243,6 +395,9 @@ extern unsigned aarch64_architecture_version;
 #define TARGET_FIX_ERR_A53_835769_DEFAULT 1
 #endif
 
+/* SB instruction is enabled through +sb.  */
+#define TARGET_SB (AARCH64_ISA_SB)
+
 /* Apply the workaround for Cortex-A53 erratum 835769.  */
 #define TARGET_FIX_ERR_A53_835769	\
   ((aarch64_fix_a53_err835769 == 2)	\
@@ -296,6 +451,9 @@ extern unsigned aarch64_architecture_version;
    P0-P7        Predicate low registers: valid in all predicate contexts
    P8-P15       Predicate high registers: used as scratch space
 
+   FFR		First Fault Register, a fixed-use SVE predicate register
+   FFRT		FFR token: a fake register used for modelling dependencies
+
    VG           Pseudo "vector granules" register
 
    VG is the number of 64-bit elements in an SVE vector.  We define
@@ -303,15 +461,6 @@ extern unsigned aarch64_architecture_version;
    register.  GCC internally uses the poly_int variable aarch64_sve_vg
    instead.  */
 
-/* Note that we don't mark X30 as a call-clobbered register.  The idea is
-   that it's really the call instructions themselves which clobber X30.
-   We don't care what the called function does with it afterwards.
-
-   This approach makes it easier to implement sibcalls.  Unlike normal
-   calls, sibcalls don't clobber X30, so the register reaches the
-   called function intact.  EPILOGUE_USES says that X30 is useful
-   to the called function.  */
-
 #define FIXED_REGISTERS					\
   {							\
     0, 0, 0, 0,   0, 0, 0, 0,	/* R0 - R7 */		\
@@ -325,8 +474,16 @@ extern unsigned aarch64_architecture_version;
     1, 1, 1, 1,			/* SFP, AP, CC, VG */	\
     0, 0, 0, 0,   0, 0, 0, 0,   /* P0 - P7 */           \
     0, 0, 0, 0,   0, 0, 0, 0,   /* P8 - P15 */          \
+    1, 1			/* FFR and FFRT */	\
   }
 
+/* X30 is marked as caller-saved which is in line with regular function call
+   behavior since the call instructions clobber it; AARCH64_EXPAND_CALL does
+   that for regular function calls and avoids it for sibcalls.  X30 is
+   considered live for sibcalls; EPILOGUE_USES helps achieve that by returning
+   true but not until function epilogues have been generated.  This ensures
+   that X30 is available for use in leaf functions if needed.  */
+
 #define CALL_USED_REGISTERS				\
   {							\
     1, 1, 1, 1,   1, 1, 1, 1,	/* R0 - R7 */		\
@@ -340,6 +497,7 @@ extern unsigned aarch64_architecture_version;
     1, 1, 1, 1,			/* SFP, AP, CC, VG */	\
     1, 1, 1, 1,   1, 1, 1, 1,	/* P0 - P7 */		\
     1, 1, 1, 1,   1, 1, 1, 1,	/* P8 - P15 */		\
+    1, 1			/* FFR and FFRT */	\
   }
 
 #define REGISTER_NAMES						\
@@ -355,6 +513,7 @@ extern unsigned aarch64_architecture_version;
     "sfp", "ap",  "cc",  "vg",					\
     "p0",  "p1",  "p2",  "p3",  "p4",  "p5",  "p6",  "p7",	\
     "p8",  "p9",  "p10", "p11", "p12", "p13", "p14", "p15",	\
+    "ffr", "ffrt"						\
   }
 
 /* Generate the register aliases for core register N */
@@ -391,12 +550,7 @@ extern unsigned aarch64_architecture_version;
     V_ALIASES(28), V_ALIASES(29), V_ALIASES(30), V_ALIASES(31)  \
   }
 
-/* Say that the epilogue uses the return address register.  Note that
-   in the case of sibcalls, the values "used by the epilogue" are
-   considered live at the start of the called function.  */
-
-#define EPILOGUE_USES(REGNO) \
-  (epilogue_completed && (REGNO) == LR_REGNUM)
+#define EPILOGUE_USES(REGNO) (aarch64_epilogue_uses (REGNO))
 
 /* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
    the stack pointer does not matter.  This is only true if the function
@@ -408,11 +562,12 @@ extern unsigned aarch64_architecture_version;
 #define FRAME_POINTER_REGNUM		SFP_REGNUM
 #define STACK_POINTER_REGNUM		SP_REGNUM
 #define ARG_POINTER_REGNUM		AP_REGNUM
-#define FIRST_PSEUDO_REGISTER		(P15_REGNUM + 1)
+#define FIRST_PSEUDO_REGISTER		(FFRT_REGNUM + 1)
 
-/* The number of (integer) argument register available.  */
+/* The number of argument registers available for each class.  */
 #define NUM_ARG_REGS			8
 #define NUM_FP_ARG_REGS			8
+#define NUM_PR_ARG_REGS			4
 
 /* A Homogeneous Floating-Point or Short-Vector Aggregate may have at most
    four members.  */
@@ -470,6 +625,18 @@ extern unsigned aarch64_architecture_version;
 #define ASM_DECLARE_FUNCTION_NAME(STR, NAME, DECL)	\
   aarch64_declare_function_name (STR, NAME, DECL)
 
+/* Output assembly strings for alias definition.  */
+#define ASM_OUTPUT_DEF_FROM_DECLS(STR, DECL, TARGET) \
+  aarch64_asm_output_alias (STR, DECL, TARGET)
+
+/* Output assembly strings for undefined extern symbols.  */
+#undef ASM_OUTPUT_EXTERNAL
+#define ASM_OUTPUT_EXTERNAL(STR, DECL, NAME) \
+  aarch64_asm_output_external (STR, DECL, NAME)
+
+/* Output assembly strings after .cfi_startproc is emitted.  */
+#define ASM_POST_CFI_STARTPROC  aarch64_post_cfi_startproc
+
 /* For EH returns X4 contains the stack adjustment.  */
 #define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (Pmode, R4_REGNUM)
 #define EH_RETURN_HANDLER_RTX  aarch64_eh_return_handler_rtx ()
@@ -478,6 +645,9 @@ extern unsigned aarch64_architecture_version;
 #undef DONT_USE_BUILTIN_SETJMP
 #define DONT_USE_BUILTIN_SETJMP 1
 
+#undef TARGET_COMPUTE_FRAME_LAYOUT
+#define TARGET_COMPUTE_FRAME_LAYOUT aarch64_layout_frame
+
 /* Register in which the structure value is to be returned.  */
 #define AARCH64_STRUCT_VALUE_REGNUM R8_REGNUM
 
@@ -489,18 +659,33 @@ extern unsigned aarch64_architecture_version;
 #define GP_REGNUM_P(REGNO)						\
   (((unsigned) (REGNO - R0_REGNUM)) <= (R30_REGNUM - R0_REGNUM))
 
+/* Registers known to be preserved over a BL instruction.  This consists of the
+   GENERAL_REGS without x16, x17, and x30.  The x30 register is changed by the
+   BL instruction itself, while the x16 and x17 registers may be used by
+   veneers which can be inserted by the linker.  */
+#define STUB_REGNUM_P(REGNO) \
+  (GP_REGNUM_P (REGNO) \
+   && (REGNO) != R16_REGNUM \
+   && (REGNO) != R17_REGNUM \
+   && (REGNO) != R30_REGNUM) \
+
 #define FP_REGNUM_P(REGNO)			\
   (((unsigned) (REGNO - V0_REGNUM)) <= (V31_REGNUM - V0_REGNUM))
 
 #define FP_LO_REGNUM_P(REGNO)            \
   (((unsigned) (REGNO - V0_REGNUM)) <= (V15_REGNUM - V0_REGNUM))
 
+#define FP_LO8_REGNUM_P(REGNO)            \
+  (((unsigned) (REGNO - V0_REGNUM)) <= (V7_REGNUM - V0_REGNUM))
+
 #define PR_REGNUM_P(REGNO)\
   (((unsigned) (REGNO - P0_REGNUM)) <= (P15_REGNUM - P0_REGNUM))
 
 #define PR_LO_REGNUM_P(REGNO)\
   (((unsigned) (REGNO - P0_REGNUM)) <= (P7_REGNUM - P0_REGNUM))
 
+#define FP_SIMD_SAVED_REGNUM_P(REGNO)			\
+  (((unsigned) (REGNO - V8_REGNUM)) <= (V23_REGNUM - V8_REGNUM))
 
 /* Register and constant classes.  */
 
@@ -508,15 +693,19 @@ enum reg_class
 {
   NO_REGS,
   TAILCALL_ADDR_REGS,
+  STUB_REGS,
   GENERAL_REGS,
   STACK_REG,
   POINTER_REGS,
+  FP_LO8_REGS,
   FP_LO_REGS,
   FP_REGS,
   POINTER_AND_FP_REGS,
   PR_LO_REGS,
   PR_HI_REGS,
   PR_REGS,
+  FFR_REGS,
+  PR_AND_FFR_REGS,
   ALL_REGS,
   LIM_REG_CLASSES		/* Last */
 };
@@ -527,31 +716,39 @@ enum reg_class
 {						\
   "NO_REGS",					\
   "TAILCALL_ADDR_REGS",				\
+  "STUB_REGS",					\
   "GENERAL_REGS",				\
   "STACK_REG",					\
   "POINTER_REGS",				\
+  "FP_LO8_REGS",				\
   "FP_LO_REGS",					\
   "FP_REGS",					\
   "POINTER_AND_FP_REGS",			\
   "PR_LO_REGS",					\
   "PR_HI_REGS",					\
   "PR_REGS",					\
+  "FFR_REGS",					\
+  "PR_AND_FFR_REGS",				\
   "ALL_REGS"					\
 }
 
 #define REG_CLASS_CONTENTS						\
 {									\
   { 0x00000000, 0x00000000, 0x00000000 },	/* NO_REGS */		\
-  { 0x0004ffff, 0x00000000, 0x00000000 },	/* TAILCALL_ADDR_REGS */\
+  { 0x00030000, 0x00000000, 0x00000000 },	/* TAILCALL_ADDR_REGS */\
+  { 0x3ffcffff, 0x00000000, 0x00000000 },	/* STUB_REGS */		\
   { 0x7fffffff, 0x00000000, 0x00000003 },	/* GENERAL_REGS */	\
   { 0x80000000, 0x00000000, 0x00000000 },	/* STACK_REG */		\
   { 0xffffffff, 0x00000000, 0x00000003 },	/* POINTER_REGS */	\
+  { 0x00000000, 0x000000ff, 0x00000000 },       /* FP_LO8_REGS  */	\
   { 0x00000000, 0x0000ffff, 0x00000000 },       /* FP_LO_REGS  */	\
   { 0x00000000, 0xffffffff, 0x00000000 },       /* FP_REGS  */		\
   { 0xffffffff, 0xffffffff, 0x00000003 },	/* POINTER_AND_FP_REGS */\
   { 0x00000000, 0x00000000, 0x00000ff0 },	/* PR_LO_REGS */	\
   { 0x00000000, 0x00000000, 0x000ff000 },	/* PR_HI_REGS */	\
   { 0x00000000, 0x00000000, 0x000ffff0 },	/* PR_REGS */		\
+  { 0x00000000, 0x00000000, 0x00300000 },	/* FFR_REGS */		\
+  { 0x00000000, 0x00000000, 0x003ffff0 },	/* PR_AND_FFR_REGS */	\
   { 0xffffffff, 0xffffffff, 0x000fffff }	/* ALL_REGS */		\
 }
 
@@ -627,7 +824,7 @@ extern enum aarch64_processor aarch64_tune;
 #ifdef HAVE_POLY_INT_H
 struct GTY (()) aarch64_frame
 {
-  HOST_WIDE_INT reg_offset[FIRST_PSEUDO_REGISTER];
+  poly_int64 reg_offset[LAST_SAVED_REGNUM + 1];
 
   /* The number of extra stack bytes taken up by register varargs.
      This area is allocated by the callee at the very top of the
@@ -635,9 +832,12 @@ struct GTY (()) aarch64_frame
      STACK_BOUNDARY.  */
   HOST_WIDE_INT saved_varargs_size;
 
-  /* The size of the saved callee-save int/FP registers.  */
+  /* The size of the callee-save registers with a slot in REG_OFFSET.  */
+  poly_int64 saved_regs_size;
 
-  HOST_WIDE_INT saved_regs_size;
+  /* The size of the callee-save registers with a slot in REG_OFFSET that
+     are saved below the hard frame pointer.  */
+  poly_int64 below_hard_fp_saved_regs_size;
 
   /* Offset from the base of the frame (incomming SP) to the
      top of the locals area.  This value is always a multiple of
@@ -665,15 +865,41 @@ struct GTY (()) aarch64_frame
      It may be non-zero if no push is used (ie. callee_adjust == 0).  */
   poly_int64 callee_offset;
 
+  /* The size of the stack adjustment before saving or after restoring
+     SVE registers.  */
+  poly_int64 sve_callee_adjust;
+
   /* The size of the stack adjustment after saving callee-saves.  */
   poly_int64 final_adjust;
 
   /* Store FP,LR and setup a frame pointer.  */
   bool emit_frame_chain;
 
+  /* In each frame, we can associate up to two register saves with the
+     initial stack allocation.  This happens in one of two ways:
+
+     (1) Using an STR or STP with writeback to perform the initial
+	 stack allocation.  When EMIT_FRAME_CHAIN, the registers will
+	 be those needed to create a frame chain.
+
+	 Indicated by CALLEE_ADJUST != 0.
+
+     (2) Using a separate STP to set up the frame record, after the
+	 initial stack allocation but before setting up the frame pointer.
+	 This is used if the offset is too large to use writeback.
+
+	 Indicated by CALLEE_ADJUST == 0 && EMIT_FRAME_CHAIN.
+
+     These fields indicate which registers we've decided to handle using
+     (1) or (2), or INVALID_REGNUM if none.  */
   unsigned wb_candidate1;
   unsigned wb_candidate2;
 
+  /* Big-endian SVE frames need a spare predicate register in order
+     to save vector registers in the correct layout for unwinding.
+     This is the register they should use.  */
+  unsigned spare_pred_reg;
+
   bool laid_out;
 };
 
@@ -682,6 +908,9 @@ typedef struct GTY (()) machine_function
   struct aarch64_frame frame;
   /* One entry for each hard register.  */
   bool reg_is_wrapped_separately[LAST_SAVED_REGNUM];
+  /* One entry for each general purpose register.  */
+  rtx call_via[SP_REGNUM];
+  bool label_is_assembled;
 } machine_function;
 #endif
 
@@ -701,6 +930,10 @@ enum aarch64_abi_type
 enum arm_pcs
 {
   ARM_PCS_AAPCS64,		/* Base standard AAPCS for 64 bit.  */
+  ARM_PCS_SIMD,			/* For aarch64_vector_pcs functions.  */
+  ARM_PCS_SVE,			/* For functions that pass or return
+				   values in SVE registers.  */
+  ARM_PCS_TLSDESC,		/* For targets of tlsdesc calls.  */
   ARM_PCS_UNKNOWN
 };
 
@@ -727,6 +960,8 @@ typedef struct
   int aapcs_nextncrn;		/* Next next core register number.  */
   int aapcs_nvrn;		/* Next Vector register number.  */
   int aapcs_nextnvrn;		/* Next Next Vector register number.  */
+  int aapcs_nprn;		/* Next Predicate register number.  */
+  int aapcs_nextnprn;		/* Next Next Predicate register number.  */
   rtx aapcs_reg;		/* Register assigned to this argument.  This
 				   is NULL_RTX if this parameter goes on
 				   the stack.  */
@@ -737,6 +972,8 @@ typedef struct
 				   aapcs_reg == NULL_RTX.  */
   int aapcs_stack_size;		/* The total size (in words, per 8 byte) of the
 				   stack arg area so far.  */
+  bool silent_p;		/* True if we should act silently, rather than
+				   raise an error for invalid calls.  */
 } CUMULATIVE_ARGS;
 #endif
 
@@ -792,22 +1029,25 @@ typedef struct
 /* MOVE_RATIO dictates when we will use the move_by_pieces infrastructure.
    move_by_pieces will continually copy the largest safe chunks.  So a
    7-byte copy is a 4-byte + 2-byte + byte copy.  This proves inefficient
-   for both size and speed of copy, so we will instead use the "movmem"
+   for both size and speed of copy, so we will instead use the "cpymem"
    standard name to implement the copy.  This logic does not apply when
    targeting -mstrict-align, so keep a sensible default in that case.  */
 #define MOVE_RATIO(speed) \
   (!STRICT_ALIGNMENT ? 2 : (((speed) ? 15 : AARCH64_CALL_RATIO) / 2))
 
-/* For CLEAR_RATIO, when optimizing for size, give a better estimate
-   of the length of a memset call, but use the default otherwise.  */
+/* Like MOVE_RATIO, without -mstrict-align, make decisions in "setmem" when
+   we would use more than 3 scalar instructions.
+   Otherwise follow a sensible default: when optimizing for size, give a better
+   estimate of the length of a memset call, but use the default otherwise.  */
 #define CLEAR_RATIO(speed) \
-  ((speed) ? 15 : AARCH64_CALL_RATIO)
+  (!STRICT_ALIGNMENT ? 4 : (speed) ? 15 : AARCH64_CALL_RATIO)
 
-/* SET_RATIO is similar to CLEAR_RATIO, but for a non-zero constant, so when
-   optimizing for size adjust the ratio to account for the overhead of loading
-   the constant.  */
+/* SET_RATIO is similar to CLEAR_RATIO, but for a non-zero constant.  Without
+   -mstrict-align, make decisions in "setmem".  Otherwise follow a sensible
+   default: when optimizing for size adjust the ratio to account for the
+   overhead of loading the constant.  */
 #define SET_RATIO(speed) \
-  ((speed) ? 15 : AARCH64_CALL_RATIO - 2)
+  (!STRICT_ALIGNMENT ? 0 : (speed) ? 15 : AARCH64_CALL_RATIO - 2)
 
 /* Disable auto-increment in move_by_pieces et al.  Use of auto-increment is
    rarely a good idea in straight-line code since it adds an extra address
@@ -838,14 +1078,8 @@ typedef struct
    if given data not on the nominal alignment.  */
 #define STRICT_ALIGNMENT		TARGET_STRICT_ALIGN
 
-/* Define this macro to be non-zero if accessing less than a word of
-   memory is no faster than accessing a word of memory, i.e., if such
-   accesses require more than one instruction or if there is no
-   difference in cost.
-   Although there's no difference in instruction count or cycles,
-   in AArch64 we don't want to expand to a sub-word to a 64-bit access
-   if we don't have to, for power-saving reasons.  */
-#define SLOW_BYTE_ACCESS		0
+/* Enable wide bitfield accesses for more efficient bitfield code.  */
+#define SLOW_BYTE_ACCESS 1
 
 #define NO_FUNCTION_CSE	1
 
@@ -865,12 +1099,10 @@ typedef struct
 
 #define SELECT_CC_MODE(OP, X, Y)	aarch64_select_cc_mode (OP, X, Y)
 
-#define REVERSIBLE_CC_MODE(MODE) 1
-
-#define REVERSE_CONDITION(CODE, MODE)		\
-  (((MODE) == CCFPmode || (MODE) == CCFPEmode)	\
-   ? reverse_condition_maybe_unordered (CODE)	\
-   : reverse_condition (CODE))
+/* Having an integer comparison mode guarantees that we can use
+   reverse_condition, but the usual restrictions apply to floating-point
+   comparisons.  */
+#define REVERSIBLE_CC_MODE(MODE) ((MODE) != CCFPmode && (MODE) != CCFPEmode)
 
 #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
   ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
@@ -881,8 +1113,10 @@ typedef struct
 
 #define RETURN_ADDR_RTX aarch64_return_addr
 
-/* 3 insns + padding + 2 pointer-sized entries.  */
-#define TRAMPOLINE_SIZE	(TARGET_ILP32 ? 24 : 32)
+/* BTI c + 3 insns
+   + sls barrier of DSB + ISB.
+   + 2 pointer-sized entries.  */
+#define TRAMPOLINE_SIZE	(24 + (TARGET_ILP32 ? 8 : 16))
 
 /* Trampolines contain dwords, so must be dword aligned.  */
 #define TRAMPOLINE_ALIGNMENT 64
@@ -918,7 +1152,7 @@ typedef struct
 #define PROFILE_HOOK(LABEL)						\
   {									\
     rtx fun, lr;							\
-    lr = get_hard_reg_initial_val (Pmode, LR_REGNUM);			\
+    lr = aarch64_return_addr_rtx ();					\
     fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_NAME);			\
     emit_library_call (fun, LCT_NORMAL, VOIDmode, lr, Pmode);		\
   }
@@ -975,23 +1209,25 @@ extern enum aarch64_code_model aarch64_cmodel;
 #define AARCH64_VALID_SIMD_DREG_MODE(MODE) \
   ((MODE) == V2SImode || (MODE) == V4HImode || (MODE) == V8QImode \
    || (MODE) == V2SFmode || (MODE) == V4HFmode || (MODE) == DImode \
-   || (MODE) == DFmode)
+   || (MODE) == DFmode || (MODE) == V4BFmode)
 
 /* Modes valid for AdvSIMD Q registers.  */
 #define AARCH64_VALID_SIMD_QREG_MODE(MODE) \
   ((MODE) == V4SImode || (MODE) == V8HImode || (MODE) == V16QImode \
    || (MODE) == V4SFmode || (MODE) == V8HFmode || (MODE) == V2DImode \
-   || (MODE) == V2DFmode)
+   || (MODE) == V2DFmode || (MODE) == V8BFmode)
 
 #define ENDIAN_LANE_N(NUNITS, N) \
   (BYTES_BIG_ENDIAN ? NUNITS - 1 - N : N)
 
-/* Support for a configure-time default CPU, etc.  We currently support
-   --with-arch and --with-cpu.  Both are ignored if either is specified
-   explicitly on the command line at run time.  */
+/* Support for configure-time --with-arch, --with-cpu and --with-tune.
+   --with-arch and --with-cpu are ignored if either -mcpu or -march is used.
+   --with-tune is ignored if either -mtune or -mcpu is used (but is not
+   affected by -march).  */
 #define OPTION_DEFAULT_SPECS				\
   {"arch", "%{!march=*:%{!mcpu=*:-march=%(VALUE)}}" },	\
-  {"cpu",  "%{!march=*:%{!mcpu=*:-mcpu=%(VALUE)}}" },
+  {"cpu",  "%{!march=*:%{!mcpu=*:-mcpu=%(VALUE)}}" },   \
+  {"tune", "%{!mcpu=*:%{!mtune=*:-mtune=%(VALUE)}}"},
 
 #define MCPU_TO_MARCH_SPEC \
    " %{mcpu=*:-march=%:rewrite_mcpu(%{mcpu=*:%*})}"
@@ -1029,6 +1265,11 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
 extern tree aarch64_fp16_type_node;
 extern tree aarch64_fp16_ptr_type_node;
 
+/* This type is the user-visible __bf16, and a pointer to that type.  Defined
+   in aarch64-builtins.c.  */
+extern tree aarch64_bf16_type_node;
+extern tree aarch64_bf16_ptr_type_node;
+
 /* The generic unwind code in libgcc does not initialize the frame pointer.
    So in order to unwind a function using a frame pointer, the very first
    function that is unwound must save the frame pointer.  That way the frame
@@ -1044,7 +1285,8 @@ extern poly_uint16 aarch64_sve_vg;
 #define BITS_PER_SVE_VECTOR (poly_uint16 (aarch64_sve_vg * 64))
 #define BYTES_PER_SVE_VECTOR (poly_uint16 (aarch64_sve_vg * 8))
 
-/* The number of bytes in an SVE predicate.  */
+/* The number of bits and bytes in an SVE predicate.  */
+#define BITS_PER_SVE_PRED BYTES_PER_SVE_VECTOR
 #define BYTES_PER_SVE_PRED aarch64_sve_vg
 
 /* The SVE mode for a vector of bytes.  */
@@ -1061,4 +1303,17 @@ extern poly_uint16 aarch64_sve_vg;
 
 #define REGMODE_NATURAL_SIZE(MODE) aarch64_regmode_natural_size (MODE)
 
+/* Allocate a minimum of STACK_CLASH_MIN_BYTES_OUTGOING_ARGS bytes for the
+   outgoing arguments if stack clash protection is enabled.  This is essential
+   as the extra arg space allows us to skip a check in alloca.  */
+#undef STACK_DYNAMIC_OFFSET
+#define STACK_DYNAMIC_OFFSET(FUNDECL)			   \
+   ((flag_stack_clash_protection			   \
+     && cfun->calls_alloca				   \
+     && known_lt (crtl->outgoing_args_size,		   \
+		  STACK_CLASH_MIN_BYTES_OUTGOING_ARGS))    \
+    ? ROUND_UP (STACK_CLASH_MIN_BYTES_OUTGOING_ARGS,       \
+		STACK_BOUNDARY / BITS_PER_UNIT)		   \
+    : (crtl->outgoing_args_size + STACK_POINTER_OFFSET))
+
 #endif /* GCC_AARCH64_H */
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 82db038c77a05..aef6da9732d45 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1,5 +1,5 @@
 ;; Machine description for AArch64 architecture.
-;; Copyright (C) 2009-2018 Free Software Foundation, Inc.
+;; Copyright (C) 2009-2021 Free Software Foundation, Inc.
 ;; Contributed by ARM Ltd.
 ;;
 ;; This file is part of GCC.
@@ -38,9 +38,7 @@
     (R14_REGNUM		14)
     (R15_REGNUM		15)
     (R16_REGNUM		16)
-    (IP0_REGNUM		16)
     (R17_REGNUM		17)
-    (IP1_REGNUM		17)
     (R18_REGNUM		18)
     (R19_REGNUM		19)
     (R20_REGNUM		20)
@@ -54,19 +52,39 @@
     (R28_REGNUM		28)
     (R29_REGNUM		29)
     (R30_REGNUM		30)
-    (LR_REGNUM		30)
     (SP_REGNUM		31)
     (V0_REGNUM		32)
+    (V1_REGNUM		33)
+    (V2_REGNUM		34)
+    (V3_REGNUM		35)
     (V4_REGNUM		36)
+    (V5_REGNUM		37)
+    (V6_REGNUM		38)
+    (V7_REGNUM		39)
     (V8_REGNUM		40)
+    (V9_REGNUM		41)
+    (V10_REGNUM		42)
+    (V11_REGNUM		43)
     (V12_REGNUM		44)
+    (V13_REGNUM		45)
+    (V14_REGNUM		46)
     (V15_REGNUM		47)
     (V16_REGNUM		48)
+    (V17_REGNUM		49)
+    (V18_REGNUM		50)
+    (V19_REGNUM		51)
     (V20_REGNUM		52)
+    (V21_REGNUM		53)
+    (V22_REGNUM		54)
+    (V23_REGNUM		55)
     (V24_REGNUM		56)
+    (V25_REGNUM		57)
+    (V26_REGNUM		58)
+    (V27_REGNUM		59)
     (V28_REGNUM		60)
+    (V29_REGNUM		61)
+    (V30_REGNUM		62)
     (V31_REGNUM		63)
-    (LAST_SAVED_REGNUM	63)
     (SFP_REGNUM		64)
     (AP_REGNUM		65)
     (CC_REGNUM		66)
@@ -88,12 +106,42 @@
     (P13_REGNUM		81)
     (P14_REGNUM		82)
     (P15_REGNUM		83)
+    (LAST_SAVED_REGNUM	83)
+    (FFR_REGNUM		84)
+    ;; "FFR token": a fake register used for representing the scheduling
+    ;; restrictions on FFR-related operations.
+    (FFRT_REGNUM	85)
+    ;; The pair of scratch registers used for stack probing with -fstack-check.
+    ;; Leave R9 alone as a possible choice for the static chain.
+    ;; Note that the use of these registers is mutually exclusive with the use
+    ;; of STACK_CLASH_SVE_CFA_REGNUM, which is for -fstack-clash-protection
+    ;; rather than -fstack-check.
+    (PROBE_STACK_FIRST_REGNUM  10)
+    (PROBE_STACK_SECOND_REGNUM 11)
+    ;; Scratch register used by stack clash protection to calculate
+    ;; SVE CFA offsets during probing.
+    (STACK_CLASH_SVE_CFA_REGNUM 11)
+    ;; Scratch registers for prologue/epilogue use.
+    (EP0_REGNUM         12)
+    (EP1_REGNUM         13)
+    ;; A couple of call-clobbered registers that we need to reserve when
+    ;; tracking speculation this is not ABI, so is subject to change.
+    (SPECULATION_SCRATCH_REGNUM 14)
+    (SPECULATION_TRACKER_REGNUM 15)
+    ;; Scratch registers used in frame layout.
+    (IP0_REGNUM         16)
+    (IP1_REGNUM         17)
+    (FP_REGNUM		29)
+    (LR_REGNUM          30)
   ]
 )
 
 (define_c_enum "unspec" [
-    UNSPEC_AUTI1716
-    UNSPEC_AUTISP
+    UNSPEC_AUTIA1716
+    UNSPEC_AUTIB1716
+    UNSPEC_AUTIASP
+    UNSPEC_AUTIBSP
+    UNSPEC_CALLEE_ABI
     UNSPEC_CASESI
     UNSPEC_CRC32B
     UNSPEC_CRC32CB
@@ -105,6 +153,11 @@
     UNSPEC_CRC32X
     UNSPEC_FCVTZS
     UNSPEC_FCVTZU
+    UNSPEC_FJCVTZS
+    UNSPEC_FRINT32Z
+    UNSPEC_FRINT32X
+    UNSPEC_FRINT64Z
+    UNSPEC_FRINT64X
     UNSPEC_URECPE
     UNSPEC_FRECPE
     UNSPEC_FRECPS
@@ -136,11 +189,18 @@
     UNSPEC_LD4_LANE
     UNSPEC_MB
     UNSPEC_NOP
-    UNSPEC_PACI1716
-    UNSPEC_PACISP
+    UNSPEC_PACIA1716
+    UNSPEC_PACIB1716
+    UNSPEC_PACIASP
+    UNSPEC_PACIBSP
     UNSPEC_PRLG_STK
     UNSPEC_REV
     UNSPEC_RBIT
+    UNSPEC_SABAL
+    UNSPEC_SABAL2
+    UNSPEC_SABDL
+    UNSPEC_SABDL2
+    UNSPEC_SADALP
     UNSPEC_SCVTF
     UNSPEC_SISD_NEG
     UNSPEC_SISD_SSHL
@@ -159,11 +219,18 @@
     UNSPEC_TLSLE24
     UNSPEC_TLSLE32
     UNSPEC_TLSLE48
+    UNSPEC_UABAL
+    UNSPEC_UABAL2
+    UNSPEC_UABDL
+    UNSPEC_UABDL2
+    UNSPEC_UADALP
     UNSPEC_UCVTF
     UNSPEC_USHL_2S
     UNSPEC_VSTRUCTDUMMY
+    UNSPEC_SSP_SYSREG
     UNSPEC_SP_SET
     UNSPEC_SP_TEST
+    UNSPEC_RSHRN
     UNSPEC_RSQRT
     UNSPEC_RSQRTE
     UNSPEC_RSQRTS
@@ -171,25 +238,62 @@
     UNSPEC_XPACLRI
     UNSPEC_LD1_SVE
     UNSPEC_ST1_SVE
+    UNSPEC_LDNT1_SVE
+    UNSPEC_STNT1_SVE
     UNSPEC_LD1RQ
     UNSPEC_LD1_GATHER
+    UNSPEC_LDFF1_GATHER
+    UNSPEC_LDNT1_GATHER
     UNSPEC_ST1_SCATTER
-    UNSPEC_MERGE_PTRUE
-    UNSPEC_PTEST_PTRUE
+    UNSPEC_STNT1_SCATTER
+    UNSPEC_PRED_X
+    UNSPEC_PRED_Z
+    UNSPEC_PTEST
+    UNSPEC_PTRUE
     UNSPEC_UNPACKSHI
     UNSPEC_UNPACKUHI
     UNSPEC_UNPACKSLO
     UNSPEC_UNPACKULO
     UNSPEC_PACK
-    UNSPEC_FLOAT_CONVERT
-    UNSPEC_WHILE_LO
+    UNSPEC_WHILEGE
+    UNSPEC_WHILEGT
+    UNSPEC_WHILEHI
+    UNSPEC_WHILEHS
+    UNSPEC_WHILELE
+    UNSPEC_WHILELO
+    UNSPEC_WHILELS
+    UNSPEC_WHILELT
+    UNSPEC_WHILERW
+    UNSPEC_WHILEWR
     UNSPEC_LDN
     UNSPEC_STN
     UNSPEC_INSR
+    UNSPEC_CLASTA
     UNSPEC_CLASTB
     UNSPEC_FADDA
     UNSPEC_REV_SUBREG
+    UNSPEC_REINTERPRET
+    UNSPEC_SPECULATION_TRACKER
+    UNSPEC_SPECULATION_TRACKER_REV
     UNSPEC_COPYSIGN
+    UNSPEC_TTEST		; Represent transaction test.
+    UNSPEC_UPDATE_FFR
+    UNSPEC_UPDATE_FFRT
+    UNSPEC_RDFFR
+    UNSPEC_WRFFR
+    ;; Represents an SVE-style lane index, in which the indexing applies
+    ;; within the containing 128-bit block.
+    UNSPEC_SVE_LANE_SELECT
+    UNSPEC_SVE_CNT_PAT
+    UNSPEC_SVE_PREFETCH
+    UNSPEC_SVE_PREFETCH_GATHER
+    UNSPEC_SVE_COMPACT
+    UNSPEC_SVE_SPLICE
+    UNSPEC_GEN_TAG		; Generate a 4-bit MTE tag.
+    UNSPEC_GEN_TAG_RND		; Generate a random 4-bit MTE tag.
+    UNSPEC_TAG_SPACE		; Translate address to MTE tag address space.
+    UNSPEC_LD1RO
+    UNSPEC_SALT_ADDR
 ])
 
 (define_c_enum "unspecv" [
@@ -200,9 +304,40 @@
     UNSPECV_SET_FPSR		; Represent assign of FPSR content.
     UNSPECV_BLOCKAGE		; Represent a blockage
     UNSPECV_PROBE_STACK_RANGE	; Represent stack range probing.
+    UNSPECV_SPECULATION_BARRIER ; Represent speculation barrier.
+    UNSPECV_BTI_NOARG		; Represent BTI.
+    UNSPECV_BTI_C		; Represent BTI c.
+    UNSPECV_BTI_J		; Represent BTI j.
+    UNSPECV_BTI_JC		; Represent BTI jc.
+    UNSPECV_TSTART		; Represent transaction start.
+    UNSPECV_TCOMMIT		; Represent transaction commit.
+    UNSPECV_TCANCEL		; Represent transaction cancel.
+    UNSPEC_RNDR			; Represent RNDR
+    UNSPEC_RNDRRS		; Represent RNDRRS
   ]
 )
 
+;; These constants are used as a const_int in various SVE unspecs
+;; to indicate whether the governing predicate is known to be a PTRUE.
+(define_constants
+  [; Indicates that the predicate might not be a PTRUE.
+   (SVE_MAYBE_NOT_PTRUE 0)
+
+   ; Indicates that the predicate is known to be a PTRUE.
+   (SVE_KNOWN_PTRUE 1)])
+
+;; These constants are used as a const_int in predicated SVE FP arithmetic
+;; to indicate whether the operation is allowed to make additional lanes
+;; active without worrying about the effect on faulting behavior.
+(define_constants
+  [; Indicates either that all lanes are active or that the instruction may
+   ; operate on inactive inputs even if doing so could induce a fault.
+   (SVE_RELAXED_GP 0)
+
+   ; Indicates that some lanes might be inactive and that the instruction
+   ; must not operate on inactive inputs if doing so could induce a fault.
+   (SVE_STRICT_GP 1)])
+
 ;; If further include files are added the defintion of MD_INCLUDES
 ;; must be updated.
 
@@ -226,44 +361,54 @@
 ;; FP or SIMD registers then the pattern predicate should include TARGET_FLOAT
 ;; or TARGET_SIMD.
 
-;; Attribute that specifies whether or not the instruction touches fp
-;; registers.  When this is set to yes for an alternative, that alternative
-;; will be disabled when !TARGET_FLOAT.
-(define_attr "fp" "no,yes" (const_string "no"))
-
-;; Attribute that specifies whether or not the instruction touches half
-;; precision fp registers.  When this is set to yes for an alternative,
-;; that alternative will be disabled when !TARGET_FP_F16INST.
-(define_attr "fp16" "no,yes" (const_string "no"))
+;; Attributes of the architecture required to support the instruction (or
+;; alternative). This attribute is used to compute attribute "enabled", use type
+;; "any" to enable an alternative in all cases.
 
-;; Attribute that specifies whether or not the instruction touches simd
-;; registers.  When this is set to yes for an alternative, that alternative
-;; will be disabled when !TARGET_SIMD.
-(define_attr "simd" "no,yes" (const_string "no"))
+(define_enum "arches" [ any rcpc8_4 fp simd sve fp16])
 
-;; Attribute that specifies whether or not the instruction uses SVE.
-;; When this is set to yes for an alternative, that alternative
-;; will be disabled when !TARGET_SVE.
-(define_attr "sve" "no,yes" (const_string "no"))
+(define_enum_attr "arch" "arches" (const_string "any"))
 
-(define_attr "length" ""
-  (const_int 4))
+;; [For compatibility with Arm in pipeline models]
+;; Attribute that specifies whether or not the instruction touches fp
+;; registers.
+;; Note that this attribute is not used anywhere in either the arm or aarch64
+;; backends except in the scheduling description for xgene1.  In that
+;; scheduling description this attribute is used to subclass the load_4 and
+;; load_8 types.
+(define_attr "fp" "no,yes"
+  (if_then_else
+    (eq_attr "arch" "fp")
+    (const_string "yes")
+    (const_string "no")))
+
+(define_attr "arch_enabled" "no,yes"
+  (if_then_else
+    (ior
+	(eq_attr "arch" "any")
+
+	(and (eq_attr "arch" "rcpc8_4")
+	     (match_test "AARCH64_ISA_RCPC8_4"))
+
+	(and (eq_attr "arch" "fp")
+	     (match_test "TARGET_FLOAT"))
+
+	(and (eq_attr "arch" "simd")
+	     (match_test "TARGET_SIMD"))
+
+	(and (eq_attr "arch" "fp16")
+	     (match_test "TARGET_FP_F16INST"))
+
+	(and (eq_attr "arch" "sve")
+	     (match_test "TARGET_SVE")))
+    (const_string "yes")
+    (const_string "no")))
 
 ;; Attribute that controls whether an alternative is enabled or not.
 ;; Currently it is only used to disable alternatives which touch fp or simd
-;; registers when -mgeneral-regs-only is specified.
-(define_attr "enabled" "no,yes"
-  (cond [(ior
-	    (and (eq_attr "fp" "yes")
-		 (eq (symbol_ref "TARGET_FLOAT") (const_int 0)))
-	    (and (eq_attr "simd" "yes")
-		 (eq (symbol_ref "TARGET_SIMD") (const_int 0)))
-	    (and (eq_attr "fp16" "yes")
-		 (eq (symbol_ref "TARGET_FP_F16INST") (const_int 0)))
-	    (and (eq_attr "sve" "yes")
-		 (eq (symbol_ref "TARGET_SVE") (const_int 0))))
-	    (const_string "no")
-	] (const_string "yes")))
+;; registers when -mgeneral-regs-only is specified or to require a special
+;; architecture support.
+(define_attr "enabled" "no,yes" (attr "arch_enabled"))
 
 ;; Attribute that specifies whether we are dealing with a branch to a
 ;; label that is far away, i.e. further away than the maximum/minimum
@@ -272,10 +417,38 @@
 ;; 1 :=: yes
 (define_attr "far_branch" "" (const_int 0))
 
+;; Attribute that specifies whether the alternative uses MOVPRFX.
+(define_attr "movprfx" "no,yes" (const_string "no"))
+
+;; Attribute to specify that an alternative has the length of a single
+;; instruction plus a speculation barrier.
+(define_attr "sls_length" "none,retbr,casesi" (const_string "none"))
+
+(define_attr "length" ""
+  (cond [(eq_attr "movprfx" "yes")
+           (const_int 8)
+
+	 (eq_attr "sls_length" "retbr")
+	   (cond [(match_test "!aarch64_harden_sls_retbr_p ()") (const_int 4)
+		  (match_test "TARGET_SB") (const_int 8)]
+		 (const_int 12))
+
+	 (eq_attr "sls_length" "casesi")
+	   (cond [(match_test "!aarch64_harden_sls_retbr_p ()") (const_int 16)
+		  (match_test "TARGET_SB") (const_int 20)]
+		 (const_int 24))
+	]
+	  (const_int 4)))
+
 ;; Strictly for compatibility with AArch32 in pipeline models, since AArch64 has
 ;; no predicated insns.
 (define_attr "predicated" "yes,no" (const_string "no"))
 
+;; Set to true on an insn that requires the speculation tracking state to be
+;; in the tracking register before the insn issues.  Otherwise the compiler
+;; may chose to hold the tracking state encoded in SP.
+(define_attr "speculation_barrier" "true,false" (const_string "false"))
+
 ;; -------------------------------------------------------------------
 ;; Pipeline descriptions and scheduling
 ;; -------------------------------------------------------------------
@@ -288,9 +461,12 @@
 (include "../arm/cortex-a57.md")
 (include "../arm/exynos-m1.md")
 (include "falkor.md")
+(include "saphira.md")
 (include "thunderx.md")
 (include "../arm/xgene1.md")
 (include "thunderx2t99.md")
+(include "tsv110.md")
+(include "thunderx3t110.md")
 
 ;; -------------------------------------------------------------------
 ;; Jumps and other miscellaneous insns
@@ -299,8 +475,12 @@
 (define_insn "indirect_jump"
   [(set (pc) (match_operand:DI 0 "register_operand" "r"))]
   ""
-  "br\\t%0"
-  [(set_attr "type" "branch")]
+  {
+    output_asm_insn ("br\\t%0", operands);
+    return aarch64_sls_barrier (aarch64_harden_sls_retbr_p ());
+  }
+  [(set_attr "type" "branch")
+   (set_attr "sls_length" "retbr")]
 )
 
 (define_insn "jump"
@@ -312,8 +492,8 @@
 
 (define_expand "cbranch<mode>4"
   [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
-			    [(match_operand:GPI 1 "register_operand" "")
-			     (match_operand:GPI 2 "aarch64_plus_operand" "")])
+			    [(match_operand:GPI 1 "register_operand")
+			     (match_operand:GPI 2 "aarch64_plus_operand")])
 			   (label_ref (match_operand 3 "" ""))
 			   (pc)))]
   ""
@@ -326,8 +506,8 @@
 
 (define_expand "cbranch<mode>4"
   [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
-			    [(match_operand:GPF 1 "register_operand" "")
-			     (match_operand:GPF 2 "aarch64_fp_compare_operand" "")])
+			    [(match_operand:GPF 1 "register_operand")
+			     (match_operand:GPF 2 "aarch64_fp_compare_operand")])
 			   (label_ref (match_operand 3 "" ""))
 			   (pc)))]
   ""
@@ -341,23 +521,25 @@
 (define_expand "cbranchcc4"
   [(set (pc) (if_then_else
 	      (match_operator 0 "aarch64_comparison_operator"
-	       [(match_operand 1 "cc_register" "")
+	       [(match_operand 1 "cc_register")
 	        (match_operand 2 "const0_operand")])
 	      (label_ref (match_operand 3 "" ""))
 	      (pc)))]
   ""
   "")
 
-(define_insn "ccmp<mode>"
-  [(set (match_operand:CC 1 "cc_register" "")
-	(if_then_else:CC
+(define_insn "@ccmp<CC_ONLY:mode><GPI:mode>"
+  [(set (match_operand:CC_ONLY 1 "cc_register" "")
+	(if_then_else:CC_ONLY
 	  (match_operator 4 "aarch64_comparison_operator"
 	   [(match_operand 0 "cc_register" "")
 	    (const_int 0)])
-	  (compare:CC
+	  (compare:CC_ONLY
 	    (match_operand:GPI 2 "register_operand" "r,r,r")
 	    (match_operand:GPI 3 "aarch64_ccmp_operand" "r,Uss,Usn"))
-	  (unspec:CC [(match_operand 5 "immediate_operand")] UNSPEC_NZCV)))]
+	  (unspec:CC_ONLY
+	    [(match_operand 5 "immediate_operand")]
+	    UNSPEC_NZCV)))]
   ""
   "@
    ccmp\\t%<w>2, %<w>3, %k5, %m4
@@ -366,33 +548,57 @@
   [(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
 )
 
-(define_insn "fccmp<mode>"
-  [(set (match_operand:CCFP 1 "cc_register" "")
-	(if_then_else:CCFP
+(define_insn "@ccmp<CCFP_CCFPE:mode><GPF:mode>"
+  [(set (match_operand:CCFP_CCFPE 1 "cc_register" "")
+	(if_then_else:CCFP_CCFPE
 	  (match_operator 4 "aarch64_comparison_operator"
 	   [(match_operand 0 "cc_register" "")
 	    (const_int 0)])
-	  (compare:CCFP
+	  (compare:CCFP_CCFPE
 	    (match_operand:GPF 2 "register_operand" "w")
 	    (match_operand:GPF 3 "register_operand" "w"))
-	  (unspec:CCFP [(match_operand 5 "immediate_operand")] UNSPEC_NZCV)))]
+	  (unspec:CCFP_CCFPE
+	    [(match_operand 5 "immediate_operand")]
+	    UNSPEC_NZCV)))]
   "TARGET_FLOAT"
-  "fccmp\\t%<s>2, %<s>3, %k5, %m4"
+  "fccmp<e>\\t%<s>2, %<s>3, %k5, %m4"
   [(set_attr "type" "fccmp<s>")]
 )
 
-(define_insn "fccmpe<mode>"
-  [(set (match_operand:CCFPE 1 "cc_register" "")
-	 (if_then_else:CCFPE
+(define_insn "@ccmp<CC_ONLY:mode><GPI:mode>_rev"
+  [(set (match_operand:CC_ONLY 1 "cc_register" "")
+	(if_then_else:CC_ONLY
+	  (match_operator 4 "aarch64_comparison_operator"
+	   [(match_operand 0 "cc_register" "")
+	    (const_int 0)])
+	  (unspec:CC_ONLY
+	    [(match_operand 5 "immediate_operand")]
+	    UNSPEC_NZCV)
+	  (compare:CC_ONLY
+	    (match_operand:GPI 2 "register_operand" "r,r,r")
+	    (match_operand:GPI 3 "aarch64_ccmp_operand" "r,Uss,Usn"))))]
+  ""
+  "@
+   ccmp\\t%<w>2, %<w>3, %k5, %M4
+   ccmp\\t%<w>2, %3, %k5, %M4
+   ccmn\\t%<w>2, #%n3, %k5, %M4"
+  [(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
+)
+
+(define_insn "@ccmp<CCFP_CCFPE:mode><GPF:mode>_rev"
+  [(set (match_operand:CCFP_CCFPE 1 "cc_register" "")
+	(if_then_else:CCFP_CCFPE
 	  (match_operator 4 "aarch64_comparison_operator"
 	   [(match_operand 0 "cc_register" "")
-	  (const_int 0)])
-	   (compare:CCFPE
+	    (const_int 0)])
+	  (unspec:CCFP_CCFPE
+	    [(match_operand 5 "immediate_operand")]
+	    UNSPEC_NZCV)
+	  (compare:CCFP_CCFPE
 	    (match_operand:GPF 2 "register_operand" "w")
-	    (match_operand:GPF 3 "register_operand" "w"))
-	  (unspec:CCFPE [(match_operand 5 "immediate_operand")] UNSPEC_NZCV)))]
+	    (match_operand:GPF 3 "register_operand" "w"))))]
   "TARGET_FLOAT"
-  "fccmpe\\t%<s>2, %<s>3, %k5, %m4"
+  "fccmp<e>\\t%<s>2, %<s>3, %k5, %M4"
   [(set_attr "type" "fccmp<s>")]
 )
 
@@ -404,9 +610,9 @@
 ;; csneg  x0, x0, x1, mi
 
 (define_expand "mod<mode>3"
-  [(match_operand:GPI 0 "register_operand" "")
-   (match_operand:GPI 1 "register_operand" "")
-   (match_operand:GPI 2 "const_int_operand" "")]
+  [(match_operand:GPI 0 "register_operand")
+   (match_operand:GPI 1 "register_operand")
+   (match_operand:GPI 2 "const_int_operand")]
   ""
   {
     HOST_WIDE_INT val = INTVAL (operands[2]);
@@ -459,10 +665,14 @@
 			   (pc)))]
   ""
   {
+    /* GCC's traditional style has been to use "beq" instead of "b.eq", etc.,
+       but the "." is required for SVE conditions.  */
+    bool use_dot_p = GET_MODE (operands[1]) == CC_NZCmode;
     if (get_attr_length (insn) == 8)
-      return aarch64_gen_far_branch (operands, 2, "Lbcond", "b%M0\\t");
+      return aarch64_gen_far_branch (operands, 2, "Lbcond",
+				     use_dot_p ? "b.%M0\\t" : "b%M0\\t");
     else
-      return  "b%m0\\t%l2";
+      return use_dot_p ? "b.%m0\\t%l2" : "b%m0\\t%l2";
   }
   [(set_attr "type" "branch")
    (set (attr "length")
@@ -487,14 +697,14 @@
 ;; 	sub	x0, x1, #(CST & 0xfff000)
 ;; 	subs	x0, x0, #(CST & 0x000fff)
 ;; 	b<ne,eq> .Label
-(define_insn_and_split "*compare_condjump<mode>"
+(define_insn_and_split "*compare_condjump<GPI:mode>"
   [(set (pc) (if_then_else (EQL
 			      (match_operand:GPI 0 "register_operand" "r")
 			      (match_operand:GPI 1 "aarch64_imm24" "n"))
 			   (label_ref:P (match_operand 2 "" ""))
 			   (pc)))]
-  "!aarch64_move_imm (INTVAL (operands[1]), <MODE>mode)
-   && !aarch64_plus_operand (operands[1], <MODE>mode)
+  "!aarch64_move_imm (INTVAL (operands[1]), <GPI:MODE>mode)
+   && !aarch64_plus_operand (operands[1], <GPI:MODE>mode)
    && !reload_completed"
   "#"
   "&& true"
@@ -502,20 +712,21 @@
   {
     HOST_WIDE_INT lo_imm = UINTVAL (operands[1]) & 0xfff;
     HOST_WIDE_INT hi_imm = UINTVAL (operands[1]) & 0xfff000;
-    rtx tmp = gen_reg_rtx (<MODE>mode);
-    emit_insn (gen_add<mode>3 (tmp, operands[0], GEN_INT (-hi_imm)));
-    emit_insn (gen_add<mode>3_compare0 (tmp, tmp, GEN_INT (-lo_imm)));
+    rtx tmp = gen_reg_rtx (<GPI:MODE>mode);
+    emit_insn (gen_add<GPI:mode>3 (tmp, operands[0], GEN_INT (-hi_imm)));
+    emit_insn (gen_add<GPI:mode>3_compare0 (tmp, tmp, GEN_INT (-lo_imm)));
     rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
-    rtx cmp_rtx = gen_rtx_fmt_ee (<EQL:CMP>, <MODE>mode, cc_reg, const0_rtx);
+    rtx cmp_rtx = gen_rtx_fmt_ee (<EQL:CMP>, <GPI:MODE>mode,
+				  cc_reg, const0_rtx);
     emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, operands[2]));
     DONE;
   }
 )
 
 (define_expand "casesi"
-  [(match_operand:SI 0 "register_operand" "")	; Index
-   (match_operand:SI 1 "const_int_operand" "")	; Lower bound
-   (match_operand:SI 2 "const_int_operand" "")	; Total range
+  [(match_operand:SI 0 "register_operand")	; Index
+   (match_operand:SI 1 "const_int_operand")	; Lower bound
+   (match_operand:SI 2 "const_int_operand")	; Total range
    (match_operand:DI 3 "" "")			; Table label
    (match_operand:DI 4 "" "")]			; Out of range label
   ""
@@ -536,7 +747,8 @@
 	   constant can be represented in SImode, this is important
 	   for the corner case where operand[1] is INT_MIN.  */
 
-	operands[1] = GEN_INT (trunc_int_for_mode (-INTVAL (operands[1]), SImode));
+	operands[1]
+	  = GEN_INT (trunc_int_for_mode (-UINTVAL (operands[1]), SImode));
 
 	if (!(*insn_data[CODE_FOR_addsi3].operand[2].predicate)
 	      (operands[1], SImode))
@@ -552,13 +764,27 @@
 				    operands[0], operands[2], operands[4]));
 
     operands[2] = force_reg (DImode, gen_rtx_LABEL_REF (DImode, operands[3]));
-    emit_jump_insn (gen_casesi_dispatch (operands[2], operands[0],
-					 operands[3]));
+    operands[2]
+      = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, operands[2], operands[0]),
+			UNSPEC_CASESI);
+    operands[2] = gen_rtx_MEM (DImode, operands[2]);
+    MEM_READONLY_P (operands[2]) = 1;
+    MEM_NOTRAP_P (operands[2]) = 1;
+    emit_jump_insn (gen_casesi_dispatch (operands[2], operands[3]));
     DONE;
   }
 )
 
-(define_insn "casesi_dispatch"
+(define_expand "casesi_dispatch"
+  [(parallel
+    [(set (pc) (match_operand:DI 0 ""))
+     (clobber (reg:CC CC_REGNUM))
+     (clobber (match_scratch:DI 2))
+     (clobber (match_scratch:DI 3))
+     (use (label_ref:DI (match_operand 1 "")))])]
+  "")
+
+(define_insn "*casesi_dispatch"
   [(parallel
     [(set (pc)
 	  (mem:DI (unspec [(match_operand:DI 0 "register_operand" "r")
@@ -567,12 +793,12 @@
      (clobber (reg:CC CC_REGNUM))
      (clobber (match_scratch:DI 3 "=r"))
      (clobber (match_scratch:DI 4 "=r"))
-     (use (label_ref (match_operand 2 "" "")))])]
+     (use (label_ref:DI (match_operand 2 "" "")))])]
   ""
   "*
   return aarch64_output_casesi (operands);
   "
-  [(set_attr "length" "16")
+  [(set_attr "sls_length" "casesi")
    (set_attr "type" "branch")]
 )
 
@@ -651,14 +877,23 @@
   [(return)]
   ""
   {
+    const char *ret = NULL;
     if (aarch64_return_address_signing_enabled ()
-	&& TARGET_ARMV8_3
+	&& (TARGET_PAUTH)
 	&& !crtl->calls_eh_return)
-      return "retaa";
-
-    return "ret";
+      {
+	if (aarch64_ra_sign_key == AARCH64_KEY_B)
+	  ret = "retab";
+	else
+	  ret = "retaa";
+      }
+    else
+      ret = "ret";
+    output_asm_insn (ret, operands);
+    return aarch64_sls_barrier (aarch64_harden_sls_retbr_p ());
   }
-  [(set_attr "type" "branch")]
+  [(set_attr "type" "branch")
+   (set_attr "sls_length" "retbr")]
 )
 
 (define_expand "return"
@@ -670,8 +905,12 @@
 (define_insn "simple_return"
   [(simple_return)]
   ""
-  "ret"
-  [(set_attr "type" "branch")]
+  {
+    output_asm_insn ("ret", operands);
+    return aarch64_sls_barrier (aarch64_harden_sls_retbr_p ());
+  }
+  [(set_attr "type" "branch")
+   (set_attr "sls_length" "retbr")]
 )
 
 (define_insn "*cb<optab><mode>1"
@@ -679,7 +918,7 @@
 				(const_int 0))
 			   (label_ref (match_operand 1 "" ""))
 			   (pc)))]
-  ""
+  "!aarch64_track_speculation"
   {
     if (get_attr_length (insn) == 8)
       return aarch64_gen_far_branch (operands, 1, "Lcb", "<inv_cb>\\t%<w>0, ");
@@ -709,7 +948,7 @@
 	     (label_ref (match_operand 2 "" ""))
 	     (pc)))
    (clobber (reg:CC CC_REGNUM))]
-  ""
+  "!aarch64_track_speculation"
   {
     if (get_attr_length (insn) == 8)
       {
@@ -745,7 +984,7 @@
 			   (label_ref (match_operand 1 "" ""))
 			   (pc)))
    (clobber (reg:CC CC_REGNUM))]
-  ""
+  "!aarch64_track_speculation"
   {
     if (get_attr_length (insn) == 8)
       {
@@ -783,102 +1022,121 @@
 ;; -------------------------------------------------------------------
 
 (define_expand "call"
-  [(parallel [(call (match_operand 0 "memory_operand" "")
-		    (match_operand 1 "general_operand" ""))
-	      (use (match_operand 2 "" ""))
-	      (clobber (reg:DI LR_REGNUM))])]
+  [(parallel
+     [(call (match_operand 0 "memory_operand")
+	    (match_operand 1 "general_operand"))
+      (unspec:DI [(match_operand 2 "const_int_operand")] UNSPEC_CALLEE_ABI)
+      (clobber (reg:DI LR_REGNUM))])]
   ""
   "
   {
-    aarch64_expand_call (NULL_RTX, operands[0], false);
+    aarch64_expand_call (NULL_RTX, operands[0], operands[2], false);
     DONE;
   }"
 )
 
 (define_insn "*call_insn"
-  [(call (mem:DI (match_operand:DI 0 "aarch64_call_insn_operand" "r, Usf"))
+  [(call (mem:DI (match_operand:DI 0 "aarch64_call_insn_operand" "Ucr, Usf"))
 	 (match_operand 1 "" ""))
+   (unspec:DI [(match_operand:DI 2 "const_int_operand")] UNSPEC_CALLEE_ABI)
    (clobber (reg:DI LR_REGNUM))]
   ""
   "@
-  blr\\t%0
+  * return aarch64_indirect_call_asm (operands[0]);
   bl\\t%c0"
-  [(set_attr "type" "call, call")]
-)
+  [(set_attr "type" "call, call")])
 
 (define_expand "call_value"
-  [(parallel [(set (match_operand 0 "" "")
-		   (call (match_operand 1 "memory_operand" "")
-			 (match_operand 2 "general_operand" "")))
-	      (use (match_operand 3 "" ""))
-	      (clobber (reg:DI LR_REGNUM))])]
+  [(parallel
+     [(set (match_operand 0 "")
+	   (call (match_operand 1 "memory_operand")
+		 (match_operand 2 "general_operand")))
+     (unspec:DI [(match_operand 3 "const_int_operand")] UNSPEC_CALLEE_ABI)
+     (clobber (reg:DI LR_REGNUM))])]
   ""
   "
   {
-    aarch64_expand_call (operands[0], operands[1], false);
+    aarch64_expand_call (operands[0], operands[1], operands[3], false);
     DONE;
   }"
 )
 
 (define_insn "*call_value_insn"
   [(set (match_operand 0 "" "")
-	(call (mem:DI (match_operand:DI 1 "aarch64_call_insn_operand" "r, Usf"))
+	(call (mem:DI (match_operand:DI 1 "aarch64_call_insn_operand" "Ucr, Usf"))
 		      (match_operand 2 "" "")))
+   (unspec:DI [(match_operand:DI 3 "const_int_operand")] UNSPEC_CALLEE_ABI)
    (clobber (reg:DI LR_REGNUM))]
   ""
   "@
-  blr\\t%1
+  * return aarch64_indirect_call_asm (operands[1]);
   bl\\t%c1"
   [(set_attr "type" "call, call")]
 )
 
 (define_expand "sibcall"
-  [(parallel [(call (match_operand 0 "memory_operand" "")
-		    (match_operand 1 "general_operand" ""))
-	      (return)
-	      (use (match_operand 2 "" ""))])]
+  [(parallel
+     [(call (match_operand 0 "memory_operand")
+	    (match_operand 1 "general_operand"))
+      (unspec:DI [(match_operand 2 "const_int_operand")] UNSPEC_CALLEE_ABI)
+      (return)])]
   ""
   {
-    aarch64_expand_call (NULL_RTX, operands[0], true);
+    aarch64_expand_call (NULL_RTX, operands[0], operands[2], true);
     DONE;
   }
 )
 
 (define_expand "sibcall_value"
-  [(parallel [(set (match_operand 0 "" "")
-		   (call (match_operand 1 "memory_operand" "")
-			 (match_operand 2 "general_operand" "")))
-	      (return)
-	      (use (match_operand 3 "" ""))])]
+  [(parallel
+     [(set (match_operand 0 "")
+	   (call (match_operand 1 "memory_operand")
+		 (match_operand 2 "general_operand")))
+      (unspec:DI [(match_operand 3 "const_int_operand")] UNSPEC_CALLEE_ABI)
+      (return)])]
   ""
   {
-    aarch64_expand_call (operands[0], operands[1], true);
+    aarch64_expand_call (operands[0], operands[1], operands[3], true);
     DONE;
   }
 )
 
 (define_insn "*sibcall_insn"
   [(call (mem:DI (match_operand:DI 0 "aarch64_call_insn_operand" "Ucs, Usf"))
-	 (match_operand 1 "" ""))
+	 (match_operand 1 ""))
+   (unspec:DI [(match_operand:DI 2 "const_int_operand")] UNSPEC_CALLEE_ABI)
    (return)]
   "SIBLING_CALL_P (insn)"
-  "@
-   br\\t%0
-   b\\t%c0"
-  [(set_attr "type" "branch, branch")]
+  {
+    if (which_alternative == 0)
+      {
+	output_asm_insn ("br\\t%0", operands);
+	return aarch64_sls_barrier (aarch64_harden_sls_retbr_p ());
+      }
+    return "b\\t%c0";
+  }
+  [(set_attr "type" "branch, branch")
+   (set_attr "sls_length" "retbr,none")]
 )
 
 (define_insn "*sibcall_value_insn"
-  [(set (match_operand 0 "" "")
+  [(set (match_operand 0 "")
 	(call (mem:DI
 		(match_operand:DI 1 "aarch64_call_insn_operand" "Ucs, Usf"))
-	      (match_operand 2 "" "")))
+	      (match_operand 2 "")))
+   (unspec:DI [(match_operand:DI 3 "const_int_operand")] UNSPEC_CALLEE_ABI)
    (return)]
   "SIBLING_CALL_P (insn)"
-  "@
-   br\\t%1
-   b\\t%c1"
-  [(set_attr "type" "branch, branch")]
+  {
+    if (which_alternative == 0)
+      {
+	output_asm_insn ("br\\t%1", operands);
+	return aarch64_sls_barrier (aarch64_harden_sls_retbr_p ());
+      }
+    return "b\\t%c1";
+  }
+  [(set_attr "type" "branch, branch")
+   (set_attr "sls_length" "retbr,none")]
 )
 
 ;; Call subroutine returning any type.
@@ -892,7 +1150,9 @@
 {
   int i;
 
-  emit_call_insn (gen_call (operands[0], const0_rtx, NULL));
+  /* Untyped calls always use the default ABI.  It's only possible to use
+     ABI variants if we know the type of the target function.  */
+  emit_call_insn (gen_call (operands[0], const0_rtx, const0_rtx));
 
   for (i = 0; i < XVECLEN (operands[2], 0); i++)
     {
@@ -913,8 +1173,8 @@
 ;; -------------------------------------------------------------------
 
 (define_expand "mov<mode>"
-  [(set (match_operand:SHORT 0 "nonimmediate_operand" "")
-	(match_operand:SHORT 1 "general_operand" ""))]
+  [(set (match_operand:SHORT 0 "nonimmediate_operand")
+	(match_operand:SHORT 1 "general_operand"))]
   ""
   "
     if (GET_CODE (operands[0]) == MEM && operands[1] != const0_rtx)
@@ -929,8 +1189,8 @@
 )
 
 (define_insn "*mov<mode>_aarch64"
-  [(set (match_operand:SHORT 0 "nonimmediate_operand" "=r,r,   *w,r ,r,*w, m, m, r,*w,*w")
-	(match_operand:SHORT 1 "aarch64_mov_operand"  " r,M,D<hq>,Usv,m, m,rZ,*w,*w, r,*w"))]
+  [(set (match_operand:SHORT 0 "nonimmediate_operand" "=r,r,    w,r  ,r,w, m,m,r,w,w")
+	(match_operand:SHORT 1 "aarch64_mov_operand"  " r,M,D<hq>,Usv,m,m,rZ,w,w,r,w"))]
   "(register_operand (operands[0], <MODE>mode)
     || aarch64_reg_or_zero (operands[1], <MODE>mode))"
 {
@@ -966,13 +1226,12 @@
   ;; The "mov_imm" type for CNT is just a placeholder.
   [(set_attr "type" "mov_reg,mov_imm,neon_move,mov_imm,load_4,load_4,store_4,
 		     store_4,neon_to_gp<q>,neon_from_gp<q>,neon_dup")
-   (set_attr "simd" "*,*,yes,*,*,*,*,*,yes,yes,yes")
-   (set_attr "sve" "*,*,*,yes,*,*,*,*,*,*,*")]
+   (set_attr "arch" "*,*,simd,sve,*,*,*,*,simd,simd,simd")]
 )
 
 (define_expand "mov<mode>"
-  [(set (match_operand:GPI 0 "nonimmediate_operand" "")
-	(match_operand:GPI 1 "general_operand" ""))]
+  [(set (match_operand:GPI 0 "nonimmediate_operand")
+	(match_operand:GPI 1 "general_operand"))]
   ""
   "
     if (MEM_P (operands[0]) && !MEM_VOLATILE_P (operands[0])
@@ -983,10 +1242,19 @@
     if (GET_CODE (operands[0]) == MEM && operands[1] != const0_rtx)
       operands[1] = force_reg (<MODE>mode, operands[1]);
 
-    /* FIXME: RR we still need to fix up what we are doing with
-       symbol_refs and other types of constants.  */
-    if (CONSTANT_P (operands[1])
-        && !CONST_INT_P (operands[1]))
+    /* Lower moves of symbolic constants into individual instructions.
+       Doing this now is sometimes necessary for correctness, since some
+       sequences require temporary pseudo registers.  Lowering now is also
+       often better for optimization, since more RTL passes get the
+       chance to optimize the individual instructions.
+
+       When called after RA, also split multi-instruction moves into
+       smaller pieces now, since we can't be sure that sure that there
+       will be a following split pass.  */
+    if (CONST_INT_P (operands[1])
+	? (reload_completed
+	   && !aarch64_mov_imm_operand (operands[1], <MODE>mode))
+	: CONSTANT_P (operands[1]))
      {
        aarch64_expand_mov_immediate (operands[0], operands[1]);
        DONE;
@@ -996,7 +1264,7 @@
 
 (define_insn_and_split "*movsi_aarch64"
   [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m, m,  r,  r, w,r,w, w")
-	(match_operand:SI 1 "aarch64_mov_operand"  " r,r,k,M,n,Usv,m,m,rZ,*w,Usa,Ush,rZ,w,w,Ds"))]
+	(match_operand:SI 1 "aarch64_mov_operand"  " r,r,k,M,n,Usv,m,m,rZ,w,Usa,Ush,rZ,w,w,Ds"))]
   "(register_operand (operands[0], SImode)
     || aarch64_reg_or_zero (operands[1], SImode))"
   "@
@@ -1026,9 +1294,7 @@
   ;; The "mov_imm" type for CNT is just a placeholder.
   [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load_4,
 		    load_4,store_4,store_4,adr,adr,f_mcr,f_mrc,fmov,neon_move")
-   (set_attr "fp" "*,*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
-   (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")
-   (set_attr "sve" "*,*,*,*,*,yes,*,*,*,*,*,*,*,*,*,*")]
+   (set_attr "arch" "*,*,*,*,*,sve,*,fp,*,fp,*,*,fp,fp,fp,simd")]
 )
 
 (define_insn_and_split "*movdi_aarch64"
@@ -1065,9 +1331,7 @@
   [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,mov_imm,
 		     load_8,load_8,store_8,store_8,adr,adr,f_mcr,f_mrc,fmov,
 		     neon_move")
-   (set_attr "fp" "*,*,*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
-   (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")
-   (set_attr "sve" "*,*,*,*,*,*,yes,*,*,*,*,*,*,*,*,*,*")]
+   (set_attr "arch" "*,*,*,*,*,*,sve,*,fp,*,fp,*,*,fp,fp,fp,simd")]
 )
 
 (define_insn "insv_imm<mode>"
@@ -1081,9 +1345,27 @@
   [(set_attr "type" "mov_imm")]
 )
 
+;; Match MOVK as a normal AND and IOR operation.
+(define_insn "aarch64_movk<mode>"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "0")
+			  (match_operand:GPI 2 "const_int_operand"))
+		 (match_operand:GPI 3 "const_int_operand")))]
+  "aarch64_movk_shift (rtx_mode_t (operands[2], <MODE>mode),
+		       rtx_mode_t (operands[3], <MODE>mode)) >= 0"
+  {
+    int shift = aarch64_movk_shift (rtx_mode_t (operands[2], <MODE>mode),
+				    rtx_mode_t (operands[3], <MODE>mode));
+    operands[2] = gen_int_mode (UINTVAL (operands[3]) >> shift, SImode);
+    operands[3] = gen_int_mode (shift, SImode);
+    return "movk\\t%<w>0, #%X2, lsl %3";
+  }
+  [(set_attr "type" "mov_imm")]
+)
+
 (define_expand "movti"
-  [(set (match_operand:TI 0 "nonimmediate_operand" "")
-	(match_operand:TI 1 "general_operand" ""))]
+  [(set (match_operand:TI 0 "nonimmediate_operand")
+	(match_operand:TI 1 "general_operand"))]
   ""
   "
     if (GET_CODE (operands[0]) == MEM && operands[1] != const0_rtx)
@@ -1101,13 +1383,14 @@
 
 (define_insn "*movti_aarch64"
   [(set (match_operand:TI 0
-	 "nonimmediate_operand"  "=   r,w, r,w,r,m,m,w,m")
+	 "nonimmediate_operand"  "=   r,w,w, r,w,r,m,m,w,m")
 	(match_operand:TI 1
-	 "aarch64_movti_operand" " rUti,r, w,w,m,r,Z,m,w"))]
+	 "aarch64_movti_operand" " rUti,Z,r, w,w,m,r,Z,m,w"))]
   "(register_operand (operands[0], TImode)
     || aarch64_reg_or_zero (operands[1], TImode))"
   "@
    #
+   movi\\t%0.2d, #0
    #
    #
    mov\\t%0.16b, %1.16b
@@ -1116,12 +1399,11 @@
    stp\\txzr, xzr, %0
    ldr\\t%q0, %1
    str\\t%q1, %0"
-  [(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \
+  [(set_attr "type" "multiple,neon_move,f_mcr,f_mrc,neon_logic_q, \
 		             load_16,store_16,store_16,\
                              load_16,store_16")
-   (set_attr "length" "8,8,8,4,4,4,4,4,4")
-   (set_attr "simd" "*,*,*,yes,*,*,*,*,*")
-   (set_attr "fp" "*,*,*,*,*,*,*,yes,yes")]
+   (set_attr "length" "8,4,8,8,4,4,4,4,4,4")
+   (set_attr "arch" "*,simd,*,*,simd,*,*,*,fp,fp")]
 )
 
 ;; Split a TImode register-register or register-immediate move into
@@ -1138,14 +1420,18 @@
 })
 
 (define_expand "mov<mode>"
-  [(set (match_operand:GPF_TF_F16 0 "nonimmediate_operand" "")
-	(match_operand:GPF_TF_F16 1 "general_operand" ""))]
+  [(set (match_operand:GPF_TF_F16_MOV 0 "nonimmediate_operand")
+	(match_operand:GPF_TF_F16_MOV 1 "general_operand"))]
   ""
   {
     if (!TARGET_FLOAT)
       {
-	aarch64_err_no_fpadvsimd (<MODE>mode, "code");
-	FAIL;
+	aarch64_err_no_fpadvsimd (<MODE>mode);
+	machine_mode intmode
+	  = int_mode_for_size (GET_MODE_BITSIZE (<MODE>mode), 0).require ();
+	emit_move_insn (gen_lowpart (intmode, operands[0]),
+			gen_lowpart (intmode, operands[1]));
+	DONE;
       }
 
     if (GET_CODE (operands[0]) == MEM
@@ -1155,14 +1441,15 @@
   }
 )
 
-(define_insn "*movhf_aarch64"
-  [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w  ,?r,w,w  ,w  ,w,m,r,m ,r")
-	(match_operand:HF 1 "general_operand"      "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r"))]
-  "TARGET_FLOAT && (register_operand (operands[0], HFmode)
-    || aarch64_reg_or_fp_zero (operands[1], HFmode))"
+(define_insn "*mov<mode>_aarch64"
+  [(set (match_operand:HFBF 0 "nonimmediate_operand" "=w,w  , w,?r,w,w  ,w  ,w,m,r,m ,r")
+	(match_operand:HFBF 1 "general_operand"      "Y ,?rY,?r, w,w,Ufc,Uvi,m,w,m,rY,r"))]
+  "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
+    || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
   "@
    movi\\t%0.4h, #0
    fmov\\t%h0, %w1
+   dup\\t%w0.4h, %w1
    umov\\t%w0, %1.h[0]
    mov\\t%0.h[0], %1.h[0]
    fmov\\t%h0, %1
@@ -1172,10 +1459,9 @@
    ldrh\\t%w0, %1
    strh\\t%w1, %0
    mov\\t%w0, %w1"
-  [(set_attr "type" "neon_move,f_mcr,neon_to_gp,neon_move,fconsts, \
+  [(set_attr "type" "neon_move,f_mcr,neon_move,neon_to_gp, neon_move,fconsts, \
 		     neon_move,f_loads,f_stores,load_4,store_4,mov_reg")
-   (set_attr "simd" "yes,*,yes,yes,*,yes,*,*,*,*,*")
-   (set_attr "fp16"   "*,yes,*,*,yes,*,*,*,*,*,*")]
+   (set_attr "arch" "simd,fp16,simd,simd,simd,fp16,simd,*,*,*,*,*")]
 )
 
 (define_insn "*movsf_aarch64"
@@ -1199,7 +1485,7 @@
   [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,neon_move,\
 		     f_loads,f_stores,load_4,store_4,mov_reg,\
 		     fconsts")
-   (set_attr "simd" "yes,*,*,*,*,yes,*,*,*,*,*,*")]
+   (set_attr "arch" "simd,*,*,*,*,simd,*,*,*,*,*,*")]
 )
 
 (define_insn "*movdf_aarch64"
@@ -1223,7 +1509,7 @@
   [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,neon_move,\
 		     f_loadd,f_stored,load_8,store_8,mov_reg,\
 		     fconstd")
-   (set_attr "simd" "yes,*,*,*,*,yes,*,*,*,*,*,*")]
+   (set_attr "arch" "simd,*,*,*,*,simd,*,*,*,*,*,*")]
 )
 
 (define_split
@@ -1248,9 +1534,9 @@
 
 (define_insn "*movtf_aarch64"
   [(set (match_operand:TF 0
-	 "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r,m ,m")
+	 "nonimmediate_operand" "=w,?r ,w ,?r,w,?w,w,m,?r,m ,m")
 	(match_operand:TF 1
-	 "general_operand"      " w,?r, ?r,w ,Y,Y ,m,w,m ,?r,Y"))]
+	 "general_operand"      " w,?rY,?r,w ,Y,Y ,m,w,m ,?r,Y"))]
   "TARGET_FLOAT && (register_operand (operands[0], TFmode)
     || aarch64_reg_or_fp_zero (operands[1], TFmode))"
   "@
@@ -1268,12 +1554,12 @@
   [(set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,neon_move_q,f_mcr,\
                      f_loadd,f_stored,load_16,store_16,store_16")
    (set_attr "length" "4,8,8,8,4,4,4,4,4,4,4")
-   (set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*,*")]
+   (set_attr "arch" "simd,*,*,*,simd,*,*,*,*,*,*")]
 )
 
 (define_split
    [(set (match_operand:TF 0 "register_operand" "")
-	 (match_operand:TF 1 "aarch64_reg_or_imm" ""))]
+	 (match_operand:TF 1 "nonmemory_operand" ""))]
   "reload_completed && aarch64_split_128bit_move_p (operands[0], operands[1])"
   [(const_int 0)]
   {
@@ -1284,157 +1570,138 @@
 
 ;; 0 is dst
 ;; 1 is src
-;; 2 is size of move in bytes
+;; 2 is size of copy in bytes
 ;; 3 is alignment
 
-(define_expand "movmemdi"
+(define_expand "cpymemdi"
   [(match_operand:BLK 0 "memory_operand")
    (match_operand:BLK 1 "memory_operand")
    (match_operand:DI 2 "immediate_operand")
    (match_operand:DI 3 "immediate_operand")]
    "!STRICT_ALIGNMENT"
 {
-  if (aarch64_expand_movmem (operands))
+  if (aarch64_expand_cpymem (operands))
     DONE;
   FAIL;
 }
 )
 
+;; 0 is dst
+;; 1 is val
+;; 2 is size of copy in bytes
+;; 3 is alignment
+
+(define_expand "setmemdi"
+  [(set (match_operand:BLK 0 "memory_operand")     ;; Dest
+        (match_operand:QI  2 "nonmemory_operand")) ;; Value
+   (use (match_operand:DI  1 "immediate_operand")) ;; Length
+   (match_operand          3 "immediate_operand")] ;; Align
+  "TARGET_SIMD"
+{
+  if (aarch64_expand_setmem (operands))
+    DONE;
+
+  FAIL;
+})
+
 ;; Operands 1 and 3 are tied together by the final condition; so we allow
 ;; fairly lax checking on the second memory operation.
-(define_insn "load_pairsi"
-  [(set (match_operand:SI 0 "register_operand" "=r,*w")
-	(match_operand:SI 1 "aarch64_mem_pair_operand" "Ump,Ump"))
-   (set (match_operand:SI 2 "register_operand" "=r,*w")
-	(match_operand:SI 3 "memory_operand" "m,m"))]
-  "rtx_equal_p (XEXP (operands[3], 0),
-		plus_constant (Pmode,
-			       XEXP (operands[1], 0),
-			       GET_MODE_SIZE (SImode)))"
+(define_insn "load_pair_sw_<SX:mode><SX2:mode>"
+  [(set (match_operand:SX 0 "register_operand" "=r,w")
+	(match_operand:SX 1 "aarch64_mem_pair_operand" "Ump,Ump"))
+   (set (match_operand:SX2 2 "register_operand" "=r,w")
+	(match_operand:SX2 3 "memory_operand" "m,m"))]
+   "rtx_equal_p (XEXP (operands[3], 0),
+		 plus_constant (Pmode,
+				XEXP (operands[1], 0),
+				GET_MODE_SIZE (<SX:MODE>mode)))"
   "@
-   ldp\\t%w0, %w2, %1
-   ldp\\t%s0, %s2, %1"
+   ldp\\t%w0, %w2, %z1
+   ldp\\t%s0, %s2, %z1"
   [(set_attr "type" "load_8,neon_load1_2reg")
-   (set_attr "fp" "*,yes")]
-)
-
-(define_insn "load_pairdi"
-  [(set (match_operand:DI 0 "register_operand" "=r,*w")
-	(match_operand:DI 1 "aarch64_mem_pair_operand" "Ump,Ump"))
-   (set (match_operand:DI 2 "register_operand" "=r,*w")
-	(match_operand:DI 3 "memory_operand" "m,m"))]
-  "rtx_equal_p (XEXP (operands[3], 0),
-		plus_constant (Pmode,
-			       XEXP (operands[1], 0),
-			       GET_MODE_SIZE (DImode)))"
+   (set_attr "arch" "*,fp")]
+)
+
+;; Storing different modes that can still be merged
+(define_insn "load_pair_dw_<DX:mode><DX2:mode>"
+  [(set (match_operand:DX 0 "register_operand" "=r,w")
+	(match_operand:DX 1 "aarch64_mem_pair_operand" "Ump,Ump"))
+   (set (match_operand:DX2 2 "register_operand" "=r,w")
+	(match_operand:DX2 3 "memory_operand" "m,m"))]
+   "rtx_equal_p (XEXP (operands[3], 0),
+		 plus_constant (Pmode,
+				XEXP (operands[1], 0),
+				GET_MODE_SIZE (<DX:MODE>mode)))"
   "@
-   ldp\\t%x0, %x2, %1
-   ldp\\t%d0, %d2, %1"
+   ldp\\t%x0, %x2, %z1
+   ldp\\t%d0, %d2, %z1"
   [(set_attr "type" "load_16,neon_load1_2reg")
-   (set_attr "fp" "*,yes")]
+   (set_attr "arch" "*,fp")]
 )
 
+(define_insn "load_pair_dw_tftf"
+  [(set (match_operand:TF 0 "register_operand" "=w")
+	(match_operand:TF 1 "aarch64_mem_pair_operand" "Ump"))
+   (set (match_operand:TF 2 "register_operand" "=w")
+	(match_operand:TF 3 "memory_operand" "m"))]
+   "TARGET_SIMD
+    && rtx_equal_p (XEXP (operands[3], 0),
+		    plus_constant (Pmode,
+				   XEXP (operands[1], 0),
+				   GET_MODE_SIZE (TFmode)))"
+  "ldp\\t%q0, %q2, %z1"
+  [(set_attr "type" "neon_ldp_q")
+   (set_attr "fp" "yes")]
+)
 
 ;; Operands 0 and 2 are tied together by the final condition; so we allow
 ;; fairly lax checking on the second memory operation.
-(define_insn "store_pairsi"
-  [(set (match_operand:SI 0 "aarch64_mem_pair_operand" "=Ump,Ump")
-	(match_operand:SI 1 "aarch64_reg_or_zero" "rZ,*w"))
-   (set (match_operand:SI 2 "memory_operand" "=m,m")
-	(match_operand:SI 3 "aarch64_reg_or_zero" "rZ,*w"))]
-  "rtx_equal_p (XEXP (operands[2], 0),
-		plus_constant (Pmode,
-			       XEXP (operands[0], 0),
-			       GET_MODE_SIZE (SImode)))"
+(define_insn "store_pair_sw_<SX:mode><SX2:mode>"
+  [(set (match_operand:SX 0 "aarch64_mem_pair_operand" "=Ump,Ump")
+	(match_operand:SX 1 "aarch64_reg_zero_or_fp_zero" "rYZ,w"))
+   (set (match_operand:SX2 2 "memory_operand" "=m,m")
+	(match_operand:SX2 3 "aarch64_reg_zero_or_fp_zero" "rYZ,w"))]
+   "rtx_equal_p (XEXP (operands[2], 0),
+		 plus_constant (Pmode,
+				XEXP (operands[0], 0),
+				GET_MODE_SIZE (<SX:MODE>mode)))"
   "@
-   stp\\t%w1, %w3, %0
-   stp\\t%s1, %s3, %0"
+   stp\\t%w1, %w3, %z0
+   stp\\t%s1, %s3, %z0"
   [(set_attr "type" "store_8,neon_store1_2reg")
-   (set_attr "fp" "*,yes")]
-)
-
-(define_insn "store_pairdi"
-  [(set (match_operand:DI 0 "aarch64_mem_pair_operand" "=Ump,Ump")
-	(match_operand:DI 1 "aarch64_reg_or_zero" "rZ,*w"))
-   (set (match_operand:DI 2 "memory_operand" "=m,m")
-	(match_operand:DI 3 "aarch64_reg_or_zero" "rZ,*w"))]
-  "rtx_equal_p (XEXP (operands[2], 0),
-		plus_constant (Pmode,
-			       XEXP (operands[0], 0),
-			       GET_MODE_SIZE (DImode)))"
+   (set_attr "arch" "*,fp")]
+)
+
+;; Storing different modes that can still be merged
+(define_insn "store_pair_dw_<DX:mode><DX2:mode>"
+  [(set (match_operand:DX 0 "aarch64_mem_pair_operand" "=Ump,Ump")
+	(match_operand:DX 1 "aarch64_reg_zero_or_fp_zero" "rYZ,w"))
+   (set (match_operand:DX2 2 "memory_operand" "=m,m")
+	(match_operand:DX2 3 "aarch64_reg_zero_or_fp_zero" "rYZ,w"))]
+   "rtx_equal_p (XEXP (operands[2], 0),
+		 plus_constant (Pmode,
+				XEXP (operands[0], 0),
+				GET_MODE_SIZE (<DX:MODE>mode)))"
   "@
-   stp\\t%x1, %x3, %0
-   stp\\t%d1, %d3, %0"
+   stp\\t%x1, %x3, %z0
+   stp\\t%d1, %d3, %z0"
   [(set_attr "type" "store_16,neon_store1_2reg")
-   (set_attr "fp" "*,yes")]
-)
-
-;; Operands 1 and 3 are tied together by the final condition; so we allow
-;; fairly lax checking on the second memory operation.
-(define_insn "load_pairsf"
-  [(set (match_operand:SF 0 "register_operand" "=w,*r")
-	(match_operand:SF 1 "aarch64_mem_pair_operand" "Ump,Ump"))
-   (set (match_operand:SF 2 "register_operand" "=w,*r")
-	(match_operand:SF 3 "memory_operand" "m,m"))]
-  "rtx_equal_p (XEXP (operands[3], 0),
-		plus_constant (Pmode,
-			       XEXP (operands[1], 0),
-			       GET_MODE_SIZE (SFmode)))"
-  "@
-   ldp\\t%s0, %s2, %1
-   ldp\\t%w0, %w2, %1"
-  [(set_attr "type" "neon_load1_2reg,load_8")
-   (set_attr "fp" "yes,*")]
-)
-
-(define_insn "load_pairdf"
-  [(set (match_operand:DF 0 "register_operand" "=w,*r")
-	(match_operand:DF 1 "aarch64_mem_pair_operand" "Ump,Ump"))
-   (set (match_operand:DF 2 "register_operand" "=w,*r")
-	(match_operand:DF 3 "memory_operand" "m,m"))]
-  "rtx_equal_p (XEXP (operands[3], 0),
-		plus_constant (Pmode,
-			       XEXP (operands[1], 0),
-			       GET_MODE_SIZE (DFmode)))"
-  "@
-   ldp\\t%d0, %d2, %1
-   ldp\\t%x0, %x2, %1"
-  [(set_attr "type" "neon_load1_2reg,load_16")
-   (set_attr "fp" "yes,*")]
+   (set_attr "arch" "*,fp")]
 )
 
-;; Operands 0 and 2 are tied together by the final condition; so we allow
-;; fairly lax checking on the second memory operation.
-(define_insn "store_pairsf"
-  [(set (match_operand:SF 0 "aarch64_mem_pair_operand" "=Ump,Ump")
-	(match_operand:SF 1 "aarch64_reg_or_fp_zero" "w,*rY"))
-   (set (match_operand:SF 2 "memory_operand" "=m,m")
-	(match_operand:SF 3 "aarch64_reg_or_fp_zero" "w,*rY"))]
-  "rtx_equal_p (XEXP (operands[2], 0),
-		plus_constant (Pmode,
-			       XEXP (operands[0], 0),
-			       GET_MODE_SIZE (SFmode)))"
-  "@
-   stp\\t%s1, %s3, %0
-   stp\\t%w1, %w3, %0"
-  [(set_attr "type" "neon_store1_2reg,store_8")
-   (set_attr "fp" "yes,*")]
-)
-
-(define_insn "store_pairdf"
-  [(set (match_operand:DF 0 "aarch64_mem_pair_operand" "=Ump,Ump")
-	(match_operand:DF 1 "aarch64_reg_or_fp_zero" "w,*rY"))
-   (set (match_operand:DF 2 "memory_operand" "=m,m")
-	(match_operand:DF 3 "aarch64_reg_or_fp_zero" "w,*rY"))]
-  "rtx_equal_p (XEXP (operands[2], 0),
-		plus_constant (Pmode,
-			       XEXP (operands[0], 0),
-			       GET_MODE_SIZE (DFmode)))"
-  "@
-   stp\\t%d1, %d3, %0
-   stp\\t%x1, %x3, %0"
-  [(set_attr "type" "neon_store1_2reg,store_16")
-   (set_attr "fp" "yes,*")]
+(define_insn "store_pair_dw_tftf"
+  [(set (match_operand:TF 0 "aarch64_mem_pair_operand" "=Ump")
+	(match_operand:TF 1 "register_operand" "w"))
+   (set (match_operand:TF 2 "memory_operand" "=m")
+	(match_operand:TF 3 "register_operand" "w"))]
+   "TARGET_SIMD &&
+    rtx_equal_p (XEXP (operands[2], 0),
+		 plus_constant (Pmode,
+				XEXP (operands[0], 0),
+				GET_MODE_SIZE (TFmode)))"
+  "stp\\t%q1, %q3, %z0"
+  [(set_attr "type" "neon_stp_q")
+   (set_attr "fp" "yes")]
 )
 
 ;; Load pair with post-index writeback.  This is primarily used in function
@@ -1450,8 +1717,8 @@
           (mem:GPI (plus:P (match_dup 1)
                    (match_operand:P 5 "const_int_operand" "n"))))])]
   "INTVAL (operands[5]) == GET_MODE_SIZE (<GPI:MODE>mode)"
-  "ldp\\t%<w>2, %<w>3, [%1], %4"
-  [(set_attr "type" "load_<ldpstp_sz>")]
+  "ldp\\t%<GPI:w>2, %<GPI:w>3, [%1], %4"
+  [(set_attr "type" "load_<GPI:ldpstp_sz>")]
 )
 
 (define_insn "loadwb_pair<GPF:mode>_<P:mode>"
@@ -1465,10 +1732,25 @@
           (mem:GPF (plus:P (match_dup 1)
                    (match_operand:P 5 "const_int_operand" "n"))))])]
   "INTVAL (operands[5]) == GET_MODE_SIZE (<GPF:MODE>mode)"
-  "ldp\\t%<w>2, %<w>3, [%1], %4"
+  "ldp\\t%<GPF:w>2, %<GPF:w>3, [%1], %4"
   [(set_attr "type" "neon_load1_2reg")]
 )
 
+(define_insn "loadwb_pair<TX:mode>_<P:mode>"
+  [(parallel
+    [(set (match_operand:P 0 "register_operand" "=k")
+          (plus:P (match_operand:P 1 "register_operand" "0")
+                  (match_operand:P 4 "aarch64_mem_pair_offset" "n")))
+     (set (match_operand:TX 2 "register_operand" "=w")
+          (mem:TX (match_dup 1)))
+     (set (match_operand:TX 3 "register_operand" "=w")
+          (mem:TX (plus:P (match_dup 1)
+			  (match_operand:P 5 "const_int_operand" "n"))))])]
+  "TARGET_SIMD && INTVAL (operands[5]) == GET_MODE_SIZE (<TX:MODE>mode)"
+  "ldp\\t%q2, %q3, [%1], %4"
+  [(set_attr "type" "neon_ldp_q")]
+)
+
 ;; Store pair with pre-index writeback.  This is primarily used in function
 ;; prologues.
 (define_insn "storewb_pair<GPI:mode>_<P:mode>"
@@ -1483,8 +1765,8 @@
                    (match_operand:P 5 "const_int_operand" "n")))
           (match_operand:GPI 3 "register_operand" "r"))])]
   "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPI:MODE>mode)"
-  "stp\\t%<w>2, %<w>3, [%0, %4]!"
-  [(set_attr "type" "store_<ldpstp_sz>")]
+  "stp\\t%<GPI:w>2, %<GPI:w>3, [%0, %4]!"
+  [(set_attr "type" "store_<GPI:ldpstp_sz>")]
 )
 
 (define_insn "storewb_pair<GPF:mode>_<P:mode>"
@@ -1499,10 +1781,28 @@
                    (match_operand:P 5 "const_int_operand" "n")))
           (match_operand:GPF 3 "register_operand" "w"))])]
   "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPF:MODE>mode)"
-  "stp\\t%<w>2, %<w>3, [%0, %4]!"
+  "stp\\t%<GPF:w>2, %<GPF:w>3, [%0, %4]!"
   [(set_attr "type" "neon_store1_2reg<q>")]
 )
 
+(define_insn "storewb_pair<TX:mode>_<P:mode>"
+  [(parallel
+    [(set (match_operand:P 0 "register_operand" "=&k")
+          (plus:P (match_operand:P 1 "register_operand" "0")
+                  (match_operand:P 4 "aarch64_mem_pair_offset" "n")))
+     (set (mem:TX (plus:P (match_dup 0)
+			  (match_dup 4)))
+          (match_operand:TX 2 "register_operand" "w"))
+     (set (mem:TX (plus:P (match_dup 0)
+			  (match_operand:P 5 "const_int_operand" "n")))
+          (match_operand:TX 3 "register_operand" "w"))])]
+  "TARGET_SIMD
+   && INTVAL (operands[5])
+      == INTVAL (operands[4]) + GET_MODE_SIZE (<TX:MODE>mode)"
+  "stp\\t%q2, %q3, [%0, %4]!"
+  [(set_attr "type" "neon_stp_q")]
+)
+
 ;; -------------------------------------------------------------------
 ;; Sign/Zero extension
 ;; -------------------------------------------------------------------
@@ -1532,31 +1832,39 @@
 		plus_constant (Pmode,
 			       XEXP (operands[1], 0),
 			       GET_MODE_SIZE (SImode)))"
-  "ldpsw\\t%0, %2, %1"
+  "ldpsw\\t%0, %2, %z1"
   [(set_attr "type" "load_8")]
 )
 
 (define_insn "*zero_extendsidi2_aarch64"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
-        (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,m")))]
+  [(set (match_operand:DI 0 "register_operand" "=r,r,w,w,r,w")
+        (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,m,r,m,w,w")))]
   ""
   "@
    uxtw\t%0, %w1
-   ldr\t%w0, %1"
-  [(set_attr "type" "extend,load_4")]
+   ldr\t%w0, %1
+   fmov\t%s0, %w1
+   ldr\t%s0, %1
+   fmov\t%w0, %s1
+   fmov\t%s0, %s1"
+  [(set_attr "type" "mov_reg,load_4,f_mcr,f_loads,f_mrc,fmov")
+   (set_attr "arch" "*,*,fp,fp,fp,fp")]
 )
 
 (define_insn "*load_pair_zero_extendsidi2_aarch64"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(zero_extend:DI (match_operand:SI 1 "aarch64_mem_pair_operand" "Ump")))
-   (set (match_operand:DI 2 "register_operand" "=r")
-	(zero_extend:DI (match_operand:SI 3 "memory_operand" "m")))]
+  [(set (match_operand:DI 0 "register_operand" "=r,w")
+	(zero_extend:DI (match_operand:SI 1 "aarch64_mem_pair_operand" "Ump,Ump")))
+   (set (match_operand:DI 2 "register_operand" "=r,w")
+	(zero_extend:DI (match_operand:SI 3 "memory_operand" "m,m")))]
   "rtx_equal_p (XEXP (operands[3], 0),
 		plus_constant (Pmode,
 			       XEXP (operands[1], 0),
 			       GET_MODE_SIZE (SImode)))"
-  "ldp\\t%w0, %w2, %1"
-  [(set_attr "type" "load_8")]
+  "@
+   ldp\t%w0, %w2, %z1
+   ldp\t%s0, %s2, %z1"
+  [(set_attr "type" "load_8,neon_load1_2reg")
+   (set_attr "arch" "*,fp")]
 )
 
 (define_expand "<ANY_EXTEND:optab><SHORT:mode><GPI:mode>2"
@@ -1576,14 +1884,15 @@
 )
 
 (define_insn "*zero_extend<SHORT:mode><GPI:mode>2_aarch64"
-  [(set (match_operand:GPI 0 "register_operand" "=r,r,*w")
+  [(set (match_operand:GPI 0 "register_operand" "=r,r,w")
         (zero_extend:GPI (match_operand:SHORT 1 "nonimmediate_operand" "r,m,m")))]
   ""
   "@
    and\t%<GPI:w>0, %<GPI:w>1, <SHORT:short_mask>
    ldr<SHORT:size>\t%w0, %1
    ldr\t%<SHORT:size>0, %1"
-  [(set_attr "type" "logic_imm,load_4,load_4")]
+  [(set_attr "type" "logic_imm,load_4,f_loads")
+   (set_attr "arch" "*,*,fp")]
 )
 
 (define_expand "<optab>qihi2"
@@ -1618,9 +1927,9 @@
 
 (define_expand "add<mode>3"
   [(set
-    (match_operand:GPI 0 "register_operand" "")
-    (plus:GPI (match_operand:GPI 1 "register_operand" "")
-	      (match_operand:GPI 2 "aarch64_pluslong_or_poly_operand" "")))]
+    (match_operand:GPI 0 "register_operand")
+    (plus:GPI (match_operand:GPI 1 "register_operand")
+	      (match_operand:GPI 2 "aarch64_pluslong_or_poly_operand")))]
   ""
 {
   /* If operands[1] is a subreg extract the inner RTX.  */
@@ -1629,10 +1938,19 @@
   /* If the constant is too large for a single instruction and isn't frame
      based, split off the immediate so it is available for CSE.  */
   if (!aarch64_plus_immediate (operands[2], <MODE>mode)
+      && !(TARGET_SVE && aarch64_sve_plus_immediate (operands[2], <MODE>mode))
       && can_create_pseudo_p ()
       && (!REG_P (op1)
 	 || !REGNO_PTR_FRAME_P (REGNO (op1))))
     operands[2] = force_reg (<MODE>mode, operands[2]);
+  /* Some tunings prefer to avoid VL-based operations.
+     Split off the poly immediate here.  The rtx costs hook will reject attempts
+     to combine them back.  */
+  else if (GET_CODE (operands[2]) == CONST_POLY_INT
+	   && can_create_pseudo_p ()
+	   && (aarch64_tune_params.extra_tuning_flags
+	       & AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS))
+    operands[2] = force_reg (<MODE>mode, operands[2]);
   /* Expand polynomial additions now if the destination is the stack
      pointer, since we don't want to use that as a temporary.  */
   else if (operands[0] == stack_pointer_rtx
@@ -1646,10 +1964,10 @@
 
 (define_insn "*add<mode>3_aarch64"
   [(set
-    (match_operand:GPI 0 "register_operand" "=rk,rk,w,rk,r,rk")
+    (match_operand:GPI 0 "register_operand" "=rk,rk,w,rk,r,r,rk")
     (plus:GPI
-     (match_operand:GPI 1 "register_operand" "%rk,rk,w,rk,rk,rk")
-     (match_operand:GPI 2 "aarch64_pluslong_operand" "I,r,w,J,Uaa,Uav")))]
+     (match_operand:GPI 1 "register_operand" "%rk,rk,w,rk,rk,0,rk")
+     (match_operand:GPI 2 "aarch64_pluslong_operand" "I,r,w,J,Uaa,Uai,Uav")))]
   ""
   "@
   add\\t%<w>0, %<w>1, %2
@@ -1657,10 +1975,11 @@
   add\\t%<rtn>0<vas>, %<rtn>1<vas>, %<rtn>2<vas>
   sub\\t%<w>0, %<w>1, #%n2
   #
-  * return aarch64_output_sve_addvl_addpl (operands[0], operands[1], operands[2]);"
-  ;; The "alu_imm" type for ADDVL/ADDPL is just a placeholder.
-  [(set_attr "type" "alu_imm,alu_sreg,neon_add,alu_imm,multiple,alu_imm")
-   (set_attr "simd" "*,*,yes,*,*,*")]
+  * return aarch64_output_sve_scalar_inc_dec (operands[2]);
+  * return aarch64_output_sve_addvl_addpl (operands[2]);"
+  ;; The "alu_imm" types for INC/DEC and ADDVL/ADDPL are just placeholders.
+  [(set_attr "type" "alu_imm,alu_sreg,neon_add,alu_imm,multiple,alu_imm,alu_imm")
+   (set_attr "arch" "*,*,simd,*,*,sve,sve")]
 )
 
 ;; zero_extend version of above
@@ -1739,17 +2058,18 @@
 ;; this pattern.
 (define_insn_and_split "*add<mode>3_poly_1"
   [(set
-    (match_operand:GPI 0 "register_operand" "=r,r,r,r,r,&r")
+    (match_operand:GPI 0 "register_operand" "=r,r,r,r,r,r,&r")
     (plus:GPI
-     (match_operand:GPI 1 "register_operand" "%rk,rk,rk,rk,rk,rk")
-     (match_operand:GPI 2 "aarch64_pluslong_or_poly_operand" "I,r,J,Uaa,Uav,Uat")))]
+     (match_operand:GPI 1 "register_operand" "%rk,rk,rk,rk,0,rk,rk")
+     (match_operand:GPI 2 "aarch64_pluslong_or_poly_operand" "I,r,J,Uaa,Uai,Uav,Uat")))]
   "TARGET_SVE && operands[0] != stack_pointer_rtx"
   "@
   add\\t%<w>0, %<w>1, %2
   add\\t%<w>0, %<w>1, %<w>2
   sub\\t%<w>0, %<w>1, #%n2
   #
-  * return aarch64_output_sve_addvl_addpl (operands[0], operands[1], operands[2]);
+  * return aarch64_output_sve_scalar_inc_dec (operands[2]);
+  * return aarch64_output_sve_addvl_addpl (operands[2]);
   #"
   "&& epilogue_completed
    && !reg_overlap_mentioned_p (operands[0], operands[1])
@@ -1760,8 +2080,8 @@
 			      operands[2], operands[0], NULL_RTX);
     DONE;
   }
-  ;; The "alu_imm" type for ADDVL/ADDPL is just a placeholder.
-  [(set_attr "type" "alu_imm,alu_sreg,alu_imm,multiple,alu_imm,multiple")]
+  ;; The "alu_imm" types for INC/DEC and ADDVL/ADDPL are just placeholders.
+  [(set_attr "type" "alu_imm,alu_sreg,alu_imm,multiple,alu_imm,alu_imm,multiple")]
 )
 
 (define_split
@@ -1782,29 +2102,141 @@
   }
 )
 
+(define_expand "addv<mode>4"
+  [(match_operand:GPI 0 "register_operand")
+   (match_operand:GPI 1 "register_operand")
+   (match_operand:GPI 2 "aarch64_plus_operand")
+   (label_ref (match_operand 3 "" ""))]
+  ""
+{
+  if (CONST_INT_P (operands[2]))
+    emit_insn (gen_add<mode>3_compareV_imm (operands[0], operands[1],
+					    operands[2]));
+  else
+    emit_insn (gen_add<mode>3_compareV (operands[0], operands[1], operands[2]));
+  aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]);
+
+  DONE;
+})
+
+(define_expand "uaddv<mode>4"
+  [(match_operand:GPI 0 "register_operand")
+   (match_operand:GPI 1 "register_operand")
+   (match_operand:GPI 2 "register_operand")
+   (label_ref (match_operand 3 "" ""))]
+  ""
+{
+  emit_insn (gen_add<mode>3_compareC (operands[0], operands[1], operands[2]));
+  aarch64_gen_unlikely_cbranch (LTU, CC_Cmode, operands[3]);
+
+  DONE;
+})
+
 (define_expand "addti3"
-  [(set (match_operand:TI 0 "register_operand" "")
-	(plus:TI (match_operand:TI 1 "register_operand" "")
-		 (match_operand:TI 2 "register_operand" "")))]
+  [(set (match_operand:TI 0 "register_operand")
+	(plus:TI (match_operand:TI 1 "register_operand")
+		 (match_operand:TI 2 "aarch64_reg_or_imm")))]
   ""
 {
-  rtx low = gen_reg_rtx (DImode);
-  emit_insn (gen_adddi3_compareC (low, gen_lowpart (DImode, operands[1]),
-				  gen_lowpart (DImode, operands[2])));
+  rtx low_dest, op1_low, op2_low, high_dest, op1_high, op2_high;
 
-  rtx high = gen_reg_rtx (DImode);
-  emit_insn (gen_adddi3_carryin (high, gen_highpart (DImode, operands[1]),
-				 gen_highpart (DImode, operands[2])));
+  aarch64_addti_scratch_regs (operands[1], operands[2],
+			      &low_dest, &op1_low, &op2_low,
+			      &high_dest, &op1_high, &op2_high);
 
-  emit_move_insn (gen_lowpart (DImode, operands[0]), low);
-  emit_move_insn (gen_highpart (DImode, operands[0]), high);
+  if (op2_low == const0_rtx)
+    {
+      low_dest = op1_low;
+      if (!aarch64_pluslong_operand (op2_high, DImode))
+	op2_high = force_reg (DImode, op2_high);
+      emit_insn (gen_adddi3 (high_dest, op1_high, op2_high));
+    }
+  else
+    {
+      emit_insn (gen_adddi3_compareC (low_dest, op1_low,
+				      force_reg (DImode, op2_low)));
+      emit_insn (gen_adddi3_carryin (high_dest, op1_high,
+				     force_reg (DImode, op2_high)));
+    }
+
+  emit_move_insn (gen_lowpart (DImode, operands[0]), low_dest);
+  emit_move_insn (gen_highpart (DImode, operands[0]), high_dest);
+
+  DONE;
+})
+
+(define_expand "addvti4"
+  [(match_operand:TI 0 "register_operand")
+   (match_operand:TI 1 "register_operand")
+   (match_operand:TI 2 "aarch64_reg_or_imm")
+   (label_ref (match_operand 3 "" ""))]
+  ""
+{
+  rtx low_dest, op1_low, op2_low, high_dest, op1_high, op2_high;
+
+  aarch64_addti_scratch_regs (operands[1], operands[2],
+			      &low_dest, &op1_low, &op2_low,
+			      &high_dest, &op1_high, &op2_high);
+
+  if (op2_low == const0_rtx)
+    {
+      low_dest = op1_low;
+      emit_insn (gen_adddi3_compareV (high_dest, op1_high,
+				      force_reg (DImode, op2_high)));
+    }
+  else
+    {
+      emit_insn (gen_adddi3_compareC (low_dest, op1_low,
+				      force_reg (DImode, op2_low)));
+      emit_insn (gen_adddi3_carryinV (high_dest, op1_high,
+				      force_reg (DImode, op2_high)));
+    }
+
+  emit_move_insn (gen_lowpart (DImode, operands[0]), low_dest);
+  emit_move_insn (gen_highpart (DImode, operands[0]), high_dest);
+
+  aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]);
   DONE;
 })
 
+(define_expand "uaddvti4"
+  [(match_operand:TI 0 "register_operand")
+   (match_operand:TI 1 "register_operand")
+   (match_operand:TI 2 "aarch64_reg_or_imm")
+   (label_ref (match_operand 3 "" ""))]
+  ""
+{
+  rtx low_dest, op1_low, op2_low, high_dest, op1_high, op2_high;
+
+  aarch64_addti_scratch_regs (operands[1], operands[2],
+			      &low_dest, &op1_low, &op2_low,
+			      &high_dest, &op1_high, &op2_high);
+
+  if (op2_low == const0_rtx)
+    {
+      low_dest = op1_low;
+      emit_insn (gen_adddi3_compareC (high_dest, op1_high,
+				      force_reg (DImode, op2_high)));
+    }
+  else
+    {
+      emit_insn (gen_adddi3_compareC (low_dest, op1_low,
+				      force_reg (DImode, op2_low)));
+      emit_insn (gen_adddi3_carryinC (high_dest, op1_high,
+				      force_reg (DImode, op2_high)));
+    }
+
+  emit_move_insn (gen_lowpart (DImode, operands[0]), low_dest);
+  emit_move_insn (gen_highpart (DImode, operands[0]), high_dest);
+
+  aarch64_gen_unlikely_cbranch (GEU, CC_ADCmode, operands[3]);
+  DONE;
+ })
+
 (define_insn "add<mode>3_compare0"
   [(set (reg:CC_NZ CC_REGNUM)
 	(compare:CC_NZ
-	 (plus:GPI (match_operand:GPI 1 "register_operand" "%r,r,r")
+	 (plus:GPI (match_operand:GPI 1 "register_operand" "%rk,rk,rk")
 		   (match_operand:GPI 2 "aarch64_plus_operand" "r,I,J"))
 	 (const_int 0)))
    (set (match_operand:GPI 0 "register_operand" "=r,r,r")
@@ -1821,7 +2253,7 @@
 (define_insn "*addsi3_compare0_uxtw"
   [(set (reg:CC_NZ CC_REGNUM)
 	(compare:CC_NZ
-	 (plus:SI (match_operand:SI 1 "register_operand" "%r,r,r")
+	 (plus:SI (match_operand:SI 1 "register_operand" "%rk,rk,rk")
 		  (match_operand:SI 2 "aarch64_plus_operand" "r,I,J"))
 	 (const_int 0)))
    (set (match_operand:DI 0 "register_operand" "=r,r,r")
@@ -1834,64 +2266,92 @@
   [(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
 )
 
-(define_insn "*add<mode>3_compareC_cconly_imm"
+(define_insn "*add<mode>3_compareC_cconly"
   [(set (reg:CC_C CC_REGNUM)
-	(ne:CC_C
-	  (plus:<DWI>
-	    (zero_extend:<DWI> (match_operand:GPI 0 "register_operand" "r,r"))
-	    (match_operand:<DWI> 2 "const_scalar_int_operand" ""))
-	  (zero_extend:<DWI>
-	    (plus:GPI
-	      (match_dup 0)
-	      (match_operand:GPI 1 "aarch64_plus_immediate" "I,J")))))]
-  "aarch64_zero_extend_const_eq (<DWI>mode, operands[2],
-				 <MODE>mode, operands[1])"
+	(compare:CC_C
+	  (plus:GPI
+	    (match_operand:GPI 0 "register_operand" "r,r,r")
+	    (match_operand:GPI 1 "aarch64_plus_operand" "r,I,J"))
+	  (match_dup 0)))]
+  ""
   "@
+  cmn\\t%<w>0, %<w>1
   cmn\\t%<w>0, %1
   cmp\\t%<w>0, #%n1"
-  [(set_attr "type" "alus_imm")]
+  [(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
 )
 
-(define_insn "*add<mode>3_compareC_cconly"
+(define_insn "add<mode>3_compareC"
   [(set (reg:CC_C CC_REGNUM)
-	(ne:CC_C
+	(compare:CC_C
+	  (plus:GPI
+	    (match_operand:GPI 1 "register_operand" "rk,rk,rk")
+	    (match_operand:GPI 2 "aarch64_plus_operand" "r,I,J"))
+	  (match_dup 1)))
+   (set (match_operand:GPI 0 "register_operand" "=r,r,r")
+	(plus:GPI (match_dup 1) (match_dup 2)))]
+  ""
+  "@
+  adds\\t%<w>0, %<w>1, %<w>2
+  adds\\t%<w>0, %<w>1, %2
+  subs\\t%<w>0, %<w>1, #%n2"
+  [(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
+)
+
+(define_insn "*add<mode>3_compareV_cconly_imm"
+  [(set (reg:CC_V CC_REGNUM)
+	(compare:CC_V
 	  (plus:<DWI>
-	    (zero_extend:<DWI> (match_operand:GPI 0 "register_operand" "r"))
-	    (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")))
-	  (zero_extend:<DWI> (plus:GPI (match_dup 0) (match_dup 1)))))]
+	    (sign_extend:<DWI> (match_operand:GPI 0 "register_operand" "r,r"))
+	    (match_operand:<DWI> 1 "const_scalar_int_operand" ""))
+	  (sign_extend:<DWI>
+	   (plus:GPI
+	    (match_dup 0)
+	    (match_operand:GPI 2 "aarch64_plus_immediate" "I,J")))))]
+  "INTVAL (operands[1]) == INTVAL (operands[2])"
+  "@
+  cmn\\t%<w>0, %<w>1
+  cmp\\t%<w>0, #%n1"
+  [(set_attr "type" "alus_imm")]
+)
+
+(define_insn "*add<mode>3_compareV_cconly"
+  [(set (reg:CC_V CC_REGNUM)
+	(compare:CC_V
+	  (plus:<DWI>
+	    (sign_extend:<DWI> (match_operand:GPI 0 "register_operand" "r"))
+	    (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")))
+	  (sign_extend:<DWI> (plus:GPI (match_dup 0) (match_dup 1)))))]
   ""
   "cmn\\t%<w>0, %<w>1"
   [(set_attr "type" "alus_sreg")]
 )
 
-(define_insn "*add<mode>3_compareC_imm"
-  [(set (reg:CC_C CC_REGNUM)
-	(ne:CC_C
+(define_insn "add<mode>3_compareV_imm"
+  [(set (reg:CC_V CC_REGNUM)
+	(compare:CC_V
 	  (plus:<DWI>
-	    (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r,r"))
-	    (match_operand:<DWI> 3 "const_scalar_int_operand" ""))
-	  (zero_extend:<DWI>
-	    (plus:GPI
-	      (match_dup 1)
-	      (match_operand:GPI 2 "aarch64_plus_immediate" "I,J")))))
+	    (sign_extend:<DWI>
+	      (match_operand:GPI 1 "register_operand" "rk,rk"))
+	    (match_operand:GPI 2 "aarch64_plus_immediate" "I,J"))
+	  (sign_extend:<DWI>
+	    (plus:GPI (match_dup 1) (match_dup 2)))))
    (set (match_operand:GPI 0 "register_operand" "=r,r")
 	(plus:GPI (match_dup 1) (match_dup 2)))]
-  "aarch64_zero_extend_const_eq (<DWI>mode, operands[3],
-                                 <MODE>mode, operands[2])"
-  "@
-  adds\\t%<w>0, %<w>1, %2
-  subs\\t%<w>0, %<w>1, #%n2"
-  [(set_attr "type" "alus_imm")]
+   ""
+   "@
+   adds\\t%<w>0, %<w>1, %<w>2
+   subs\\t%<w>0, %<w>1, #%n2"
+  [(set_attr "type" "alus_imm,alus_imm")]
 )
 
-(define_insn "add<mode>3_compareC"
-  [(set (reg:CC_C CC_REGNUM)
-	(ne:CC_C
+(define_insn "add<mode>3_compareV"
+  [(set (reg:CC_V CC_REGNUM)
+	(compare:CC_V
 	  (plus:<DWI>
-	    (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))
-	    (zero_extend:<DWI> (match_operand:GPI 2 "register_operand" "r")))
-	  (zero_extend:<DWI>
-	    (plus:GPI (match_dup 1) (match_dup 2)))))
+	    (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "rk"))
+	    (sign_extend:<DWI> (match_operand:GPI 2 "register_operand" "r")))
+	  (sign_extend:<DWI> (plus:GPI (match_dup 1) (match_dup 2)))))
    (set (match_operand:GPI 0 "register_operand" "=r")
 	(plus:GPI (match_dup 1) (match_dup 2)))]
   ""
@@ -1902,7 +2362,7 @@
 (define_insn "*adds_shift_imm_<mode>"
   [(set (reg:CC_NZ CC_REGNUM)
 	(compare:CC_NZ
-	 (plus:GPI (ASHIFT:GPI 
+	 (plus:GPI (ASHIFT:GPI
 		    (match_operand:GPI 1 "register_operand" "r")
 		    (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n"))
 		   (match_operand:GPI 3 "register_operand" "r"))
@@ -1931,63 +2391,31 @@
   [(set_attr "type" "alus_shift_imm")]
 )
 
-(define_insn "*adds_mul_imm_<mode>"
+(define_insn "*adds_<optab><ALLX:mode>_<GPI:mode>"
   [(set (reg:CC_NZ CC_REGNUM)
 	(compare:CC_NZ
-	 (plus:GPI (mult:GPI
-		    (match_operand:GPI 1 "register_operand" "r")
-		    (match_operand:QI 2 "aarch64_pwr_2_<mode>" "n"))
-		   (match_operand:GPI 3 "register_operand" "r"))
-	 (const_int 0)))
+	 (plus:GPI
+	  (ANY_EXTEND:GPI (match_operand:ALLX 1 "register_operand" "r"))
+	  (match_operand:GPI 2 "register_operand" "rk"))
+	(const_int 0)))
    (set (match_operand:GPI 0 "register_operand" "=r")
-	(plus:GPI (mult:GPI (match_dup 1) (match_dup 2))
-		  (match_dup 3)))]
+	(plus:GPI (ANY_EXTEND:GPI (match_dup 1)) (match_dup 2)))]
   ""
-  "adds\\t%<w>0, %<w>3, %<w>1, lsl %p2"
-  [(set_attr "type" "alus_shift_imm")]
-)
-
-(define_insn "*subs_mul_imm_<mode>"
-  [(set (reg:CC_NZ CC_REGNUM)
-	(compare:CC_NZ
-	 (minus:GPI (match_operand:GPI 1 "register_operand" "r")
-		    (mult:GPI
-		     (match_operand:GPI 2 "register_operand" "r")
-		     (match_operand:QI 3 "aarch64_pwr_2_<mode>" "n")))
-	 (const_int 0)))
-   (set (match_operand:GPI 0 "register_operand" "=r")
-	(minus:GPI (match_dup 1)
-		   (mult:GPI (match_dup 2) (match_dup 3))))]
-  ""
-  "subs\\t%<w>0, %<w>1, %<w>2, lsl %p3"
-  [(set_attr "type" "alus_shift_imm")]
-)
-
-(define_insn "*adds_<optab><ALLX:mode>_<GPI:mode>"
-  [(set (reg:CC_NZ CC_REGNUM)
-	(compare:CC_NZ
-	 (plus:GPI
-	  (ANY_EXTEND:GPI (match_operand:ALLX 1 "register_operand" "r"))
-	  (match_operand:GPI 2 "register_operand" "r"))
-	(const_int 0)))
-   (set (match_operand:GPI 0 "register_operand" "=r")
-	(plus:GPI (ANY_EXTEND:GPI (match_dup 1)) (match_dup 2)))]
-  ""
-  "adds\\t%<GPI:w>0, %<GPI:w>2, %<GPI:w>1, <su>xt<ALLX:size>"
+  "adds\\t%<GPI:w>0, %<GPI:w>2, %w1, <su>xt<ALLX:size>"
   [(set_attr "type" "alus_ext")]
 )
 
 (define_insn "*subs_<optab><ALLX:mode>_<GPI:mode>"
   [(set (reg:CC_NZ CC_REGNUM)
 	(compare:CC_NZ
-	 (minus:GPI (match_operand:GPI 1 "register_operand" "r")
+	 (minus:GPI (match_operand:GPI 1 "register_operand" "rk")
 		    (ANY_EXTEND:GPI
 		     (match_operand:ALLX 2 "register_operand" "r")))
 	(const_int 0)))
    (set (match_operand:GPI 0 "register_operand" "=r")
 	(minus:GPI (match_dup 1) (ANY_EXTEND:GPI (match_dup 2))))]
   ""
-  "subs\\t%<GPI:w>0, %<GPI:w>1, %<GPI:w>2, <su>xt<ALLX:size>"
+  "subs\\t%<GPI:w>0, %<GPI:w>1, %w2, <su>xt<ALLX:size>"
   [(set_attr "type" "alus_ext")]
 )
 
@@ -1998,21 +2426,21 @@
 		    (ANY_EXTEND:GPI 
 		     (match_operand:ALLX 1 "register_operand" "r"))
 		    (match_operand 2 "aarch64_imm3" "Ui3"))
-		   (match_operand:GPI 3 "register_operand" "r"))
+		   (match_operand:GPI 3 "register_operand" "rk"))
 	 (const_int 0)))
    (set (match_operand:GPI 0 "register_operand" "=rk")
 	(plus:GPI (ashift:GPI (ANY_EXTEND:GPI (match_dup 1))
 			      (match_dup 2))
 		  (match_dup 3)))]
   ""
-  "adds\\t%<GPI:w>0, %<GPI:w>3, %<GPI:w>1, <su>xt<ALLX:size> %2"
+  "adds\\t%<GPI:w>0, %<GPI:w>3, %w1, <su>xt<ALLX:size> %2"
   [(set_attr "type" "alus_ext")]
 )
 
 (define_insn "*subs_<optab><ALLX:mode>_shift_<GPI:mode>"
   [(set (reg:CC_NZ CC_REGNUM)
 	(compare:CC_NZ
-	 (minus:GPI (match_operand:GPI 1 "register_operand" "r")
+	 (minus:GPI (match_operand:GPI 1 "register_operand" "rk")
 		    (ashift:GPI 
 		     (ANY_EXTEND:GPI
 		      (match_operand:ALLX 2 "register_operand" "r"))
@@ -2023,47 +2451,7 @@
 		   (ashift:GPI (ANY_EXTEND:GPI (match_dup 2))
 			       (match_dup 3))))]
   ""
-  "subs\\t%<GPI:w>0, %<GPI:w>1, %<GPI:w>2, <su>xt<ALLX:size> %3"
-  [(set_attr "type" "alus_ext")]
-)
-
-(define_insn "*adds_<optab><mode>_multp2"
-  [(set (reg:CC_NZ CC_REGNUM)
-	(compare:CC_NZ
-	 (plus:GPI (ANY_EXTRACT:GPI
-		    (mult:GPI (match_operand:GPI 1 "register_operand" "r")
-			      (match_operand 2 "aarch64_pwr_imm3" "Up3"))
-		    (match_operand 3 "const_int_operand" "n")
-		    (const_int 0))
-		   (match_operand:GPI 4 "register_operand" "r"))
-	(const_int 0)))
-   (set (match_operand:GPI 0 "register_operand" "=r")
-	(plus:GPI (ANY_EXTRACT:GPI (mult:GPI (match_dup 1) (match_dup 2))
-				   (match_dup 3)
-				   (const_int 0))
-		  (match_dup 4)))]
-  "aarch64_is_extend_from_extract (<MODE>mode, operands[2], operands[3])"
-  "adds\\t%<w>0, %<w>4, %<w>1, <su>xt%e3 %p2"
-  [(set_attr "type" "alus_ext")]
-)
-
-(define_insn "*subs_<optab><mode>_multp2"
-  [(set (reg:CC_NZ CC_REGNUM)
-	(compare:CC_NZ
-	 (minus:GPI (match_operand:GPI 4 "register_operand" "r")
-		    (ANY_EXTRACT:GPI
-		     (mult:GPI (match_operand:GPI 1 "register_operand" "r")
-			       (match_operand 2 "aarch64_pwr_imm3" "Up3"))
-		     (match_operand 3 "const_int_operand" "n")
-		     (const_int 0)))
-	(const_int 0)))
-   (set (match_operand:GPI 0 "register_operand" "=r")
-	(minus:GPI (match_dup 4) (ANY_EXTRACT:GPI
-				  (mult:GPI (match_dup 1) (match_dup 2))
-				  (match_dup 3)
-				  (const_int 0))))]
-  "aarch64_is_extend_from_extract (<MODE>mode, operands[2], operands[3])"
-  "subs\\t%<w>0, %<w>4, %<w>1, <su>xt%e3 %p2"
+  "subs\\t%<GPI:w>0, %<GPI:w>1, %w2, <su>xt<ALLX:size> %3"
   [(set_attr "type" "alus_ext")]
 )
 
@@ -2109,7 +2497,7 @@
 		  (match_operand:GPI 3 "register_operand" "r")))]
   ""
   "add\\t%<w>0, %<w>3, %<w>1, <shift> %2"
-  [(set_attr "type" "alu_shift_imm")]
+  [(set_attr "autodetect_type" "alu_shift_<shift>_op2")]
 )
 
 ;; zero_extend version of above
@@ -2121,17 +2509,7 @@
 		  (match_operand:SI 3 "register_operand" "r"))))]
   ""
   "add\\t%w0, %w3, %w1, <shift> %2"
-  [(set_attr "type" "alu_shift_imm")]
-)
-
-(define_insn "*add_mul_imm_<mode>"
-  [(set (match_operand:GPI 0 "register_operand" "=r")
-	(plus:GPI (mult:GPI (match_operand:GPI 1 "register_operand" "r")
-			    (match_operand:QI 2 "aarch64_pwr_2_<mode>" "n"))
-		  (match_operand:GPI 3 "register_operand" "r")))]
-  ""
-  "add\\t%<w>0, %<w>3, %<w>1, lsl %p2"
-  [(set_attr "type" "alu_shift_imm")]
+  [(set_attr "autodetect_type" "alu_shift_<shift>_op2")]
 )
 
 (define_insn "*add_<optab><ALLX:mode>_<GPI:mode>"
@@ -2139,7 +2517,7 @@
 	(plus:GPI (ANY_EXTEND:GPI (match_operand:ALLX 1 "register_operand" "r"))
 		  (match_operand:GPI 2 "register_operand" "r")))]
   ""
-  "add\\t%<GPI:w>0, %<GPI:w>2, %<GPI:w>1, <su>xt<ALLX:size>"
+  "add\\t%<GPI:w>0, %<GPI:w>2, %w1, <su>xt<ALLX:size>"
   [(set_attr "type" "alu_ext")]
 )
 
@@ -2161,7 +2539,7 @@
 			      (match_operand 2 "aarch64_imm3" "Ui3"))
 		  (match_operand:GPI 3 "register_operand" "r")))]
   ""
-  "add\\t%<GPI:w>0, %<GPI:w>3, %<GPI:w>1, <su>xt<ALLX:size> %2"
+  "add\\t%<GPI:w>0, %<GPI:w>3, %w1, <su>xt<ALLX:size> %2"
   [(set_attr "type" "alu_ext")]
 )
 
@@ -2178,62 +2556,11 @@
   [(set_attr "type" "alu_ext")]
 )
 
-(define_insn "*add_<optab><ALLX:mode>_mult_<GPI:mode>"
-  [(set (match_operand:GPI 0 "register_operand" "=rk")
-	(plus:GPI (mult:GPI (ANY_EXTEND:GPI
-			     (match_operand:ALLX 1 "register_operand" "r"))
-			    (match_operand 2 "aarch64_pwr_imm3" "Up3"))
-		  (match_operand:GPI 3 "register_operand" "r")))]
-  ""
-  "add\\t%<GPI:w>0, %<GPI:w>3, %<GPI:w>1, <su>xt<ALLX:size> %p2"
-  [(set_attr "type" "alu_ext")]
-)
-
-;; zero_extend version of above
-(define_insn "*add_<optab><SHORT:mode>_mult_si_uxtw"
-  [(set (match_operand:DI 0 "register_operand" "=rk")
-	(zero_extend:DI (plus:SI (mult:SI (ANY_EXTEND:SI
-			     (match_operand:SHORT 1 "register_operand" "r"))
-			    (match_operand 2 "aarch64_pwr_imm3" "Up3"))
-		  (match_operand:SI 3 "register_operand" "r"))))]
-  ""
-  "add\\t%w0, %w3, %w1, <su>xt<SHORT:size> %p2"
-  [(set_attr "type" "alu_ext")]
-)
-
-(define_insn "*add_<optab><mode>_multp2"
-  [(set (match_operand:GPI 0 "register_operand" "=rk")
-	(plus:GPI (ANY_EXTRACT:GPI
-		   (mult:GPI (match_operand:GPI 1 "register_operand" "r")
-			     (match_operand 2 "aarch64_pwr_imm3" "Up3"))
-		   (match_operand 3 "const_int_operand" "n")
-		   (const_int 0))
-		  (match_operand:GPI 4 "register_operand" "r")))]
-  "aarch64_is_extend_from_extract (<MODE>mode, operands[2], operands[3])"
-  "add\\t%<w>0, %<w>4, %<w>1, <su>xt%e3 %p2"
-  [(set_attr "type" "alu_ext")]
-)
-
-;; zero_extend version of above
-(define_insn "*add_<optab>si_multp2_uxtw"
-  [(set (match_operand:DI 0 "register_operand" "=rk")
-	(zero_extend:DI
-         (plus:SI (ANY_EXTRACT:SI
-		   (mult:SI (match_operand:SI 1 "register_operand" "r")
-			    (match_operand 2 "aarch64_pwr_imm3" "Up3"))
-		   (match_operand 3 "const_int_operand" "n")
-                   (const_int 0))
-		  (match_operand:SI 4 "register_operand" "r"))))]
-  "aarch64_is_extend_from_extract (SImode, operands[2], operands[3])"
-  "add\\t%w0, %w4, %w1, <su>xt%e3 %p2"
-  [(set_attr "type" "alu_ext")]
-)
-
 (define_expand "add<mode>3_carryin"
   [(set (match_operand:GPI 0 "register_operand")
 	(plus:GPI
 	  (plus:GPI
-	    (ne:GPI (reg:CC_C CC_REGNUM) (const_int 0))
+	    (ltu:GPI (reg:CC_C CC_REGNUM) (const_int 0))
 	    (match_operand:GPI 1 "aarch64_reg_or_zero"))
 	  (match_operand:GPI 2 "aarch64_reg_or_zero")))]
    ""
@@ -2269,6 +2596,135 @@
   [(set_attr "type" "adc_reg")]
 )
 
+(define_expand "add<mode>3_carryinC"
+  [(parallel
+     [(set (match_dup 3)
+	   (compare:CC_ADC
+	     (plus:<DWI>
+	       (plus:<DWI>
+		 (match_dup 4)
+		 (zero_extend:<DWI>
+		   (match_operand:GPI 1 "register_operand")))
+	       (zero_extend:<DWI>
+		 (match_operand:GPI 2 "register_operand")))
+	     (match_dup 6)))
+      (set (match_operand:GPI 0 "register_operand")
+	   (plus:GPI
+	     (plus:GPI (match_dup 5) (match_dup 1))
+	     (match_dup 2)))])]
+   ""
+{
+  operands[3] = gen_rtx_REG (CC_ADCmode, CC_REGNUM);
+  rtx ccin = gen_rtx_REG (CC_Cmode, CC_REGNUM);
+  operands[4] = gen_rtx_LTU (<DWI>mode, ccin, const0_rtx);
+  operands[5] = gen_rtx_LTU (<MODE>mode, ccin, const0_rtx);
+  operands[6] = immed_wide_int_const (wi::shwi (1, <DWI>mode)
+				      << GET_MODE_BITSIZE (<MODE>mode),
+				      TImode);
+})
+
+(define_insn "*add<mode>3_carryinC_zero"
+  [(set (reg:CC_ADC CC_REGNUM)
+	(compare:CC_ADC
+	  (plus:<DWI>
+	    (match_operand:<DWI> 2 "aarch64_carry_operation" "")
+	    (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")))
+	  (match_operand 4 "const_scalar_int_operand" "")))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+	(plus:GPI (match_operand:GPI 3 "aarch64_carry_operation" "")
+		  (match_dup 1)))]
+  "rtx_mode_t (operands[4], <DWI>mode)
+   == (wi::shwi (1, <DWI>mode) << (unsigned) GET_MODE_BITSIZE (<MODE>mode))"
+   "adcs\\t%<w>0, %<w>1, <w>zr"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_insn "*add<mode>3_carryinC"
+  [(set (reg:CC_ADC CC_REGNUM)
+	(compare:CC_ADC
+	  (plus:<DWI>
+	    (plus:<DWI>
+	      (match_operand:<DWI> 3 "aarch64_carry_operation" "")
+	      (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")))
+	    (zero_extend:<DWI> (match_operand:GPI 2 "register_operand" "r")))
+	  (match_operand 5 "const_scalar_int_operand" "")))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+	(plus:GPI
+	  (plus:GPI (match_operand:GPI 4 "aarch64_carry_operation" "")
+		    (match_dup 1))
+	  (match_dup 2)))]
+  "rtx_mode_t (operands[5], <DWI>mode)
+   == (wi::shwi (1, <DWI>mode) << (unsigned) GET_MODE_BITSIZE (<MODE>mode))"
+   "adcs\\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_expand "add<mode>3_carryinV"
+  [(parallel
+     [(set (reg:CC_V CC_REGNUM)
+	   (compare:CC_V
+	     (plus:<DWI>
+	       (plus:<DWI>
+		 (match_dup 3)
+		 (sign_extend:<DWI>
+		   (match_operand:GPI 1 "register_operand")))
+	       (sign_extend:<DWI>
+		 (match_operand:GPI 2 "register_operand")))
+	   (sign_extend:<DWI>
+	     (plus:GPI
+	       (plus:GPI (match_dup 4) (match_dup 1))
+	       (match_dup 2)))))
+      (set (match_operand:GPI 0 "register_operand")
+	   (plus:GPI
+	     (plus:GPI (match_dup 4) (match_dup 1))
+	     (match_dup 2)))])]
+   ""
+{
+  rtx cc = gen_rtx_REG (CC_Cmode, CC_REGNUM);
+  operands[3] = gen_rtx_LTU (<DWI>mode, cc, const0_rtx);
+  operands[4] = gen_rtx_LTU (<MODE>mode, cc, const0_rtx);
+})
+
+(define_insn "*add<mode>3_carryinV_zero"
+  [(set (reg:CC_V CC_REGNUM)
+	(compare:CC_V
+	  (plus:<DWI>
+	    (match_operand:<DWI> 2 "aarch64_carry_operation" "")
+	    (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")))
+	  (sign_extend:<DWI>
+	    (plus:GPI
+	      (match_operand:GPI 3 "aarch64_carry_operation" "")
+	      (match_dup 1)))))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+	(plus:GPI (match_dup 3) (match_dup 1)))]
+   ""
+   "adcs\\t%<w>0, %<w>1, <w>zr"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_insn "*add<mode>3_carryinV"
+  [(set (reg:CC_V CC_REGNUM)
+	(compare:CC_V
+	  (plus:<DWI>
+	    (plus:<DWI>
+	      (match_operand:<DWI> 3 "aarch64_carry_operation" "")
+	      (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")))
+	    (sign_extend:<DWI> (match_operand:GPI 2 "register_operand" "r")))
+	  (sign_extend:<DWI>
+	    (plus:GPI
+	      (plus:GPI
+		(match_operand:GPI 4 "aarch64_carry_operation" "")
+		(match_dup 1))
+	      (match_dup 2)))))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+	(plus:GPI
+	  (plus:GPI (match_dup 4) (match_dup 1))
+	  (match_dup 2)))]
+   ""
+   "adcs\\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "type" "adc_reg")]
+)
+
 (define_insn "*add_uxt<mode>_shift2"
   [(set (match_operand:GPI 0 "register_operand" "=rk")
 	(plus:GPI (and:GPI
@@ -2280,7 +2736,7 @@
   "*
   operands[3] = GEN_INT (aarch64_uxt_size (INTVAL(operands[2]),
 					   INTVAL (operands[3])));
-  return \"add\t%<w>0, %<w>4, %<w>1, uxt%e3 %2\";"
+  return \"add\t%<w>0, %<w>4, %w1, uxt%e3 %2\";"
   [(set_attr "type" "alu_ext")]
 )
 
@@ -2301,38 +2757,6 @@
   [(set_attr "type" "alu_ext")]
 )
 
-(define_insn "*add_uxt<mode>_multp2"
-  [(set (match_operand:GPI 0 "register_operand" "=rk")
-	(plus:GPI (and:GPI
-		   (mult:GPI (match_operand:GPI 1 "register_operand" "r")
-			     (match_operand 2 "aarch64_pwr_imm3" "Up3"))
-		   (match_operand 3 "const_int_operand" "n"))
-		  (match_operand:GPI 4 "register_operand" "r")))]
-  "aarch64_uxt_size (exact_log2 (INTVAL (operands[2])), INTVAL (operands[3])) != 0"
-  "*
-  operands[3] = GEN_INT (aarch64_uxt_size (exact_log2 (INTVAL (operands[2])),
-					   INTVAL (operands[3])));
-  return \"add\t%<w>0, %<w>4, %<w>1, uxt%e3 %p2\";"
-  [(set_attr "type" "alu_ext")]
-)
-
-;; zero_extend version of above
-(define_insn "*add_uxtsi_multp2_uxtw"
-  [(set (match_operand:DI 0 "register_operand" "=rk")
-	(zero_extend:DI
-         (plus:SI (and:SI
-		   (mult:SI (match_operand:SI 1 "register_operand" "r")
-			    (match_operand 2 "aarch64_pwr_imm3" "Up3"))
-		   (match_operand 3 "const_int_operand" "n"))
-		  (match_operand:SI 4 "register_operand" "r"))))]
-  "aarch64_uxt_size (exact_log2 (INTVAL (operands[2])), INTVAL (operands[3])) != 0"
-  "*
-  operands[3] = GEN_INT (aarch64_uxt_size (exact_log2 (INTVAL (operands[2])),
-					   INTVAL (operands[3])));
-  return \"add\t%w0, %w4, %w1, uxt%e3 %p2\";"
-  [(set_attr "type" "alu_ext")]
-)
-
 (define_insn "subsi3"
   [(set (match_operand:SI 0 "register_operand" "=rk")
 	(minus:SI (match_operand:SI 1 "register_operand" "rk")
@@ -2362,31 +2786,230 @@
    sub\\t%x0, %x1, %x2
    sub\\t%d0, %d1, %d2"
   [(set_attr "type" "alu_sreg, neon_sub")
-   (set_attr "simd" "*,yes")]
+   (set_attr "arch" "*,simd")]
+)
+
+(define_expand "subv<GPI:mode>4"
+  [(match_operand:GPI 0 "register_operand")
+   (match_operand:GPI 1 "register_operand")
+   (match_operand:GPI 2 "aarch64_plus_operand")
+   (label_ref (match_operand 3 "" ""))]
+  ""
+{
+  if (CONST_INT_P (operands[2]))
+    emit_insn (gen_subv<mode>_imm (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_subv<mode>_insn (operands[0], operands[1], operands[2]));
+  aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]);
+
+  DONE;
+})
+
+(define_insn "subv<GPI:mode>_insn"
+  [(set (reg:CC_V CC_REGNUM)
+	(compare:CC_V
+	 (sign_extend:<DWI>
+	  (minus:GPI
+	   (match_operand:GPI 1 "register_operand" "rk")
+	   (match_operand:GPI 2 "register_operand" "r")))
+	 (minus:<DWI> (sign_extend:<DWI> (match_dup 1))
+		      (sign_extend:<DWI> (match_dup 2)))))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+	(minus:GPI (match_dup 1) (match_dup 2)))]
+  ""
+  "subs\\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "type" "alus_sreg")]
+)
+
+(define_insn "subv<GPI:mode>_imm"
+  [(set (reg:CC_V CC_REGNUM)
+	(compare:CC_V
+	 (sign_extend:<DWI>
+	  (minus:GPI
+	   (match_operand:GPI 1 "register_operand" "rk,rk")
+	   (match_operand:GPI 2 "aarch64_plus_immediate" "I,J")))
+	 (minus:<DWI> (sign_extend:<DWI> (match_dup 1))
+		      (match_dup 2))))
+   (set (match_operand:GPI 0 "register_operand" "=r,r")
+	(minus:GPI (match_dup 1) (match_dup 2)))]
+  ""
+  "@
+   subs\\t%<w>0, %<w>1, %2
+   adds\\t%<w>0, %<w>1, #%n2"
+  [(set_attr "type" "alus_sreg")]
+)
+
+(define_expand "negv<GPI:mode>3"
+  [(match_operand:GPI 0 "register_operand")
+   (match_operand:GPI 1 "register_operand")
+   (label_ref (match_operand 2 "" ""))]
+  ""
+  {
+    emit_insn (gen_negv<mode>_insn (operands[0], operands[1]));
+    aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[2]);
+
+    DONE;
+  }
 )
 
+(define_insn "negv<GPI:mode>_insn"
+  [(set (reg:CC_V CC_REGNUM)
+	(compare:CC_V
+	 (sign_extend:<DWI>
+	  (neg:GPI (match_operand:GPI 1 "register_operand" "r")))
+	 (neg:<DWI> (sign_extend:<DWI> (match_dup 1)))))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+	(neg:GPI (match_dup 1)))]
+  ""
+  "negs\\t%<w>0, %<w>1"
+  [(set_attr "type" "alus_sreg")]
+)
+
+(define_insn "negv<GPI:mode>_cmp_only"
+  [(set (reg:CC_V CC_REGNUM)
+	(compare:CC_V
+	 (sign_extend:<DWI>
+	  (neg:GPI (match_operand:GPI 0 "register_operand" "r")))
+	 (neg:<DWI> (sign_extend:<DWI> (match_dup 0)))))]
+  ""
+  "negs\\t%<w>zr, %<w>0"
+  [(set_attr "type" "alus_sreg")]
+)
+
+(define_insn "*cmpv<GPI:mode>_insn"
+  [(set (reg:CC_V CC_REGNUM)
+	(compare:CC_V
+	 (sign_extend:<DWI>
+	  (minus:GPI (match_operand:GPI 0 "register_operand" "r,r,r")
+		     (match_operand:GPI 1 "aarch64_plus_operand" "r,I,J")))
+	 (minus:<DWI> (sign_extend:<DWI> (match_dup 0))
+		    (sign_extend:<DWI> (match_dup 1)))))]
+  ""
+  "@
+   cmp\\t%<w>0, %<w>1
+   cmp\\t%<w>0, %1
+   cmp\\t%<w>0, #%n1"
+  [(set_attr "type" "alus_sreg")]
+)
+
+(define_expand "usubv<mode>4"
+  [(match_operand:GPI 0 "register_operand")
+   (match_operand:GPI 1 "aarch64_reg_or_zero")
+   (match_operand:GPI 2 "aarch64_reg_or_zero")
+   (label_ref (match_operand 3 "" ""))]
+  ""
+{
+  emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1], operands[2]));
+  aarch64_gen_unlikely_cbranch (LTU, CCmode, operands[3]);
+
+  DONE;
+})
+
 (define_expand "subti3"
-  [(set (match_operand:TI 0 "register_operand" "")
-	(minus:TI (match_operand:TI 1 "register_operand" "")
-		  (match_operand:TI 2 "register_operand" "")))]
+  [(set (match_operand:TI 0 "register_operand")
+	(minus:TI (match_operand:TI 1 "aarch64_reg_or_zero")
+		  (match_operand:TI 2 "register_operand")))]
   ""
 {
-  rtx low = gen_reg_rtx (DImode);
-  emit_insn (gen_subdi3_compare1 (low, gen_lowpart (DImode, operands[1]),
-				  gen_lowpart (DImode, operands[2])));
+  rtx low_dest, op1_low, op2_low, high_dest, op1_high, op2_high;
 
-  rtx high = gen_reg_rtx (DImode);
-  emit_insn (gen_subdi3_carryin (high, gen_highpart (DImode, operands[1]),
-				 gen_highpart (DImode, operands[2])));
+  aarch64_subvti_scratch_regs (operands[1], operands[2],
+			       &low_dest, &op1_low, &op2_low,
+			       &high_dest, &op1_high, &op2_high);
 
-  emit_move_insn (gen_lowpart (DImode, operands[0]), low);
-  emit_move_insn (gen_highpart (DImode, operands[0]), high);
+  emit_insn (gen_subdi3_compare1 (low_dest, op1_low, op2_low));
+  emit_insn (gen_subdi3_carryin (high_dest, op1_high, op2_high));
+
+  emit_move_insn (gen_lowpart (DImode, operands[0]), low_dest);
+  emit_move_insn (gen_highpart (DImode, operands[0]), high_dest);
   DONE;
 })
 
+(define_expand "subvti4"
+  [(match_operand:TI 0 "register_operand")
+   (match_operand:TI 1 "register_operand")
+   (match_operand:TI 2 "aarch64_reg_or_imm")
+   (label_ref (match_operand 3 "" ""))]
+  ""
+{
+  rtx low_dest, op1_low, op2_low, high_dest, op1_high, op2_high;
+
+  aarch64_subvti_scratch_regs (operands[1], operands[2],
+			       &low_dest, &op1_low, &op2_low,
+			       &high_dest, &op1_high, &op2_high);
+  aarch64_expand_subvti (operands[0], low_dest, op1_low, op2_low,
+			 high_dest, op1_high, op2_high, false);
+
+  aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]);
+  DONE;
+})
+
+(define_expand "usubvti4"
+  [(match_operand:TI 0 "register_operand")
+   (match_operand:TI 1 "register_operand")
+   (match_operand:TI 2 "aarch64_reg_or_imm")
+   (label_ref (match_operand 3 "" ""))]
+  ""
+{
+  rtx low_dest, op1_low, op2_low, high_dest, op1_high, op2_high;
+
+  aarch64_subvti_scratch_regs (operands[1], operands[2],
+				    &low_dest, &op1_low, &op2_low,
+			       &high_dest, &op1_high, &op2_high);
+  aarch64_expand_subvti (operands[0], low_dest, op1_low, op2_low,
+			 high_dest, op1_high, op2_high, true);
+
+  aarch64_gen_unlikely_cbranch (LTU, CCmode, operands[3]);
+  DONE;
+})
+
+(define_expand "negvti3"
+  [(match_operand:TI 0 "register_operand")
+   (match_operand:TI 1 "register_operand")
+   (label_ref (match_operand 2 "" ""))]
+  ""
+  {
+    emit_insn (gen_negdi_carryout (gen_lowpart (DImode, operands[0]),
+				   gen_lowpart (DImode, operands[1])));
+    emit_insn (gen_negvdi_carryinV (gen_highpart (DImode, operands[0]),
+				    gen_highpart (DImode, operands[1])));
+    aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[2]);
+
+    DONE;
+  }
+)
+
+(define_insn "negdi_carryout"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC
+	 (const_int 0) (match_operand:DI 1 "register_operand" "r")))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(neg:DI (match_dup 1)))]
+  ""
+  "negs\\t%0, %1"
+  [(set_attr "type" "alus_sreg")]
+)
+
+(define_insn "negvdi_carryinV"
+  [(set (reg:CC_V CC_REGNUM)
+	(compare:CC_V
+	 (neg:TI (plus:TI
+		  (ltu:TI (reg:CC CC_REGNUM) (const_int 0))
+		  (sign_extend:TI (match_operand:DI 1 "register_operand" "r"))))
+	 (sign_extend:TI
+	  (neg:DI (plus:DI (ltu:DI (reg:CC CC_REGNUM) (const_int 0))
+			   (match_dup 1))))))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(neg:DI (plus:DI (ltu:DI (reg:CC CC_REGNUM) (const_int 0))
+			 (match_dup 1))))]
+  ""
+  "ngcs\\t%0, %1"
+  [(set_attr "type" "alus_sreg")]
+)
+
 (define_insn "*sub<mode>3_compare0"
   [(set (reg:CC_NZ CC_REGNUM)
-	(compare:CC_NZ (minus:GPI (match_operand:GPI 1 "register_operand" "r")
+	(compare:CC_NZ (minus:GPI (match_operand:GPI 1 "register_operand" "rk")
 				  (match_operand:GPI 2 "register_operand" "r"))
 		       (const_int 0)))
    (set (match_operand:GPI 0 "register_operand" "=r")
@@ -2399,7 +3022,7 @@
 ;; zero_extend version of above
 (define_insn "*subsi3_compare0_uxtw"
   [(set (reg:CC_NZ CC_REGNUM)
-	(compare:CC_NZ (minus:SI (match_operand:SI 1 "register_operand" "r")
+	(compare:CC_NZ (minus:SI (match_operand:SI 1 "register_operand" "rk")
 				 (match_operand:SI 2 "register_operand" "r"))
 		       (const_int 0)))
    (set (match_operand:DI 0 "register_operand" "=r")
@@ -2409,10 +3032,26 @@
   [(set_attr "type" "alus_sreg")]
 )
 
+(define_insn "sub<mode>3_compare1_imm"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC
+	  (match_operand:GPI 1 "register_operand" "rk,rk")
+	  (match_operand:GPI 2 "aarch64_plus_immediate" "I,J")))
+   (set (match_operand:GPI 0 "register_operand" "=r,r")
+	(plus:GPI
+	  (match_dup 1)
+	  (match_operand:GPI 3 "aarch64_plus_immediate" "J,I")))]
+  "UINTVAL (operands[2]) == -UINTVAL (operands[3])"
+  "@
+  subs\\t%<w>0, %<w>1, %2
+  adds\\t%<w>0, %<w>1, #%n2"
+  [(set_attr "type" "alus_imm")]
+)
+
 (define_insn "sub<mode>3_compare1"
   [(set (reg:CC CC_REGNUM)
 	(compare:CC
-	  (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
+	  (match_operand:GPI 1 "aarch64_reg_or_zero" "rkZ")
 	  (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")))
    (set (match_operand:GPI 0 "register_operand" "=r")
 	(minus:GPI (match_dup 1) (match_dup 2)))]
@@ -2421,21 +3060,8 @@
   [(set_attr "type" "alus_sreg")]
 )
 
-(define_insn "sub<mode>3_compare1_imm"
-  [(set (reg:CC CC_REGNUM)
-	(compare:CC
-	  (match_operand:GPI 1 "register_operand" "r")
-	  (match_operand:GPI 3 "const_int_operand" "n")))
-   (set (match_operand:GPI 0 "register_operand" "=r")
-	(plus:GPI (match_dup 1)
-		  (match_operand:GPI 2 "aarch64_sub_immediate" "J")))]
-  "INTVAL (operands[3]) == -INTVAL (operands[2])"
-  "subs\\t%<w>0, %<w>1, #%n2"
-  [(set_attr "type" "alus_sreg")]
-)
-
 (define_peephole2
-  [(set (match_operand:GPI 0 "register_operand")
+  [(set (match_operand:GPI 0 "aarch64_general_reg")
 	(minus:GPI (match_operand:GPI 1 "aarch64_reg_or_zero")
 		    (match_operand:GPI 2 "aarch64_reg_or_zero")))
    (set (reg:CC CC_REGNUM)
@@ -2460,7 +3086,7 @@
 	(compare:CC
 	  (match_operand:GPI 1 "aarch64_reg_or_zero")
 	  (match_operand:GPI 2 "aarch64_reg_or_zero")))
-   (set (match_operand:GPI 0 "register_operand")
+   (set (match_operand:GPI 0 "aarch64_general_reg")
 	(minus:GPI (match_dup 1)
 		   (match_dup 2)))]
   ""
@@ -2473,9 +3099,9 @@
 )
 
 (define_peephole2
-  [(set (match_operand:GPI 0 "register_operand")
+  [(set (match_operand:GPI 0 "aarch64_general_reg")
 	(plus:GPI (match_operand:GPI 1 "register_operand")
-		  (match_operand:GPI 2 "aarch64_sub_immediate")))
+		  (match_operand:GPI 2 "aarch64_plus_immediate")))
    (set (reg:CC CC_REGNUM)
 	(compare:CC
 	  (match_dup 1)
@@ -2485,7 +3111,7 @@
   [(const_int 0)]
   {
     emit_insn (gen_sub<mode>3_compare1_imm (operands[0], operands[1],
-					 operands[2], operands[3]));
+					 operands[3], operands[2]));
     DONE;
   }
 )
@@ -2498,14 +3124,14 @@
 	(compare:CC
 	  (match_operand:GPI 1 "register_operand")
 	  (match_operand:GPI 3 "const_int_operand")))
-   (set (match_operand:GPI 0 "register_operand")
+   (set (match_operand:GPI 0 "aarch64_general_reg")
 	(plus:GPI (match_dup 1)
-		  (match_operand:GPI 2 "aarch64_sub_immediate")))]
+		  (match_operand:GPI 2 "aarch64_plus_immediate")))]
   "INTVAL (operands[3]) == -INTVAL (operands[2])"
   [(const_int 0)]
   {
     emit_insn (gen_sub<mode>3_compare1_imm (operands[0], operands[1],
-					 operands[2], operands[3]));
+					 operands[3], operands[2]));
     DONE;
   }
 )
@@ -2518,7 +3144,7 @@
 		    (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n"))))]
   ""
   "sub\\t%<w>0, %<w>3, %<w>1, <shift> %2"
-  [(set_attr "type" "alu_shift_imm")]
+  [(set_attr "autodetect_type" "alu_shift_<shift>_op2")]
 )
 
 ;; zero_extend version of above
@@ -2531,31 +3157,7 @@
 		    (match_operand:QI 2 "aarch64_shift_imm_si" "n")))))]
   ""
   "sub\\t%w0, %w3, %w1, <shift> %2"
-  [(set_attr "type" "alu_shift_imm")]
-)
-
-(define_insn "*sub_mul_imm_<mode>"
-  [(set (match_operand:GPI 0 "register_operand" "=r")
-	(minus:GPI (match_operand:GPI 3 "register_operand" "r")
-		   (mult:GPI
-		    (match_operand:GPI 1 "register_operand" "r")
-		    (match_operand:QI 2 "aarch64_pwr_2_<mode>" "n"))))]
-  ""
-  "sub\\t%<w>0, %<w>3, %<w>1, lsl %p2"
-  [(set_attr "type" "alu_shift_imm")]
-)
-
-;; zero_extend version of above
-(define_insn "*sub_mul_imm_si_uxtw"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(zero_extend:DI
-         (minus:SI (match_operand:SI 3 "register_operand" "r")
-		   (mult:SI
-		    (match_operand:SI 1 "register_operand" "r")
-		    (match_operand:QI 2 "aarch64_pwr_2_si" "n")))))]
-  ""
-  "sub\\t%w0, %w3, %w1, lsl %p2"
-  [(set_attr "type" "alu_shift_imm")]
+  [(set_attr "autodetect_type" "alu_shift_<shift>_op2")]
 )
 
 (define_insn "*sub_<optab><ALLX:mode>_<GPI:mode>"
@@ -2564,7 +3166,7 @@
 		   (ANY_EXTEND:GPI
 		    (match_operand:ALLX 2 "register_operand" "r"))))]
   ""
-  "sub\\t%<GPI:w>0, %<GPI:w>1, %<GPI:w>2, <su>xt<ALLX:size>"
+  "sub\\t%<GPI:w>0, %<GPI:w>1, %w2, <su>xt<ALLX:size>"
   [(set_attr "type" "alu_ext")]
 )
 
@@ -2587,7 +3189,7 @@
 				(match_operand:ALLX 2 "register_operand" "r"))
 			       (match_operand 3 "aarch64_imm3" "Ui3"))))]
   ""
-  "sub\\t%<GPI:w>0, %<GPI:w>1, %<GPI:w>2, <su>xt<ALLX:size> %3"
+  "sub\\t%<GPI:w>0, %<GPI:w>1, %w2, <su>xt<ALLX:size> %3"
   [(set_attr "type" "alu_ext")]
 )
 
@@ -2604,34 +3206,6 @@
   [(set_attr "type" "alu_ext")]
 )
 
-(define_insn "*sub_<optab><mode>_multp2"
-  [(set (match_operand:GPI 0 "register_operand" "=rk")
-	(minus:GPI (match_operand:GPI 4 "register_operand" "rk")
-		   (ANY_EXTRACT:GPI
-		    (mult:GPI (match_operand:GPI 1 "register_operand" "r")
-			      (match_operand 2 "aarch64_pwr_imm3" "Up3"))
-		    (match_operand 3 "const_int_operand" "n")
-		    (const_int 0))))]
-  "aarch64_is_extend_from_extract (<MODE>mode, operands[2], operands[3])"
-  "sub\\t%<w>0, %<w>4, %<w>1, <su>xt%e3 %p2"
-  [(set_attr "type" "alu_ext")]
-)
-
-;; zero_extend version of above
-(define_insn "*sub_<optab>si_multp2_uxtw"
-  [(set (match_operand:DI 0 "register_operand" "=rk")
-	(zero_extend:DI
-         (minus:SI (match_operand:SI 4 "register_operand" "rk")
-		   (ANY_EXTRACT:SI
-		    (mult:SI (match_operand:SI 1 "register_operand" "r")
-			     (match_operand 2 "aarch64_pwr_imm3" "Up3"))
-		    (match_operand 3 "const_int_operand" "n")
-		    (const_int 0)))))]
-  "aarch64_is_extend_from_extract (SImode, operands[2], operands[3])"
-  "sub\\t%w0, %w4, %w1, <su>xt%e3 %p2"
-  [(set_attr "type" "alu_ext")]
-)
-
 ;; The hardware description is op1 + ~op2 + C.
 ;;                           = op1 + (-op2 + 1) + (1 - !C)
 ;;                           = op1 - op2 - 1 + 1 - !C
@@ -2725,6 +3299,136 @@
   [(set_attr "type" "adc_reg")]
 )
 
+(define_expand "usub<GPI:mode>3_carryinC"
+  [(parallel
+     [(set (reg:CC CC_REGNUM)
+	   (compare:CC
+	     (zero_extend:<DWI>
+	       (match_operand:GPI 1 "aarch64_reg_or_zero"))
+	     (plus:<DWI>
+	       (zero_extend:<DWI>
+		 (match_operand:GPI 2 "register_operand"))
+	       (ltu:<DWI> (reg:CC CC_REGNUM) (const_int 0)))))
+      (set (match_operand:GPI 0 "register_operand")
+	   (minus:GPI
+	     (minus:GPI (match_dup 1) (match_dup 2))
+	     (ltu:GPI (reg:CC CC_REGNUM) (const_int 0))))])]
+   ""
+)
+
+(define_insn "*usub<GPI:mode>3_carryinC_z1"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC
+	  (const_int 0)
+	  (plus:<DWI>
+	    (zero_extend:<DWI>
+	      (match_operand:GPI 1 "register_operand" "r"))
+	    (match_operand:<DWI> 2 "aarch64_borrow_operation" ""))))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+	(minus:GPI
+	  (neg:GPI (match_dup 1))
+	  (match_operand:GPI 3 "aarch64_borrow_operation" "")))]
+   ""
+   "sbcs\\t%<w>0, <w>zr, %<w>1"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_insn "*usub<GPI:mode>3_carryinC_z2"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC
+	  (zero_extend:<DWI>
+	    (match_operand:GPI 1 "register_operand" "r"))
+	  (match_operand:<DWI> 2 "aarch64_borrow_operation" "")))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+	(minus:GPI
+	  (match_dup 1)
+	  (match_operand:GPI 3 "aarch64_borrow_operation" "")))]
+   ""
+   "sbcs\\t%<w>0, %<w>1, <w>zr"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_insn "*usub<GPI:mode>3_carryinC"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC
+	  (zero_extend:<DWI>
+	    (match_operand:GPI 1 "register_operand" "r"))
+	  (plus:<DWI>
+	    (zero_extend:<DWI>
+	      (match_operand:GPI 2 "register_operand" "r"))
+	    (match_operand:<DWI> 3 "aarch64_borrow_operation" ""))))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+	(minus:GPI
+	  (minus:GPI (match_dup 1) (match_dup 2))
+	  (match_operand:GPI 4 "aarch64_borrow_operation" "")))]
+   ""
+   "sbcs\\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_expand "sub<GPI:mode>3_carryinV"
+  [(parallel
+     [(set (reg:CC_V CC_REGNUM)
+	   (compare:CC_V
+	    (minus:<DWI>
+	     (sign_extend:<DWI>
+	       (match_operand:GPI 1 "aarch64_reg_or_zero"))
+	     (plus:<DWI>
+	       (sign_extend:<DWI>
+		 (match_operand:GPI 2 "register_operand"))
+	       (ltu:<DWI> (reg:CC CC_REGNUM) (const_int 0))))
+	    (sign_extend:<DWI>
+	     (minus:GPI (match_dup 1)
+			(plus:GPI (ltu:GPI (reg:CC CC_REGNUM) (const_int 0))
+				  (match_dup 2))))))
+      (set (match_operand:GPI 0 "register_operand")
+	   (minus:GPI
+	     (minus:GPI (match_dup 1) (match_dup 2))
+	     (ltu:GPI (reg:CC CC_REGNUM) (const_int 0))))])]
+   ""
+)
+
+(define_insn "*sub<mode>3_carryinV_z2"
+  [(set (reg:CC_V CC_REGNUM)
+	(compare:CC_V
+	 (minus:<DWI>
+	  (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))
+	  (match_operand:<DWI> 2 "aarch64_borrow_operation" ""))
+	 (sign_extend:<DWI>
+	  (minus:GPI (match_dup 1)
+		     (match_operand:GPI 3 "aarch64_borrow_operation" "")))))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+	(minus:GPI
+	 (match_dup 1) (match_dup 3)))]
+   ""
+   "sbcs\\t%<w>0, %<w>1, <w>zr"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_insn "*sub<mode>3_carryinV"
+  [(set (reg:CC_V CC_REGNUM)
+	(compare:CC_V
+	 (minus:<DWI>
+	  (sign_extend:<DWI>
+	    (match_operand:GPI 1 "register_operand" "r"))
+	  (plus:<DWI>
+	    (sign_extend:<DWI>
+	      (match_operand:GPI 2 "register_operand" "r"))
+	    (match_operand:<DWI> 3 "aarch64_borrow_operation" "")))
+	 (sign_extend:<DWI>
+	  (minus:GPI
+	   (match_dup 1)
+	   (plus:GPI (match_operand:GPI 4 "aarch64_borrow_operation" "")
+		     (match_dup 2))))))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+	(minus:GPI
+	  (minus:GPI (match_dup 1) (match_dup 2))
+	  (match_dup 4)))]
+   ""
+   "sbcs\\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "type" "adc_reg")]
+)
+
 (define_insn "*sub_uxt<mode>_shift2"
   [(set (match_operand:GPI 0 "register_operand" "=rk")
 	(minus:GPI (match_operand:GPI 4 "register_operand" "rk")
@@ -2736,7 +3440,7 @@
   "*
   operands[3] = GEN_INT (aarch64_uxt_size (INTVAL (operands[2]),
 					   INTVAL (operands[3])));
-  return \"sub\t%<w>0, %<w>4, %<w>1, uxt%e3 %2\";"
+  return \"sub\t%<w>0, %<w>4, %w1, uxt%e3 %2\";"
   [(set_attr "type" "alu_ext")]
 )
 
@@ -2757,41 +3461,9 @@
   [(set_attr "type" "alu_ext")]
 )
 
-(define_insn "*sub_uxt<mode>_multp2"
-  [(set (match_operand:GPI 0 "register_operand" "=rk")
-	(minus:GPI (match_operand:GPI 4 "register_operand" "rk")
-		   (and:GPI
-		    (mult:GPI (match_operand:GPI 1 "register_operand" "r")
-			      (match_operand 2 "aarch64_pwr_imm3" "Up3"))
-		    (match_operand 3 "const_int_operand" "n"))))]
-  "aarch64_uxt_size (exact_log2 (INTVAL (operands[2])),INTVAL (operands[3])) != 0"
-  "*
-  operands[3] = GEN_INT (aarch64_uxt_size (exact_log2 (INTVAL (operands[2])),
-					   INTVAL (operands[3])));
-  return \"sub\t%<w>0, %<w>4, %<w>1, uxt%e3 %p2\";"
-  [(set_attr "type" "alu_ext")]
-)
-
-;; zero_extend version of above
-(define_insn "*sub_uxtsi_multp2_uxtw"
-  [(set (match_operand:DI 0 "register_operand" "=rk")
-	(zero_extend:DI
-         (minus:SI (match_operand:SI 4 "register_operand" "rk")
-		   (and:SI
-		    (mult:SI (match_operand:SI 1 "register_operand" "r")
-			     (match_operand 2 "aarch64_pwr_imm3" "Up3"))
-		    (match_operand 3 "const_int_operand" "n")))))]
-  "aarch64_uxt_size (exact_log2 (INTVAL (operands[2])),INTVAL (operands[3])) != 0"
-  "*
-  operands[3] = GEN_INT (aarch64_uxt_size (exact_log2 (INTVAL (operands[2])),
-					   INTVAL (operands[3])));
-  return \"sub\t%w0, %w4, %w1, uxt%e3 %p2\";"
-  [(set_attr "type" "alu_ext")]
-)
-
 (define_expand "abs<mode>2"
-  [(match_operand:GPI 0 "register_operand" "")
-   (match_operand:GPI 1 "register_operand" "")]
+  [(match_operand:GPI 0 "register_operand")
+   (match_operand:GPI 1 "register_operand")]
   ""
   {
     rtx ccreg = aarch64_gen_compare_reg (LT, operands[1], const0_rtx);
@@ -2809,7 +3481,7 @@
    neg\\t%<w>0, %<w>1
    neg\\t%<rtn>0<vas>, %<rtn>1<vas>"
   [(set_attr "type" "alu_sreg, neon_neg<q>")
-   (set_attr "simd" "*,yes")]
+   (set_attr "arch" "*,simd")]
 )
 
 ;; zero_extend version of above
@@ -2886,7 +3558,7 @@
 		  (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n"))))]
   ""
   "neg\\t%<w>0, %<w>1, <shift> %2"
-  [(set_attr "type" "alu_shift_imm")]
+  [(set_attr "autodetect_type" "alu_shift_<shift>_op2")]
 )
 
 ;; zero_extend version of above
@@ -2898,29 +3570,19 @@
 		  (match_operand:QI 2 "aarch64_shift_imm_si" "n")))))]
   ""
   "neg\\t%w0, %w1, <shift> %2"
-  [(set_attr "type" "alu_shift_imm")]
+  [(set_attr "autodetect_type" "alu_shift_<shift>_op2")]
 )
 
-(define_insn "*neg_mul_imm_<mode>2"
-  [(set (match_operand:GPI 0 "register_operand" "=r")
-	(neg:GPI (mult:GPI
-		  (match_operand:GPI 1 "register_operand" "r")
-		  (match_operand:QI 2 "aarch64_pwr_2_<mode>" "n"))))]
-  ""
-  "neg\\t%<w>0, %<w>1, lsl %p2"
-  [(set_attr "type" "alu_shift_imm")]
-)
-
-;; zero_extend version of above
-(define_insn "*neg_mul_imm_si2_uxtw"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(zero_extend:DI
-         (neg:SI (mult:SI
-		  (match_operand:SI 1 "register_operand" "r")
-		  (match_operand:QI 2 "aarch64_pwr_2_si" "n")))))]
-  ""
-  "neg\\t%w0, %w1, lsl %p2"
-  [(set_attr "type" "alu_shift_imm")]
+(define_insn "*neg_asr_si2_extr"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(neg:SI (match_operator:SI 4 "subreg_lowpart_operator"
+		  [(sign_extract:DI
+		     (match_operand:DI 1 "register_operand" "r")
+		     (match_operand 3 "aarch64_simd_shift_imm_offset_si" "n")
+		     (match_operand 2 "aarch64_simd_shift_imm_offset_si" "n"))])))]
+  "INTVAL (operands[2]) + INTVAL (operands[3]) == 32"
+  "neg\\t%w0, %w1, asr %2"
+  [(set_attr "autodetect_type" "alu_shift_asr_op2")]
 )
 
 (define_insn "mul<mode>3"
@@ -3134,7 +3796,7 @@
 
 (define_insn "cmp<mode>"
   [(set (reg:CC CC_REGNUM)
-	(compare:CC (match_operand:GPI 0 "register_operand" "r,r,r")
+	(compare:CC (match_operand:GPI 0 "register_operand" "rk,rk,rk")
 		    (match_operand:GPI 1 "aarch64_plus_operand" "r,I,J")))]
   ""
   "@
@@ -3183,7 +3845,7 @@
 			 (match_operand:ALLX 0 "register_operand" "r"))
 			(match_operand:GPI 1 "register_operand" "r")))]
   ""
-  "cmp\\t%<GPI:w>1, %<GPI:w>0, <su>xt<ALLX:size>"
+  "cmp\\t%<GPI:w>1, %w0, <su>xt<ALLX:size>"
   [(set_attr "type" "alus_ext")]
 )
 
@@ -3195,7 +3857,7 @@
 			 (match_operand 1 "aarch64_imm3" "Ui3"))
 	(match_operand:GPI 2 "register_operand" "r")))]
   ""
-  "cmp\\t%<GPI:w>2, %<GPI:w>0, <su>xt<ALLX:size> %1"
+  "cmp\\t%<GPI:w>2, %w0, <su>xt<ALLX:size> %1"
   [(set_attr "type" "alus_ext")]
 )
 
@@ -3204,10 +3866,10 @@
 ;; -------------------------------------------------------------------
 
 (define_expand "cstore<mode>4"
-  [(set (match_operand:SI 0 "register_operand" "")
+  [(set (match_operand:SI 0 "register_operand")
 	(match_operator:SI 1 "aarch64_comparison_operator"
-	 [(match_operand:GPI 2 "register_operand" "")
-	  (match_operand:GPI 3 "aarch64_plus_operand" "")]))]
+	 [(match_operand:GPI 2 "register_operand")
+	  (match_operand:GPI 3 "aarch64_plus_operand")]))]
   ""
   "
   operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2],
@@ -3229,10 +3891,10 @@
 
 
 (define_expand "cstore<mode>4"
-  [(set (match_operand:SI 0 "register_operand" "")
+  [(set (match_operand:SI 0 "register_operand")
 	(match_operator:SI 1 "aarch64_comparison_operator_mode"
-	 [(match_operand:GPF 2 "register_operand" "")
-	  (match_operand:GPF 3 "aarch64_fp_compare_operand" "")]))]
+	 [(match_operand:GPF 2 "register_operand")
+	  (match_operand:GPF 3 "aarch64_fp_compare_operand")]))]
   ""
   "
   operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2],
@@ -3317,13 +3979,13 @@
 )
 
 (define_expand "cmov<mode>6"
-  [(set (match_operand:GPI 0 "register_operand" "")
+  [(set (match_operand:GPI 0 "register_operand")
 	(if_then_else:GPI
 	 (match_operator 1 "aarch64_comparison_operator"
-	  [(match_operand:GPI 2 "register_operand" "")
-	   (match_operand:GPI 3 "aarch64_plus_operand" "")])
-	 (match_operand:GPI 4 "register_operand" "")
-	 (match_operand:GPI 5 "register_operand" "")))]
+	  [(match_operand:GPI 2 "register_operand")
+	   (match_operand:GPI 3 "aarch64_plus_operand")])
+	 (match_operand:GPI 4 "register_operand")
+	 (match_operand:GPI 5 "register_operand")))]
   ""
   "
   operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2],
@@ -3333,13 +3995,13 @@
 )
 
 (define_expand "cmov<mode>6"
-  [(set (match_operand:GPF 0 "register_operand" "")
+  [(set (match_operand:GPF 0 "register_operand")
 	(if_then_else:GPF
 	 (match_operator 1 "aarch64_comparison_operator"
-	  [(match_operand:GPF 2 "register_operand" "")
-	   (match_operand:GPF 3 "aarch64_fp_compare_operand" "")])
-	 (match_operand:GPF 4 "register_operand" "")
-	 (match_operand:GPF 5 "register_operand" "")))]
+	  [(match_operand:GPF 2 "register_operand")
+	   (match_operand:GPF 3 "aarch64_fp_compare_operand")])
+	 (match_operand:GPF 4 "register_operand")
+	 (match_operand:GPF 5 "register_operand")))]
   ""
   "
   operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2],
@@ -3417,10 +4079,10 @@
 )
 
 (define_expand "mov<mode>cc"
-  [(set (match_operand:ALLI 0 "register_operand" "")
-	(if_then_else:ALLI (match_operand 1 "aarch64_comparison_operator" "")
-			   (match_operand:ALLI 2 "register_operand" "")
-			   (match_operand:ALLI 3 "register_operand" "")))]
+  [(set (match_operand:ALLI 0 "register_operand")
+	(if_then_else:ALLI (match_operand 1 "aarch64_comparison_operator")
+			   (match_operand:ALLI 2 "register_operand")
+			   (match_operand:ALLI 3 "register_operand")))]
   ""
   {
     rtx ccreg;
@@ -3436,10 +4098,10 @@
 )
 
 (define_expand "mov<GPF:mode><GPI:mode>cc"
-  [(set (match_operand:GPI 0 "register_operand" "")
-	(if_then_else:GPI (match_operand 1 "aarch64_comparison_operator" "")
-			  (match_operand:GPF 2 "register_operand" "")
-			  (match_operand:GPF 3 "register_operand" "")))]
+  [(set (match_operand:GPI 0 "register_operand")
+	(if_then_else:GPI (match_operand 1 "aarch64_comparison_operator")
+			  (match_operand:GPF 2 "register_operand")
+			  (match_operand:GPF 3 "register_operand")))]
   ""
   {
     rtx ccreg;
@@ -3455,10 +4117,10 @@
 )
 
 (define_expand "mov<mode>cc"
-  [(set (match_operand:GPF 0 "register_operand" "")
-	(if_then_else:GPF (match_operand 1 "aarch64_comparison_operator" "")
-			  (match_operand:GPF 2 "register_operand" "")
-			  (match_operand:GPF 3 "register_operand" "")))]
+  [(set (match_operand:GPF 0 "register_operand")
+	(if_then_else:GPF (match_operand 1 "aarch64_comparison_operator")
+			  (match_operand:GPF 2 "register_operand")
+			  (match_operand:GPF 3 "register_operand")))]
   ""
   {
     rtx ccreg;
@@ -3474,10 +4136,10 @@
 )
 
 (define_expand "<neg_not_op><mode>cc"
-  [(set (match_operand:GPI 0 "register_operand" "")
-	(if_then_else:GPI (match_operand 1 "aarch64_comparison_operator" "")
-			  (NEG_NOT:GPI (match_operand:GPI 2 "register_operand" ""))
-			  (match_operand:GPI 3 "register_operand" "")))]
+  [(set (match_operand:GPI 0 "register_operand")
+	(if_then_else:GPI (match_operand 1 "aarch64_comparison_operator")
+			  (NEG_NOT:GPI (match_operand:GPI 2 "register_operand"))
+			  (match_operand:GPI 3 "register_operand")))]
   ""
   {
     rtx ccreg;
@@ -3563,6 +4225,44 @@
   [(set_attr "type" "csel")]
 )
 
+(define_insn "*csinv3_uxtw_insn1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(if_then_else:DI
+	  (match_operand 1 "aarch64_comparison_operation" "")
+	  (zero_extend:DI
+	    (match_operand:SI 2 "register_operand" "r"))
+	  (zero_extend:DI
+	    (NEG_NOT:SI (match_operand:SI 3 "register_operand" "r")))))]
+  ""
+  "cs<neg_not_cs>\\t%w0, %w2, %w3, %m1"
+  [(set_attr "type" "csel")]
+)
+
+(define_insn "*csinv3_uxtw_insn2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(if_then_else:DI
+	  (match_operand 1 "aarch64_comparison_operation" "")
+	  (zero_extend:DI
+	    (NEG_NOT:SI (match_operand:SI 2 "register_operand" "r")))
+	  (zero_extend:DI
+	    (match_operand:SI 3 "register_operand" "r"))))]
+  ""
+  "cs<neg_not_cs>\\t%w0, %w3, %w2, %M1"
+  [(set_attr "type" "csel")]
+)
+
+(define_insn "*csinv3_uxtw_insn3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(if_then_else:DI
+	  (match_operand 1 "aarch64_comparison_operation" "")
+	  (zero_extend:DI
+	    (NEG_NOT:SI (match_operand:SI 2 "register_operand" "r")))
+	  (const_int 0)))]
+  ""
+  "cs<neg_not_cs>\\t%w0, wzr, %w2, %M1"
+  [(set_attr "type" "csel")]
+)
+
 ;; If X can be loaded by a single CNT[BHWD] instruction,
 ;;
 ;;    A = UMAX (B, X)
@@ -3654,7 +4354,7 @@
   <logical>\\t%<w>0, %<w>1, %2
   <logical>\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
   [(set_attr "type" "logic_reg,logic_imm,neon_logic")
-   (set_attr "simd" "*,*,yes")]
+   (set_attr "arch" "*,*,simd")]
 )
 
 ;; zero_extend version of above
@@ -3744,6 +4444,59 @@
   [(set_attr "type" "logic_shift_imm")]
 )
 
+(define_split
+  [(set (match_operand:GPI 0 "register_operand")
+	(LOGICAL:GPI
+	  (and:GPI (ashift:GPI (match_operand:GPI 1 "register_operand")
+			       (match_operand:QI 2 "aarch64_shift_imm_<mode>"))
+		   (match_operand:GPI 3 "const_int_operand"))
+	  (zero_extend:GPI (match_operand 4 "register_operand"))))]
+  "can_create_pseudo_p ()
+   && ((paradoxical_subreg_p (operands[1])
+	&& rtx_equal_p (SUBREG_REG (operands[1]), operands[4]))
+       || (REG_P (operands[1])
+	   && REG_P (operands[4])
+	   && REGNO (operands[1]) == REGNO (operands[4])))
+   && (trunc_int_for_mode (GET_MODE_MASK (GET_MODE (operands[4]))
+			   << INTVAL (operands[2]), <MODE>mode)
+       == INTVAL (operands[3]))"
+  [(set (match_dup 5) (zero_extend:GPI (match_dup 4)))
+   (set (match_dup 0) (LOGICAL:GPI (ashift:GPI (match_dup 5) (match_dup 2))
+				   (match_dup 5)))]
+  "operands[5] = gen_reg_rtx (<MODE>mode);"
+)
+
+(define_split
+  [(set (match_operand:GPI 0 "register_operand")
+	(LOGICAL:GPI
+	  (and:GPI (ashift:GPI (match_operand:GPI 1 "register_operand")
+			       (match_operand:QI 2 "aarch64_shift_imm_<mode>"))
+		   (match_operand:GPI 4 "const_int_operand"))
+	  (and:GPI (match_dup 1) (match_operand:GPI 3 "const_int_operand"))))]
+  "can_create_pseudo_p ()
+   && pow2_or_zerop (UINTVAL (operands[3]) + 1)
+   && (trunc_int_for_mode (UINTVAL (operands[3])
+			   << INTVAL (operands[2]), <MODE>mode)
+       == INTVAL (operands[4]))"
+  [(set (match_dup 5) (and:GPI (match_dup 1) (match_dup 3)))
+   (set (match_dup 0) (LOGICAL:GPI (ashift:GPI (match_dup 5) (match_dup 2))
+				   (match_dup 5)))]
+  "operands[5] = gen_reg_rtx (<MODE>mode);"
+)
+
+(define_split
+  [(set (match_operand:GPI 0 "register_operand")
+	(LOGICAL:GPI
+	  (ashift:GPI (sign_extend:GPI (match_operand 1 "register_operand"))
+		      (match_operand:QI 2 "aarch64_shift_imm_<mode>"))
+	  (sign_extend:GPI (match_dup 1))))]
+  "can_create_pseudo_p ()"
+  [(set (match_dup 3) (sign_extend:GPI (match_dup 1)))
+   (set (match_dup 0) (LOGICAL:GPI (ashift:GPI (match_dup 3) (match_dup 2))
+				   (match_dup 3)))]
+  "operands[3] = gen_reg_rtx (<MODE>mode);"
+)
+
 (define_insn "*<optab>_rol<mode>3"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(LOGICAL:GPI (rotate:GPI
@@ -3751,7 +4504,7 @@
 		      (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n"))
 		     (match_operand:GPI 3 "register_operand" "r")))]
   ""
-  "<logical>\\t%<w>0, %<w>3, %<w>1, ror (<sizen> - %2)"
+  "<logical>\\t%<w>0, %<w>3, %<w>1, ror #(<sizen> - %2)"
   [(set_attr "type" "logic_shift_imm")]
 )
 
@@ -3776,7 +4529,7 @@
 		      (match_operand:QI 2 "aarch64_shift_imm_si" "n"))
 		     (match_operand:SI 3 "register_operand" "r"))))]
   ""
-  "<logical>\\t%w0, %w3, %w1, ror (32 - %2)"
+  "<logical>\\t%w0, %w3, %w1, ror #(32 - %2)"
   [(set_attr "type" "logic_shift_imm")]
 )
 
@@ -3788,7 +4541,16 @@
   mvn\\t%<w>0, %<w>1
   mvn\\t%0.8b, %1.8b"
   [(set_attr "type" "logic_reg,neon_logic")
-   (set_attr "simd" "*,yes")]
+   (set_attr "arch" "*,simd")]
+)
+
+(define_insn "*one_cmpl_zero_extend"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (zero_extend:DI
+          (not:SI (match_operand:SI 1 "register_operand" "r"))))]
+  ""
+  "mvn\\t%w0, %w1"
+  [(set_attr "type" "logic_reg")]
 )
 
 (define_insn "*one_cmpl_<optab><mode>2"
@@ -3811,7 +4573,7 @@
   <NLOGICAL:nlogical>\\t%<w>0, %<w>2, %<w>1
   <NLOGICAL:nlogical>\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
   [(set_attr "type" "logic_reg,neon_logic")
-   (set_attr "simd" "*,yes")]
+   (set_attr "arch" "*,simd")]
 )
 
 (define_insn "*<NLOGICAL:optab>_one_cmplsidi3_ze"
@@ -3851,7 +4613,7 @@
    (set (match_dup 0) (not:GPI (match_dup 0)))]
   ""
   [(set_attr "type" "logic_reg,multiple")
-   (set_attr "simd" "*,yes")]
+   (set_attr "arch" "*,simd")]
 )
 
 (define_insn "*and_one_cmpl<mode>3_compare0"
@@ -4021,7 +4783,6 @@
 {
   rtx v = gen_reg_rtx (V8QImode);
   rtx v1 = gen_reg_rtx (V8QImode);
-  rtx r = gen_reg_rtx (QImode);
   rtx in = operands[1];
   rtx out = operands[0];
   if(<MODE>mode == SImode)
@@ -4035,8 +4796,7 @@
     }
   emit_move_insn (v, gen_lowpart (V8QImode, in));
   emit_insn (gen_popcountv8qi2 (v1, v));
-  emit_insn (gen_reduc_plus_scal_v8qi (r, v1));
-  emit_insn (gen_zero_extendqi<mode>2 (out, r));
+  emit_insn (gen_aarch64_zero_extend<mode>_reduc_plus_v8qi (out, v1));
   DONE;
 })
 
@@ -4084,7 +4844,7 @@
   [(set_attr "type" "alus_imm")]
 )
 
-(define_insn "*ands<mode>_compare0"
+(define_insn "*ands<GPI:mode>_compare0"
   [(set (reg:CC_NZ CC_REGNUM)
 	(compare:CC_NZ
 	 (zero_extend:GPI (match_operand:SHORT 1 "register_operand" "r"))
@@ -4249,7 +5009,7 @@
     /* (SZ - cnt) % SZ == -cnt % SZ */
     if (CONST_INT_P (operands[2]))
       {
-        operands[2] = GEN_INT ((-INTVAL (operands[2]))
+        operands[2] = GEN_INT ((-UINTVAL (operands[2]))
 			       & (GET_MODE_BITSIZE (<MODE>mode) - 1));
         if (operands[2] == const0_rtx)
           {
@@ -4395,8 +5155,8 @@
    lsl\t%<w>0, %<w>1, %<w>2
    shl\t%<rtn>0<vas>, %<rtn>1<vas>, %2
    ushl\t%<rtn>0<vas>, %<rtn>1<vas>, %<rtn>2<vas>"
-  [(set_attr "simd" "no,no,yes,yes")
-   (set_attr "type" "bfx,shift_reg,neon_shift_imm<q>, neon_shift_reg<q>")]
+  [(set_attr "type" "bfx,shift_reg,neon_shift_imm<q>, neon_shift_reg<q>")
+   (set_attr "arch" "*,*,simd,simd")]
 )
 
 ;; Logical right shift using SISD or Integer instruction
@@ -4413,8 +5173,8 @@
    ushr\t%<rtn>0<vas>, %<rtn>1<vas>, %2
    #
    #"
-  [(set_attr "simd" "no,no,yes,yes,yes")
-   (set_attr "type" "bfx,shift_reg,neon_shift_imm<q>,neon_shift_reg<q>,neon_shift_reg<q>")]
+  [(set_attr "type" "bfx,shift_reg,neon_shift_imm<q>,neon_shift_reg<q>,neon_shift_reg<q>")
+   (set_attr "arch" "*,*,simd,simd,simd")]
 )
 
 (define_split
@@ -4461,8 +5221,8 @@
    sshr\t%<rtn>0<vas>, %<rtn>1<vas>, %2
    #
    #"
-  [(set_attr "simd" "no,no,yes,yes,yes")
-   (set_attr "type" "bfx,shift_reg,neon_shift_imm<q>,neon_shift_reg<q>,neon_shift_reg<q>")]
+  [(set_attr "type" "bfx,shift_reg,neon_shift_imm<q>,neon_shift_reg<q>,neon_shift_reg<q>")
+   (set_attr "arch" "*,*,simd,simd,simd")]
 )
 
 (define_split
@@ -4502,8 +5262,7 @@
                    UNSPEC_SISD_USHL))]
   "TARGET_SIMD"
   "ushl\t%d0, %d1, %d2"
-  [(set_attr "simd" "yes")
-   (set_attr "type" "neon_shift_reg")]
+  [(set_attr "type" "neon_shift_reg")]
 )
 
 (define_insn "*aarch64_ushl_2s"
@@ -4513,8 +5272,7 @@
                    UNSPEC_USHL_2S))]
   "TARGET_SIMD"
   "ushl\t%0.2s, %1.2s, %2.2s"
-  [(set_attr "simd" "yes")
-   (set_attr "type" "neon_shift_reg")]
+  [(set_attr "type" "neon_shift_reg")]
 )
 
 (define_insn "*aarch64_sisd_sshl"
@@ -4524,8 +5282,7 @@
                    UNSPEC_SISD_SSHL))]
   "TARGET_SIMD"
   "sshl\t%d0, %d1, %d2"
-  [(set_attr "simd" "yes")
-   (set_attr "type" "neon_shift_reg")]
+  [(set_attr "type" "neon_shift_reg")]
 )
 
 (define_insn "*aarch64_sshl_2s"
@@ -4535,8 +5292,7 @@
                    UNSPEC_SSHL_2S))]
   "TARGET_SIMD"
   "sshl\t%0.2s, %1.2s, %2.2s"
-  [(set_attr "simd" "yes")
-   (set_attr "type" "neon_shift_reg")]
+  [(set_attr "type" "neon_shift_reg")]
 )
 
 (define_insn "*aarch64_sisd_neg_qi"
@@ -4545,8 +5301,7 @@
                    UNSPEC_SISD_NEG))]
   "TARGET_SIMD"
   "neg\t%d0, %d1"
-  [(set_attr "simd" "yes")
-   (set_attr "type" "neon_neg")]
+  [(set_attr "type" "neon_neg")]
 )
 
 ;; Rotate right
@@ -4641,6 +5396,22 @@
   [(set_attr "type" "rotate_imm")]
 )
 
+(define_insn "*extrsi5_insn_di"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
+			   (match_operand 3 "const_int_operand" "n"))
+		(match_operator:SI 6 "subreg_lowpart_operator"
+		  [(zero_extract:DI
+		     (match_operand:DI 2 "register_operand" "r")
+		     (match_operand 5 "const_int_operand" "n")
+		     (match_operand 4 "const_int_operand" "n"))])))]
+  "UINTVAL (operands[3]) < 32
+   && UINTVAL (operands[3]) + UINTVAL (operands[4]) == 32
+   && INTVAL (operands[3]) == INTVAL (operands[5])"
+  "extr\\t%w0, %w1, %w2, %4"
+  [(set_attr "type" "rotate_imm")]
+)
+
 (define_insn "*ror<mode>3_insn"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(rotate:GPI (match_operand:GPI 1 "register_operand" "r")
@@ -4711,7 +5482,7 @@
 ;; -------------------------------------------------------------------
 
 (define_expand "<optab>"
-  [(set (match_operand:DI 0 "register_operand" "=r")
+  [(set (match_operand:DI 0 "register_operand")
 	(ANY_EXTRACT:DI (match_operand:DI 1 "register_operand")
 			(match_operand 2
 			  "aarch64_simd_shift_imm_offset_di")
@@ -4782,32 +5553,165 @@
 	   || (UINTVAL (value) & mask) == mask)
 	FAIL;
 
-      /* 16-bit aligned 16-bit wide insert is handled by insv_imm.  */
-      if (width == 16 && (pos % 16) == 0)
-	DONE;
-    }
-  operands[3] = force_reg (<MODE>mode, value);
-})
+      /* 16-bit aligned 16-bit wide insert is handled by insv_imm.  */
+      if (width == 16 && (pos % 16) == 0)
+	DONE;
+    }
+  operands[3] = force_reg (<MODE>mode, value);
+})
+
+(define_insn "*insv_reg<mode>"
+  [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r")
+			  (match_operand 1 "const_int_operand" "n")
+			  (match_operand 2 "const_int_operand" "n"))
+	(match_operand:GPI 3 "register_operand" "r"))]
+  "!(UINTVAL (operands[1]) == 0
+     || (UINTVAL (operands[2]) + UINTVAL (operands[1])
+	 > GET_MODE_BITSIZE (<MODE>mode)))"
+  "bfi\\t%<w>0, %<w>3, %2, %1"
+  [(set_attr "type" "bfm")]
+)
+
+(define_insn "*aarch64_bfi<GPI:mode><ALLX:mode>4"
+  [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r")
+			  (match_operand 1 "const_int_operand" "n")
+			  (match_operand 2 "const_int_operand" "n"))
+	(zero_extend:GPI (match_operand:ALLX 3  "register_operand" "r")))]
+  "UINTVAL (operands[1]) <= <ALLX:sizen>"
+  "bfi\\t%<GPI:w>0, %<GPI:w>3, %2, %1"
+  [(set_attr "type" "bfm")]
+)
+
+;;  Match a bfi instruction where the shift of OP3 means that we are
+;;  actually copying the least significant bits of OP3 into OP0 by way
+;;  of the AND masks and the IOR instruction.  A similar instruction
+;;  with the two parts of the IOR swapped around was never triggered
+;;  in a bootstrap build and test of GCC so it was not included.
+
+(define_insn "*aarch64_bfi<GPI:mode>5_shift"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+        (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "0")
+                          (match_operand:GPI 2 "const_int_operand" "n"))
+                 (and:GPI (ashift:GPI
+                           (match_operand:GPI 3 "register_operand" "r")
+                           (match_operand:GPI 4 "aarch64_simd_shift_imm_<mode>" "n"))
+                          (match_operand:GPI 5 "const_int_operand" "n"))))]
+  "aarch64_masks_and_shift_for_bfi_p (<MODE>mode, UINTVAL (operands[2]),
+				      UINTVAL (operands[4]),
+				      UINTVAL(operands[5]))"
+  "bfi\t%<GPI:w>0, %<GPI:w>3, %4, %P5"
+  [(set_attr "type" "bfm")]
+)
+
+(define_insn "*aarch64_bfi<GPI:mode>5_shift_alt"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+        (ior:GPI (and:GPI (ashift:GPI
+                           (match_operand:GPI 1 "register_operand" "r")
+                           (match_operand:GPI 2 "aarch64_simd_shift_imm_<mode>" "n"))
+                          (match_operand:GPI 3 "const_int_operand" "n"))
+		 (and:GPI (match_operand:GPI 4 "register_operand" "0")
+                          (match_operand:GPI 5 "const_int_operand" "n"))))]
+  "aarch64_masks_and_shift_for_bfi_p (<MODE>mode, UINTVAL (operands[5]),
+				      UINTVAL (operands[2]),
+				      UINTVAL(operands[3]))"
+  "bfi\t%<GPI:w>0, %<GPI:w>1, %2, %P3"
+  [(set_attr "type" "bfm")]
+)
+
+;; Like *aarch64_bfi<GPI:mode>5_shift but with no and of the ashift because
+;; the shift is large enough to remove the need for an AND instruction.
+
+(define_insn "*aarch64_bfi<GPI:mode>4_noand"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+        (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "0")
+                          (match_operand:GPI 2 "const_int_operand" "n"))
+                 (ashift:GPI
+                          (match_operand:GPI 3 "register_operand" "r")
+                          (match_operand:GPI 4 "aarch64_simd_shift_imm_<mode>" "n"))))]
+  "aarch64_masks_and_shift_for_bfi_p (<MODE>mode, UINTVAL (operands[2]),
+				      UINTVAL (operands[4]),
+				      HOST_WIDE_INT_M1U << UINTVAL (operands[4]) )"
+{
+  operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - UINTVAL (operands[4]));
+  return "bfi\t%<GPI:w>0, %<GPI:w>3, %4, %5";
+}
+  [(set_attr "type" "bfm")]
+)
+
+(define_insn "*aarch64_bfi<GPI:mode>4_noand_alt"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+        (ior:GPI (ashift:GPI
+                          (match_operand:GPI 1 "register_operand" "r")
+                          (match_operand:GPI 2 "aarch64_simd_shift_imm_<mode>" "n"))
+		 (and:GPI (match_operand:GPI 3 "register_operand" "0")
+                          (match_operand:GPI 4 "const_int_operand" "n"))))]
+  "aarch64_masks_and_shift_for_bfi_p (<MODE>mode, UINTVAL (operands[4]),
+				      UINTVAL (operands[2]),
+				      HOST_WIDE_INT_M1U << UINTVAL (operands[2]) )"
+{
+  operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - UINTVAL (operands[2]));
+  return "bfi\t%<GPI:w>0, %<GPI:w>1, %2, %5";
+}
+  [(set_attr "type" "bfm")]
+)
+
+;; Like *aarch64_bfi<GPI:mode>5_shift but with no shifting, we are just
+;; copying the least significant bits of OP3 to OP0.  We need two versions
+;; of the instruction to handle different checks on the constant values.
+
+(define_insn "*aarch64_bfi<GPI:mode>4_noshift"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+        (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "0")
+                          (match_operand:GPI 2 "const_int_operand" "n"))
+                 (and:GPI (match_operand:GPI 3 "register_operand" "r")
+                          (match_operand:GPI 4 "const_int_operand" "n"))))]
+  "aarch64_masks_and_shift_for_bfi_p (<MODE>mode, UINTVAL (operands[2]), 0,
+				      UINTVAL (operands[4]))"
+  "bfi\t%<GPI:w>0, %<GPI:w>3, 0, %P4"
+  [(set_attr "type" "bfm")]
+)
 
-(define_insn "*insv_reg<mode>"
-  [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r")
-			  (match_operand 1 "const_int_operand" "n")
-			  (match_operand 2 "const_int_operand" "n"))
-	(match_operand:GPI 3 "register_operand" "r"))]
-  "!(UINTVAL (operands[1]) == 0
-     || (UINTVAL (operands[2]) + UINTVAL (operands[1])
-	 > GET_MODE_BITSIZE (<MODE>mode)))"
-  "bfi\\t%<w>0, %<w>3, %2, %1"
+(define_insn "*aarch64_bfi<GPI:mode>4_noshift_alt"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+        (ior:GPI (and:GPI (match_operand:GPI 3 "register_operand" "r")
+                          (match_operand:GPI 4 "const_int_operand" "n"))
+                 (and:GPI (match_operand:GPI 1 "register_operand" "0")
+                          (match_operand:GPI 2 "const_int_operand" "n"))))]
+  "aarch64_masks_and_shift_for_bfi_p (<MODE>mode, UINTVAL (operands[2]), 0,
+				      UINTVAL (operands[4]))"
+  "bfi\t%<GPI:w>0, %<GPI:w>3, 0, %P4"
   [(set_attr "type" "bfm")]
 )
 
-(define_insn "*aarch64_bfi<GPI:mode><ALLX:mode>4"
-  [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r")
-			  (match_operand 1 "const_int_operand" "n")
-			  (match_operand 2 "const_int_operand" "n"))
-	(zero_extend:GPI (match_operand:ALLX 3  "register_operand" "r")))]
-  "UINTVAL (operands[1]) <= <ALLX:sizen>"
-  "bfi\\t%<GPI:w>0, %<GPI:w>3, %2, %1"
+(define_insn "*aarch64_bfxil<mode>_extr"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+        (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "0")
+			  (match_operand:GPI 2 "const_int_operand" "n"))
+		 (zero_extract:GPI
+		   (match_operand:GPI 3 "register_operand" "r")
+		   (match_operand:GPI 4 "aarch64_simd_shift_imm_<mode>" "n")
+		   (match_operand:GPI 5 "aarch64_simd_shift_imm_<mode>" "n"))))]
+  "UINTVAL (operands[2]) == HOST_WIDE_INT_M1U << INTVAL (operands[4])
+   && INTVAL (operands[4])
+   && (UINTVAL (operands[4]) + UINTVAL (operands[5])
+       <= GET_MODE_BITSIZE (<MODE>mode))"
+  "bfxil\t%<GPI:w>0, %<GPI:w>3, %5, %4"
+  [(set_attr "type" "bfm")]
+)
+
+(define_insn "*aarch64_bfxilsi_extrdi"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (ior:SI (and:SI (match_operand:SI 1 "register_operand" "0")
+			(match_operand:SI 2 "const_int_operand" "n"))
+		(match_operator:SI 6 "subreg_lowpart_operator"
+		  [(zero_extract:DI
+		     (match_operand:DI 3 "register_operand" "r")
+		     (match_operand:SI 4 "aarch64_simd_shift_imm_si" "n")
+		     (match_operand:SI 5 "aarch64_simd_shift_imm_si" "n"))])))]
+  "UINTVAL (operands[2]) == HOST_WIDE_INT_M1U << INTVAL (operands[4])
+   && INTVAL (operands[4])
+   && UINTVAL (operands[4]) + UINTVAL (operands[5]) <= 32"
+  "bfxil\t%w0, %w3, %5, %4"
   [(set_attr "type" "bfm")]
 )
 
@@ -4852,6 +5756,35 @@
   [(set_attr "type" "bfx")]
 )
 
+;; Match sbfiz pattern in a shift left + shift right operation.
+
+(define_insn "*ashift<mode>_extv_bfiz"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(ashift:GPI (sign_extract:GPI (match_operand:GPI 1 "register_operand" "r")
+				      (match_operand 2 "aarch64_simd_shift_imm_offset_<mode>" "n")
+				      (const_int 0))
+		     (match_operand 3 "aarch64_simd_shift_imm_<mode>" "n")))]
+  "IN_RANGE (INTVAL (operands[2]) + INTVAL (operands[3]),
+	     1, GET_MODE_BITSIZE (<MODE>mode) - 1)"
+  "sbfiz\\t%<w>0, %<w>1, %3, %2"
+  [(set_attr "type" "bfx")]
+)
+
+(define_insn "*ashiftsi_extvdi_bfiz"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ashift:SI
+	  (match_operator:SI 4 "subreg_lowpart_operator"
+	    [(sign_extract:DI
+	       (match_operand:DI 1 "register_operand" "r")
+	       (match_operand 2 "aarch64_simd_shift_imm_offset_si")
+	       (const_int 0))])
+	  (match_operand 3 "aarch64_simd_shift_imm_si")))]
+  "IN_RANGE (INTVAL (operands[2]) + INTVAL (operands[3]),
+	     1, GET_MODE_BITSIZE (SImode) - 1)"
+  "sbfiz\\t%w0, %w1, %3, %2"
+  [(set_attr "type" "bfx")]
+)
+
 ;; When the bit position and width of the equivalent extraction add up to 32
 ;; we can use a W-reg LSL instruction taking advantage of the implicit
 ;; zero-extension of the X-reg.
@@ -4886,6 +5819,58 @@
   [(set_attr "type" "rev")]
 )
 
+(define_insn "*aarch64_bfxil<mode>"
+  [(set (match_operand:GPI 0 "register_operand" "=r,r")
+    (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "r,0")
+		    (match_operand:GPI 3 "const_int_operand" "n, Ulc"))
+	    (and:GPI (match_operand:GPI 2 "register_operand" "0,r")
+		    (match_operand:GPI 4 "const_int_operand" "Ulc, n"))))]
+  "(INTVAL (operands[3]) == ~INTVAL (operands[4]))
+  && (aarch64_high_bits_all_ones_p (INTVAL (operands[3]))
+    || aarch64_high_bits_all_ones_p (INTVAL (operands[4])))"
+  {
+    switch (which_alternative)
+    {
+      case 0:
+	operands[3] = GEN_INT (ctz_hwi (~INTVAL (operands[3])));
+	return "bfxil\\t%<w>0, %<w>1, 0, %3";
+      case 1:
+	operands[3] = GEN_INT (ctz_hwi (~INTVAL (operands[4])));
+	return "bfxil\\t%<w>0, %<w>2, 0, %3";
+      default:
+	gcc_unreachable ();
+    }
+  }
+  [(set_attr "type" "bfm")]
+)
+
+; Zero-extended version of above (aarch64_bfxil)
+(define_insn "*aarch64_bfxilsi_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI (ior:SI (and:SI (match_operand:SI 1 "register_operand"
+					"r,0")
+		    (match_operand:SI 3 "const_int_operand" "n, Ulc"))
+	    (and:SI (match_operand:SI 2 "register_operand" "0,r")
+		    (match_operand:SI 4 "const_int_operand" "Ulc, n")))))]
+  "(INTVAL (operands[3]) == ~INTVAL (operands[4]))
+  && (aarch64_high_bits_all_ones_p (INTVAL (operands[3]))
+    || aarch64_high_bits_all_ones_p (INTVAL (operands[4])))"
+  {
+    switch (which_alternative)
+    {
+      case 0:
+	operands[3] = GEN_INT (ctz_hwi (~INTVAL (operands[3])));
+	return "bfxil\\t%w0, %w1, 0, %3";
+      case 1:
+	operands[3] = GEN_INT (ctz_hwi (~INTVAL (operands[4])));
+	return "bfxil\\t%w0, %w2, 0, %3";
+      default:
+	gcc_unreachable ();
+    }
+  }
+  [(set_attr "type" "bfm")]
+)
+
 ;; There are no canonicalisation rules for the position of the lshiftrt, ashift
 ;; operations within an IOR/AND RTX, therefore we have two patterns matching
 ;; each valid permutation.
@@ -4974,57 +5959,94 @@
   [(set_attr "type" "f_cvtf2i")]
 )
 
-;; fma - no throw
+;; fma - expand fma into patterns with the accumulator operand first since
+;; reusing the accumulator results in better register allocation.
+;; The register allocator considers copy preferences in operand order,
+;; so this prefers fmadd s0, s1, s2, s0 over fmadd s1, s1, s2, s0.
+
+(define_expand "fma<mode>4"
+  [(set (match_operand:GPF_F16 0 "register_operand")
+	(fma:GPF_F16 (match_operand:GPF_F16 1 "register_operand")
+		     (match_operand:GPF_F16 2 "register_operand")
+		     (match_operand:GPF_F16 3 "register_operand")))]
+  "TARGET_FLOAT"
+)
 
-(define_insn "fma<mode>4"
+(define_insn "*aarch64_fma<mode>4"
   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
-        (fma:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w")
-		     (match_operand:GPF_F16 2 "register_operand" "w")
-		     (match_operand:GPF_F16 3 "register_operand" "w")))]
+	(fma:GPF_F16 (match_operand:GPF_F16 2 "register_operand" "w")
+		     (match_operand:GPF_F16 3 "register_operand" "w")
+		     (match_operand:GPF_F16 1 "register_operand" "w")))]
   "TARGET_FLOAT"
-  "fmadd\\t%<s>0, %<s>1, %<s>2, %<s>3"
+  "fmadd\\t%<s>0, %<s>2, %<s>3, %<s>1"
   [(set_attr "type" "fmac<stype>")]
 )
 
-(define_insn "fnma<mode>4"
+(define_expand "fnma<mode>4"
+  [(set (match_operand:GPF_F16 0 "register_operand")
+	(fma:GPF_F16
+	  (neg:GPF_F16 (match_operand:GPF_F16 1 "register_operand"))
+	  (match_operand:GPF_F16 2 "register_operand")
+	  (match_operand:GPF_F16 3 "register_operand")))]
+  "TARGET_FLOAT"
+)
+
+(define_insn "*aarch64_fnma<mode>4"
   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
 	(fma:GPF_F16
-	  (neg:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w"))
-	  (match_operand:GPF_F16 2 "register_operand" "w")
-	  (match_operand:GPF_F16 3 "register_operand" "w")))]
+	  (neg:GPF_F16 (match_operand:GPF_F16 2 "register_operand" "w"))
+	  (match_operand:GPF_F16 3 "register_operand" "w")
+	  (match_operand:GPF_F16 1 "register_operand" "w")))]
   "TARGET_FLOAT"
-  "fmsub\\t%<s>0, %<s>1, %<s>2, %<s>3"
+  "fmsub\\t%<s>0, %<s>2, %<s>3, %<s>1"
   [(set_attr "type" "fmac<stype>")]
 )
 
-(define_insn "fms<mode>4"
+
+(define_expand "fms<mode>4"
+  [(set (match_operand:GPF 0 "register_operand")
+	(fma:GPF (match_operand:GPF 1 "register_operand")
+		 (match_operand:GPF 2 "register_operand")
+		 (neg:GPF (match_operand:GPF 3 "register_operand"))))]
+  "TARGET_FLOAT"
+)
+
+(define_insn "*aarch64_fms<mode>4"
   [(set (match_operand:GPF 0 "register_operand" "=w")
-        (fma:GPF (match_operand:GPF 1 "register_operand" "w")
-		 (match_operand:GPF 2 "register_operand" "w")
-		 (neg:GPF (match_operand:GPF 3 "register_operand" "w"))))]
+	(fma:GPF (match_operand:GPF 2 "register_operand" "w")
+		 (match_operand:GPF 3 "register_operand" "w")
+		 (neg:GPF (match_operand:GPF 1 "register_operand" "w"))))]
   "TARGET_FLOAT"
-  "fnmsub\\t%<s>0, %<s>1, %<s>2, %<s>3"
+  "fnmsub\\t%<s>0, %<s>2, %<s>3, %<s>1"
   [(set_attr "type" "fmac<s>")]
 )
 
-(define_insn "fnms<mode>4"
+(define_expand "fnms<mode>4"
+  [(set (match_operand:GPF 0 "register_operand")
+	(fma:GPF (neg:GPF (match_operand:GPF 1 "register_operand"))
+		 (match_operand:GPF 2 "register_operand")
+		 (neg:GPF (match_operand:GPF 3 "register_operand"))))]
+  "TARGET_FLOAT"
+)
+
+(define_insn "*aarch64_fnms<mode>4"
   [(set (match_operand:GPF 0 "register_operand" "=w")
-	(fma:GPF (neg:GPF (match_operand:GPF 1 "register_operand" "w"))
-		 (match_operand:GPF 2 "register_operand" "w")
-		 (neg:GPF (match_operand:GPF 3 "register_operand" "w"))))]
+	(fma:GPF (neg:GPF (match_operand:GPF 2 "register_operand" "w"))
+		 (match_operand:GPF 3 "register_operand" "w")
+		 (neg:GPF (match_operand:GPF 1 "register_operand" "w"))))]
   "TARGET_FLOAT"
-  "fnmadd\\t%<s>0, %<s>1, %<s>2, %<s>3"
+  "fnmadd\\t%<s>0, %<s>2, %<s>3, %<s>1"
   [(set_attr "type" "fmac<s>")]
 )
 
 ;; If signed zeros are ignored, -(a * b + c) = -a * b - c.
-(define_insn "*fnmadd<mode>4"
+(define_insn "*aarch64_fnmadd<mode>4"
   [(set (match_operand:GPF 0 "register_operand" "=w")
-	(neg:GPF (fma:GPF (match_operand:GPF 1 "register_operand" "w")
-			  (match_operand:GPF 2 "register_operand" "w")
-			  (match_operand:GPF 3 "register_operand" "w"))))]
+	(neg:GPF (fma:GPF (match_operand:GPF 2 "register_operand" "w")
+			  (match_operand:GPF 3 "register_operand" "w")
+			  (match_operand:GPF 1 "register_operand" "w"))))]
   "!HONOR_SIGNED_ZEROS (<MODE>mode) && TARGET_FLOAT"
-  "fnmadd\\t%<s>0, %<s>1, %<s>2, %<s>3"
+  "fnmadd\\t%<s>0, %<s>2, %<s>3, %<s>1"
   [(set_attr "type" "fmac<s>")]
 )
 
@@ -5084,13 +6106,14 @@
 ;; and making r = w more expensive
 
 (define_insn "<optab>_trunc<fcvt_target><GPI:mode>2"
-  [(set (match_operand:GPI 0 "register_operand" "=?r,w")
+  [(set (match_operand:GPI 0 "register_operand" "=w,?r")
 	(FIXUORS:GPI (match_operand:<FCVT_TARGET> 1 "register_operand" "w,w")))]
   "TARGET_FLOAT"
   "@
-   fcvtz<su>\t%<w>0, %<s>1
-   fcvtz<su>\t%<s>0, %<s>1"
-  [(set_attr "type" "f_cvtf2i,neon_fp_to_int_s")]
+   fcvtz<su>\t%<s>0, %<s>1
+   fcvtz<su>\t%<w>0, %<s>1"
+  [(set_attr "type" "neon_fp_to_int_s,f_cvtf2i")
+   (set_attr "arch" "simd,fp")]
 )
 
 ;; Convert HF -> SI or DI
@@ -5114,18 +6137,66 @@
   [(set_attr "type" "f_cvtf2i")]
 )
 
+(define_insn "*fix_to_zero_extend<mode>di2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	 (unsigned_fix:SI
+	  (match_operand:GPF 1 "register_operand" "w"))))]
+  "TARGET_FLOAT"
+  "fcvtzu\t%w0, %<s>1"
+  [(set_attr "type" "f_cvtf2i")]
+)
+
+;; Equal width integer to fp and multiply combine.
+(define_insn "*aarch64_<su_optab>cvtf<fcvt_target><GPF:mode>2_mult"
+  [(set (match_operand:GPF 0 "register_operand" "=w,w")
+	(mult:GPF (FLOATUORS:GPF
+		   (match_operand:<FCVT_TARGET> 1 "register_operand" "w,?r"))
+		   (match_operand:GPF 2 "aarch64_fp_pow2_recip" "Dt,Dt")))]
+  "TARGET_FLOAT"
+  {
+    operands[2] = GEN_INT (aarch64_fpconst_pow2_recip (operands[2]));
+    switch (which_alternative)
+    {
+      case 0:
+	return "<su_optab>cvtf\t%<GPF:s>0, %<s>1, #%2";
+      case 1:
+	return "<su_optab>cvtf\t%<GPF:s>0, %<w1>1, #%2";
+      default:
+	gcc_unreachable ();
+    }
+  }
+  [(set_attr "type" "neon_int_to_fp_<Vetype>,f_cvti2f")
+   (set_attr "arch" "simd,fp")]
+)
+
+;; Unequal width integer to fp and multiply combine.
+(define_insn "*aarch64_<su_optab>cvtf<fcvt_iesize><GPF:mode>2_mult"
+  [(set (match_operand:GPF 0 "register_operand" "=w")
+	(mult:GPF (FLOATUORS:GPF
+		   (match_operand:<FCVT_IESIZE> 1 "register_operand" "r"))
+		   (match_operand:GPF 2 "aarch64_fp_pow2_recip" "Dt")))]
+  "TARGET_FLOAT"
+  {
+    operands[2] = GEN_INT (aarch64_fpconst_pow2_recip (operands[2]));
+    return "<su_optab>cvtf\t%<GPF:s>0, %<w2>1, #%2";
+  }
+  [(set_attr "type" "f_cvti2f")]
+)
+
+;; Equal width integer to fp conversion.
 (define_insn "<optab><fcvt_target><GPF:mode>2"
   [(set (match_operand:GPF 0 "register_operand" "=w,w")
-        (FLOATUORS:GPF (match_operand:<FCVT_TARGET> 1 "register_operand" "w,r")))]
+        (FLOATUORS:GPF (match_operand:<FCVT_TARGET> 1 "register_operand" "w,?r")))]
   "TARGET_FLOAT"
   "@
    <su_optab>cvtf\t%<GPF:s>0, %<s>1
    <su_optab>cvtf\t%<GPF:s>0, %<w1>1"
-  [(set_attr "simd" "yes,no")
-   (set_attr "fp" "no,yes")
-   (set_attr "type" "neon_int_to_fp_<Vetype>,f_cvti2f")]
+  [(set_attr "type" "neon_int_to_fp_<Vetype>,f_cvti2f")
+   (set_attr "arch" "simd,fp")]
 )
 
+;; Unequal width integer to fp conversions.
 (define_insn "<optab><fcvt_iesize><GPF:mode>2"
   [(set (match_operand:GPF 0 "register_operand" "=w")
         (FLOATUORS:GPF (match_operand:<FCVT_IESIZE> 1 "register_operand" "r")))]
@@ -5208,8 +6279,7 @@
    <FCVT_F2FIXED:fcvt_fixed_insn>\t%<GPF:w1>0, %<GPF:s>1, #%2
    <FCVT_F2FIXED:fcvt_fixed_insn>\t%<GPF:s>0, %<GPF:s>1, #%2"
   [(set_attr "type" "f_cvtf2i, neon_fp_to_int_<GPF:Vetype>")
-   (set_attr "fp" "yes, *")
-   (set_attr "simd" "*, yes")]
+   (set_attr "arch" "fp,simd")]
 )
 
 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><GPI:mode>3"
@@ -5222,8 +6292,7 @@
    <FCVT_FIXED2F:fcvt_fixed_insn>\t%<GPI:v>0, %<GPI:w>1, #%2
    <FCVT_FIXED2F:fcvt_fixed_insn>\t%<GPI:v>0, %<GPI:v>1, #%2"
   [(set_attr "type" "f_cvti2f, neon_int_to_fp_<GPI:Vetype>")
-   (set_attr "fp" "yes, *")
-   (set_attr "simd" "*, yes")]
+   (set_attr "arch" "fp,simd")]
 )
 
 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn>hf<mode>3"
@@ -5350,8 +6419,8 @@
 )
 
 (define_expand "sqrt<mode>2"
-  [(set (match_operand:GPF_F16 0 "register_operand" "=w")
-	(sqrt:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w")))]
+  [(set (match_operand:GPF_F16 0 "register_operand")
+	(sqrt:GPF_F16 (match_operand:GPF_F16 1 "register_operand")))]
   "TARGET_FLOAT"
 {
   if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
@@ -5457,8 +6526,7 @@
   [(set (match_operand:GPF 0 "register_operand" "=w,w,w,r")
 	(unspec:GPF [(match_operand:GPF 1 "register_operand" "w,0,w,r")
 		     (match_operand:GPF 2 "register_operand" "w,w,0,0")
-		     (match_operand:<V_INT_EQUIV> 3 "register_operand"
-		      "0,w,w,X")]
+		     (match_operand:<V_INT_EQUIV> 3 "register_operand" "0,w,w,X")]
 	 UNSPEC_COPYSIGN))]
   "TARGET_FLOAT && TARGET_SIMD"
   "@
@@ -5511,7 +6579,8 @@
 ;; -------------------------------------------------------------------
 ;; Reload Scalar Floating point modes from constant pool.
 ;; The AArch64 port doesn't have __int128 constant move support.
-(define_expand "aarch64_reload_movcp<GPF_TF:mode><P:mode>"
+;; The patterns need constraints due to TARGET_SECONDARY_RELOAD hook.
+(define_expand "@aarch64_reload_movcp<GPF_TF:mode><P:mode>"
  [(set (match_operand:GPF_TF 0 "register_operand" "=w")
        (mem:GPF_TF (match_operand 1 "aarch64_constant_pool_symref" "S")))
   (clobber (match_operand:P 2 "register_operand" "=&r"))]
@@ -5524,7 +6593,7 @@
 )
 
 ;; Reload Vector modes from constant pool.
-(define_expand "aarch64_reload_movcp<VALL:mode><P:mode>"
+(define_expand "@aarch64_reload_movcp<VALL:mode><P:mode>"
  [(set (match_operand:VALL 0 "register_operand" "=w")
        (mem:VALL (match_operand 1 "aarch64_constant_pool_symref" "S")))
   (clobber (match_operand:P 2 "register_operand" "=&r"))]
@@ -5536,7 +6605,7 @@
  }
 )
 
-(define_expand "aarch64_reload_mov<mode>"
+(define_expand "@aarch64_reload_mov<mode>"
   [(set (match_operand:TX 0 "register_operand" "=w")
         (match_operand:TX 1 "register_operand" "w"))
    (clobber (match_operand:DI 2 "register_operand" "=&r"))
@@ -5556,7 +6625,7 @@
 ;; after or during reload as we don't want these patterns to start
 ;; kicking in during the combiner.
 
-(define_insn "aarch64_movdi_<mode>low"
+(define_insn "@aarch64_movdi_<mode>low"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(zero_extract:DI (match_operand:TX 1 "register_operand" "w")
 			 (const_int 64) (const_int 0)))]
@@ -5566,7 +6635,7 @@
    (set_attr "length" "4")
   ])
 
-(define_insn "aarch64_movdi_<mode>high"
+(define_insn "@aarch64_movdi_<mode>high"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(zero_extract:DI (match_operand:TX 1 "register_operand" "w")
 			 (const_int 64) (const_int 64)))]
@@ -5576,7 +6645,7 @@
    (set_attr "length" "4")
   ])
 
-(define_insn "aarch64_mov<mode>high_di"
+(define_insn "@aarch64_mov<mode>high_di"
   [(set (zero_extract:TX (match_operand:TX 0 "register_operand" "+w")
                          (const_int 64) (const_int 64))
         (zero_extend:TX (match_operand:DI 1 "register_operand" "r")))]
@@ -5586,7 +6655,7 @@
    (set_attr "length" "4")
   ])
 
-(define_insn "aarch64_mov<mode>low_di"
+(define_insn "@aarch64_mov<mode>low_di"
   [(set (match_operand:TX 0 "register_operand" "=w")
         (zero_extend:TX (match_operand:DI 1 "register_operand" "r")))]
   "TARGET_FLOAT && (reload_completed || reload_in_progress)"
@@ -5611,9 +6680,9 @@
 ;; rodata section.
 
 (define_expand "add_losym"
-  [(set (match_operand 0 "register_operand" "=r")
-	(lo_sum (match_operand 1 "register_operand" "r")
-		(match_operand 2 "aarch64_valid_symref" "S")))]
+  [(set (match_operand 0 "register_operand")
+	(lo_sum (match_operand 1 "register_operand")
+		(match_operand 2 "aarch64_valid_symref")))]
   ""
 {
   machine_mode mode = GET_MODE (operands[0]);
@@ -5681,13 +6750,23 @@
   [(set_attr "type" "load_4")]
 )
 
-(define_insn "ldr_got_tiny"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(unspec:DI [(match_operand:DI 1 "aarch64_valid_symref" "S")]
-		   UNSPEC_GOTTINYPIC))]
+(define_insn "@ldr_got_tiny_<mode>"
+  [(set (match_operand:PTR 0 "register_operand" "=r")
+	(unspec:PTR [(match_operand:PTR 1 "aarch64_valid_symref" "S")]
+		    UNSPEC_GOTTINYPIC))]
   ""
-  "ldr\\t%0, %L1"
-  [(set_attr "type" "load_8")]
+  "ldr\t%<w>0, %L1"
+  [(set_attr "type" "load_<ldst_sz>")]
+)
+
+(define_insn "ldr_got_tiny_sidi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (unspec:SI [(match_operand:DI 1 "aarch64_valid_symref" "S")]
+		     UNSPEC_GOTTINYPIC)))]
+  "TARGET_ILP32"
+  "ldr\t%w0, %L1"
+  [(set_attr "type" "load_4")]
 )
 
 (define_insn "aarch64_load_tp_hard"
@@ -5702,9 +6781,10 @@
 ;; instructions in the TLS stubs, in order to enable linker relaxation.
 ;; Therefore we treat the stubs as an atomic sequence.
 (define_expand "tlsgd_small_<mode>"
- [(parallel [(set (match_operand 0 "register_operand" "")
+ [(parallel [(set (match_operand:PTR 0 "register_operand")
                   (call (mem:DI (match_dup 2)) (const_int 1)))
-	     (unspec:DI [(match_operand:PTR 1 "aarch64_valid_symref" "")] UNSPEC_GOTSMALLTLS)
+	     (unspec:DI [(const_int 0)] UNSPEC_CALLEE_ABI)
+	     (unspec:DI [(match_operand 1 "aarch64_valid_symref")] UNSPEC_GOTSMALLTLS)
 	     (clobber (reg:DI LR_REGNUM))])]
  ""
 {
@@ -5712,9 +6792,10 @@
 })
 
 (define_insn "*tlsgd_small_<mode>"
-  [(set (match_operand 0 "register_operand" "")
+  [(set (match_operand:PTR 0 "register_operand" "")
 	(call (mem:DI (match_operand:DI 2 "" "")) (const_int 1)))
-   (unspec:DI [(match_operand:PTR 1 "aarch64_valid_symref" "S")] UNSPEC_GOTSMALLTLS)
+   (unspec:DI [(const_int 0)] UNSPEC_CALLEE_ABI)
+   (unspec:DI [(match_operand 1 "aarch64_valid_symref" "S")] UNSPEC_GOTSMALLTLS)
    (clobber (reg:DI LR_REGNUM))
   ]
   ""
@@ -5814,7 +6895,12 @@
   "TARGET_TLS_DESC"
   {
     if (TARGET_SVE)
-      emit_insn (gen_tlsdesc_small_sve_<mode> (operands[0]));
+      {
+	rtx abi = gen_int_mode (aarch64_tlsdesc_abi_id (), DImode);
+	rtx_insn *call
+	  = emit_call_insn (gen_tlsdesc_small_sve_<mode> (operands[0], abi));
+	RTL_CONST_CALL_P (call) = 1;
+      }
     else
       emit_insn (gen_tlsdesc_small_advsimd_<mode> (operands[0]));
     DONE;
@@ -5829,49 +6915,26 @@
 		    UNSPEC_TLSDESC))
    (clobber (reg:DI LR_REGNUM))
    (clobber (reg:CC CC_REGNUM))
-   (clobber (match_scratch:DI 1 "=r"))]
+   (clobber (match_scratch:DI 1 "=r"))
+   (use (reg:DI FP_REGNUM))]
   "TARGET_TLS_DESC && !TARGET_SVE"
   "adrp\\tx0, %A0\;ldr\\t%<w>1, [x0, #%L0]\;add\\t<w>0, <w>0, %L0\;.tlsdesccall\\t%0\;blr\\t%1"
   [(set_attr "type" "call")
    (set_attr "length" "16")])
 
-;; For SVE, model tlsdesc calls as clobbering all vector and predicate
-;; registers, on top of the usual R0 and LR.  In reality the calls
-;; preserve the low 128 bits of the vector registers, but we don't
-;; yet have a way of representing that in the instruction pattern.
+;; For SVE, model tlsdesc calls as normal calls, with the callee ABI
+;; describing the extra call-preserved guarantees.  This would work
+;; for non-SVE too, but avoiding a call is probably better if we can.
 (define_insn "tlsdesc_small_sve_<mode>"
   [(set (reg:PTR R0_REGNUM)
-        (unspec:PTR [(match_operand 0 "aarch64_valid_symref" "S")]
-		    UNSPEC_TLSDESC))
-   (clobber (reg:DI LR_REGNUM))
-   (clobber (reg:CC CC_REGNUM))
-   (clobber (reg:XI V0_REGNUM))
-   (clobber (reg:XI V4_REGNUM))
-   (clobber (reg:XI V8_REGNUM))
-   (clobber (reg:XI V12_REGNUM))
-   (clobber (reg:XI V16_REGNUM))
-   (clobber (reg:XI V20_REGNUM))
-   (clobber (reg:XI V24_REGNUM))
-   (clobber (reg:XI V28_REGNUM))
-   (clobber (reg:VNx2BI P0_REGNUM))
-   (clobber (reg:VNx2BI P1_REGNUM))
-   (clobber (reg:VNx2BI P2_REGNUM))
-   (clobber (reg:VNx2BI P3_REGNUM))
-   (clobber (reg:VNx2BI P4_REGNUM))
-   (clobber (reg:VNx2BI P5_REGNUM))
-   (clobber (reg:VNx2BI P6_REGNUM))
-   (clobber (reg:VNx2BI P7_REGNUM))
-   (clobber (reg:VNx2BI P8_REGNUM))
-   (clobber (reg:VNx2BI P9_REGNUM))
-   (clobber (reg:VNx2BI P10_REGNUM))
-   (clobber (reg:VNx2BI P11_REGNUM))
-   (clobber (reg:VNx2BI P12_REGNUM))
-   (clobber (reg:VNx2BI P13_REGNUM))
-   (clobber (reg:VNx2BI P14_REGNUM))
-   (clobber (reg:VNx2BI P15_REGNUM))
-   (clobber (match_scratch:DI 1 "=r"))]
+	(call (mem:DI (unspec:PTR
+			[(match_operand 0 "aarch64_valid_symref")]
+			UNSPEC_TLSDESC))
+	      (const_int 0)))
+   (unspec:DI [(match_operand:DI 1 "const_int_operand")] UNSPEC_CALLEE_ABI)
+   (clobber (reg:DI LR_REGNUM))]
   "TARGET_TLS_DESC && TARGET_SVE"
-  "adrp\\tx0, %A0\;ldr\\t%<w>1, [x0, #%L0]\;add\\t<w>0, <w>0, %L0\;.tlsdesccall\\t%0\;blr\\t%1"
+  "adrp\\tx0, %A0\;ldr\\t<w>30, [x0, #%L0]\;add\\t<w>0, <w>0, %L0\;.tlsdesccall\\t%0\;blr\\tx30"
   [(set_attr "type" "call")
    (set_attr "length" "16")])
 
@@ -5885,6 +6948,16 @@
   [(set_attr "length" "0")]
 )
 
+(define_insn "aarch64_fjcvtzs"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:DF 1 "register_operand" "w")]
+		   UNSPEC_FJCVTZS))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_JSCVT"
+  "fjcvtzs\\t%w0, %d1"
+  [(set_attr "type" "f_cvtf2i")]
+)
+
 ;; Pointer authentication patterns are always provided.  In architecture
 ;; revisions prior to ARMv8.3-A these HINT instructions operate as NOPs.
 ;; This lets the user write portable software which authenticates pointers
@@ -5898,7 +6971,7 @@
   [(set (reg:DI R30_REGNUM)
 	(unspec:DI [(reg:DI R30_REGNUM) (reg:DI SP_REGNUM)] PAUTH_LR_SP))]
   ""
-  "hint\t<pauth_hint_num_a> // <pauth_mnem_prefix>asp";
+  "hint\t<pauth_hint_num> // <pauth_mnem_prefix>sp";
 )
 
 ;; Signing/Authenticating X17 using X16 as the salt.
@@ -5907,7 +6980,7 @@
   [(set (reg:DI R17_REGNUM)
 	(unspec:DI [(reg:DI R17_REGNUM) (reg:DI R16_REGNUM)] PAUTH_17_16))]
   ""
-  "hint\t<pauth_hint_num_a> // <pauth_mnem_prefix>a1716";
+  "hint\t<pauth_hint_num> // <pauth_mnem_prefix>1716";
 )
 
 ;; Stripping the signature in R30.
@@ -5930,7 +7003,7 @@
 )
 
 (define_insn "probe_stack_range"
-  [(set (match_operand:DI 0 "register_operand" "=r")
+  [(set (match_operand:DI 0 "register_operand" "=rk")
 	(unspec_volatile:DI [(match_operand:DI 1 "register_operand" "0")
 			     (match_operand:DI 2 "register_operand" "r")]
 			      UNSPECV_PROBE_STACK_RANGE))]
@@ -5941,9 +7014,28 @@
   [(set_attr "length" "32")]
 )
 
+;; This instruction is used to generate the stack clash stack adjustment and
+;; probing loop.  We can't change the control flow during prologue and epilogue
+;; code generation.  So we must emit a volatile unspec and expand it later on.
+
+(define_insn "@probe_sve_stack_clash_<mode>"
+  [(set (match_operand:P 0 "register_operand" "=rk")
+	(unspec_volatile:P [(match_operand:P 1 "register_operand" "0")
+			    (match_operand:P 2 "register_operand" "r")
+			    (match_operand:P 3 "const_int_operand" "n")
+			    (match_operand:P 4 "aarch64_plus_immediate" "L")]
+			     UNSPECV_PROBE_STACK_RANGE))]
+  "TARGET_SVE"
+{
+  return aarch64_output_probe_sve_stack_clash (operands[0], operands[2],
+					       operands[3], operands[4]);
+}
+  [(set_attr "length" "28")]
+)
+
 ;; Named pattern for expanding thread pointer reference.
 (define_expand "get_thread_pointerdi"
-  [(match_operand:DI 0 "register_operand" "=r")]
+  [(match_operand:DI 0 "register_operand")]
   ""
 {
   rtx tmp = aarch64_load_tp (operands[0]);
@@ -5952,98 +7044,121 @@
   DONE;
 })
 
-;; Named patterns for stack smashing protection.
+;; Defined for -mstack-protector-guard=sysreg, which goes through this
+;; pattern rather than stack_protect_combined_set.  Our implementation
+;; of the latter can handle both.
 (define_expand "stack_protect_set"
   [(match_operand 0 "memory_operand")
-   (match_operand 1 "memory_operand")]
+   (match_operand 1 "")]
   ""
 {
-  machine_mode mode = GET_MODE (operands[0]);
+  emit_insn (gen_stack_protect_combined_set (operands[0], operands[1]));
+  DONE;
+})
 
+(define_expand "stack_protect_combined_set"
+  [(match_operand 0 "memory_operand")
+   (match_operand 1 "")]
+  ""
+{
+  machine_mode mode = GET_MODE (operands[0]);
+  operands[1] = aarch64_stack_protect_canary_mem (mode, operands[1],
+						  AARCH64_SALT_SSP_SET);
   emit_insn ((mode == DImode
 	      ? gen_stack_protect_set_di
 	      : gen_stack_protect_set_si) (operands[0], operands[1]));
   DONE;
 })
 
+;; Operand 1 is either AARCH64_SALT_SSP_SET or AARCH64_SALT_SSP_TEST.
+(define_insn "reg_stack_protect_address_<mode>"
+ [(set (match_operand:PTR 0 "register_operand" "=r")
+       (unspec:PTR [(match_operand 1 "const_int_operand")]
+		   UNSPEC_SSP_SYSREG))]
+ "aarch64_stack_protector_guard != SSP_GLOBAL"
+ {
+   char buf[150];
+   snprintf (buf, 150, "mrs\\t%%<w>0, %s",
+	    aarch64_stack_protector_guard_reg_str);
+   output_asm_insn (buf, operands);
+   return "";
+ }
+ [(set_attr "type" "mrs")])
+
+;; DO NOT SPLIT THIS PATTERN.  It is important for security reasons that the
+;; canary value does not live beyond the life of this sequence.
 (define_insn "stack_protect_set_<mode>"
   [(set (match_operand:PTR 0 "memory_operand" "=m")
 	(unspec:PTR [(match_operand:PTR 1 "memory_operand" "m")]
 	 UNSPEC_SP_SET))
    (set (match_scratch:PTR 2 "=&r") (const_int 0))]
   ""
-  "ldr\\t%<w>2, %1\;str\\t%<w>2, %0\;mov\t%<w>2,0"
+  "ldr\\t%<w>2, %1\;str\\t%<w>2, %0\;mov\t%<w>2, 0"
   [(set_attr "length" "12")
    (set_attr "type" "multiple")])
 
+;; Defined for -mstack-protector-guard=sysreg, which goes through this
+;; pattern rather than stack_protect_combined_test.  Our implementation
+;; of the latter can handle both.
 (define_expand "stack_protect_test"
   [(match_operand 0 "memory_operand")
-   (match_operand 1 "memory_operand")
+   (match_operand 1 "")
    (match_operand 2)]
   ""
 {
-  rtx result;
-  machine_mode mode = GET_MODE (operands[0]);
-
-  result = gen_reg_rtx(mode);
+  emit_insn (gen_stack_protect_combined_test (operands[0], operands[1],
+					      operands[2]));
+  DONE;
+})
 
+(define_expand "stack_protect_combined_test"
+  [(match_operand 0 "memory_operand")
+   (match_operand 1 "")
+   (match_operand 2)]
+  ""
+{
+  machine_mode mode = GET_MODE (operands[0]);
+  operands[1] = aarch64_stack_protect_canary_mem (mode, operands[1],
+						  AARCH64_SALT_SSP_TEST);
   emit_insn ((mode == DImode
-	      ? gen_stack_protect_test_di
-	      : gen_stack_protect_test_si) (result,
-					    operands[0],
-					    operands[1]));
-
-  if (mode == DImode)
-    emit_jump_insn (gen_cbranchdi4 (gen_rtx_EQ (VOIDmode, result, const0_rtx),
-				    result, const0_rtx, operands[2]));
-  else
-    emit_jump_insn (gen_cbranchsi4 (gen_rtx_EQ (VOIDmode, result, const0_rtx),
-				    result, const0_rtx, operands[2]));
+	     ? gen_stack_protect_test_di
+	     : gen_stack_protect_test_si) (operands[0], operands[1]));
+
+  rtx cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
+  emit_jump_insn (gen_condjump (gen_rtx_EQ (VOIDmode, cc_reg, const0_rtx),
+				cc_reg, operands[2]));
   DONE;
 })
 
+;; DO NOT SPLIT THIS PATTERN.  It is important for security reasons that the
+;; canary value does not live beyond the end of this sequence.
 (define_insn "stack_protect_test_<mode>"
-  [(set (match_operand:PTR 0 "register_operand" "=r")
-	(unspec:PTR [(match_operand:PTR 1 "memory_operand" "m")
-		     (match_operand:PTR 2 "memory_operand" "m")]
-	 UNSPEC_SP_TEST))
+  [(set (reg:CC CC_REGNUM)
+	(unspec:CC [(match_operand:PTR 0 "memory_operand" "m")
+		    (match_operand:PTR 1 "memory_operand" "m")]
+		   UNSPEC_SP_TEST))
+   (clobber (match_scratch:PTR 2 "=&r"))
    (clobber (match_scratch:PTR 3 "=&r"))]
   ""
-  "ldr\t%<w>3, %1\;ldr\t%<w>0, %2\;eor\t%<w>0, %<w>3, %<w>0"
-  [(set_attr "length" "12")
+  "ldr\t%<w>2, %0\;ldr\t%<w>3, %1\;subs\t%<w>2, %<w>2, %<w>3\;mov\t%3, 0"
+  [(set_attr "length" "16")
    (set_attr "type" "multiple")])
 
-;; Write Floating-point Control Register.
-(define_insn "set_fpcr"
-  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPECV_SET_FPCR)]
-  ""
-  "msr\\tfpcr, %0"
-  [(set_attr "type" "mrs")])
-
-;; Read Floating-point Control Register.
-(define_insn "get_fpcr"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-        (unspec_volatile:SI [(const_int 0)] UNSPECV_GET_FPCR))]
-  ""
-  "mrs\\t%0, fpcr"
-  [(set_attr "type" "mrs")])
-
-;; Write Floating-point Status Register.
-(define_insn "set_fpsr"
-  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPECV_SET_FPSR)]
+;; Write into the Floating-point Status or Control Register.
+(define_insn "@aarch64_set_<fpscr_name><GPI:mode>"
+  [(unspec_volatile [(match_operand:GPI 0 "register_operand" "r")] SET_FPSCR)]
   ""
-  "msr\\tfpsr, %0"
+  "msr\\t<fpscr_name>, %0"
   [(set_attr "type" "mrs")])
 
-;; Read Floating-point Status Register.
-(define_insn "get_fpsr"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-        (unspec_volatile:SI [(const_int 0)] UNSPECV_GET_FPSR))]
+;; Read into the Floating-point Status or Control Register.
+(define_insn "@aarch64_get_<fpscr_name><GPI:mode>"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+        (unspec_volatile:GPI [(const_int 0)] GET_FPSCR))]
   ""
-  "mrs\\t%0, fpsr"
+  "mrs\\t%0, <fpscr_name>"
   [(set_attr "type" "mrs")])
 
-
 ;; Define the subtract-one-and-jump insns so loop.c
 ;; knows what to generate.
 (define_expand "doloop_end"
@@ -6081,11 +7196,324 @@
   DONE;
 })
 
-;; Helper for aarch64.c code.
-(define_expand "set_clobber_cc"
-  [(parallel [(set (match_operand 0)
-		   (match_operand 1))
-	      (clobber (reg:CC CC_REGNUM))])])
+;; Track speculation through conditional branches.  We assume that
+;; SPECULATION_TRACKER_REGNUM is reserved for this purpose when necessary.
+(define_insn "speculation_tracker"
+  [(set (reg:DI SPECULATION_TRACKER_REGNUM)
+	(unspec:DI [(reg:DI SPECULATION_TRACKER_REGNUM) (match_operand 0)]
+	 UNSPEC_SPECULATION_TRACKER))]
+  ""
+  {
+    operands[1] = gen_rtx_REG (DImode, SPECULATION_TRACKER_REGNUM);
+    output_asm_insn ("csel\\t%1, %1, xzr, %m0", operands);
+    return "";
+  }
+  [(set_attr "type" "csel")]
+)
+
+;; Like speculation_tracker, but track the inverse condition.
+(define_insn "speculation_tracker_rev"
+  [(set (reg:DI SPECULATION_TRACKER_REGNUM)
+	(unspec:DI [(reg:DI SPECULATION_TRACKER_REGNUM) (match_operand 0)]
+	 UNSPEC_SPECULATION_TRACKER_REV))]
+  ""
+  {
+    operands[1] = gen_rtx_REG (DImode, SPECULATION_TRACKER_REGNUM);
+    output_asm_insn ("csel\\t%1, %1, xzr, %M0", operands);
+    return "";
+  }
+  [(set_attr "type" "csel")]
+)
+
+;; BTI <target> instructions
+(define_insn "bti_noarg"
+  [(unspec_volatile [(const_int 0)] UNSPECV_BTI_NOARG)]
+  ""
+  "hint\t32 // bti"
+  [(set_attr "type" "no_insn")]
+)
+
+(define_insn "bti_c"
+  [(unspec_volatile [(const_int 0)] UNSPECV_BTI_C)]
+  ""
+  "hint\t34 // bti c"
+  [(set_attr "type" "no_insn")]
+)
+
+(define_insn "bti_j"
+  [(unspec_volatile [(const_int 0)] UNSPECV_BTI_J)]
+  ""
+  "hint\t36 // bti j"
+  [(set_attr "type" "no_insn")]
+)
+
+(define_insn "bti_jc"
+  [(unspec_volatile [(const_int 0)] UNSPECV_BTI_JC)]
+  ""
+  "hint\t38 // bti jc"
+  [(set_attr "type" "no_insn")]
+)
+
+;; Hard speculation barrier.
+(define_insn "speculation_barrier"
+  [(unspec_volatile [(const_int 0)] UNSPECV_SPECULATION_BARRIER)]
+  ""
+  "isb\;dsb\\tsy"
+  [(set_attr "length" "8")
+   (set_attr "type" "block")
+   (set_attr "speculation_barrier" "true")]
+)
+
+;; Support for __builtin_speculation_safe_value when we have speculation
+;; tracking enabled.  Use the speculation tracker to decide whether to
+;; copy operand 1 to the target, or to copy the fail value (operand 2).
+(define_expand "@despeculate_copy<ALLI_TI:mode>"
+  [(set (match_operand:ALLI_TI 0 "register_operand")
+	(unspec_volatile:ALLI_TI
+	 [(match_operand:ALLI_TI 1 "register_operand")
+	  (match_operand:ALLI_TI 2 "aarch64_reg_or_zero")
+	  (use (reg:DI SPECULATION_TRACKER_REGNUM))
+	  (clobber (reg:CC CC_REGNUM))] UNSPECV_SPECULATION_BARRIER))]
+  ""
+  "
+  {
+    if (operands[2] == const0_rtx)
+      {
+	rtx tracker;
+	if (<MODE>mode == TImode)
+	  tracker = gen_rtx_REG (DImode, SPECULATION_TRACKER_REGNUM);
+	else
+	  tracker = gen_rtx_REG (<MODE>mode, SPECULATION_TRACKER_REGNUM);
+
+	emit_insn (gen_despeculate_simple<mode> (operands[0], operands[1],
+						 tracker));
+	DONE;
+      }
+  }
+  "
+)
+
+;; Patterns to match despeculate_copy<mode>.  Note that "hint 0x14" is the
+;; encoding for CSDB, but will work in older versions of the assembler.
+(define_insn "*despeculate_copy<ALLI:mode>_insn"
+  [(set (match_operand:ALLI 0 "register_operand" "=r")
+	(unspec_volatile:ALLI
+	 [(match_operand:ALLI 1 "register_operand" "r")
+	  (match_operand:ALLI 2 "aarch64_reg_or_zero" "rZ")
+	  (use (reg:DI SPECULATION_TRACKER_REGNUM))
+	  (clobber (reg:CC CC_REGNUM))] UNSPECV_SPECULATION_BARRIER))]
+  ""
+  {
+    operands[3] = gen_rtx_REG (DImode, SPECULATION_TRACKER_REGNUM);
+    output_asm_insn ("cmp\\t%3, #0\;csel\\t%<w>0, %<w>1, %<w>2, ne\;hint\t0x14 // csdb",
+		     operands);
+    return "";
+  }
+  [(set_attr "length" "12")
+   (set_attr "type" "block")
+   (set_attr "speculation_barrier" "true")]
+)
+
+;; Pattern to match despeculate_copyti
+(define_insn "*despeculate_copyti_insn"
+  [(set (match_operand:TI 0 "register_operand" "=r")
+	(unspec_volatile:TI
+	 [(match_operand:TI 1 "register_operand" "r")
+	  (match_operand:TI 2 "aarch64_reg_or_zero" "rZ")
+	  (use (reg:DI SPECULATION_TRACKER_REGNUM))
+	  (clobber (reg:CC CC_REGNUM))] UNSPECV_SPECULATION_BARRIER))]
+  ""
+  {
+    operands[3] = gen_rtx_REG (DImode, SPECULATION_TRACKER_REGNUM);
+    output_asm_insn
+      ("cmp\\t%3, #0\;csel\\t%0, %1, %2, ne\;csel\\t%H0, %H1, %H2, ne\;hint\t0x14 // csdb",
+       operands);
+    return "";
+  }
+  [(set_attr "length" "16")
+   (set_attr "type" "block")
+   (set_attr "speculation_barrier" "true")]
+)
+
+(define_insn "despeculate_simple<ALLI:mode>"
+  [(set (match_operand:ALLI 0 "register_operand" "=r")
+	(unspec_volatile:ALLI
+	 [(match_operand:ALLI 1 "register_operand" "r")
+	  (use (match_operand:ALLI 2 "register_operand" ""))]
+	 UNSPECV_SPECULATION_BARRIER))]
+  ""
+  "and\\t%<w>0, %<w>1, %<w>2\;hint\t0x14 // csdb"
+  [(set_attr "type" "block")
+   (set_attr "length" "8")
+   (set_attr "speculation_barrier" "true")]
+)
+
+(define_insn "despeculate_simpleti"
+  [(set (match_operand:TI 0 "register_operand" "=r")
+	(unspec_volatile:TI
+	 [(match_operand:TI 1 "register_operand" "r")
+	  (use (match_operand:DI 2 "register_operand" ""))]
+	 UNSPECV_SPECULATION_BARRIER))]
+  ""
+  "and\\t%0, %1, %2\;and\\t%H0, %H1, %2\;hint\t0x14 // csdb"
+  [(set_attr "type" "block")
+   (set_attr "length" "12")
+   (set_attr "speculation_barrier" "true")]
+)
+
+(define_insn "aarch64_<frintnzs_op><mode>"
+  [(set (match_operand:VSFDF 0 "register_operand" "=w")
+	(unspec:VSFDF [(match_operand:VSFDF 1 "register_operand" "w")]
+		      FRINTNZX))]
+  "TARGET_FRINT && TARGET_FLOAT
+   && !(VECTOR_MODE_P (<MODE>mode) && !TARGET_SIMD)"
+  "<frintnzs_op>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
+  [(set_attr "type" "f_rint<stype>")]
+)
+
+;; Transactional Memory Extension (TME) instructions.
+
+(define_insn "tstart"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec_volatile:DI [(const_int 0)] UNSPECV_TSTART))
+   (clobber (mem:BLK (scratch)))]
+  "TARGET_TME"
+  "tstart\\t%0"
+  [(set_attr "type" "tme")]
+)
+
+(define_insn "ttest"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec_volatile:DI [(const_int 0)] UNSPEC_TTEST))
+   (clobber (mem:BLK (scratch)))]
+  "TARGET_TME"
+  "ttest\\t%0"
+  [(set_attr "type" "tme")]
+)
+
+(define_insn "tcommit"
+  [(unspec_volatile:BLK [(const_int 0)] UNSPECV_TCOMMIT)
+   (clobber (mem:BLK (scratch)))]
+  "TARGET_TME"
+  "tcommit"
+  [(set_attr "type" "tme")]
+)
+
+(define_insn "tcancel"
+  [(unspec_volatile:BLK
+     [(match_operand 0 "const_int_operand" "n")] UNSPECV_TCANCEL)
+   (clobber (mem:BLK (scratch)))]
+  "TARGET_TME && (UINTVAL (operands[0]) <= 65535)"
+  "tcancel\\t#%0"
+  [(set_attr "type" "tme")]
+)
+
+(define_insn "aarch64_rndr"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec_volatile:DI [(const_int 0)] UNSPEC_RNDR))
+   (set (reg:CC_Z CC_REGNUM)
+	(unspec_volatile:CC_Z [(const_int 0)] UNSPEC_RNDR))]
+  "TARGET_RNG"
+  "mrs\t%0, RNDR"
+  [(set_attr "type" "mrs")]
+)
+
+(define_insn "aarch64_rndrrs"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec_volatile:DI [(const_int 0)] UNSPEC_RNDRRS))
+   (set (reg:CC_Z CC_REGNUM)
+	(unspec_volatile:CC_Z [(const_int 0)] UNSPEC_RNDRRS))]
+  "TARGET_RNG"
+  "mrs\t%0, RNDRRS"
+  [(set_attr "type" "mrs")]
+)
+
+;; Memory Tagging Extension (MTE) instructions.
+
+(define_insn "irg"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(ior:DI
+	 (and:DI (match_operand:DI 1 "register_operand" "rk")
+		 (const_int -1080863910568919041)) ;; 0xf0ff...
+	 (ashift:DI (unspec:QI [(match_operand:DI 2 "register_operand" "r")]
+		     UNSPEC_GEN_TAG_RND)
+		    (const_int 56))))]
+  "TARGET_MEMTAG"
+  "irg\\t%0, %1, %2"
+  [(set_attr "type" "memtag")]
+)
+
+(define_insn "gmi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ior:DI (ashift:DI
+		 (const_int 1)
+		 (and:QI (lshiftrt:DI
+			  (match_operand:DI 1 "register_operand" "rk")
+			  (const_int 56)) (const_int 15)))
+		(match_operand:DI 2 "register_operand" "r")))]
+  "TARGET_MEMTAG"
+  "gmi\\t%0, %1, %2"
+  [(set_attr "type" "memtag")]
+)
+
+(define_insn "addg"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(ior:DI
+	 (and:DI (plus:DI (match_operand:DI 1 "register_operand" "rk")
+			  (match_operand:DI 2 "aarch64_granule16_uimm6" "i"))
+		 (const_int -1080863910568919041)) ;; 0xf0ff...
+	 (ashift:DI
+	  (unspec:QI
+	   [(and:QI (lshiftrt:DI (match_dup 1) (const_int 56)) (const_int 15))
+	    (match_operand:QI 3 "aarch64_memtag_tag_offset" "i")]
+	   UNSPEC_GEN_TAG)
+	  (const_int 56))))]
+  "TARGET_MEMTAG"
+  "addg\\t%0, %1, #%2, #%3"
+  [(set_attr "type" "memtag")]
+)
+
+(define_insn "subp"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI
+	  (and:DI (match_operand:DI 1 "register_operand" "rk")
+		  (const_int 72057594037927935)) ;; 0x00ff...
+	  (and:DI (match_operand:DI 2 "register_operand" "rk")
+		  (const_int 72057594037927935))))] ;; 0x00ff...
+  "TARGET_MEMTAG"
+  "subp\\t%0, %1, %2"
+  [(set_attr "type" "memtag")]
+)
+
+;; LDG will use the 16-byte aligned value of the address.
+(define_insn "ldg"
+  [(set (match_operand:DI 0 "register_operand" "+r")
+	(ior:DI
+	 (and:DI (match_dup 0) (const_int -1080863910568919041)) ;; 0xf0ff...
+	 (ashift:DI
+	  (mem:QI (unspec:DI
+	   [(and:DI (plus:DI (match_operand:DI 1 "register_operand" "rk")
+			     (match_operand:DI 2 "aarch64_granule16_simm9" "i"))
+		    (const_int -16))] UNSPEC_TAG_SPACE))
+	  (const_int 56))))]
+  "TARGET_MEMTAG"
+  "ldg\\t%0, [%1, #%2]"
+  [(set_attr "type" "memtag")]
+)
+
+;; STG doesn't align the address but aborts with alignment fault
+;; when the address is not 16-byte aligned.
+(define_insn "stg"
+  [(set (mem:QI (unspec:DI
+	 [(plus:DI (match_operand:DI 1 "register_operand" "rk")
+		   (match_operand:DI 2 "aarch64_granule16_simm9" "i"))]
+	 UNSPEC_TAG_SPACE))
+	(and:QI (lshiftrt:DI (match_operand:DI 0 "register_operand" "rk")
+			     (const_int 56)) (const_int 15)))]
+  "TARGET_MEMTAG"
+  "stg\\t%0, [%1, #%2]"
+  [(set_attr "type" "memtag")]
+)
 
 ;; AdvSIMD Stuff
 (include "aarch64-simd.md")
@@ -6098,3 +7526,6 @@
 
 ;; SVE.
 (include "aarch64-sve.md")
+
+;; SVE2.
+(include "aarch64-sve2.md")
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
index 52eaf8c6f408f..32191cf1acf43 100644
--- a/gcc/config/aarch64/aarch64.opt
+++ b/gcc/config/aarch64/aarch64.opt
@@ -1,5 +1,5 @@
 ; Machine description for AArch64 architecture.
-; Copyright (C) 2009-2018 Free Software Foundation, Inc.
+; Copyright (C) 2009-2021 Free Software Foundation, Inc.
 ; Contributed by ARM Ltd.
 ;
 ; This file is part of GCC.
@@ -31,7 +31,10 @@ TargetSave
 const char *x_aarch64_override_tune_string
 
 TargetVariable
-unsigned long aarch64_isa_flags = 0
+uint64_t aarch64_isa_flags = 0
+
+TargetVariable
+unsigned aarch64_enable_bti = 2
 
 ; The TLS dialect names to use with -mtls-dialect.
 
@@ -61,23 +64,27 @@ EnumValue
 Enum(cmodel) String(large) Value(AARCH64_CMODEL_LARGE)
 
 mbig-endian
-Target Report RejectNegative Mask(BIG_END)
+Target RejectNegative Mask(BIG_END)
 Assume target CPU is configured as big endian.
 
 mgeneral-regs-only
-Target Report RejectNegative Mask(GENERAL_REGS_ONLY) Save
+Target RejectNegative Mask(GENERAL_REGS_ONLY) Save
 Generate code which uses only the general registers.
 
+mharden-sls=
+Target RejectNegative Joined Var(aarch64_harden_sls_string)
+Generate code to mitigate against straight line speculation.
+
 mfix-cortex-a53-835769
-Target Report Var(aarch64_fix_a53_err835769) Init(2) Save
+Target Var(aarch64_fix_a53_err835769) Init(2) Save
 Workaround for ARM Cortex-A53 Erratum number 835769.
 
 mfix-cortex-a53-843419
-Target Report Var(aarch64_fix_a53_err843419) Init(2) Save
+Target Var(aarch64_fix_a53_err843419) Init(2) Save
 Workaround for ARM Cortex-A53 Erratum number 843419.
 
 mlittle-endian
-Target Report RejectNegative InverseMask(BIG_END)
+Target RejectNegative InverseMask(BIG_END)
 Assume target CPU is configured as little endian.
 
 mcmodel=
@@ -85,11 +92,11 @@ Target RejectNegative Joined Enum(cmodel) Var(aarch64_cmodel_var) Init(AARCH64_C
 Specify the code model.
 
 mstrict-align
-Target Report RejectNegative Mask(STRICT_ALIGN) Save
+Target Mask(STRICT_ALIGN) Save
 Don't assume that unaligned accesses are handled by the system.
 
 momit-leaf-frame-pointer
-Target Report Var(flag_omit_leaf_frame_pointer) Init(2) Save
+Target Var(flag_omit_leaf_frame_pointer) Init(2) Save
 Omit the frame pointer in leaf functions.
 
 mtls-dialect=
@@ -116,24 +123,24 @@ EnumValue
 Enum(aarch64_tls_size) String(48) Value(48)
 
 march=
-Target RejectNegative ToLower Joined Var(aarch64_arch_string)
--march=ARCH	Use features of architecture ARCH.
+Target RejectNegative Negative(march=) ToLower Joined Var(aarch64_arch_string)
+Use features of architecture ARCH.
 
 mcpu=
-Target RejectNegative ToLower Joined Var(aarch64_cpu_string)
--mcpu=CPU	Use features of and optimize for CPU.
+Target RejectNegative Negative(mcpu=) ToLower Joined Var(aarch64_cpu_string)
+Use features of and optimize for CPU.
 
 mtune=
-Target RejectNegative ToLower Joined Var(aarch64_tune_string)
--mtune=CPU	Optimize for CPU.
+Target RejectNegative Negative(mtune=) ToLower Joined Var(aarch64_tune_string)
+Optimize for CPU.
 
 mabi=
 Target RejectNegative Joined Enum(aarch64_abi) Var(aarch64_abi) Init(AARCH64_ABI_DEFAULT)
--mabi=ABI	Generate code that conforms to the specified ABI.
+Generate code that conforms to the specified ABI.
 
 moverride=
 Target RejectNegative ToLower Joined Var(aarch64_override_tune_string)
--moverride=STRING	Power users only! Override CPU optimization parameters.
+-moverride=<string>	Power users only! Override CPU optimization parameters.
 
 Enum
 Name(aarch64_abi) Type(int)
@@ -146,11 +153,15 @@ EnumValue
 Enum(aarch64_abi) String(lp64) Value(AARCH64_ABI_LP64)
 
 mpc-relative-literal-loads
-Target Report Save Var(pcrelative_literal_loads) Init(2) Save
+Target Save Var(pcrelative_literal_loads) Init(2) Save
 PC relative literal loads.
 
+mbranch-protection=
+Target RejectNegative Joined Var(aarch64_branch_protection_string) Save
+Use branch-protection features.
+
 msign-return-address=
-Target RejectNegative Report Joined Enum(aarch64_ra_sign_scope_t) Var(aarch64_ra_sign_scope) Init(AARCH64_FUNCTION_NONE) Save
+Target WarnRemoved RejectNegative Joined Enum(aarch64_ra_sign_scope_t) Var(aarch64_ra_sign_scope) Init(AARCH64_FUNCTION_NONE) Save
 Select return address signing scope.
 
 Enum
@@ -167,20 +178,20 @@ EnumValue
 Enum(aarch64_ra_sign_scope_t) String(all) Value(AARCH64_FUNCTION_ALL)
 
 mlow-precision-recip-sqrt
-Common Var(flag_mrecip_low_precision_sqrt) Optimization
+Target Var(flag_mrecip_low_precision_sqrt) Optimization
 Enable the reciprocal square root approximation.  Enabling this reduces
 precision of reciprocal square root results to about 16 bits for
 single precision and to 32 bits for double precision.
 
 mlow-precision-sqrt
-Common Var(flag_mlow_precision_sqrt) Optimization
+Target Var(flag_mlow_precision_sqrt) Optimization
 Enable the square root approximation.  Enabling this reduces
 precision of square root results to about 16 bits for
 single precision and to 32 bits for double precision.
 If enabled, it implies -mlow-precision-recip-sqrt.
 
 mlow-precision-div
-Common Var(flag_mlow_precision_div) Optimization
+Target Var(flag_mlow_precision_div) Optimization
 Enable the division approximation.  Enabling this reduces
 precision of division results to about 16 bits for
 single precision and to 32 bits for double precision.
@@ -209,8 +220,63 @@ Enum(sve_vector_bits) String(2048) Value(SVE_2048)
 
 msve-vector-bits=
 Target RejectNegative Joined Enum(sve_vector_bits) Var(aarch64_sve_vector_bits) Init(SVE_SCALABLE)
--msve-vector-bits=N	Set the number of bits in an SVE vector register to N.
+-msve-vector-bits=<number>	Set the number of bits in an SVE vector register.
 
 mverbose-cost-dump
-Common Undocumented Var(flag_aarch64_verbose_cost)
+Target Undocumented Var(flag_aarch64_verbose_cost)
 Enables verbose cost model dumping in the debug dump files.
+
+mtrack-speculation
+Target Var(aarch64_track_speculation)
+Generate code to track when the CPU might be speculating incorrectly.
+
+mstack-protector-guard=
+Target RejectNegative Joined Enum(stack_protector_guard) Var(aarch64_stack_protector_guard) Init(SSP_GLOBAL)
+Use given stack-protector guard.
+
+Enum
+Name(stack_protector_guard) Type(enum stack_protector_guard)
+Valid arguments to -mstack-protector-guard=:
+
+EnumValue
+Enum(stack_protector_guard) String(sysreg) Value(SSP_SYSREG)
+
+EnumValue
+Enum(stack_protector_guard) String(global) Value(SSP_GLOBAL)
+
+mstack-protector-guard-reg=
+Target Joined RejectNegative String Var(aarch64_stack_protector_guard_reg_str)
+Use the system register specified on the command line as the stack protector
+guard register. This option is for use with fstack-protector-strong and
+not for use in user-land code.
+
+mstack-protector-guard-offset=
+Target Joined RejectNegative String Var(aarch64_stack_protector_guard_offset_str)
+Use an immediate to offset from the stack protector guard register, sp_el0.
+This option is for use with fstack-protector-strong and not for use in
+user-land code.
+
+TargetVariable
+long aarch64_stack_protector_guard_offset = 0
+
+moutline-atomics
+Target Var(aarch64_flag_outline_atomics) Init(2) Save
+Generate local calls to out-of-line atomic operations.
+
+-param=aarch64-sve-compare-costs=
+Target Joined UInteger Var(aarch64_sve_compare_costs) Init(1) IntegerRange(0, 1) Param
+When vectorizing for SVE, consider using unpacked vectors for smaller elements and use the cost model to pick the cheapest approach.  Also use the cost model to choose between SVE and Advanced SIMD vectorization.
+
+-param=aarch64-float-recp-precision=
+Target Joined UInteger Var(aarch64_float_recp_precision) Init(1) IntegerRange(1, 5) Param
+The number of Newton iterations for calculating the reciprocal for float type.  The precision of division is proportional to this param when division approximation is enabled.  The default value is 1.
+
+-param=aarch64-double-recp-precision=
+Target Joined UInteger Var(aarch64_double_recp_precision) Init(2) IntegerRange(1, 5) Param
+The number of Newton iterations for calculating the reciprocal for double type.  The precision of division is proportional to this param when division approximation is enabled.  The default value is 2.
+
+-param=aarch64-autovec-preference=
+Target Joined UInteger Var(aarch64_autovec_preference) Init(0) IntegerRange(0, 4) Param
+
+-param=aarch64-loop-vect-issue-rate-niters=
+Target Joined UInteger Var(aarch64_loop_vect_issue_rate_niters) Init(6) IntegerRange(0, 65536) Param
diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h
index 8504c3f1d7647..73b29f4b8dc9c 100644
--- a/gcc/config/aarch64/arm_acle.h
+++ b/gcc/config/aarch64/arm_acle.h
@@ -1,6 +1,6 @@
 /* AArch64 Non-NEON ACLE intrinsics include file.
 
-   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   Copyright (C) 2014-2021 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -29,14 +29,77 @@
 
 #include <stdint.h>
 
-#pragma GCC push_options
-
-#pragma GCC target ("+nothing+crc")
-
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.3-a")
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+__jcvt (double __a)
+{
+  return __builtin_aarch64_jcvtzs (__a);
+}
+
+#pragma GCC pop_options
+
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.5-a")
+__extension__ static __inline float __attribute__ ((__always_inline__))
+__rint32zf (float __a)
+{
+  return __builtin_aarch64_frint32zsf (__a);
+}
+
+__extension__ static __inline double __attribute__ ((__always_inline__))
+__rint32z (double __a)
+{
+  return __builtin_aarch64_frint32zdf (__a);
+}
+
+__extension__ static __inline float __attribute__ ((__always_inline__))
+__rint64zf (float __a)
+{
+  return __builtin_aarch64_frint64zsf (__a);
+}
+
+__extension__ static __inline double __attribute__ ((__always_inline__))
+__rint64z (double __a)
+{
+  return __builtin_aarch64_frint64zdf (__a);
+}
+
+__extension__ static __inline float __attribute__ ((__always_inline__))
+__rint32xf (float __a)
+{
+  return __builtin_aarch64_frint32xsf (__a);
+}
+
+__extension__ static __inline double __attribute__ ((__always_inline__))
+__rint32x (double __a)
+{
+  return __builtin_aarch64_frint32xdf (__a);
+}
+
+__extension__ static __inline float __attribute__ ((__always_inline__))
+__rint64xf (float __a)
+{
+  return __builtin_aarch64_frint64xsf (__a);
+}
+
+__extension__ static __inline double __attribute__ ((__always_inline__))
+__rint64x (double __a)
+{
+  return __builtin_aarch64_frint64xdf (__a);
+}
+
+
+#pragma GCC pop_options
+
+#pragma GCC push_options
+
+#pragma GCC target ("+nothing+crc")
+
 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
 __crc32b (uint32_t __a, uint8_t __b)
 {
@@ -85,10 +148,92 @@ __crc32d (uint32_t __a, uint64_t __b)
   return __builtin_aarch64_crc32x (__a, __b);
 }
 
-#ifdef __cplusplus
+#pragma GCC pop_options
+
+#ifdef __ARM_FEATURE_TME
+#pragma GCC push_options
+#pragma GCC target ("+nothing+tme")
+
+#define _TMFAILURE_REASON     0x00007fffu
+#define _TMFAILURE_RTRY       0x00008000u
+#define _TMFAILURE_CNCL       0x00010000u
+#define _TMFAILURE_MEM        0x00020000u
+#define _TMFAILURE_IMP        0x00040000u
+#define _TMFAILURE_ERR        0x00080000u
+#define _TMFAILURE_SIZE       0x00100000u
+#define _TMFAILURE_NEST       0x00200000u
+#define _TMFAILURE_DBG        0x00400000u
+#define _TMFAILURE_INT        0x00800000u
+#define _TMFAILURE_TRIVIAL    0x01000000u
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+__tstart (void)
+{
+  return __builtin_aarch64_tstart ();
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+__tcommit (void)
+{
+  __builtin_aarch64_tcommit ();
 }
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+__tcancel (const uint64_t __reason)
+{
+  __builtin_aarch64_tcancel (__reason);
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+__ttest (void)
+{
+  return __builtin_aarch64_ttest ();
+}
+
+#pragma GCC pop_options
 #endif
 
+#pragma GCC push_options
+#pragma GCC target ("+nothing+rng")
+__extension__ static __inline int __attribute__ ((__always_inline__))
+__rndr (uint64_t *__res)
+{
+  return __builtin_aarch64_rndr (__res);
+}
+
+__extension__ static __inline int __attribute__ ((__always_inline__))
+__rndrrs (uint64_t *__res)
+{
+  return __builtin_aarch64_rndrrs (__res);
+}
+
 #pragma GCC pop_options
 
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.5-a+memtag")
+
+#define __arm_mte_create_random_tag(__ptr, __u64_mask) \
+  __builtin_aarch64_memtag_irg(__ptr, __u64_mask)
+
+#define __arm_mte_exclude_tag(__ptr, __u64_excluded) \
+  __builtin_aarch64_memtag_gmi(__ptr, __u64_excluded)
+
+#define __arm_mte_ptrdiff(__ptr_a, __ptr_b) \
+  __builtin_aarch64_memtag_subp(__ptr_a, __ptr_b)
+
+#define __arm_mte_increment_tag(__ptr, __u_offset) \
+  __builtin_aarch64_memtag_inc_tag(__ptr, __u_offset)
+
+#define __arm_mte_set_tag(__tagged_address) \
+  __builtin_aarch64_memtag_set_tag(__tagged_address)
+
+#define __arm_mte_get_tag(__address) \
+  __builtin_aarch64_memtag_get_tag(__address)
+
+#pragma GCC pop_options
+
+#ifdef __cplusplus
+}
+#endif
+
 #endif
diff --git a/gcc/config/aarch64/arm_bf16.h b/gcc/config/aarch64/arm_bf16.h
new file mode 100644
index 0000000000000..59ed67fb5cbfe
--- /dev/null
+++ b/gcc/config/aarch64/arm_bf16.h
@@ -0,0 +1,52 @@
+/* Arm BF16 instrinsics include file.
+
+   Copyright (C) 2019-2021 Free Software Foundation, Inc.
+   Contributed by Arm.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _AARCH64_BF16_H_
+#define _AARCH64_BF16_H_
+
+typedef __bf16 bfloat16_t;
+typedef float float32_t;
+
+#pragma GCC push_options
+#pragma GCC target ("+nothing+bf16+nosimd")
+
+__extension__ extern __inline bfloat16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvth_bf16_f32 (float32_t __a)
+{
+  return __builtin_aarch64_bfcvtbf (__a);
+}
+
+__extension__ extern __inline float32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvtah_f32_bf16 (bfloat16_t __a)
+{
+  return __builtin_aarch64_bfcvtsf (__a);
+}
+
+#pragma GCC pop_options
+
+#endif
diff --git a/gcc/config/aarch64/arm_fp16.h b/gcc/config/aarch64/arm_fp16.h
index d80e3eefc89fc..2afbd1203361b 100644
--- a/gcc/config/aarch64/arm_fp16.h
+++ b/gcc/config/aarch64/arm_fp16.h
@@ -1,6 +1,6 @@
 /* ARM FP16 scalar intrinsics include file.
 
-   Copyright (C) 2016-2018 Free Software Foundation, Inc.
+   Copyright (C) 2016-2021 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 115cedcaa5b20..baa30bd5a9d96 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -1,6 +1,6 @@
 /* ARM NEON intrinsics include file.
 
-   Copyright (C) 2011-2018 Free Software Foundation, Inc.
+   Copyright (C) 2011-2021 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -73,6 +73,39 @@ typedef __fp16 float16_t;
 typedef float float32_t;
 typedef double float64_t;
 
+typedef __Bfloat16x4_t bfloat16x4_t;
+typedef __Bfloat16x8_t bfloat16x8_t;
+
+typedef struct bfloat16x4x2_t
+{
+  bfloat16x4_t val[2];
+} bfloat16x4x2_t;
+
+typedef struct bfloat16x8x2_t
+{
+  bfloat16x8_t val[2];
+} bfloat16x8x2_t;
+
+typedef struct bfloat16x4x3_t
+{
+  bfloat16x4_t val[3];
+} bfloat16x4x3_t;
+
+typedef struct bfloat16x8x3_t
+{
+  bfloat16x8_t val[3];
+} bfloat16x8x3_t;
+
+typedef struct bfloat16x4x4_t
+{
+  bfloat16x4_t val[4];
+} bfloat16x4x4_t;
+
+typedef struct bfloat16x8x4_t
+{
+  bfloat16x8_t val[4];
+} bfloat16x8x4_t;
+
 typedef struct int8x8x2_t
 {
   int8x8_t val[2];
@@ -6055,6 +6088,20 @@ vreinterpretq_u32_p128 (poly128_t __a)
   return (uint32x4_t)__a;
 }
 
+__extension__ extern __inline float64x2_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_f64_p128 (poly128_t __a)
+{
+  return (float64x2_t) __a;
+}
+
+__extension__ extern __inline poly128_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p128_f64 (float64x2_t __a)
+{
+  return (poly128_t) __a;
+}
+
 /* vset_lane  */
 
 __extension__ extern __inline float16x4_t
@@ -6255,216 +6302,203 @@ vsetq_lane_u64 (uint64_t __elem, uint64x2_t __vec, const int __index)
   return __aarch64_vset_lane_any (__elem, __vec, __index);
 }
 
-#define __GET_LOW(__TYPE) \
-  uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a);  \
-  uint64x1_t lo = vcreate_u64 (vgetq_lane_u64 (tmp, 0));  \
-  return vreinterpret_##__TYPE##_u64 (lo);
-
 __extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_low_f16 (float16x8_t __a)
 {
-  __GET_LOW (f16);
+  return __builtin_aarch64_get_lowv8hf (__a);
 }
 
 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_low_f32 (float32x4_t __a)
 {
-  __GET_LOW (f32);
+  return __builtin_aarch64_get_lowv4sf (__a);
 }
 
 __extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_low_f64 (float64x2_t __a)
 {
-  return (float64x1_t) {vgetq_lane_f64 (__a, 0)};
+  return (float64x1_t) {__builtin_aarch64_get_lowv2df (__a)};
 }
 
 __extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_low_p8 (poly8x16_t __a)
 {
-  __GET_LOW (p8);
+  return (poly8x8_t) __builtin_aarch64_get_lowv16qi ((int8x16_t) __a);
 }
 
 __extension__ extern __inline poly16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_low_p16 (poly16x8_t __a)
 {
-  __GET_LOW (p16);
+  return (poly16x4_t) __builtin_aarch64_get_lowv8hi ((int16x8_t) __a);
 }
 
 __extension__ extern __inline poly64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_low_p64 (poly64x2_t __a)
 {
-  __GET_LOW (p64);
+  return (poly64x1_t) __builtin_aarch64_get_lowv2di ((int64x2_t) __a);
 }
 
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_low_s8 (int8x16_t __a)
 {
-  __GET_LOW (s8);
+  return  __builtin_aarch64_get_lowv16qi (__a);
 }
 
 __extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_low_s16 (int16x8_t __a)
 {
-  __GET_LOW (s16);
+  return  __builtin_aarch64_get_lowv8hi (__a);
 }
 
 __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_low_s32 (int32x4_t __a)
 {
-  __GET_LOW (s32);
+  return  __builtin_aarch64_get_lowv4si (__a);
 }
 
 __extension__ extern __inline int64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_low_s64 (int64x2_t __a)
 {
-  __GET_LOW (s64);
+  return  (int64x1_t) {__builtin_aarch64_get_lowv2di (__a)};
 }
 
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_low_u8 (uint8x16_t __a)
 {
-  __GET_LOW (u8);
+  return (uint8x8_t) __builtin_aarch64_get_lowv16qi ((int8x16_t) __a);
 }
 
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_low_u16 (uint16x8_t __a)
 {
-  __GET_LOW (u16);
+  return (uint16x4_t) __builtin_aarch64_get_lowv8hi ((int16x8_t) __a);
 }
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_low_u32 (uint32x4_t __a)
 {
-  __GET_LOW (u32);
+  return (uint32x2_t) __builtin_aarch64_get_lowv4si ((int32x4_t) __a);
 }
 
 __extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_low_u64 (uint64x2_t __a)
 {
-  return vcreate_u64 (vgetq_lane_u64 (__a, 0));
+  return (uint64x1_t) {__builtin_aarch64_get_lowv2di ((int64x2_t) __a)};
 }
 
-#undef __GET_LOW
-
-#define __GET_HIGH(__TYPE)					\
-  uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a);		\
-  uint64x1_t hi = vcreate_u64 (vgetq_lane_u64 (tmp, 1));	\
-  return vreinterpret_##__TYPE##_u64 (hi);
-
 __extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_high_f16 (float16x8_t __a)
 {
-  __GET_HIGH (f16);
+  return __builtin_aarch64_get_highv8hf (__a);
 }
 
 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_high_f32 (float32x4_t __a)
 {
-  __GET_HIGH (f32);
+  return __builtin_aarch64_get_highv4sf (__a);
 }
 
 __extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_high_f64 (float64x2_t __a)
 {
-  __GET_HIGH (f64);
+  return (float64x1_t) {__builtin_aarch64_get_highv2df (__a)};
 }
 
 __extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_high_p8 (poly8x16_t __a)
 {
-  __GET_HIGH (p8);
+  return (poly8x8_t) __builtin_aarch64_get_highv16qi ((int8x16_t) __a);
 }
 
 __extension__ extern __inline poly16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_high_p16 (poly16x8_t __a)
 {
-  __GET_HIGH (p16);
+  return (poly16x4_t) __builtin_aarch64_get_highv8hi ((int16x8_t) __a);
 }
 
 __extension__ extern __inline poly64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_high_p64 (poly64x2_t __a)
 {
-  __GET_HIGH (p64);
+  return (poly64x1_t) __builtin_aarch64_get_highv2di ((int64x2_t) __a);
 }
 
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_high_s8 (int8x16_t __a)
 {
-  __GET_HIGH (s8);
+  return  __builtin_aarch64_get_highv16qi (__a);
 }
 
 __extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_high_s16 (int16x8_t __a)
 {
-  __GET_HIGH (s16);
+  return  __builtin_aarch64_get_highv8hi (__a);
 }
 
 __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_high_s32 (int32x4_t __a)
 {
-  __GET_HIGH (s32);
+  return  __builtin_aarch64_get_highv4si (__a);
 }
 
 __extension__ extern __inline int64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_high_s64 (int64x2_t __a)
 {
-  __GET_HIGH (s64);
+  return  (int64x1_t) {__builtin_aarch64_get_highv2di (__a)};
 }
 
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_high_u8 (uint8x16_t __a)
 {
-  __GET_HIGH (u8);
+  return (uint8x8_t) __builtin_aarch64_get_highv16qi ((int8x16_t) __a);
 }
 
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_high_u16 (uint16x8_t __a)
 {
-  __GET_HIGH (u16);
+  return (uint16x4_t) __builtin_aarch64_get_highv8hi ((int16x8_t) __a);
 }
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_high_u32 (uint32x4_t __a)
 {
-  __GET_HIGH (u32);
+  return (uint32x2_t) __builtin_aarch64_get_highv4si ((int32x4_t) __a);
 }
 
-#undef __GET_HIGH
-
 __extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_high_u64 (uint64x2_t __a)
 {
-  return vcreate_u64 (vgetq_lane_u64 (__a, 1));
+  return (uint64x1_t) {__builtin_aarch64_get_highv2di ((int64x2_t) __a)};
 }
 
+
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcombine_s8 (int8x8_t __a, int8x8_t __b)
@@ -6572,25720 +6606,27930 @@ vcombine_p64 (poly64x1_t __a, poly64x1_t __b)
 
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaba_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
+vaba_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c)
 {
-  int8x8_t result;
-  __asm__ ("saba %0.8b,%2.8b,%3.8b"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_sabav8qi (__a, __b, __c);
 }
 
 __extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaba_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
+vaba_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
 {
-  int16x4_t result;
-  __asm__ ("saba %0.4h,%2.4h,%3.4h"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_sabav4hi (__a, __b, __c);
 }
 
 __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaba_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
+vaba_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c)
 {
-  int32x2_t result;
-  __asm__ ("saba %0.2s,%2.2s,%3.2s"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_sabav2si (__a, __b, __c);
 }
 
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaba_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
+vaba_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
 {
-  uint8x8_t result;
-  __asm__ ("uaba %0.8b,%2.8b,%3.8b"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_uabav8qi_uuuu (__a, __b, __c);
 }
 
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaba_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
+vaba_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c)
 {
-  uint16x4_t result;
-  __asm__ ("uaba %0.4h,%2.4h,%3.4h"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_uabav4hi_uuuu (__a, __b, __c);
 }
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaba_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
+vaba_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
 {
-  uint32x2_t result;
-  __asm__ ("uaba %0.2s,%2.2s,%3.2s"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_uabav2si_uuuu (__a, __b, __c);
 }
 
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
+vabal_high_s8 (int16x8_t __a, int8x16_t __b, int8x16_t __c)
 {
-  int16x8_t result;
-  __asm__ ("sabal2 %0.8h,%2.16b,%3.16b"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_sabal2v16qi (__a, __b, __c);
 }
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
+vabal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
 {
-  int32x4_t result;
-  __asm__ ("sabal2 %0.4s,%2.8h,%3.8h"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_sabal2v8hi (__a, __b, __c);
 }
 
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
+vabal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
 {
-  int64x2_t result;
-  __asm__ ("sabal2 %0.2d,%2.4s,%3.4s"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_sabal2v4si (__a, __b, __c);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
+vabal_high_u8 (uint16x8_t __a, uint8x16_t __b, uint8x16_t __c)
 {
-  uint16x8_t result;
-  __asm__ ("uabal2 %0.8h,%2.16b,%3.16b"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_uabal2v16qi_uuuu (__a, __b, __c);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
+vabal_high_u16 (uint32x4_t __a, uint16x8_t __b, uint16x8_t __c)
 {
-  uint32x4_t result;
-  __asm__ ("uabal2 %0.4s,%2.8h,%3.8h"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_uabal2v8hi_uuuu (__a, __b, __c);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
+vabal_high_u32 (uint64x2_t __a, uint32x4_t __b, uint32x4_t __c)
 {
-  uint64x2_t result;
-  __asm__ ("uabal2 %0.2d,%2.4s,%3.4s"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_uabal2v4si_uuuu (__a, __b, __c);
 }
 
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
+vabal_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c)
 {
-  int16x8_t result;
-  __asm__ ("sabal %0.8h,%2.8b,%3.8b"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_sabalv8qi (__a, __b, __c);
 }
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
+vabal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
 {
-  int32x4_t result;
-  __asm__ ("sabal %0.4s,%2.4h,%3.4h"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_sabalv4hi (__a, __b, __c);
 }
 
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
+vabal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
 {
-  int64x2_t result;
-  __asm__ ("sabal %0.2d,%2.2s,%3.2s"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_sabalv2si (__a, __b, __c);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
+vabal_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c)
 {
-  uint16x8_t result;
-  __asm__ ("uabal %0.8h,%2.8b,%3.8b"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_uabalv8qi_uuuu (__a, __b, __c);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
+vabal_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c)
 {
-  uint32x4_t result;
-  __asm__ ("uabal %0.4s,%2.4h,%3.4h"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_uabalv4hi_uuuu (__a, __b, __c);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
+vabal_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c)
 {
-  uint64x2_t result;
-  __asm__ ("uabal %0.2d,%2.2s,%3.2s"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_uabalv2si_uuuu (__a, __b, __c);
 }
 
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
+vabaq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c)
 {
-  int8x16_t result;
-  __asm__ ("saba %0.16b,%2.16b,%3.16b"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_sabav16qi (__a, __b, __c);
 }
 
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
+vabaq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
 {
-  int16x8_t result;
-  __asm__ ("saba %0.8h,%2.8h,%3.8h"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_sabav8hi (__a, __b, __c);
 }
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
+vabaq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
 {
-  int32x4_t result;
-  __asm__ ("saba %0.4s,%2.4s,%3.4s"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_sabav4si (__a, __b, __c);
 }
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
+vabaq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
 {
-  uint8x16_t result;
-  __asm__ ("uaba %0.16b,%2.16b,%3.16b"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_uabav16qi_uuuu (__a, __b, __c);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
+vabaq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
 {
-  uint16x8_t result;
-  __asm__ ("uaba %0.8h,%2.8h,%3.8h"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_uabav8hi_uuuu (__a, __b, __c);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
+vabaq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
 {
-  uint32x4_t result;
-  __asm__ ("uaba %0.4s,%2.4s,%3.4s"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_uabav4si_uuuu (__a, __b, __c);
 }
 
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabd_s8 (int8x8_t a, int8x8_t b)
+vabd_s8 (int8x8_t __a, int8x8_t __b)
 {
-  int8x8_t result;
-  __asm__ ("sabd %0.8b, %1.8b, %2.8b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return  __builtin_aarch64_sabdv8qi (__a, __b);
 }
 
 __extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabd_s16 (int16x4_t a, int16x4_t b)
+vabd_s16 (int16x4_t __a, int16x4_t __b)
 {
-  int16x4_t result;
-  __asm__ ("sabd %0.4h, %1.4h, %2.4h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return  __builtin_aarch64_sabdv4hi (__a, __b);
 }
 
 __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabd_s32 (int32x2_t a, int32x2_t b)
+vabd_s32 (int32x2_t __a, int32x2_t __b)
 {
-  int32x2_t result;
-  __asm__ ("sabd %0.2s, %1.2s, %2.2s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return  __builtin_aarch64_sabdv2si (__a, __b);
 }
 
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabd_u8 (uint8x8_t a, uint8x8_t b)
+vabd_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  uint8x8_t result;
-  __asm__ ("uabd %0.8b, %1.8b, %2.8b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return  __builtin_aarch64_uabdv8qi_uuu (__a, __b);
 }
 
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabd_u16 (uint16x4_t a, uint16x4_t b)
+vabd_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  uint16x4_t result;
-  __asm__ ("uabd %0.4h, %1.4h, %2.4h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return  __builtin_aarch64_uabdv4hi_uuu (__a, __b);
 }
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabd_u32 (uint32x2_t a, uint32x2_t b)
+vabd_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  uint32x2_t result;
-  __asm__ ("uabd %0.2s, %1.2s, %2.2s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return  __builtin_aarch64_uabdv2si_uuu (__a, __b);
 }
 
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabdl_high_s8 (int8x16_t a, int8x16_t b)
+vabdl_high_s8 (int8x16_t __a, int8x16_t __b)
 {
-  int16x8_t result;
-  __asm__ ("sabdl2 %0.8h,%1.16b,%2.16b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_sabdl2v16qi (__a, __b);
 }
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabdl_high_s16 (int16x8_t a, int16x8_t b)
+vabdl_high_s16 (int16x8_t __a, int16x8_t __b)
 {
-  int32x4_t result;
-  __asm__ ("sabdl2 %0.4s,%1.8h,%2.8h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_sabdl2v8hi (__a, __b);
 }
 
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabdl_high_s32 (int32x4_t a, int32x4_t b)
+vabdl_high_s32 (int32x4_t __a, int32x4_t __b)
 {
-  int64x2_t result;
-  __asm__ ("sabdl2 %0.2d,%1.4s,%2.4s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_sabdl2v4si (__a, __b);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabdl_high_u8 (uint8x16_t a, uint8x16_t b)
+vabdl_high_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  uint16x8_t result;
-  __asm__ ("uabdl2 %0.8h,%1.16b,%2.16b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_uabdl2v16qi_uuu (__a, __b);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabdl_high_u16 (uint16x8_t a, uint16x8_t b)
+vabdl_high_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  uint32x4_t result;
-  __asm__ ("uabdl2 %0.4s,%1.8h,%2.8h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_uabdl2v8hi_uuu (__a, __b);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabdl_high_u32 (uint32x4_t a, uint32x4_t b)
+vabdl_high_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  uint64x2_t result;
-  __asm__ ("uabdl2 %0.2d,%1.4s,%2.4s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_uabdl2v4si_uuu (__a, __b);
 }
 
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabdl_s8 (int8x8_t a, int8x8_t b)
+vabdl_s8 (int8x8_t __a, int8x8_t __b)
 {
-  int16x8_t result;
-  __asm__ ("sabdl %0.8h, %1.8b, %2.8b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_sabdlv8qi (__a, __b);
 }
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabdl_s16 (int16x4_t a, int16x4_t b)
+vabdl_s16 (int16x4_t __a, int16x4_t __b)
 {
-  int32x4_t result;
-  __asm__ ("sabdl %0.4s, %1.4h, %2.4h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_sabdlv4hi (__a, __b);
 }
 
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabdl_s32 (int32x2_t a, int32x2_t b)
+vabdl_s32 (int32x2_t __a, int32x2_t __b)
 {
-  int64x2_t result;
-  __asm__ ("sabdl %0.2d, %1.2s, %2.2s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_sabdlv2si (__a, __b);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabdl_u8 (uint8x8_t a, uint8x8_t b)
+vabdl_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  uint16x8_t result;
-  __asm__ ("uabdl %0.8h, %1.8b, %2.8b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_uabdlv8qi_uuu (__a, __b);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabdl_u16 (uint16x4_t a, uint16x4_t b)
+vabdl_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  uint32x4_t result;
-  __asm__ ("uabdl %0.4s, %1.4h, %2.4h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_uabdlv4hi_uuu (__a, __b);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabdl_u32 (uint32x2_t a, uint32x2_t b)
+vabdl_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  uint64x2_t result;
-  __asm__ ("uabdl %0.2d, %1.2s, %2.2s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_uabdlv2si_uuu (__a, __b);
 }
 
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabdq_s8 (int8x16_t a, int8x16_t b)
+vabdq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  int8x16_t result;
-  __asm__ ("sabd %0.16b, %1.16b, %2.16b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return  __builtin_aarch64_sabdv16qi (__a, __b);
 }
 
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabdq_s16 (int16x8_t a, int16x8_t b)
+vabdq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  int16x8_t result;
-  __asm__ ("sabd %0.8h, %1.8h, %2.8h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return  __builtin_aarch64_sabdv8hi (__a, __b);
 }
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabdq_s32 (int32x4_t a, int32x4_t b)
+vabdq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  int32x4_t result;
-  __asm__ ("sabd %0.4s, %1.4s, %2.4s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return  __builtin_aarch64_sabdv4si (__a, __b);
 }
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabdq_u8 (uint8x16_t a, uint8x16_t b)
+vabdq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  uint8x16_t result;
-  __asm__ ("uabd %0.16b, %1.16b, %2.16b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return  __builtin_aarch64_uabdv16qi_uuu (__a, __b);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabdq_u16 (uint16x8_t a, uint16x8_t b)
+vabdq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  uint16x8_t result;
-  __asm__ ("uabd %0.8h, %1.8h, %2.8h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return  __builtin_aarch64_uabdv8hi_uuu (__a, __b);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabdq_u32 (uint32x4_t a, uint32x4_t b)
+vabdq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  uint32x4_t result;
-  __asm__ ("uabd %0.4s, %1.4s, %2.4s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return  __builtin_aarch64_uabdv4si_uuu (__a, __b);
 }
 
 __extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddlv_s8 (int8x8_t a)
+vaddlv_s8 (int8x8_t __a)
 {
-  int16_t result;
-  __asm__ ("saddlv %h0,%1.8b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_saddlvv8qi (__a);
 }
 
 __extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddlv_s16 (int16x4_t a)
+vaddlv_s16 (int16x4_t __a)
 {
-  int32_t result;
-  __asm__ ("saddlv %s0,%1.4h"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_saddlvv4hi (__a);
 }
 
 __extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddlv_u8 (uint8x8_t a)
+vaddlv_u8 (uint8x8_t __a)
 {
-  uint16_t result;
-  __asm__ ("uaddlv %h0,%1.8b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_uaddlvv8qi_uu (__a);
 }
 
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddlv_u16 (uint16x4_t a)
+vaddlv_u16 (uint16x4_t __a)
 {
-  uint32_t result;
-  __asm__ ("uaddlv %s0,%1.4h"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_uaddlvv4hi_uu (__a);
 }
 
 __extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddlvq_s8 (int8x16_t a)
+vaddlvq_s8 (int8x16_t __a)
 {
-  int16_t result;
-  __asm__ ("saddlv %h0,%1.16b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_saddlvv16qi (__a);
 }
 
 __extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddlvq_s16 (int16x8_t a)
+vaddlvq_s16 (int16x8_t __a)
 {
-  int32_t result;
-  __asm__ ("saddlv %s0,%1.8h"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_saddlvv8hi (__a);
 }
 
 __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddlvq_s32 (int32x4_t a)
+vaddlvq_s32 (int32x4_t __a)
 {
-  int64_t result;
-  __asm__ ("saddlv %d0,%1.4s"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_saddlvv4si (__a);
 }
 
 __extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddlvq_u8 (uint8x16_t a)
+vaddlvq_u8 (uint8x16_t __a)
 {
-  uint16_t result;
-  __asm__ ("uaddlv %h0,%1.16b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_uaddlvv16qi_uu (__a);
 }
 
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddlvq_u16 (uint16x8_t a)
+vaddlvq_u16 (uint16x8_t __a)
 {
-  uint32_t result;
-  __asm__ ("uaddlv %s0,%1.8h"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_uaddlvv8hi_uu (__a);
 }
 
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddlvq_u32 (uint32x4_t a)
+vaddlvq_u32 (uint32x4_t __a)
 {
-  uint64_t result;
-  __asm__ ("uaddlv %d0,%1.4s"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_uaddlvv4si_uu (__a);
 }
 
 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtx_f32_f64 (float64x2_t a)
+vcvtx_f32_f64 (float64x2_t __a)
 {
-  float32x2_t result;
+  float32x2_t __result;
   __asm__ ("fcvtxn %0.2s,%1.2d"
-           : "=w"(result)
-           : "w"(a)
+           : "=w"(__result)
+           : "w"(__a)
            : /* No clobbers */);
-  return result;
+  return __result;
 }
 
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtx_high_f32_f64 (float32x2_t a, float64x2_t b)
+vcvtx_high_f32_f64 (float32x2_t __a, float64x2_t __b)
 {
-  float32x4_t result;
+  float32x4_t __result;
   __asm__ ("fcvtxn2 %0.4s,%1.2d"
-           : "=w"(result)
-           : "w" (b), "0"(a)
+           : "=w"(__result)
+           : "w" (__b), "0"(__a)
            : /* No clobbers */);
-  return result;
+  return __result;
 }
 
 __extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtxd_f32_f64 (float64_t a)
+vcvtxd_f32_f64 (float64_t __a)
 {
-  float32_t result;
+  float32_t __result;
   __asm__ ("fcvtxn %s0,%d1"
-           : "=w"(result)
-           : "w"(a)
+           : "=w"(__result)
+           : "w"(__a)
            : /* No clobbers */);
-  return result;
+  return __result;
 }
 
 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmla_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
+vmla_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c)
 {
-  float32x2_t result;
-  float32x2_t t1;
+  float32x2_t __result;
+  float32x2_t __t1;
   __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fadd %0.2s, %0.2s, %1.2s"
-           : "=w"(result), "=w"(t1)
-           : "0"(a), "w"(b), "w"(c)
+           : "=w"(__result), "=w"(__t1)
+           : "0"(__a), "w"(__b), "w"(__c)
            : /* No clobbers */);
-  return result;
+  return __result;
 }
 
 __extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmla_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
+vmla_n_s16 (int16x4_t __a, int16x4_t __b, int16_t __c)
 {
-  int16x4_t result;
-  __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
-           : "=w"(result)
-           : "0"(a), "w"(b), "x"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_mla_nv4hi (__a, __b, __c);
 }
 
 __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmla_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
+vmla_n_s32 (int32x2_t __a, int32x2_t __b, int32_t __c)
 {
-  int32x2_t result;
-  __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_mla_nv2si (__a, __b, __c);
 }
 
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmla_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
+vmla_n_u16 (uint16x4_t __a, uint16x4_t __b, uint16_t __c)
 {
-  uint16x4_t result;
-  __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
-           : "=w"(result)
-           : "0"(a), "w"(b), "x"(c)
-           : /* No clobbers */);
-  return result;
+  return (uint16x4_t) __builtin_aarch64_mla_nv4hi ((int16x4_t) __a,
+                                                   (int16x4_t) __b,
+                                                   (int16_t) __c);
 }
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmla_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
+vmla_n_u32 (uint32x2_t __a, uint32x2_t __b, uint32_t __c)
 {
-  uint32x2_t result;
-  __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return (uint32x2_t) __builtin_aarch64_mla_nv2si ((int32x2_t) __a,
+                                                   (int32x2_t) __b,
+                                                   (int32_t) __c);
 }
 
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmla_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
+vmla_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c)
 {
-  int8x8_t result;
-  __asm__ ("mla %0.8b, %2.8b, %3.8b"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_mlav8qi (__a, __b, __c);
 }
 
 __extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmla_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
+vmla_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
 {
-  int16x4_t result;
-  __asm__ ("mla %0.4h, %2.4h, %3.4h"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_mlav4hi (__a, __b, __c);
 }
 
 __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmla_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
+vmla_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c)
 {
-  int32x2_t result;
-  __asm__ ("mla %0.2s, %2.2s, %3.2s"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_mlav2si (__a, __b, __c);
 }
 
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmla_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
+vmla_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
 {
-  uint8x8_t result;
-  __asm__ ("mla %0.8b, %2.8b, %3.8b"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return (uint8x8_t) __builtin_aarch64_mlav8qi ((int8x8_t) __a,
+                                                (int8x8_t) __b,
+                                                (int8x8_t) __c);
 }
 
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmla_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
+vmla_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c)
 {
-  uint16x4_t result;
-  __asm__ ("mla %0.4h, %2.4h, %3.4h"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return (uint16x4_t) __builtin_aarch64_mlav4hi ((int16x4_t) __a,
+                                                 (int16x4_t) __b,
+                                                 (int16x4_t) __c);
 }
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
+vmla_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
 {
-  uint32x2_t result;
-  __asm__ ("mla %0.2s, %2.2s, %3.2s"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return (uint32x2_t) __builtin_aarch64_mlav2si ((int32x2_t) __a,
+                                                 (int32x2_t) __b,
+                                                 (int32x2_t) __c);
 }
 
-#define vmlal_high_lane_s16(a, b, c, d)                                 \
-  __extension__                                                         \
-    ({                                                                  \
-       int16x4_t c_ = (c);                                              \
-       int16x8_t b_ = (b);                                              \
-       int32x4_t a_ = (a);                                              \
-       int32x4_t result;                                                \
-       __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]"                         \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmlal_high_lane_s32(a, b, c, d)                                 \
-  __extension__                                                         \
-    ({                                                                  \
-       int32x2_t c_ = (c);                                              \
-       int32x4_t b_ = (b);                                              \
-       int64x2_t a_ = (a);                                              \
-       int64x2_t result;                                                \
-       __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]"                         \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmlal_high_lane_u16(a, b, c, d)                                 \
-  __extension__                                                         \
-    ({                                                                  \
-       uint16x4_t c_ = (c);                                             \
-       uint16x8_t b_ = (b);                                             \
-       uint32x4_t a_ = (a);                                             \
-       uint32x4_t result;                                               \
-       __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]"                         \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmlal_high_lane_u32(a, b, c, d)                                 \
-  __extension__                                                         \
-    ({                                                                  \
-       uint32x2_t c_ = (c);                                             \
-       uint32x4_t b_ = (b);                                             \
-       uint64x2_t a_ = (a);                                             \
-       uint64x2_t result;                                               \
-       __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]"                         \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmlal_high_laneq_s16(a, b, c, d)                                \
-  __extension__                                                         \
-    ({                                                                  \
-       int16x8_t c_ = (c);                                              \
-       int16x8_t b_ = (b);                                              \
-       int32x4_t a_ = (a);                                              \
-       int32x4_t result;                                                \
-       __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]"                         \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmlal_high_laneq_s32(a, b, c, d)                                \
-  __extension__                                                         \
-    ({                                                                  \
-       int32x4_t c_ = (c);                                              \
-       int32x4_t b_ = (b);                                              \
-       int64x2_t a_ = (a);                                              \
-       int64x2_t result;                                                \
-       __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]"                         \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmlal_high_laneq_u16(a, b, c, d)                                \
-  __extension__                                                         \
-    ({                                                                  \
-       uint16x8_t c_ = (c);                                             \
-       uint16x8_t b_ = (b);                                             \
-       uint32x4_t a_ = (a);                                             \
-       uint32x4_t result;                                               \
-       __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]"                         \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmlal_high_laneq_u32(a, b, c, d)                                \
-  __extension__                                                         \
-    ({                                                                  \
-       uint32x4_t c_ = (c);                                             \
-       uint32x4_t b_ = (b);                                             \
-       uint64x2_t a_ = (a);                                             \
-       uint64x2_t result;                                               \
-       __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]"                         \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlal_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
+vmlal_high_lane_s16(int32x4_t __a, int16x8_t __b, int16x4_t __v,
+		    const int __lane)
 {
-  int32x4_t result;
-  __asm__ ("smlal2 %0.4s,%2.8h,%3.h[0]"
-           : "=w"(result)
-           : "0"(a), "w"(b), "x"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_smlal_hi_lanev8hi (__a, __b, __v, __lane);
 }
 
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlal_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
+vmlal_high_lane_s32(int64x2_t __a, int32x4_t __b, int32x2_t __v,
+		    const int __lane)
 {
-  int64x2_t result;
-  __asm__ ("smlal2 %0.2d,%2.4s,%3.s[0]"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_smlal_hi_lanev4si (__a, __b, __v, __lane);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlal_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
+vmlal_high_lane_u16(uint32x4_t __a, uint16x8_t __b, uint16x4_t __v,
+		    const int __lane)
 {
-  uint32x4_t result;
-  __asm__ ("umlal2 %0.4s,%2.8h,%3.h[0]"
-           : "=w"(result)
-           : "0"(a), "w"(b), "x"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_umlal_hi_lanev8hi_uuuus (__a, __b, __v, __lane);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlal_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
-{
-  uint64x2_t result;
-  __asm__ ("umlal2 %0.2d,%2.4s,%3.s[0]"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
+vmlal_high_lane_u32(uint64x2_t __a, uint32x4_t __b, uint32x2_t __v,
+		    const int __lane)
 {
-  int16x8_t result;
-  __asm__ ("smlal2 %0.8h,%2.16b,%3.16b"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_umlal_hi_lanev4si_uuuus (__a, __b, __v, __lane);
 }
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
+vmlal_high_laneq_s16(int32x4_t __a, int16x8_t __b, int16x8_t __v,
+		     const int __lane)
 {
-  int32x4_t result;
-  __asm__ ("smlal2 %0.4s,%2.8h,%3.8h"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_smlal_hi_laneqv8hi (__a, __b, __v, __lane);
 }
 
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
-{
-  int64x2_t result;
-  __asm__ ("smlal2 %0.2d,%2.4s,%3.4s"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
+vmlal_high_laneq_s32(int64x2_t __a, int32x4_t __b, int32x4_t __v,
+		     const int __lane)
 {
-  uint16x8_t result;
-  __asm__ ("umlal2 %0.8h,%2.16b,%3.16b"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_smlal_hi_laneqv4si (__a, __b, __v, __lane);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
+vmlal_high_laneq_u16(uint32x4_t __a, uint16x8_t __b, uint16x8_t __v,
+		     const int __lane)
 {
-  uint32x4_t result;
-  __asm__ ("umlal2 %0.4s,%2.8h,%3.8h"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_umlal_hi_laneqv8hi_uuuus (__a, __b, __v, __lane);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
+vmlal_high_laneq_u32(uint64x2_t __a, uint32x4_t __b, uint32x4_t __v,
+		     const int __lane)
 {
-  uint64x2_t result;
-  __asm__ ("umlal2 %0.2d,%2.4s,%3.4s"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_umlal_hi_laneqv4si_uuuus (__a, __b, __v, __lane);
 }
 
-#define vmlal_lane_s16(a, b, c, d)                                      \
-  __extension__                                                         \
-    ({                                                                  \
-       int16x4_t c_ = (c);                                              \
-       int16x4_t b_ = (b);                                              \
-       int32x4_t a_ = (a);                                              \
-       int32x4_t result;                                                \
-       __asm__ ("smlal %0.4s,%2.4h,%3.h[%4]"                            \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmlal_lane_s32(a, b, c, d)                                      \
-  __extension__                                                         \
-    ({                                                                  \
-       int32x2_t c_ = (c);                                              \
-       int32x2_t b_ = (b);                                              \
-       int64x2_t a_ = (a);                                              \
-       int64x2_t result;                                                \
-       __asm__ ("smlal %0.2d,%2.2s,%3.s[%4]"                            \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmlal_lane_u16(a, b, c, d)                                      \
-  __extension__                                                         \
-    ({                                                                  \
-       uint16x4_t c_ = (c);                                             \
-       uint16x4_t b_ = (b);                                             \
-       uint32x4_t a_ = (a);                                             \
-       uint32x4_t result;                                               \
-       __asm__ ("umlal %0.4s,%2.4h,%3.h[%4]"                            \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmlal_lane_u32(a, b, c, d)                                      \
-  __extension__                                                         \
-    ({                                                                  \
-       uint32x2_t c_ = (c);                                             \
-       uint32x2_t b_ = (b);                                             \
-       uint64x2_t a_ = (a);                                             \
-       uint64x2_t result;                                               \
-       __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]"                          \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmlal_laneq_s16(a, b, c, d)                                     \
-  __extension__                                                         \
-    ({                                                                  \
-       int16x8_t c_ = (c);                                              \
-       int16x4_t b_ = (b);                                              \
-       int32x4_t a_ = (a);                                              \
-       int32x4_t result;                                                \
-       __asm__ ("smlal %0.4s, %2.4h, %3.h[%4]"                          \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmlal_laneq_s32(a, b, c, d)                                     \
-  __extension__                                                         \
-    ({                                                                  \
-       int32x4_t c_ = (c);                                              \
-       int32x2_t b_ = (b);                                              \
-       int64x2_t a_ = (a);                                              \
-       int64x2_t result;                                                \
-       __asm__ ("smlal %0.2d, %2.2s, %3.s[%4]"                          \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmlal_laneq_u16(a, b, c, d)                                     \
-  __extension__                                                         \
-    ({                                                                  \
-       uint16x8_t c_ = (c);                                             \
-       uint16x4_t b_ = (b);                                             \
-       uint32x4_t a_ = (a);                                             \
-       uint32x4_t result;                                               \
-       __asm__ ("umlal %0.4s, %2.4h, %3.h[%4]"                          \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmlal_laneq_u32(a, b, c, d)                                     \
-  __extension__                                                         \
-    ({                                                                  \
-       uint32x4_t c_ = (c);                                             \
-       uint32x2_t b_ = (b);                                             \
-       uint64x2_t a_ = (a);                                             \
-       uint64x2_t result;                                               \
-       __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]"                          \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlal_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
+vmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
 {
-  int32x4_t result;
-  __asm__ ("smlal %0.4s,%2.4h,%3.h[0]"
-           : "=w"(result)
-           : "0"(a), "w"(b), "x"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_smlal_hi_nv8hi (__a, __b, __c);
 }
 
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlal_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
+vmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
 {
-  int64x2_t result;
-  __asm__ ("smlal %0.2d,%2.2s,%3.s[0]"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_smlal_hi_nv4si (__a, __b, __c);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlal_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
+vmlal_high_n_u16 (uint32x4_t __a, uint16x8_t __b, uint16_t __c)
 {
-  uint32x4_t result;
-  __asm__ ("umlal %0.4s,%2.4h,%3.h[0]"
-           : "=w"(result)
-           : "0"(a), "w"(b), "x"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_umlal_hi_nv8hi_uuuu (__a, __b, __c);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlal_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
+vmlal_high_n_u32 (uint64x2_t __a, uint32x4_t __b, uint32_t __c)
 {
-  uint64x2_t result;
-  __asm__ ("umlal %0.2d,%2.2s,%3.s[0]"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_umlal_hi_nv4si_uuuu (__a, __b, __c);
 }
 
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
+vmlal_high_s8 (int16x8_t __a, int8x16_t __b, int8x16_t __c)
 {
-  int16x8_t result;
-  __asm__ ("smlal %0.8h,%2.8b,%3.8b"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_smlal_hiv16qi (__a, __b, __c);
 }
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
+vmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
 {
-  int32x4_t result;
-  __asm__ ("smlal %0.4s,%2.4h,%3.4h"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_smlal_hiv8hi (__a, __b, __c);
 }
 
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
+vmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
 {
-  int64x2_t result;
-  __asm__ ("smlal %0.2d,%2.2s,%3.2s"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_smlal_hiv4si (__a, __b, __c);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
+vmlal_high_u8 (uint16x8_t __a, uint8x16_t __b, uint8x16_t __c)
 {
-  uint16x8_t result;
-  __asm__ ("umlal %0.8h,%2.8b,%3.8b"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_umlal_hiv16qi_uuuu (__a, __b, __c);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
+vmlal_high_u16 (uint32x4_t __a, uint16x8_t __b, uint16x8_t __c)
 {
-  uint32x4_t result;
-  __asm__ ("umlal %0.4s,%2.4h,%3.4h"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_umlal_hiv8hi_uuuu (__a, __b, __c);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
+vmlal_high_u32 (uint64x2_t __a, uint32x4_t __b, uint32x4_t __c)
 {
-  uint64x2_t result;
-  __asm__ ("umlal %0.2d,%2.2s,%3.2s"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_umlal_hiv4si_uuuu (__a, __b, __c);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
+vmlal_lane_s16 (int32x4_t __acc, int16x4_t __a, int16x4_t __b, const int __c)
 {
-  float32x4_t result;
-  float32x4_t t1;
-  __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fadd %0.4s, %0.4s, %1.4s"
-           : "=w"(result), "=w"(t1)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_vec_smlal_lane_v4hi (__acc, __a, __b, __c);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
+vmlal_lane_s32 (int64x2_t __acc, int32x2_t __a, int32x2_t __b, const int __c)
 {
-  int16x8_t result;
-  __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
-           : "=w"(result)
-           : "0"(a), "w"(b), "x"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_vec_smlal_lane_v2si (__acc, __a, __b, __c);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlaq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
+vmlal_lane_u16 (uint32x4_t __acc, uint16x4_t __a, uint16x4_t __b, const int __c)
 {
-  int32x4_t result;
-  __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_vec_umlal_lane_v4hi_uuuus (__acc, __a, __b, __c);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlaq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
+vmlal_lane_u32 (uint64x2_t __acc, uint32x2_t __a, uint32x2_t __b, const int __c)
 {
-  uint16x8_t result;
-  __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
-           : "=w"(result)
-           : "0"(a), "w"(b), "x"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_vec_umlal_lane_v2si_uuuus (__acc, __a, __b, __c);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlaq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
+vmlal_laneq_s16 (int32x4_t __acc, int16x4_t __a, int16x8_t __b, const int __c)
 {
-  uint32x4_t result;
-  __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_vec_smlal_laneq_v4hi (__acc, __a, __b, __c);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
+vmlal_laneq_s32 (int64x2_t __acc, int32x2_t __a, int32x4_t __b, const int __c)
 {
-  int8x16_t result;
-  __asm__ ("mla %0.16b, %2.16b, %3.16b"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_vec_smlal_laneq_v2si (__acc, __a, __b, __c);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
+vmlal_laneq_u16 (uint32x4_t __acc, uint16x4_t __a, uint16x8_t __b, const int __c)
 {
-  int16x8_t result;
-  __asm__ ("mla %0.8h, %2.8h, %3.8h"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_vec_umlal_laneq_v4hi_uuuus (__acc, __a, __b, __c);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
+vmlal_laneq_u32 (uint64x2_t __acc, uint32x2_t __a, uint32x4_t __b, const int __c)
 {
-  int32x4_t result;
-  __asm__ ("mla %0.4s, %2.4s, %3.4s"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_vec_umlal_laneq_v2si_uuuus (__acc, __a, __b, __c);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
+vmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
 {
-  uint8x16_t result;
-  __asm__ ("mla %0.16b, %2.16b, %3.16b"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_smlal_nv4hi (__a, __b, __c);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
+vmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
 {
-  uint16x8_t result;
-  __asm__ ("mla %0.8h, %2.8h, %3.8h"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_smlal_nv2si (__a, __b, __c);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
-{
-  uint32x4_t result;
-  __asm__ ("mla %0.4s, %2.4s, %3.4s"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ extern __inline float32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmls_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
-{
-  float32x2_t result;
-  float32x2_t t1;
-  __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fsub %0.2s, %0.2s, %1.2s"
-           : "=w"(result), "=w"(t1)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ extern __inline int16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmls_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
+vmlal_n_u16 (uint32x4_t __a, uint16x4_t __b, uint16_t __c)
 {
-  int16x4_t result;
-  __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
-           : "=w"(result)
-           : "0"(a), "w"(b), "x"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_umlal_nv4hi_uuuu (__a, __b, __c);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmls_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
+vmlal_n_u32 (uint64x2_t __a, uint32x2_t __b, uint32_t __c)
 {
-  int32x2_t result;
-  __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_umlal_nv2si_uuuu (__a, __b, __c);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmls_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
+vmlal_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c)
 {
-  uint16x4_t result;
-  __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
-           : "=w"(result)
-           : "0"(a), "w"(b), "x"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_smlalv8qi (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmls_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
+vmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
 {
-  uint32x2_t result;
-  __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_smlalv4hi (__a, __b, __c);
 }
 
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmls_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
+vmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
 {
-  int8x8_t result;
-  __asm__ ("mls %0.8b,%2.8b,%3.8b"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_smlalv2si (__a, __b, __c);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmls_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
+vmlal_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c)
 {
-  int16x4_t result;
-  __asm__ ("mls %0.4h,%2.4h,%3.4h"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_umlalv8qi_uuuu (__a, __b, __c);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmls_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
+vmlal_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c)
 {
-  int32x2_t result;
-  __asm__ ("mls %0.2s,%2.2s,%3.2s"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_umlalv4hi_uuuu (__a, __b, __c);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmls_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
+vmlal_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c)
 {
-  uint8x8_t result;
-  __asm__ ("mls %0.8b,%2.8b,%3.8b"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_umlalv2si_uuuu (__a, __b, __c);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmls_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
+vmlaq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c)
 {
-  uint16x4_t result;
-  __asm__ ("mls %0.4h,%2.4h,%3.4h"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
+  float32x4_t __result;
+  float32x4_t __t1;
+  __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fadd %0.4s, %0.4s, %1.4s"
+           : "=w"(__result), "=w"(__t1)
+           : "0"(__a), "w"(__b), "w"(__c)
            : /* No clobbers */);
-  return result;
+  return __result;
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
+vmlaq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
 {
-  uint32x2_t result;
-  __asm__ ("mls %0.2s,%2.2s,%3.2s"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_mla_nv8hi (__a, __b, __c);
 }
 
-#define vmlsl_high_lane_s16(a, b, c, d)                                 \
-  __extension__                                                         \
-    ({                                                                  \
-       int16x4_t c_ = (c);                                              \
-       int16x8_t b_ = (b);                                              \
-       int32x4_t a_ = (a);                                              \
-       int32x4_t result;                                                \
-       __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]"                         \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmlsl_high_lane_s32(a, b, c, d)                                 \
-  __extension__                                                         \
-    ({                                                                  \
-       int32x2_t c_ = (c);                                              \
-       int32x4_t b_ = (b);                                              \
-       int64x2_t a_ = (a);                                              \
-       int64x2_t result;                                                \
-       __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]"                         \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmlsl_high_lane_u16(a, b, c, d)                                 \
-  __extension__                                                         \
-    ({                                                                  \
-       uint16x4_t c_ = (c);                                             \
-       uint16x8_t b_ = (b);                                             \
-       uint32x4_t a_ = (a);                                             \
-       uint32x4_t result;                                               \
-       __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]"                         \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmlsl_high_lane_u32(a, b, c, d)                                 \
-  __extension__                                                         \
-    ({                                                                  \
-       uint32x2_t c_ = (c);                                             \
-       uint32x4_t b_ = (b);                                             \
-       uint64x2_t a_ = (a);                                             \
-       uint64x2_t result;                                               \
-       __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]"                         \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmlsl_high_laneq_s16(a, b, c, d)                                \
-  __extension__                                                         \
-    ({                                                                  \
-       int16x8_t c_ = (c);                                              \
-       int16x8_t b_ = (b);                                              \
-       int32x4_t a_ = (a);                                              \
-       int32x4_t result;                                                \
-       __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]"                         \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmlsl_high_laneq_s32(a, b, c, d)                                \
-  __extension__                                                         \
-    ({                                                                  \
-       int32x4_t c_ = (c);                                              \
-       int32x4_t b_ = (b);                                              \
-       int64x2_t a_ = (a);                                              \
-       int64x2_t result;                                                \
-       __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]"                         \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmlsl_high_laneq_u16(a, b, c, d)                                \
-  __extension__                                                         \
-    ({                                                                  \
-       uint16x8_t c_ = (c);                                             \
-       uint16x8_t b_ = (b);                                             \
-       uint32x4_t a_ = (a);                                             \
-       uint32x4_t result;                                               \
-       __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]"                         \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmlsl_high_laneq_u32(a, b, c, d)                                \
-  __extension__                                                         \
-    ({                                                                  \
-       uint32x4_t c_ = (c);                                             \
-       uint32x4_t b_ = (b);                                             \
-       uint64x2_t a_ = (a);                                             \
-       uint64x2_t result;                                               \
-       __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]"                         \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsl_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
+vmlaq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
 {
-  int32x4_t result;
-  __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[0]"
-           : "=w"(result)
-           : "0"(a), "w"(b), "x"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_mla_nv4si (__a, __b, __c);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsl_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
+vmlaq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
 {
-  int64x2_t result;
-  __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[0]"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return (uint16x8_t) __builtin_aarch64_mla_nv8hi ((int16x8_t) __a,
+                                                   (int16x8_t) __b,
+                                                   (int16_t) __c);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsl_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
+vmlaq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
 {
-  uint32x4_t result;
-  __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[0]"
-           : "=w"(result)
-           : "0"(a), "w"(b), "x"(c)
-           : /* No clobbers */);
-  return result;
+  return (uint32x4_t) __builtin_aarch64_mla_nv4si ((int32x4_t) __a,
+                                                   (int32x4_t) __b,
+                                                   (int32_t) __c);
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsl_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
+vmlaq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c)
 {
-  uint64x2_t result;
-  __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[0]"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_mlav16qi (__a, __b, __c);
 }
 
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsl_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
+vmlaq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
 {
-  int16x8_t result;
-  __asm__ ("smlsl2 %0.8h,%2.16b,%3.16b"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_mlav8hi (__a, __b, __c);
 }
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsl_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
+vmlaq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
 {
-  int32x4_t result;
-  __asm__ ("smlsl2 %0.4s,%2.8h,%3.8h"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_mlav4si (__a, __b, __c);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsl_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
+vmlaq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
 {
-  int64x2_t result;
-  __asm__ ("smlsl2 %0.2d,%2.4s,%3.4s"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return (uint8x16_t) __builtin_aarch64_mlav16qi ((int8x16_t) __a,
+                                                  (int8x16_t) __b,
+                                                  (int8x16_t) __c);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsl_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
+vmlaq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
 {
-  uint16x8_t result;
-  __asm__ ("umlsl2 %0.8h,%2.16b,%3.16b"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return (uint16x8_t) __builtin_aarch64_mlav8hi ((int16x8_t) __a,
+                                                 (int16x8_t) __b,
+                                                 (int16x8_t) __c);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsl_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
+vmlaq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
 {
-  uint32x4_t result;
-  __asm__ ("umlsl2 %0.4s,%2.8h,%3.8h"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return (uint32x4_t) __builtin_aarch64_mlav4si ((int32x4_t) __a,
+                                                 (int32x4_t) __b,
+                                                 (int32x4_t) __c);
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
+vmls_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c)
 {
-  uint64x2_t result;
-  __asm__ ("umlsl2 %0.2d,%2.4s,%3.4s"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
+  float32x2_t __result;
+  float32x2_t __t1;
+  __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fsub %0.2s, %0.2s, %1.2s"
+           : "=w"(__result), "=w"(__t1)
+           : "0"(__a), "w"(__b), "w"(__c)
            : /* No clobbers */);
-  return result;
+  return __result;
 }
 
-#define vmlsl_lane_s16(a, b, c, d)                                      \
-  __extension__                                                         \
-    ({                                                                  \
-       int16x4_t c_ = (c);                                              \
-       int16x4_t b_ = (b);                                              \
-       int32x4_t a_ = (a);                                              \
-       int32x4_t result;                                                \
-       __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]"                          \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmlsl_lane_s32(a, b, c, d)                                      \
-  __extension__                                                         \
-    ({                                                                  \
-       int32x2_t c_ = (c);                                              \
-       int32x2_t b_ = (b);                                              \
-       int64x2_t a_ = (a);                                              \
-       int64x2_t result;                                                \
-       __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]"                          \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmlsl_lane_u16(a, b, c, d)                                      \
-  __extension__                                                         \
-    ({                                                                  \
-       uint16x4_t c_ = (c);                                             \
-       uint16x4_t b_ = (b);                                             \
-       uint32x4_t a_ = (a);                                             \
-       uint32x4_t result;                                               \
-       __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]"                          \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmlsl_lane_u32(a, b, c, d)                                      \
-  __extension__                                                         \
-    ({                                                                  \
-       uint32x2_t c_ = (c);                                             \
-       uint32x2_t b_ = (b);                                             \
-       uint64x2_t a_ = (a);                                             \
-       uint64x2_t result;                                               \
-       __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]"                          \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmlsl_laneq_s16(a, b, c, d)                                     \
-  __extension__                                                         \
-    ({                                                                  \
-       int16x8_t c_ = (c);                                              \
-       int16x4_t b_ = (b);                                              \
-       int32x4_t a_ = (a);                                              \
-       int32x4_t result;                                                \
-       __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]"                          \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmlsl_laneq_s32(a, b, c, d)                                     \
-  __extension__                                                         \
-    ({                                                                  \
-       int32x4_t c_ = (c);                                              \
-       int32x2_t b_ = (b);                                              \
-       int64x2_t a_ = (a);                                              \
-       int64x2_t result;                                                \
-       __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]"                          \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmlsl_laneq_u16(a, b, c, d)                                     \
-  __extension__                                                         \
-    ({                                                                  \
-       uint16x8_t c_ = (c);                                             \
-       uint16x4_t b_ = (b);                                             \
-       uint32x4_t a_ = (a);                                             \
-       uint32x4_t result;                                               \
-       __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]"                          \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmlsl_laneq_u32(a, b, c, d)                                     \
-  __extension__                                                         \
-    ({                                                                  \
-       uint32x4_t c_ = (c);                                             \
-       uint32x2_t b_ = (b);                                             \
-       uint64x2_t a_ = (a);                                             \
-       uint64x2_t result;                                               \
-       __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]"                          \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsl_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
+vmls_n_s16 (int16x4_t __a, int16x4_t __b, int16_t __c)
 {
-  int32x4_t result;
-  __asm__ ("smlsl %0.4s, %2.4h, %3.h[0]"
-           : "=w"(result)
-           : "0"(a), "w"(b), "x"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_mls_nv4hi (__a, __b, __c);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsl_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
+vmls_n_s32 (int32x2_t __a, int32x2_t __b, int32_t __c)
 {
-  int64x2_t result;
-  __asm__ ("smlsl %0.2d, %2.2s, %3.s[0]"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_mls_nv2si (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsl_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
+vmls_n_u16 (uint16x4_t __a, uint16x4_t __b, uint16_t __c)
 {
-  uint32x4_t result;
-  __asm__ ("umlsl %0.4s, %2.4h, %3.h[0]"
-           : "=w"(result)
-           : "0"(a), "w"(b), "x"(c)
-           : /* No clobbers */);
-  return result;
+  return (uint16x4_t) __builtin_aarch64_mls_nv4hi ((int16x4_t) __a,
+                                                   (int16x4_t) __b,
+                                                   (int16_t) __c);
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsl_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
+vmls_n_u32 (uint32x2_t __a, uint32x2_t __b, uint32_t __c)
 {
-  uint64x2_t result;
-  __asm__ ("umlsl %0.2d, %2.2s, %3.s[0]"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return (uint32x2_t) __builtin_aarch64_mls_nv2si ((int32x2_t) __a,
+                                                   (int32x2_t) __b,
+                                                   (int32_t) __c);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsl_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
+vmls_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c)
 {
-  int16x8_t result;
-  __asm__ ("smlsl %0.8h, %2.8b, %3.8b"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_mlsv8qi (__a, __b, __c);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsl_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
+vmls_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
 {
-  int32x4_t result;
-  __asm__ ("smlsl %0.4s, %2.4h, %3.4h"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_mlsv4hi (__a, __b, __c);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsl_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
+vmls_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c)
 {
-  int64x2_t result;
-  __asm__ ("smlsl %0.2d, %2.2s, %3.2s"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_mlsv2si (__a, __b, __c);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsl_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
+vmls_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
 {
-  uint16x8_t result;
-  __asm__ ("umlsl %0.8h, %2.8b, %3.8b"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return (uint8x8_t) __builtin_aarch64_mlsv8qi ((int8x8_t) __a,
+                                                (int8x8_t) __b,
+                                                (int8x8_t) __c);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsl_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
+vmls_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c)
 {
-  uint32x4_t result;
-  __asm__ ("umlsl %0.4s, %2.4h, %3.4h"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return (uint16x4_t) __builtin_aarch64_mlsv4hi ((int16x4_t) __a,
+                                                 (int16x4_t) __b,
+                                                 (int16x4_t) __c);
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
+vmls_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
 {
-  uint64x2_t result;
-  __asm__ ("umlsl %0.2d, %2.2s, %3.2s"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return (uint32x2_t) __builtin_aarch64_mlsv2si ((int32x2_t) __a,
+                                                 (int32x2_t) __b,
+                                                 (int32x2_t) __c);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
+vmlsl_high_lane_s16(int32x4_t __a, int16x8_t __b, int16x4_t __v,
+		    const int __lane)
 {
-  float32x4_t result;
-  float32x4_t t1;
-  __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fsub %0.4s, %0.4s, %1.4s"
-           : "=w"(result), "=w"(t1)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_smlsl_hi_lanev8hi (__a, __b, __v, __lane);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
+vmlsl_high_lane_s32(int64x2_t __a, int32x4_t __b, int32x2_t __v,
+		    const int __lane)
 {
-  int16x8_t result;
-  __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
-           : "=w"(result)
-           : "0"(a), "w"(b), "x"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_smlsl_hi_lanev4si (__a, __b, __v, __lane);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
+vmlsl_high_lane_u16(uint32x4_t __a, uint16x8_t __b, uint16x4_t __v,
+		    const int __lane)
 {
-  int32x4_t result;
-  __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_umlsl_hi_lanev8hi_uuuus (__a, __b, __v, __lane);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
+vmlsl_high_lane_u32(uint64x2_t __a, uint32x4_t __b, uint32x2_t __v,
+		    const int __lane)
 {
-  uint16x8_t result;
-  __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
-           : "=w"(result)
-           : "0"(a), "w"(b), "x"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_umlsl_hi_lanev4si_uuuus (__a, __b, __v, __lane);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
+vmlsl_high_laneq_s16(int32x4_t __a, int16x8_t __b, int16x8_t __v,
+		     const int __lane)
 {
-  uint32x4_t result;
-  __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_smlsl_hi_laneqv8hi (__a, __b, __v, __lane);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
+vmlsl_high_laneq_s32(int64x2_t __a, int32x4_t __b, int32x4_t __v,
+		     const int __lane)
 {
-  int8x16_t result;
-  __asm__ ("mls %0.16b,%2.16b,%3.16b"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_smlsl_hi_laneqv4si (__a, __b, __v, __lane);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
+vmlsl_high_laneq_u16(uint32x4_t __a, uint16x8_t __b, uint16x8_t __v,
+		     const int __lane)
 {
-  int16x8_t result;
-  __asm__ ("mls %0.8h,%2.8h,%3.8h"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_umlsl_hi_laneqv8hi_uuuus (__a, __b, __v, __lane);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
+vmlsl_high_laneq_u32(uint64x2_t __a, uint32x4_t __b, uint32x4_t __v,
+		     const int __lane)
 {
-  int32x4_t result;
-  __asm__ ("mls %0.4s,%2.4s,%3.4s"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_umlsl_hi_laneqv4si_uuuus (__a, __b, __v, __lane);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
+vmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
 {
-  uint8x16_t result;
-  __asm__ ("mls %0.16b,%2.16b,%3.16b"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_smlsl_hi_nv8hi (__a, __b, __c);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
+vmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
 {
-  uint16x8_t result;
-  __asm__ ("mls %0.8h,%2.8h,%3.8h"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_smlsl_hi_nv4si (__a, __b, __c);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
+vmlsl_high_n_u16 (uint32x4_t __a, uint16x8_t __b, uint16_t __c)
 {
-  uint32x4_t result;
-  __asm__ ("mls %0.4s,%2.4s,%3.4s"
-           : "=w"(result)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_umlsl_hi_nv8hi_uuuu (__a, __b, __c);
+}
+
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vmlsl_high_n_u32 (uint64x2_t __a, uint32x4_t __b, uint32_t __c)
+{
+  return __builtin_aarch64_umlsl_hi_nv4si_uuuu (__a, __b, __c);
 }
 
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovl_high_s8 (int8x16_t a)
+vmlsl_high_s8 (int16x8_t __a, int8x16_t __b, int8x16_t __c)
 {
-  int16x8_t result;
-  __asm__ ("sshll2 %0.8h,%1.16b,#0"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_smlsl_hiv16qi (__a, __b, __c);
 }
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovl_high_s16 (int16x8_t a)
+vmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
 {
-  int32x4_t result;
-  __asm__ ("sshll2 %0.4s,%1.8h,#0"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_smlsl_hiv8hi (__a, __b, __c);
 }
 
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovl_high_s32 (int32x4_t a)
+vmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
 {
-  int64x2_t result;
-  __asm__ ("sshll2 %0.2d,%1.4s,#0"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_smlsl_hiv4si (__a, __b, __c);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovl_high_u8 (uint8x16_t a)
+vmlsl_high_u8 (uint16x8_t __a, uint8x16_t __b, uint8x16_t __c)
 {
-  uint16x8_t result;
-  __asm__ ("ushll2 %0.8h,%1.16b,#0"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_umlsl_hiv16qi_uuuu (__a, __b, __c);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovl_high_u16 (uint16x8_t a)
+vmlsl_high_u16 (uint32x4_t __a, uint16x8_t __b, uint16x8_t __c)
 {
-  uint32x4_t result;
-  __asm__ ("ushll2 %0.4s,%1.8h,#0"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_umlsl_hiv8hi_uuuu (__a, __b, __c);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovl_high_u32 (uint32x4_t a)
+vmlsl_high_u32 (uint64x2_t __a, uint32x4_t __b, uint32x4_t __c)
 {
-  uint64x2_t result;
-  __asm__ ("ushll2 %0.2d,%1.4s,#0"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_umlsl_hiv4si_uuuu (__a, __b, __c);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovl_s8 (int8x8_t a)
+vmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __v, const int __lane)
 {
-  int16x8_t result;
-  __asm__ ("sshll %0.8h,%1.8b,#0"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_vec_smlsl_lane_v4hi (__a, __b, __v, __lane);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovl_s16 (int16x4_t a)
+vmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __v, const int __lane)
 {
-  int32x4_t result;
-  __asm__ ("sshll %0.4s,%1.4h,#0"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_vec_smlsl_lane_v2si (__a, __b, __v, __lane);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovl_s32 (int32x2_t a)
+vmlsl_lane_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __v,
+                const int __lane)
 {
-  int64x2_t result;
-  __asm__ ("sshll %0.2d,%1.2s,#0"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_vec_umlsl_lane_v4hi_uuuus (__a, __b, __v, __lane);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovl_u8 (uint8x8_t a)
+vmlsl_lane_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __v,
+                const int __lane)
 {
-  uint16x8_t result;
-  __asm__ ("ushll %0.8h,%1.8b,#0"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_vec_umlsl_lane_v2si_uuuus (__a, __b, __v, __lane);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vmlsl_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __v, const int __lane)
+{
+  return __builtin_aarch64_vec_smlsl_laneq_v4hi (__a, __b, __v, __lane);
+}
+
+__extension__ extern __inline int64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vmlsl_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __v, const int __lane)
+{
+  return __builtin_aarch64_vec_smlsl_laneq_v2si (__a, __b, __v, __lane);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovl_u16 (uint16x4_t a)
+vmlsl_laneq_u16 (uint32x4_t __a, uint16x4_t __b, uint16x8_t __v,
+                 const int __lane)
 {
-  uint32x4_t result;
-  __asm__ ("ushll %0.4s,%1.4h,#0"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_vec_umlsl_laneq_v4hi_uuuus (__a, __b, __v, __lane);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovl_u32 (uint32x2_t a)
+vmlsl_laneq_u32 (uint64x2_t __a, uint32x2_t __b, uint32x4_t __v,
+                 const int __lane)
 {
-  uint64x2_t result;
-  __asm__ ("ushll %0.2d,%1.2s,#0"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_vec_umlsl_laneq_v2si_uuuus (__a, __b, __v, __lane);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovn_high_s16 (int8x8_t a, int16x8_t b)
+vmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
 {
-  int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
-  __asm__ ("xtn2 %0.16b,%1.8h"
-           : "+w"(result)
-           : "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_smlsl_nv4hi (__a, __b, __c);
+}
+
+__extension__ extern __inline int64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
+{
+  return __builtin_aarch64_smlsl_nv2si (__a, __b, __c);
+}
+
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vmlsl_n_u16 (uint32x4_t __a, uint16x4_t __b, uint16_t __c)
+{
+  return __builtin_aarch64_umlsl_nv4hi_uuuu (__a, __b, __c);
+}
+
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vmlsl_n_u32 (uint64x2_t __a, uint32x2_t __b, uint32_t __c)
+{
+  return __builtin_aarch64_umlsl_nv2si_uuuu (__a, __b, __c);
 }
 
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovn_high_s32 (int16x4_t a, int32x4_t b)
+vmlsl_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c)
 {
-  int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
-  __asm__ ("xtn2 %0.8h,%1.4s"
-           : "+w"(result)
-           : "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_smlslv8qi (__a, __b, __c);
 }
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovn_high_s64 (int32x2_t a, int64x2_t b)
+vmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
 {
-  int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
-  __asm__ ("xtn2 %0.4s,%1.2d"
-           : "+w"(result)
-           : "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_smlslv4hi (__a, __b, __c);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovn_high_u16 (uint8x8_t a, uint16x8_t b)
+vmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
 {
-  uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
-  __asm__ ("xtn2 %0.16b,%1.8h"
-           : "+w"(result)
-           : "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_smlslv2si (__a, __b, __c);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovn_high_u32 (uint16x4_t a, uint32x4_t b)
+vmlsl_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c)
 {
-  uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
-  __asm__ ("xtn2 %0.8h,%1.4s"
-           : "+w"(result)
-           : "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_umlslv8qi_uuuu (__a, __b, __c);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovn_high_u64 (uint32x2_t a, uint64x2_t b)
+vmlsl_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c)
 {
-  uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
-  __asm__ ("xtn2 %0.4s,%1.2d"
-           : "+w"(result)
-           : "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_umlslv4hi_uuuu (__a, __b, __c);
 }
 
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovn_s16 (int16x8_t a)
+vmlsl_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c)
 {
-  int8x8_t result;
-  __asm__ ("xtn %0.8b,%1.8h"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_umlslv2si_uuuu (__a, __b, __c);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovn_s32 (int32x4_t a)
+vmlsq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c)
 {
-  int16x4_t result;
-  __asm__ ("xtn %0.4h,%1.4s"
-           : "=w"(result)
-           : "w"(a)
+  float32x4_t __result;
+  float32x4_t __t1;
+  __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fsub %0.4s, %0.4s, %1.4s"
+           : "=w"(__result), "=w"(__t1)
+           : "0"(__a), "w"(__b), "w"(__c)
            : /* No clobbers */);
-  return result;
+  return __result;
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovn_s64 (int64x2_t a)
+vmlsq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
 {
-  int32x2_t result;
-  __asm__ ("xtn %0.2s,%1.2d"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_mls_nv8hi (__a, __b, __c);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovn_u16 (uint16x8_t a)
+vmlsq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
 {
-  uint8x8_t result;
-  __asm__ ("xtn %0.8b,%1.8h"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_mls_nv4si (__a, __b, __c);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovn_u32 (uint32x4_t a)
+vmlsq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
 {
-  uint16x4_t result;
-  __asm__ ("xtn %0.4h,%1.4s"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return (uint16x8_t) __builtin_aarch64_mls_nv8hi ((int16x8_t) __a,
+                                                   (int16x8_t) __b,
+                                                   (int16_t) __c);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovn_u64 (uint64x2_t a)
+vmlsq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
 {
-  uint32x2_t result;
-  __asm__ ("xtn %0.2s,%1.2d"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return (uint32x4_t) __builtin_aarch64_mls_nv4si ((int32x4_t) __a,
+                                                   (int32x4_t) __b,
+                                                   (int32_t) __c);
 }
 
-#define vmull_high_lane_s16(a, b, c)                                    \
-  __extension__                                                         \
-    ({                                                                  \
-       int16x4_t b_ = (b);                                              \
-       int16x8_t a_ = (a);                                              \
-       int32x4_t result;                                                \
-       __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]"                         \
-                : "=w"(result)                                          \
-                : "w"(a_), "x"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmull_high_lane_s32(a, b, c)                                    \
-  __extension__                                                         \
-    ({                                                                  \
-       int32x2_t b_ = (b);                                              \
-       int32x4_t a_ = (a);                                              \
-       int64x2_t result;                                                \
-       __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]"                         \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmull_high_lane_u16(a, b, c)                                    \
-  __extension__                                                         \
-    ({                                                                  \
-       uint16x4_t b_ = (b);                                             \
-       uint16x8_t a_ = (a);                                             \
-       uint32x4_t result;                                               \
-       __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]"                         \
-                : "=w"(result)                                          \
-                : "w"(a_), "x"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmull_high_lane_u32(a, b, c)                                    \
-  __extension__                                                         \
-    ({                                                                  \
-       uint32x2_t b_ = (b);                                             \
-       uint32x4_t a_ = (a);                                             \
-       uint64x2_t result;                                               \
-       __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]"                         \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmull_high_laneq_s16(a, b, c)                                   \
-  __extension__                                                         \
-    ({                                                                  \
-       int16x8_t b_ = (b);                                              \
-       int16x8_t a_ = (a);                                              \
-       int32x4_t result;                                                \
-       __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]"                         \
-                : "=w"(result)                                          \
-                : "w"(a_), "x"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmull_high_laneq_s32(a, b, c)                                   \
-  __extension__                                                         \
-    ({                                                                  \
-       int32x4_t b_ = (b);                                              \
-       int32x4_t a_ = (a);                                              \
-       int64x2_t result;                                                \
-       __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]"                         \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmull_high_laneq_u16(a, b, c)                                   \
-  __extension__                                                         \
-    ({                                                                  \
-       uint16x8_t b_ = (b);                                             \
-       uint16x8_t a_ = (a);                                             \
-       uint32x4_t result;                                               \
-       __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]"                         \
-                : "=w"(result)                                          \
-                : "w"(a_), "x"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmull_high_laneq_u32(a, b, c)                                   \
-  __extension__                                                         \
-    ({                                                                  \
-       uint32x4_t b_ = (b);                                             \
-       uint32x4_t a_ = (a);                                             \
-       uint64x2_t result;                                               \
-       __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]"                         \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
+__extension__ extern __inline int8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vmlsq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c)
+{
+  return __builtin_aarch64_mlsv16qi (__a, __b, __c);
+}
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmull_high_n_s16 (int16x8_t a, int16_t b)
+vmlsq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
 {
-  int32x4_t result;
-  __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]"
-           : "=w"(result)
-           : "w"(a), "x"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_mlsv8hi (__a, __b, __c);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmull_high_n_s32 (int32x4_t a, int32_t b)
+vmlsq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
 {
-  int64x2_t result;
-  __asm__ ("smull2 %0.2d,%1.4s,%2.s[0]"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_mlsv4si (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmull_high_n_u16 (uint16x8_t a, uint16_t b)
+vmlsq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
 {
-  uint32x4_t result;
-  __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]"
-           : "=w"(result)
-           : "w"(a), "x"(b)
-           : /* No clobbers */);
-  return result;
+  return (uint8x16_t) __builtin_aarch64_mlsv16qi ((int8x16_t) __a,
+                                                  (int8x16_t) __b,
+                                                  (int8x16_t) __c);
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmull_high_n_u32 (uint32x4_t a, uint32_t b)
+vmlsq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
 {
-  uint64x2_t result;
-  __asm__ ("umull2 %0.2d,%1.4s,%2.s[0]"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return (uint16x8_t) __builtin_aarch64_mlsv8hi ((int16x8_t) __a,
+                                                 (int16x8_t) __b,
+                                                 (int16x8_t) __c);
 }
 
-__extension__ extern __inline poly16x8_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmull_high_p8 (poly8x16_t a, poly8x16_t b)
+vmlsq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
 {
-  poly16x8_t result;
-  __asm__ ("pmull2 %0.8h,%1.16b,%2.16b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return (uint32x4_t) __builtin_aarch64_mlsv4si ((int32x4_t) __a,
+                                                 (int32x4_t) __b,
+                                                 (int32x4_t) __c);
 }
 
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmull_high_s8 (int8x16_t a, int8x16_t b)
+vmovl_high_s8 (int8x16_t __a)
 {
-  int16x8_t result;
-  __asm__ ("smull2 %0.8h,%1.16b,%2.16b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_vec_unpacks_hi_v16qi (__a);
 }
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmull_high_s16 (int16x8_t a, int16x8_t b)
+vmovl_high_s16 (int16x8_t __a)
 {
-  int32x4_t result;
-  __asm__ ("smull2 %0.4s,%1.8h,%2.8h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_vec_unpacks_hi_v8hi (__a);
 }
 
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmull_high_s32 (int32x4_t a, int32x4_t b)
+vmovl_high_s32 (int32x4_t __a)
 {
-  int64x2_t result;
-  __asm__ ("smull2 %0.2d,%1.4s,%2.4s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_vec_unpacks_hi_v4si (__a);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmull_high_u8 (uint8x16_t a, uint8x16_t b)
+vmovl_high_u8 (uint8x16_t __a)
 {
-  uint16x8_t result;
-  __asm__ ("umull2 %0.8h,%1.16b,%2.16b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_vec_unpacku_hi_v16qi_uu (__a);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmull_high_u16 (uint16x8_t a, uint16x8_t b)
+vmovl_high_u16 (uint16x8_t __a)
 {
-  uint32x4_t result;
-  __asm__ ("umull2 %0.4s,%1.8h,%2.8h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_vec_unpacku_hi_v8hi_uu (__a);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmull_high_u32 (uint32x4_t a, uint32x4_t b)
+vmovl_high_u32 (uint32x4_t __a)
 {
-  uint64x2_t result;
-  __asm__ ("umull2 %0.2d,%1.4s,%2.4s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_vec_unpacku_hi_v4si_uu (__a);
 }
 
-#define vmull_lane_s16(a, b, c)                                         \
-  __extension__                                                         \
-    ({                                                                  \
-       int16x4_t b_ = (b);                                              \
-       int16x4_t a_ = (a);                                              \
-       int32x4_t result;                                                \
-       __asm__ ("smull %0.4s,%1.4h,%2.h[%3]"                            \
-                : "=w"(result)                                          \
-                : "w"(a_), "x"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmull_lane_s32(a, b, c)                                         \
-  __extension__                                                         \
-    ({                                                                  \
-       int32x2_t b_ = (b);                                              \
-       int32x2_t a_ = (a);                                              \
-       int64x2_t result;                                                \
-       __asm__ ("smull %0.2d,%1.2s,%2.s[%3]"                            \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmull_lane_u16(a, b, c)                                         \
-  __extension__                                                         \
-    ({                                                                  \
-       uint16x4_t b_ = (b);                                             \
-       uint16x4_t a_ = (a);                                             \
-       uint32x4_t result;                                               \
-       __asm__ ("umull %0.4s,%1.4h,%2.h[%3]"                            \
-                : "=w"(result)                                          \
-                : "w"(a_), "x"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmull_lane_u32(a, b, c)                                         \
-  __extension__                                                         \
-    ({                                                                  \
-       uint32x2_t b_ = (b);                                             \
-       uint32x2_t a_ = (a);                                             \
-       uint64x2_t result;                                               \
-       __asm__ ("umull %0.2d, %1.2s, %2.s[%3]"                          \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmull_laneq_s16(a, b, c)                                        \
-  __extension__                                                         \
-    ({                                                                  \
-       int16x8_t b_ = (b);                                              \
-       int16x4_t a_ = (a);                                              \
-       int32x4_t result;                                                \
-       __asm__ ("smull %0.4s, %1.4h, %2.h[%3]"                          \
-                : "=w"(result)                                          \
-                : "w"(a_), "x"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmull_laneq_s32(a, b, c)                                        \
-  __extension__                                                         \
-    ({                                                                  \
-       int32x4_t b_ = (b);                                              \
-       int32x2_t a_ = (a);                                              \
-       int64x2_t result;                                                \
-       __asm__ ("smull %0.2d, %1.2s, %2.s[%3]"                          \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmull_laneq_u16(a, b, c)                                        \
-  __extension__                                                         \
-    ({                                                                  \
-       uint16x8_t b_ = (b);                                             \
-       uint16x4_t a_ = (a);                                             \
-       uint32x4_t result;                                               \
-       __asm__ ("umull %0.4s, %1.4h, %2.h[%3]"                          \
-                : "=w"(result)                                          \
-                : "w"(a_), "x"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmull_laneq_u32(a, b, c)                                        \
-  __extension__                                                         \
-    ({                                                                  \
-       uint32x4_t b_ = (b);                                             \
-       uint32x2_t a_ = (a);                                             \
-       uint64x2_t result;                                               \
-       __asm__ ("umull %0.2d, %1.2s, %2.s[%3]"                          \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vmovl_s8 (int8x8_t __a)
+{
+  return __builtin_aarch64_sxtlv8hi (__a);
+}
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmull_n_s16 (int16x4_t a, int16_t b)
+vmovl_s16 (int16x4_t __a)
 {
-  int32x4_t result;
-  __asm__ ("smull %0.4s,%1.4h,%2.h[0]"
-           : "=w"(result)
-           : "w"(a), "x"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_sxtlv4si (__a);
 }
 
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmull_n_s32 (int32x2_t a, int32_t b)
+vmovl_s32 (int32x2_t __a)
 {
-  int64x2_t result;
-  __asm__ ("smull %0.2d,%1.2s,%2.s[0]"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_sxtlv2di (__a);
+}
+
+__extension__ extern __inline uint16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vmovl_u8 (uint8x8_t __a)
+{
+  return __builtin_aarch64_uxtlv8hi_uu (__a);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmull_n_u16 (uint16x4_t a, uint16_t b)
+vmovl_u16 (uint16x4_t __a)
 {
-  uint32x4_t result;
-  __asm__ ("umull %0.4s,%1.4h,%2.h[0]"
-           : "=w"(result)
-           : "w"(a), "x"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_uxtlv4si_uu (__a);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmull_n_u32 (uint32x2_t a, uint32_t b)
+vmovl_u32 (uint32x2_t __a)
 {
-  uint64x2_t result;
-  __asm__ ("umull %0.2d,%1.2s,%2.s[0]"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_uxtlv2di_uu (__a);
 }
 
-__extension__ extern __inline poly16x8_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmull_p8 (poly8x8_t a, poly8x8_t b)
+vmovn_high_s16 (int8x8_t __a, int16x8_t __b)
 {
-  poly16x8_t result;
-  __asm__ ("pmull %0.8h, %1.8b, %2.8b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_xtn2v8hi (__a, __b);
 }
 
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmull_s8 (int8x8_t a, int8x8_t b)
+vmovn_high_s32 (int16x4_t __a, int32x4_t __b)
 {
-  int16x8_t result;
-  __asm__ ("smull %0.8h, %1.8b, %2.8b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_xtn2v4si (__a, __b);
 }
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmull_s16 (int16x4_t a, int16x4_t b)
+vmovn_high_s64 (int32x2_t __a, int64x2_t __b)
 {
-  int32x4_t result;
-  __asm__ ("smull %0.4s, %1.4h, %2.4h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_xtn2v2di (__a, __b);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmull_s32 (int32x2_t a, int32x2_t b)
+vmovn_high_u16 (uint8x8_t __a, uint16x8_t __b)
 {
-  int64x2_t result;
-  __asm__ ("smull %0.2d, %1.2s, %2.2s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return (uint8x16_t)
+	   __builtin_aarch64_xtn2v8hi ((int8x8_t) __a, (int16x8_t) __b);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmull_u8 (uint8x8_t a, uint8x8_t b)
+vmovn_high_u32 (uint16x4_t __a, uint32x4_t __b)
 {
-  uint16x8_t result;
-  __asm__ ("umull %0.8h, %1.8b, %2.8b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return (uint16x8_t)
+	   __builtin_aarch64_xtn2v4si ((int16x4_t) __a, (int32x4_t) __b);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmull_u16 (uint16x4_t a, uint16x4_t b)
+vmovn_high_u64 (uint32x2_t __a, uint64x2_t __b)
 {
-  uint32x4_t result;
-  __asm__ ("umull %0.4s, %1.4h, %2.4h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return (uint32x4_t)
+	   __builtin_aarch64_xtn2v2di ((int32x2_t) __a, (int64x2_t) __b);
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmull_u32 (uint32x2_t a, uint32x2_t b)
+vmovn_s16 (int16x8_t __a)
 {
-  uint64x2_t result;
-  __asm__ ("umull %0.2d, %1.2s, %2.2s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_xtnv8hi (__a);
 }
 
 __extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpadal_s8 (int16x4_t a, int8x8_t b)
+vmovn_s32 (int32x4_t __a)
 {
-  int16x4_t result;
-  __asm__ ("sadalp %0.4h,%2.8b"
-           : "=w"(result)
-           : "0"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_xtnv4si (__a);
 }
 
 __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpadal_s16 (int32x2_t a, int16x4_t b)
+vmovn_s64 (int64x2_t __a)
 {
-  int32x2_t result;
-  __asm__ ("sadalp %0.2s,%2.4h"
-           : "=w"(result)
-           : "0"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_xtnv2di (__a);
 }
 
-__extension__ extern __inline int64x1_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpadal_s32 (int64x1_t a, int32x2_t b)
+vmovn_u16 (uint16x8_t __a)
 {
-  int64x1_t result;
-  __asm__ ("sadalp %0.1d,%2.2s"
-           : "=w"(result)
-           : "0"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return (uint8x8_t)__builtin_aarch64_xtnv8hi ((int16x8_t) __a);
 }
 
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpadal_u8 (uint16x4_t a, uint8x8_t b)
+vmovn_u32 (uint32x4_t __a)
 {
-  uint16x4_t result;
-  __asm__ ("uadalp %0.4h,%2.8b"
-           : "=w"(result)
-           : "0"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return (uint16x4_t) __builtin_aarch64_xtnv4si ((int32x4_t )__a);
 }
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpadal_u16 (uint32x2_t a, uint16x4_t b)
+vmovn_u64 (uint64x2_t __a)
 {
-  uint32x2_t result;
-  __asm__ ("uadalp %0.2s,%2.4h"
-           : "=w"(result)
-           : "0"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return (uint32x2_t) __builtin_aarch64_xtnv2di ((int64x2_t) __a);
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpadal_u32 (uint64x1_t a, uint32x2_t b)
+vshrn_n_s16 (int16x8_t __a, const int __b)
 {
-  uint64x1_t result;
-  __asm__ ("uadalp %0.1d,%2.2s"
-           : "=w"(result)
-           : "0"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_shrnv8hi (__a, __b);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpadalq_s8 (int16x8_t a, int8x16_t b)
+vshrn_n_s32 (int32x4_t __a, const int __b)
 {
-  int16x8_t result;
-  __asm__ ("sadalp %0.8h,%2.16b"
-           : "=w"(result)
-           : "0"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_shrnv4si (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpadalq_s16 (int32x4_t a, int16x8_t b)
+vshrn_n_s64 (int64x2_t __a, const int __b)
 {
-  int32x4_t result;
-  __asm__ ("sadalp %0.4s,%2.8h"
-           : "=w"(result)
-           : "0"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_shrnv2di (__a, __b);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpadalq_s32 (int64x2_t a, int32x4_t b)
+vshrn_n_u16 (uint16x8_t __a, const int __b)
 {
-  int64x2_t result;
-  __asm__ ("sadalp %0.2d,%2.4s"
-           : "=w"(result)
-           : "0"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return (uint8x8_t)__builtin_aarch64_shrnv8hi ((int16x8_t)__a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpadalq_u8 (uint16x8_t a, uint8x16_t b)
+vshrn_n_u32 (uint32x4_t __a, const int __b)
 {
-  uint16x8_t result;
-  __asm__ ("uadalp %0.8h,%2.16b"
-           : "=w"(result)
-           : "0"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return (uint16x4_t)__builtin_aarch64_shrnv4si ((int32x4_t)__a, __b);
+}
+
+__extension__ extern __inline uint32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vshrn_n_u64 (uint64x2_t __a, const int __b)
+{
+  return (uint32x2_t)__builtin_aarch64_shrnv2di ((int64x2_t)__a, __b);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vmull_high_lane_s16 (int16x8_t __a, int16x4_t __v, const int __lane)
+{
+  return __builtin_aarch64_smull_hi_lanev8hi (__a, __v, __lane);
+}
+
+__extension__ extern __inline int64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vmull_high_lane_s32 (int32x4_t __a, int32x2_t __v, const int __lane)
+{
+  return __builtin_aarch64_smull_hi_lanev4si (__a, __v, __lane);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpadalq_u16 (uint32x4_t a, uint16x8_t b)
+vmull_high_lane_u16 (uint16x8_t __a, uint16x4_t __v, const int __lane)
 {
-  uint32x4_t result;
-  __asm__ ("uadalp %0.4s,%2.8h"
-           : "=w"(result)
-           : "0"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_umull_hi_lanev8hi_uuus (__a, __v, __lane);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpadalq_u32 (uint64x2_t a, uint32x4_t b)
+vmull_high_lane_u32 (uint32x4_t __a, uint32x2_t __v, const int __lane)
 {
-  uint64x2_t result;
-  __asm__ ("uadalp %0.2d,%2.4s"
-           : "=w"(result)
-           : "0"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_umull_hi_lanev4si_uuus (__a, __v, __lane);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpaddl_s8 (int8x8_t a)
+vmull_high_laneq_s16 (int16x8_t __a, int16x8_t __v, const int __lane)
 {
-  int16x4_t result;
-  __asm__ ("saddlp %0.4h,%1.8b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_smull_hi_laneqv8hi (__a, __v, __lane);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpaddl_s16 (int16x4_t a)
+vmull_high_laneq_s32 (int32x4_t __a, int32x4_t __v, const int __lane)
 {
-  int32x2_t result;
-  __asm__ ("saddlp %0.2s,%1.4h"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_smull_hi_laneqv4si (__a, __v, __lane);
 }
 
-__extension__ extern __inline int64x1_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpaddl_s32 (int32x2_t a)
+vmull_high_laneq_u16 (uint16x8_t __a, uint16x8_t __v, const int __lane)
 {
-  int64x1_t result;
-  __asm__ ("saddlp %0.1d,%1.2s"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_umull_hi_laneqv8hi_uuus (__a, __v, __lane);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpaddl_u8 (uint8x8_t a)
+vmull_high_laneq_u32 (uint32x4_t __a, uint32x4_t __v, const int __lane)
 {
-  uint16x4_t result;
-  __asm__ ("uaddlp %0.4h,%1.8b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_umull_hi_laneqv4si_uuus (__a, __v, __lane);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpaddl_u16 (uint16x4_t a)
+vmull_high_n_s16 (int16x8_t __a, int16_t __b)
 {
-  uint32x2_t result;
-  __asm__ ("uaddlp %0.2s,%1.4h"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_smull_hi_nv8hi (__a, __b);
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpaddl_u32 (uint32x2_t a)
+vmull_high_n_s32 (int32x4_t __a, int32_t __b)
 {
-  uint64x1_t result;
-  __asm__ ("uaddlp %0.1d,%1.2s"
-           : "=w"(result)
-           : "w"(a)
+  return __builtin_aarch64_smull_hi_nv4si (__a, __b);
+}
+
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vmull_high_n_u16 (uint16x8_t __a, uint16_t __b)
+{
+  return __builtin_aarch64_umull_hi_nv8hi_uuu (__a, __b);
+}
+
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vmull_high_n_u32 (uint32x4_t __a, uint32_t __b)
+{
+ return __builtin_aarch64_umull_hi_nv4si_uuu (__a, __b);
+}
+
+__extension__ extern __inline poly16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vmull_high_p8 (poly8x16_t __a, poly8x16_t __b)
+{
+  poly16x8_t __result;
+  __asm__ ("pmull2 %0.8h,%1.16b,%2.16b"
+           : "=w"(__result)
+           : "w"(__a), "w"(__b)
            : /* No clobbers */);
-  return result;
+  return __result;
 }
 
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpaddlq_s8 (int8x16_t a)
+vmull_high_s8 (int8x16_t __a, int8x16_t __b)
 {
-  int16x8_t result;
-  __asm__ ("saddlp %0.8h,%1.16b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_vec_widen_smult_hi_v16qi (__a, __b);
 }
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpaddlq_s16 (int16x8_t a)
+vmull_high_s16 (int16x8_t __a, int16x8_t __b)
 {
-  int32x4_t result;
-  __asm__ ("saddlp %0.4s,%1.8h"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_vec_widen_smult_hi_v8hi (__a, __b);
 }
 
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpaddlq_s32 (int32x4_t a)
+vmull_high_s32 (int32x4_t __a, int32x4_t __b)
 {
-  int64x2_t result;
-  __asm__ ("saddlp %0.2d,%1.4s"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_vec_widen_smult_hi_v4si (__a, __b);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpaddlq_u8 (uint8x16_t a)
+vmull_high_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  uint16x8_t result;
-  __asm__ ("uaddlp %0.8h,%1.16b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_vec_widen_umult_hi_v16qi_uuu (__a, __b);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpaddlq_u16 (uint16x8_t a)
+vmull_high_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  uint32x4_t result;
-  __asm__ ("uaddlp %0.4s,%1.8h"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_vec_widen_umult_hi_v8hi_uuu (__a, __b);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpaddlq_u32 (uint32x4_t a)
+vmull_high_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  uint64x2_t result;
-  __asm__ ("uaddlp %0.2d,%1.4s"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_vec_widen_umult_hi_v4si_uuu (__a, __b);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpaddq_s8 (int8x16_t a, int8x16_t b)
+vmull_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
 {
-  int8x16_t result;
-  __asm__ ("addp %0.16b,%1.16b,%2.16b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_vec_smult_lane_v4hi (__a, __b, __c);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpaddq_s16 (int16x8_t a, int16x8_t b)
+vmull_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
 {
-  int16x8_t result;
-  __asm__ ("addp %0.8h,%1.8h,%2.8h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_vec_smult_lane_v2si (__a, __b, __c);
+}
+
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vmull_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
+{
+  return __builtin_aarch64_vec_umult_lane_v4hi_uuus (__a, __b, __c);
+}
+
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vmull_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
+{
+  return __builtin_aarch64_vec_umult_lane_v2si_uuus (__a, __b, __c);
 }
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpaddq_s32 (int32x4_t a, int32x4_t b)
+vmull_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
 {
-  int32x4_t result;
-  __asm__ ("addp %0.4s,%1.4s,%2.4s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_vec_smult_laneq_v4hi (__a, __b, __c);
 }
 
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpaddq_s64 (int64x2_t a, int64x2_t b)
+vmull_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
 {
-  int64x2_t result;
-  __asm__ ("addp %0.2d,%1.2d,%2.2d"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_vec_smult_laneq_v2si (__a, __b, __c);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpaddq_u8 (uint8x16_t a, uint8x16_t b)
+vmull_laneq_u16 (uint16x4_t __a, uint16x8_t __b, const int __c)
 {
-  uint8x16_t result;
-  __asm__ ("addp %0.16b,%1.16b,%2.16b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
+  return __builtin_aarch64_vec_umult_laneq_v4hi_uuus (__a, __b, __c);
+}
+
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vmull_laneq_u32 (uint32x2_t __a, uint32x4_t __b, const int __c)
+{
+  return __builtin_aarch64_vec_umult_laneq_v2si_uuus (__a, __b, __c);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vmull_n_s16 (int16x4_t __a, int16_t __b)
+{
+  return __builtin_aarch64_smull_nv4hi (__a, __b);
+}
+
+__extension__ extern __inline int64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vmull_n_s32 (int32x2_t __a, int32_t __b)
+{
+  return __builtin_aarch64_smull_nv2si (__a, __b);
+}
+
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vmull_n_u16 (uint16x4_t __a, uint16_t __b)
+{
+  return __builtin_aarch64_umull_nv4hi_uuu (__a, __b);
+}
+
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vmull_n_u32 (uint32x2_t __a, uint32_t __b)
+{
+  return __builtin_aarch64_umull_nv2si_uuu (__a, __b);
+}
+
+__extension__ extern __inline poly16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vmull_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+  poly16x8_t __result;
+  __asm__ ("pmull %0.8h, %1.8b, %2.8b"
+           : "=w"(__result)
+           : "w"(__a), "w"(__b)
            : /* No clobbers */);
-  return result;
+  return __result;
+}
+
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vmull_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return __builtin_aarch64_intrinsic_vec_smult_lo_v8qi (__a, __b);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vmull_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return __builtin_aarch64_intrinsic_vec_smult_lo_v4hi (__a, __b);
+}
+
+__extension__ extern __inline int64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vmull_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return __builtin_aarch64_intrinsic_vec_smult_lo_v2si (__a, __b);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpaddq_u16 (uint16x8_t a, uint16x8_t b)
+vmull_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  uint16x8_t result;
-  __asm__ ("addp %0.8h,%1.8h,%2.8h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_intrinsic_vec_umult_lo_v8qi_uuu (__a, __b);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpaddq_u32 (uint32x4_t a, uint32x4_t b)
+vmull_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  uint32x4_t result;
-  __asm__ ("addp %0.4s,%1.4s,%2.4s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_intrinsic_vec_umult_lo_v4hi_uuu (__a, __b);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpaddq_u64 (uint64x2_t a, uint64x2_t b)
+vmull_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  uint64x2_t result;
-  __asm__ ("addp %0.2d,%1.2d,%2.2d"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_intrinsic_vec_umult_lo_v2si_uuu (__a, __b);
 }
 
 __extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmulh_n_s16 (int16x4_t a, int16_t b)
+vpadal_s8 (int16x4_t __a, int8x8_t __b)
 {
-  int16x4_t result;
-  __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]"
-           : "=w"(result)
-           : "w"(a), "x"(b)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_sadalpv8qi (__a, __b);
 }
 
 __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmulh_n_s32 (int32x2_t a, int32_t b)
+vpadal_s16 (int32x2_t __a, int16x4_t __b)
 {
-  int32x2_t result;
-  __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]"
-           : "=w"(result)
-           : "w"(a), "w"(b)
+  return __builtin_aarch64_sadalpv4hi (__a, __b);
+}
+
+__extension__ extern __inline int64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vpadal_s32 (int64x1_t __a, int32x2_t __b)
+{
+  int64x1_t __result;
+  __asm__ ("sadalp %0.1d,%2.2s"
+           : "=w"(__result)
+           : "0"(__a), "w"(__b)
            : /* No clobbers */);
-  return result;
+  return __result;
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmulhq_n_s16 (int16x8_t a, int16_t b)
+vpadal_u8 (uint16x4_t __a, uint8x8_t __b)
 {
-  int16x8_t result;
-  __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]"
-           : "=w"(result)
-           : "w"(a), "x"(b)
+  return __builtin_aarch64_uadalpv8qi_uuu (__a, __b);
+}
+
+__extension__ extern __inline uint32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vpadal_u16 (uint32x2_t __a, uint16x4_t __b)
+{
+  return __builtin_aarch64_uadalpv4hi_uuu (__a, __b);
+}
+
+__extension__ extern __inline uint64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vpadal_u32 (uint64x1_t __a, uint32x2_t __b)
+{
+  uint64x1_t __result;
+  __asm__ ("uadalp %0.1d,%2.2s"
+           : "=w"(__result)
+           : "0"(__a), "w"(__b)
            : /* No clobbers */);
-  return result;
+  return __result;
+}
+
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vpadalq_s8 (int16x8_t __a, int8x16_t __b)
+{
+  return __builtin_aarch64_sadalpv16qi (__a, __b);
 }
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmulhq_n_s32 (int32x4_t a, int32_t b)
+vpadalq_s16 (int32x4_t __a, int16x8_t __b)
 {
-  int32x4_t result;
-  __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]"
-           : "=w"(result)
-           : "w"(a), "w"(b)
+  return __builtin_aarch64_sadalpv8hi (__a, __b);
+}
+
+__extension__ extern __inline int64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vpadalq_s32 (int64x2_t __a, int32x4_t __b)
+{
+  return __builtin_aarch64_sadalpv4si (__a, __b);
+}
+
+__extension__ extern __inline uint16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vpadalq_u8 (uint16x8_t __a, uint8x16_t __b)
+{
+  return __builtin_aarch64_uadalpv16qi_uuu (__a, __b);
+}
+
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vpadalq_u16 (uint32x4_t __a, uint16x8_t __b)
+{
+  return __builtin_aarch64_uadalpv8hi_uuu (__a, __b);
+}
+
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vpadalq_u32 (uint64x2_t __a, uint32x4_t __b)
+{
+  return __builtin_aarch64_uadalpv4si_uuu (__a, __b);
+}
+
+__extension__ extern __inline int16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vpaddl_s8 (int8x8_t __a)
+{
+  int16x4_t __result;
+  __asm__ ("saddlp %0.4h,%1.8b"
+           : "=w"(__result)
+           : "w"(__a)
            : /* No clobbers */);
-  return result;
+  return __result;
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqmovn_high_s16 (int8x8_t a, int16x8_t b)
+vpaddl_s16 (int16x4_t __a)
 {
-  int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
-  __asm__ ("sqxtn2 %0.16b, %1.8h"
-           : "+w"(result)
-           : "w"(b)
+  int32x2_t __result;
+  __asm__ ("saddlp %0.2s,%1.4h"
+           : "=w"(__result)
+           : "w"(__a)
            : /* No clobbers */);
-  return result;
+  return __result;
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline int64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqmovn_high_s32 (int16x4_t a, int32x4_t b)
+vpaddl_s32 (int32x2_t __a)
 {
-  int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
-  __asm__ ("sqxtn2 %0.8h, %1.4s"
-           : "+w"(result)
-           : "w"(b)
+  int64x1_t __result;
+  __asm__ ("saddlp %0.1d,%1.2s"
+           : "=w"(__result)
+           : "w"(__a)
            : /* No clobbers */);
-  return result;
+  return __result;
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqmovn_high_s64 (int32x2_t a, int64x2_t b)
+vpaddl_u8 (uint8x8_t __a)
 {
-  int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
-  __asm__ ("sqxtn2 %0.4s, %1.2d"
-           : "+w"(result)
-           : "w"(b)
+  uint16x4_t __result;
+  __asm__ ("uaddlp %0.4h,%1.8b"
+           : "=w"(__result)
+           : "w"(__a)
            : /* No clobbers */);
-  return result;
+  return __result;
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline uint32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vpaddl_u16 (uint16x4_t __a)
+{
+  uint32x2_t __result;
+  __asm__ ("uaddlp %0.2s,%1.4h"
+           : "=w"(__result)
+           : "w"(__a)
+           : /* No clobbers */);
+  return __result;
+}
+
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqmovn_high_u16 (uint8x8_t a, uint16x8_t b)
+vpaddl_u32 (uint32x2_t __a)
 {
-  uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
-  __asm__ ("uqxtn2 %0.16b, %1.8h"
-           : "+w"(result)
-           : "w"(b)
+  uint64x1_t __result;
+  __asm__ ("uaddlp %0.1d,%1.2s"
+           : "=w"(__result)
+           : "w"(__a)
            : /* No clobbers */);
-  return result;
+  return __result;
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqmovn_high_u32 (uint16x4_t a, uint32x4_t b)
+vpaddlq_s8 (int8x16_t __a)
 {
-  uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
-  __asm__ ("uqxtn2 %0.8h, %1.4s"
-           : "+w"(result)
-           : "w"(b)
+  int16x8_t __result;
+  __asm__ ("saddlp %0.8h,%1.16b"
+           : "=w"(__result)
+           : "w"(__a)
            : /* No clobbers */);
-  return result;
+  return __result;
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqmovn_high_u64 (uint32x2_t a, uint64x2_t b)
+vpaddlq_s16 (int16x8_t __a)
 {
-  uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
-  __asm__ ("uqxtn2 %0.4s, %1.2d"
-           : "+w"(result)
-           : "w"(b)
+  int32x4_t __result;
+  __asm__ ("saddlp %0.4s,%1.8h"
+           : "=w"(__result)
+           : "w"(__a)
            : /* No clobbers */);
-  return result;
+  return __result;
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqmovun_high_s16 (uint8x8_t a, int16x8_t b)
+vpaddlq_s32 (int32x4_t __a)
 {
-  uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
-  __asm__ ("sqxtun2 %0.16b, %1.8h"
-           : "+w"(result)
-           : "w"(b)
+  int64x2_t __result;
+  __asm__ ("saddlp %0.2d,%1.4s"
+           : "=w"(__result)
+           : "w"(__a)
            : /* No clobbers */);
-  return result;
+  return __result;
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqmovun_high_s32 (uint16x4_t a, int32x4_t b)
+vpaddlq_u8 (uint8x16_t __a)
 {
-  uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
-  __asm__ ("sqxtun2 %0.8h, %1.4s"
-           : "+w"(result)
-           : "w"(b)
+  uint16x8_t __result;
+  __asm__ ("uaddlp %0.8h,%1.16b"
+           : "=w"(__result)
+           : "w"(__a)
            : /* No clobbers */);
-  return result;
+  return __result;
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqmovun_high_s64 (uint32x2_t a, int64x2_t b)
+vpaddlq_u16 (uint16x8_t __a)
 {
-  uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
-  __asm__ ("sqxtun2 %0.4s, %1.2d"
-           : "+w"(result)
-           : "w"(b)
+  uint32x4_t __result;
+  __asm__ ("uaddlp %0.4s,%1.8h"
+           : "=w"(__result)
+           : "w"(__a)
            : /* No clobbers */);
-  return result;
+  return __result;
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmulh_n_s16 (int16x4_t a, int16_t b)
+vpaddlq_u32 (uint32x4_t __a)
 {
-  int16x4_t result;
-  __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]"
-           : "=w"(result)
-           : "w"(a), "x"(b)
+  uint64x2_t __result;
+  __asm__ ("uaddlp %0.2d,%1.4s"
+           : "=w"(__result)
+           : "w"(__a)
            : /* No clobbers */);
-  return result;
+  return __result;
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmulh_n_s32 (int32x2_t a, int32_t b)
+vpaddq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  int32x2_t result;
-  __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]"
-           : "=w"(result)
-           : "w"(a), "w"(b)
+  int8x16_t __result;
+  __asm__ ("addp %0.16b,%1.16b,%2.16b"
+           : "=w"(__result)
+           : "w"(__a), "w"(__b)
            : /* No clobbers */);
-  return result;
+  return __result;
 }
 
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmulhq_n_s16 (int16x8_t a, int16_t b)
+vpaddq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  int16x8_t result;
-  __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]"
-           : "=w"(result)
-           : "w"(a), "x"(b)
+  int16x8_t __result;
+  __asm__ ("addp %0.8h,%1.8h,%2.8h"
+           : "=w"(__result)
+           : "w"(__a), "w"(__b)
            : /* No clobbers */);
-  return result;
+  return __result;
 }
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
+vpaddq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  int32x4_t result;
-  __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]"
-           : "=w"(result)
-           : "w"(a), "w"(b)
+  int32x4_t __result;
+  __asm__ ("addp %0.4s,%1.4s,%2.4s"
+           : "=w"(__result)
+           : "w"(__a), "w"(__b)
            : /* No clobbers */);
-  return result;
+  return __result;
 }
 
-#define vqrshrn_high_n_s16(a, b, c)                                     \
-  __extension__                                                         \
-    ({                                                                  \
-       int16x8_t b_ = (b);                                              \
-       int8x8_t a_ = (a);                                               \
-       int8x16_t result = vcombine_s8                                   \
-                            (a_, vcreate_s8                             \
-                                   (__AARCH64_UINT64_C (0x0)));         \
-       __asm__ ("sqrshrn2 %0.16b, %1.8h, #%2"                           \
-                : "+w"(result)                                          \
-                : "w"(b_), "i"(c)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vqrshrn_high_n_s32(a, b, c)                                     \
-  __extension__                                                         \
-    ({                                                                  \
-       int32x4_t b_ = (b);                                              \
-       int16x4_t a_ = (a);                                              \
-       int16x8_t result = vcombine_s16                                  \
-                            (a_, vcreate_s16                            \
-                                   (__AARCH64_UINT64_C (0x0)));         \
-       __asm__ ("sqrshrn2 %0.8h, %1.4s, #%2"                            \
-                : "+w"(result)                                          \
-                : "w"(b_), "i"(c)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vqrshrn_high_n_s64(a, b, c)                                     \
-  __extension__                                                         \
-    ({                                                                  \
-       int64x2_t b_ = (b);                                              \
-       int32x2_t a_ = (a);                                              \
-       int32x4_t result = vcombine_s32                                  \
-                            (a_, vcreate_s32                            \
-                                   (__AARCH64_UINT64_C (0x0)));         \
-       __asm__ ("sqrshrn2 %0.4s, %1.2d, #%2"                            \
-                : "+w"(result)                                          \
-                : "w"(b_), "i"(c)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vqrshrn_high_n_u16(a, b, c)                                     \
-  __extension__                                                         \
-    ({                                                                  \
-       uint16x8_t b_ = (b);                                             \
-       uint8x8_t a_ = (a);                                              \
-       uint8x16_t result = vcombine_u8                                  \
-                             (a_, vcreate_u8                            \
-                                    (__AARCH64_UINT64_C (0x0)));        \
-       __asm__ ("uqrshrn2 %0.16b, %1.8h, #%2"                           \
-                : "+w"(result)                                          \
-                : "w"(b_), "i"(c)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vqrshrn_high_n_u32(a, b, c)                                     \
-  __extension__                                                         \
-    ({                                                                  \
-       uint32x4_t b_ = (b);                                             \
-       uint16x4_t a_ = (a);                                             \
-       uint16x8_t result = vcombine_u16                                 \
-                             (a_, vcreate_u16                           \
-                                    (__AARCH64_UINT64_C (0x0)));        \
-       __asm__ ("uqrshrn2 %0.8h, %1.4s, #%2"                            \
-                : "+w"(result)                                          \
-                : "w"(b_), "i"(c)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vqrshrn_high_n_u64(a, b, c)                                     \
-  __extension__                                                         \
-    ({                                                                  \
-       uint64x2_t b_ = (b);                                             \
-       uint32x2_t a_ = (a);                                             \
-       uint32x4_t result = vcombine_u32                                 \
-                             (a_, vcreate_u32                           \
-                                    (__AARCH64_UINT64_C (0x0)));        \
-       __asm__ ("uqrshrn2 %0.4s, %1.2d, #%2"                            \
-                : "+w"(result)                                          \
-                : "w"(b_), "i"(c)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vqrshrun_high_n_s16(a, b, c)                                    \
-  __extension__                                                         \
-    ({                                                                  \
-       int16x8_t b_ = (b);                                              \
-       uint8x8_t a_ = (a);                                              \
-       uint8x16_t result = vcombine_u8                                  \
-                             (a_, vcreate_u8                            \
-                                    (__AARCH64_UINT64_C (0x0)));        \
-       __asm__ ("sqrshrun2 %0.16b, %1.8h, #%2"                          \
-                : "+w"(result)                                          \
-                : "w"(b_), "i"(c)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vqrshrun_high_n_s32(a, b, c)                                    \
-  __extension__                                                         \
-    ({                                                                  \
-       int32x4_t b_ = (b);                                              \
-       uint16x4_t a_ = (a);                                             \
-       uint16x8_t result = vcombine_u16                                 \
-                             (a_, vcreate_u16                           \
-                                    (__AARCH64_UINT64_C (0x0)));        \
-       __asm__ ("sqrshrun2 %0.8h, %1.4s, #%2"                           \
-                : "+w"(result)                                          \
-                : "w"(b_), "i"(c)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vqrshrun_high_n_s64(a, b, c)                                    \
-  __extension__                                                         \
-    ({                                                                  \
-       int64x2_t b_ = (b);                                              \
-       uint32x2_t a_ = (a);                                             \
-       uint32x4_t result = vcombine_u32                                 \
-                             (a_, vcreate_u32                           \
-                                    (__AARCH64_UINT64_C (0x0)));        \
-       __asm__ ("sqrshrun2 %0.4s, %1.2d, #%2"                           \
-                : "+w"(result)                                          \
-                : "w"(b_), "i"(c)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vqshrn_high_n_s16(a, b, c)                                      \
-  __extension__                                                         \
-    ({                                                                  \
-       int16x8_t b_ = (b);                                              \
-       int8x8_t a_ = (a);                                               \
-       int8x16_t result = vcombine_s8                                   \
-                            (a_, vcreate_s8                             \
-                                   (__AARCH64_UINT64_C (0x0)));         \
-       __asm__ ("sqshrn2 %0.16b, %1.8h, #%2"                            \
-                : "+w"(result)                                          \
-                : "w"(b_), "i"(c)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vqshrn_high_n_s32(a, b, c)                                      \
-  __extension__                                                         \
-    ({                                                                  \
-       int32x4_t b_ = (b);                                              \
-       int16x4_t a_ = (a);                                              \
-       int16x8_t result = vcombine_s16                                  \
-                            (a_, vcreate_s16                            \
-                                   (__AARCH64_UINT64_C (0x0)));         \
-       __asm__ ("sqshrn2 %0.8h, %1.4s, #%2"                             \
-                : "+w"(result)                                          \
-                : "w"(b_), "i"(c)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vqshrn_high_n_s64(a, b, c)                                      \
-  __extension__                                                         \
-    ({                                                                  \
-       int64x2_t b_ = (b);                                              \
-       int32x2_t a_ = (a);                                              \
-       int32x4_t result = vcombine_s32                                  \
-                            (a_, vcreate_s32                            \
-                                   (__AARCH64_UINT64_C (0x0)));         \
-       __asm__ ("sqshrn2 %0.4s, %1.2d, #%2"                             \
-                : "+w"(result)                                          \
-                : "w"(b_), "i"(c)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vqshrn_high_n_u16(a, b, c)                                      \
-  __extension__                                                         \
-    ({                                                                  \
-       uint16x8_t b_ = (b);                                             \
-       uint8x8_t a_ = (a);                                              \
-       uint8x16_t result = vcombine_u8                                  \
-                             (a_, vcreate_u8                            \
-                                    (__AARCH64_UINT64_C (0x0)));        \
-       __asm__ ("uqshrn2 %0.16b, %1.8h, #%2"                            \
-                : "+w"(result)                                          \
-                : "w"(b_), "i"(c)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vqshrn_high_n_u32(a, b, c)                                      \
-  __extension__                                                         \
-    ({                                                                  \
-       uint32x4_t b_ = (b);                                             \
-       uint16x4_t a_ = (a);                                             \
-       uint16x8_t result = vcombine_u16                                 \
-                             (a_, vcreate_u16                           \
-                                    (__AARCH64_UINT64_C (0x0)));        \
-       __asm__ ("uqshrn2 %0.8h, %1.4s, #%2"                             \
-                : "+w"(result)                                          \
-                : "w"(b_), "i"(c)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vqshrn_high_n_u64(a, b, c)                                      \
-  __extension__                                                         \
-    ({                                                                  \
-       uint64x2_t b_ = (b);                                             \
-       uint32x2_t a_ = (a);                                             \
-       uint32x4_t result = vcombine_u32                                 \
-                             (a_, vcreate_u32                           \
-                                    (__AARCH64_UINT64_C (0x0)));        \
-       __asm__ ("uqshrn2 %0.4s, %1.2d, #%2"                             \
-                : "+w"(result)                                          \
-                : "w"(b_), "i"(c)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
+__extension__ extern __inline int64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vpaddq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  int64x2_t __result;
+  __asm__ ("addp %0.2d,%1.2d,%2.2d"
+           : "=w"(__result)
+           : "w"(__a), "w"(__b)
+           : /* No clobbers */);
+  return __result;
+}
 
-#define vqshrun_high_n_s16(a, b, c)                                     \
-  __extension__                                                         \
-    ({                                                                  \
-       int16x8_t b_ = (b);                                              \
-       uint8x8_t a_ = (a);                                              \
-       uint8x16_t result = vcombine_u8                                  \
-                             (a_, vcreate_u8                            \
-                                    (__AARCH64_UINT64_C (0x0)));        \
-       __asm__ ("sqshrun2 %0.16b, %1.8h, #%2"                           \
-                : "+w"(result)                                          \
-                : "w"(b_), "i"(c)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
+__extension__ extern __inline uint8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vpaddq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  uint8x16_t __result;
+  __asm__ ("addp %0.16b,%1.16b,%2.16b"
+           : "=w"(__result)
+           : "w"(__a), "w"(__b)
+           : /* No clobbers */);
+  return __result;
+}
 
-#define vqshrun_high_n_s32(a, b, c)                                     \
-  __extension__                                                         \
-    ({                                                                  \
-       int32x4_t b_ = (b);                                              \
-       uint16x4_t a_ = (a);                                             \
-       uint16x8_t result = vcombine_u16                                 \
-                             (a_, vcreate_u16                           \
-                                    (__AARCH64_UINT64_C (0x0)));        \
-       __asm__ ("sqshrun2 %0.8h, %1.4s, #%2"                            \
-                : "+w"(result)                                          \
-                : "w"(b_), "i"(c)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
+__extension__ extern __inline uint16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vpaddq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  uint16x8_t __result;
+  __asm__ ("addp %0.8h,%1.8h,%2.8h"
+           : "=w"(__result)
+           : "w"(__a), "w"(__b)
+           : /* No clobbers */);
+  return __result;
+}
 
-#define vqshrun_high_n_s64(a, b, c)                                     \
-  __extension__                                                         \
-    ({                                                                  \
-       int64x2_t b_ = (b);                                              \
-       uint32x2_t a_ = (a);                                             \
-       uint32x4_t result = vcombine_u32                                 \
-                             (a_, vcreate_u32                           \
-                                    (__AARCH64_UINT64_C (0x0)));        \
-       __asm__ ("sqshrun2 %0.4s, %1.2d, #%2"                            \
-                : "+w"(result)                                          \
-                : "w"(b_), "i"(c)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vpaddq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  uint32x4_t __result;
+  __asm__ ("addp %0.4s,%1.4s,%2.4s"
+           : "=w"(__result)
+           : "w"(__a), "w"(__b)
+           : /* No clobbers */);
+  return __result;
+}
 
-#define vrshrn_high_n_s16(a, b, c)                                      \
-  __extension__                                                         \
-    ({                                                                  \
-       int16x8_t b_ = (b);                                              \
-       int8x8_t a_ = (a);                                               \
-       int8x16_t result = vcombine_s8                                   \
-                            (a_, vcreate_s8                             \
-                                   (__AARCH64_UINT64_C (0x0)));         \
-       __asm__ ("rshrn2 %0.16b,%1.8h,#%2"                               \
-                : "+w"(result)                                          \
-                : "w"(b_), "i"(c)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vpaddq_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  uint64x2_t __result;
+  __asm__ ("addp %0.2d,%1.2d,%2.2d"
+           : "=w"(__result)
+           : "w"(__a), "w"(__b)
+           : /* No clobbers */);
+  return __result;
+}
 
-#define vrshrn_high_n_s32(a, b, c)                                      \
-  __extension__                                                         \
-    ({                                                                  \
-       int32x4_t b_ = (b);                                              \
-       int16x4_t a_ = (a);                                              \
-       int16x8_t result = vcombine_s16                                  \
-                            (a_, vcreate_s16                            \
-                                   (__AARCH64_UINT64_C (0x0)));         \
-       __asm__ ("rshrn2 %0.8h,%1.4s,#%2"                                \
-                : "+w"(result)                                          \
-                : "w"(b_), "i"(c)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
+__extension__ extern __inline int16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqdmulh_n_s16 (int16x4_t __a, int16_t __b)
+{
+  int16x4_t __result;
+  __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]"
+           : "=w"(__result)
+           : "w"(__a), "x"(__b)
+           : /* No clobbers */);
+  return __result;
+}
 
-#define vrshrn_high_n_s64(a, b, c)                                      \
-  __extension__                                                         \
-    ({                                                                  \
-       int64x2_t b_ = (b);                                              \
-       int32x2_t a_ = (a);                                              \
-       int32x4_t result = vcombine_s32                                  \
-                            (a_, vcreate_s32                            \
-                                   (__AARCH64_UINT64_C (0x0)));         \
-       __asm__ ("rshrn2 %0.4s,%1.2d,#%2"                                \
-                : "+w"(result)                                          \
-                : "w"(b_), "i"(c)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
+__extension__ extern __inline int32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqdmulh_n_s32 (int32x2_t __a, int32_t __b)
+{
+  int32x2_t __result;
+  __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]"
+           : "=w"(__result)
+           : "w"(__a), "w"(__b)
+           : /* No clobbers */);
+  return __result;
+}
 
-#define vrshrn_high_n_u16(a, b, c)                                      \
-  __extension__                                                         \
-    ({                                                                  \
-       uint16x8_t b_ = (b);                                             \
-       uint8x8_t a_ = (a);                                              \
-       uint8x16_t result = vcombine_u8                                  \
-                            (a_, vcreate_u8                             \
-                                   (__AARCH64_UINT64_C (0x0)));         \
-       __asm__ ("rshrn2 %0.16b,%1.8h,#%2"                               \
-                : "+w"(result)                                          \
-                : "w"(b_), "i"(c)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqdmulhq_n_s16 (int16x8_t __a, int16_t __b)
+{
+  int16x8_t __result;
+  __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]"
+           : "=w"(__result)
+           : "w"(__a), "x"(__b)
+           : /* No clobbers */);
+  return __result;
+}
 
-#define vrshrn_high_n_u32(a, b, c)                                      \
-  __extension__                                                         \
-    ({                                                                  \
-       uint32x4_t b_ = (b);                                             \
-       uint16x4_t a_ = (a);                                             \
-       uint16x8_t result = vcombine_u16                                 \
-                            (a_, vcreate_u16                            \
-                                   (__AARCH64_UINT64_C (0x0)));         \
-       __asm__ ("rshrn2 %0.8h,%1.4s,#%2"                                \
-                : "+w"(result)                                          \
-                : "w"(b_), "i"(c)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqdmulhq_n_s32 (int32x4_t __a, int32_t __b)
+{
+  int32x4_t __result;
+  __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]"
+           : "=w"(__result)
+           : "w"(__a), "w"(__b)
+           : /* No clobbers */);
+  return __result;
+}
 
-#define vrshrn_high_n_u64(a, b, c)                                      \
-  __extension__                                                         \
-    ({                                                                  \
-       uint64x2_t b_ = (b);                                             \
-       uint32x2_t a_ = (a);                                             \
-       uint32x4_t result = vcombine_u32                                 \
-                            (a_, vcreate_u32                            \
-                                   (__AARCH64_UINT64_C (0x0)));         \
-       __asm__ ("rshrn2 %0.4s,%1.2d,#%2"                                \
-                : "+w"(result)                                          \
-                : "w"(b_), "i"(c)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
+__extension__ extern __inline int8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqmovn_high_s16 (int8x8_t __a, int16x8_t __b)
+{
+  return __builtin_aarch64_sqxtn2v8hi (__a, __b);
+}
 
-#define vrshrn_n_s16(a, b)                                              \
-  __extension__                                                         \
-    ({                                                                  \
-       int16x8_t a_ = (a);                                              \
-       int8x8_t result;                                                 \
-       __asm__ ("rshrn %0.8b,%1.8h,%2"                                  \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqmovn_high_s32 (int16x4_t __a, int32x4_t __b)
+{
+  return __builtin_aarch64_sqxtn2v4si (__a, __b);
+}
 
-#define vrshrn_n_s32(a, b)                                              \
-  __extension__                                                         \
-    ({                                                                  \
-       int32x4_t a_ = (a);                                              \
-       int16x4_t result;                                                \
-       __asm__ ("rshrn %0.4h,%1.4s,%2"                                  \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqmovn_high_s64 (int32x2_t __a, int64x2_t __b)
+{
+  return __builtin_aarch64_sqxtn2v2di (__a, __b);
+}
 
-#define vrshrn_n_s64(a, b)                                              \
-  __extension__                                                         \
-    ({                                                                  \
-       int64x2_t a_ = (a);                                              \
-       int32x2_t result;                                                \
-       __asm__ ("rshrn %0.2s,%1.2d,%2"                                  \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
+__extension__ extern __inline uint8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqmovn_high_u16 (uint8x8_t __a, uint16x8_t __b)
+{
+  return __builtin_aarch64_uqxtn2v8hi_uuu (__a, __b);
+}
 
-#define vrshrn_n_u16(a, b)                                              \
-  __extension__                                                         \
-    ({                                                                  \
-       uint16x8_t a_ = (a);                                             \
-       uint8x8_t result;                                                \
-       __asm__ ("rshrn %0.8b,%1.8h,%2"                                  \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
+__extension__ extern __inline uint16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqmovn_high_u32 (uint16x4_t __a, uint32x4_t __b)
+{
+  return __builtin_aarch64_uqxtn2v4si_uuu (__a, __b);
+}
 
-#define vrshrn_n_u32(a, b)                                              \
-  __extension__                                                         \
-    ({                                                                  \
-       uint32x4_t a_ = (a);                                             \
-       uint16x4_t result;                                               \
-       __asm__ ("rshrn %0.4h,%1.4s,%2"                                  \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqmovn_high_u64 (uint32x2_t __a, uint64x2_t __b)
+{
+  return __builtin_aarch64_uqxtn2v2di_uuu (__a, __b);
+}
 
-#define vrshrn_n_u64(a, b)                                              \
-  __extension__                                                         \
-    ({                                                                  \
-       uint64x2_t a_ = (a);                                             \
-       uint32x2_t result;                                               \
-       __asm__ ("rshrn %0.2s,%1.2d,%2"                                  \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
+__extension__ extern __inline uint8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqmovun_high_s16 (uint8x8_t __a, int16x8_t __b)
+{
+  return __builtin_aarch64_sqxtun2v8hi_uus (__a, __b);
+}
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrsqrte_u32 (uint32x2_t a)
+vqmovun_high_s32 (uint16x4_t __a, int32x4_t __b)
 {
-  uint32x2_t result;
-  __asm__ ("ursqrte %0.2s,%1.2s"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_sqxtun2v4si_uus (__a, __b);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrsqrteq_u32 (uint32x4_t a)
+vqmovun_high_s64 (uint32x2_t __a, int64x2_t __b)
 {
-  uint32x4_t result;
-  __asm__ ("ursqrte %0.4s,%1.4s"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
+  return __builtin_aarch64_sqxtun2v2di_uus (__a, __b);
 }
 
-#define vshrn_high_n_s16(a, b, c)                                       \
-  __extension__                                                         \
-    ({                                                                  \
-       int16x8_t b_ = (b);                                              \
-       int8x8_t a_ = (a);                                               \
-       int8x16_t result = vcombine_s8                                   \
-                            (a_, vcreate_s8                             \
-                                   (__AARCH64_UINT64_C (0x0)));         \
-       __asm__ ("shrn2 %0.16b,%1.8h,#%2"                                \
-                : "+w"(result)                                          \
-                : "w"(b_), "i"(c)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vshrn_high_n_s32(a, b, c)                                       \
-  __extension__                                                         \
-    ({                                                                  \
-       int32x4_t b_ = (b);                                              \
-       int16x4_t a_ = (a);                                              \
-       int16x8_t result = vcombine_s16                                  \
-                            (a_, vcreate_s16                            \
-                                   (__AARCH64_UINT64_C (0x0)));         \
-       __asm__ ("shrn2 %0.8h,%1.4s,#%2"                                 \
-                : "+w"(result)                                          \
-                : "w"(b_), "i"(c)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vshrn_high_n_s64(a, b, c)                                       \
-  __extension__                                                         \
-    ({                                                                  \
-       int64x2_t b_ = (b);                                              \
-       int32x2_t a_ = (a);                                              \
-       int32x4_t result = vcombine_s32                                  \
-                            (a_, vcreate_s32                            \
-                                   (__AARCH64_UINT64_C (0x0)));         \
-       __asm__ ("shrn2 %0.4s,%1.2d,#%2"                                 \
-                : "+w"(result)                                          \
-                : "w"(b_), "i"(c)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vshrn_high_n_u16(a, b, c)                                       \
-  __extension__                                                         \
-    ({                                                                  \
-       uint16x8_t b_ = (b);                                             \
-       uint8x8_t a_ = (a);                                              \
-       uint8x16_t result = vcombine_u8                                  \
-                            (a_, vcreate_u8                             \
-                                   (__AARCH64_UINT64_C (0x0)));         \
-       __asm__ ("shrn2 %0.16b,%1.8h,#%2"                                \
-                : "+w"(result)                                          \
-                : "w"(b_), "i"(c)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vshrn_high_n_u32(a, b, c)                                       \
-  __extension__                                                         \
-    ({                                                                  \
-       uint32x4_t b_ = (b);                                             \
-       uint16x4_t a_ = (a);                                             \
-       uint16x8_t result = vcombine_u16                                 \
-                            (a_, vcreate_u16                            \
-                                   (__AARCH64_UINT64_C (0x0)));         \
-       __asm__ ("shrn2 %0.8h,%1.4s,#%2"                                 \
-                : "+w"(result)                                          \
-                : "w"(b_), "i"(c)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vshrn_high_n_u64(a, b, c)                                       \
-  __extension__                                                         \
-    ({                                                                  \
-       uint64x2_t b_ = (b);                                             \
-       uint32x2_t a_ = (a);                                             \
-       uint32x4_t result = vcombine_u32                                 \
-                            (a_, vcreate_u32                            \
-                                   (__AARCH64_UINT64_C (0x0)));         \
-       __asm__ ("shrn2 %0.4s,%1.2d,#%2"                                 \
-                : "+w"(result)                                          \
-                : "w"(b_), "i"(c)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vshrn_n_s16(a, b)                                               \
-  __extension__                                                         \
-    ({                                                                  \
-       int16x8_t a_ = (a);                                              \
-       int8x8_t result;                                                 \
-       __asm__ ("shrn %0.8b,%1.8h,%2"                                   \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vshrn_n_s32(a, b)                                               \
-  __extension__                                                         \
-    ({                                                                  \
-       int32x4_t a_ = (a);                                              \
-       int16x4_t result;                                                \
-       __asm__ ("shrn %0.4h,%1.4s,%2"                                   \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vshrn_n_s64(a, b)                                               \
-  __extension__                                                         \
-    ({                                                                  \
-       int64x2_t a_ = (a);                                              \
-       int32x2_t result;                                                \
-       __asm__ ("shrn %0.2s,%1.2d,%2"                                   \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vshrn_n_u16(a, b)                                               \
-  __extension__                                                         \
-    ({                                                                  \
-       uint16x8_t a_ = (a);                                             \
-       uint8x8_t result;                                                \
-       __asm__ ("shrn %0.8b,%1.8h,%2"                                   \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vshrn_n_u32(a, b)                                               \
-  __extension__                                                         \
-    ({                                                                  \
-       uint32x4_t a_ = (a);                                             \
-       uint16x4_t result;                                               \
-       __asm__ ("shrn %0.4h,%1.4s,%2"                                   \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vshrn_n_u64(a, b)                                               \
-  __extension__                                                         \
-    ({                                                                  \
-       uint64x2_t a_ = (a);                                             \
-       uint32x2_t result;                                               \
-       __asm__ ("shrn %0.2s,%1.2d,%2"                                   \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vsli_n_p8(a, b, c)                                              \
-  __extension__                                                         \
-    ({                                                                  \
-       poly8x8_t b_ = (b);                                              \
-       poly8x8_t a_ = (a);                                              \
-       poly8x8_t result;                                                \
-       __asm__ ("sli %0.8b,%2.8b,%3"                                    \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vsli_n_p16(a, b, c)                                             \
-  __extension__                                                         \
-    ({                                                                  \
-       poly16x4_t b_ = (b);                                             \
-       poly16x4_t a_ = (a);                                             \
-       poly16x4_t result;                                               \
-       __asm__ ("sli %0.4h,%2.4h,%3"                                    \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vsliq_n_p8(a, b, c)                                             \
-  __extension__                                                         \
-    ({                                                                  \
-       poly8x16_t b_ = (b);                                             \
-       poly8x16_t a_ = (a);                                             \
-       poly8x16_t result;                                               \
-       __asm__ ("sli %0.16b,%2.16b,%3"                                  \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vsliq_n_p16(a, b, c)                                            \
-  __extension__                                                         \
-    ({                                                                  \
-       poly16x8_t b_ = (b);                                             \
-       poly16x8_t a_ = (a);                                             \
-       poly16x8_t result;                                               \
-       __asm__ ("sli %0.8h,%2.8h,%3"                                    \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
+__extension__ extern __inline int16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqrdmulh_n_s16 (int16x4_t __a, int16_t __b)
+{
+  int16x4_t __result;
+  __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]"
+           : "=w"(__result)
+           : "w"(__a), "x"(__b)
+           : /* No clobbers */);
+  return __result;
+}
 
-#define vsri_n_p8(a, b, c)                                              \
-  __extension__                                                         \
-    ({                                                                  \
-       poly8x8_t b_ = (b);                                              \
-       poly8x8_t a_ = (a);                                              \
-       poly8x8_t result;                                                \
-       __asm__ ("sri %0.8b,%2.8b,%3"                                    \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
+__extension__ extern __inline int32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqrdmulh_n_s32 (int32x2_t __a, int32_t __b)
+{
+  int32x2_t __result;
+  __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]"
+           : "=w"(__result)
+           : "w"(__a), "w"(__b)
+           : /* No clobbers */);
+  return __result;
+}
 
-#define vsri_n_p16(a, b, c)                                             \
-  __extension__                                                         \
-    ({                                                                  \
-       poly16x4_t b_ = (b);                                             \
-       poly16x4_t a_ = (a);                                             \
-       poly16x4_t result;                                               \
-       __asm__ ("sri %0.4h,%2.4h,%3"                                    \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqrdmulhq_n_s16 (int16x8_t __a, int16_t __b)
+{
+  int16x8_t __result;
+  __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]"
+           : "=w"(__result)
+           : "w"(__a), "x"(__b)
+           : /* No clobbers */);
+  return __result;
+}
 
-#define vsri_n_p64(a, b, c)						\
-  __extension__								\
-    ({									\
-       poly64x1_t b_ = (b);						\
-       poly64x1_t a_ = (a);						\
-       poly64x1_t result;						\
-       __asm__ ("sri %d0,%d2,%3"					\
-		: "=w"(result)						\
-		: "0"(a_), "w"(b_), "i"(c)				\
-		: /* No clobbers.  */);					\
-       result;								\
-     })
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqrdmulhq_n_s32 (int32x4_t __a, int32_t __b)
+{
+  int32x4_t __result;
+  __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]"
+           : "=w"(__result)
+           : "w"(__a), "w"(__b)
+           : /* No clobbers */);
+  return __result;
+}
 
-#define vsriq_n_p8(a, b, c)                                             \
-  __extension__                                                         \
-    ({                                                                  \
-       poly8x16_t b_ = (b);                                             \
-       poly8x16_t a_ = (a);                                             \
-       poly8x16_t result;                                               \
-       __asm__ ("sri %0.16b,%2.16b,%3"                                  \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
+__extension__ extern __inline int8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqrshrn_high_n_s16 (int8x8_t __a, int16x8_t __b, const int __c)
+{
+  return __builtin_aarch64_sqrshrn2_nv8hi (__a, __b, __c);
+}
 
-#define vsriq_n_p16(a, b, c)                                            \
-  __extension__                                                         \
-    ({                                                                  \
-       poly16x8_t b_ = (b);                                             \
-       poly16x8_t a_ = (a);                                             \
-       poly16x8_t result;                                               \
-       __asm__ ("sri %0.8h,%2.8h,%3"                                    \
-                : "=w"(result)                                          \
-                : "0"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqrshrn_high_n_s32 (int16x4_t __a, int32x4_t __b, const int __c)
+{
+  return __builtin_aarch64_sqrshrn2_nv4si (__a, __b, __c);
+}
 
-#define vsriq_n_p64(a, b, c)						\
-  __extension__								\
-    ({									\
-       poly64x2_t b_ = (b);						\
-       poly64x2_t a_ = (a);						\
-       poly64x2_t result;						\
-       __asm__ ("sri %0.2d,%2.2d,%3"					\
-		: "=w"(result)						\
-		: "0"(a_), "w"(b_), "i"(c)				\
-		: /* No clobbers.  */);					\
-       result;								\
-     })
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqrshrn_high_n_s64 (int32x2_t __a, int64x2_t __b, const int __c)
+{
+  return __builtin_aarch64_sqrshrn2_nv2di (__a, __b, __c);
+}
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtst_p8 (poly8x8_t a, poly8x8_t b)
+vqrshrn_high_n_u16 (uint8x8_t __a, uint16x8_t __b, const int __c)
 {
-  return (uint8x8_t) ((((uint8x8_t) a) & ((uint8x8_t) b))
-		       != 0);
+  return __builtin_aarch64_uqrshrn2_nv8hi_uuus (__a, __b, __c);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtst_p16 (poly16x4_t a, poly16x4_t b)
+vqrshrn_high_n_u32 (uint16x4_t __a, uint32x4_t __b, const int __c)
 {
-  return (uint16x4_t) ((((uint16x4_t) a) & ((uint16x4_t) b))
-		       != 0);
+  return __builtin_aarch64_uqrshrn2_nv4si_uuus (__a, __b, __c);
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtst_p64 (poly64x1_t a, poly64x1_t b)
+vqrshrn_high_n_u64 (uint32x2_t __a, uint64x2_t __b, const int __c)
 {
-  return (uint64x1_t) ((a & b) != __AARCH64_INT64_C (0));
+  return __builtin_aarch64_uqrshrn2_nv2di_uuus (__a, __b, __c);
 }
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtstq_p8 (poly8x16_t a, poly8x16_t b)
+vqrshrun_high_n_s16 (uint8x8_t __a, int16x8_t __b, const int __c)
 {
-  return (uint8x16_t) ((((uint8x16_t) a) & ((uint8x16_t) b))
-		       != 0);
+  return __builtin_aarch64_sqrshrun2_nv8hi_uuss (__a, __b, __c);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtstq_p16 (poly16x8_t a, poly16x8_t b)
+vqrshrun_high_n_s32 (uint16x4_t __a, int32x4_t __b, const int __c)
 {
-  return (uint16x8_t) ((((uint16x8_t) a) & ((uint16x8_t) b))
-		       != 0);
+  return __builtin_aarch64_sqrshrun2_nv4si_uuss (__a, __b, __c);
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtstq_p64 (poly64x2_t a, poly64x2_t b)
+vqrshrun_high_n_s64 (uint32x2_t __a, int64x2_t __b, const int __c)
 {
-  return (uint64x2_t) ((((uint64x2_t) a) & ((uint64x2_t) b))
-		       != __AARCH64_INT64_C (0));
+  return __builtin_aarch64_sqrshrun2_nv2di_uuss (__a, __b, __c);
 }
 
-/* End of temporary inline asm implementations.  */
-
-/* Start of temporary inline asm for vldn, vstn and friends.  */
+__extension__ extern __inline int8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqshrn_high_n_s16 (int8x8_t __a, int16x8_t __b, const int __c)
+{
+  return __builtin_aarch64_sqshrn2_nv8hi (__a, __b, __c);
+}
 
-/* Create struct element types for duplicating loads.
-
-   Create 2 element structures of:
-
-   +------+----+----+----+----+
-   |      | 8  | 16 | 32 | 64 |
-   +------+----+----+----+----+
-   |int   | Y  | Y  | N  | N  |
-   +------+----+----+----+----+
-   |uint  | Y  | Y  | N  | N  |
-   +------+----+----+----+----+
-   |float | -  | Y  | N  | N  |
-   +------+----+----+----+----+
-   |poly  | Y  | Y  | -  | -  |
-   +------+----+----+----+----+
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqshrn_high_n_s32 (int16x4_t __a, int32x4_t __b, const int __c)
+{
+  return __builtin_aarch64_sqshrn2_nv4si (__a, __b, __c);
+}
 
-   Create 3 element structures of:
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqshrn_high_n_s64 (int32x2_t __a, int64x2_t __b, const int __c)
+{
+  return __builtin_aarch64_sqshrn2_nv2di (__a, __b, __c);
+}
 
-   +------+----+----+----+----+
-   |      | 8  | 16 | 32 | 64 |
-   +------+----+----+----+----+
-   |int   | Y  | Y  | Y  | Y  |
-   +------+----+----+----+----+
-   |uint  | Y  | Y  | Y  | Y  |
-   +------+----+----+----+----+
-   |float | -  | Y  | Y  | Y  |
-   +------+----+----+----+----+
-   |poly  | Y  | Y  | -  | -  |
-   +------+----+----+----+----+
+__extension__ extern __inline uint8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqshrn_high_n_u16 (uint8x8_t __a, uint16x8_t __b, const int __c)
+{
+  return __builtin_aarch64_uqshrn2_nv8hi_uuus (__a, __b, __c);
+}
 
-   Create 4 element structures of:
+__extension__ extern __inline uint16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqshrn_high_n_u32 (uint16x4_t __a, uint32x4_t __b, const int __c)
+{
+  return __builtin_aarch64_uqshrn2_nv4si_uuus (__a, __b, __c);
+}
 
-   +------+----+----+----+----+
-   |      | 8  | 16 | 32 | 64 |
-   +------+----+----+----+----+
-   |int   | Y  | N  | N  | Y  |
-   +------+----+----+----+----+
-   |uint  | Y  | N  | N  | Y  |
-   +------+----+----+----+----+
-   |float | -  | N  | N  | Y  |
-   +------+----+----+----+----+
-   |poly  | Y  | N  | -  | -  |
-   +------+----+----+----+----+
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqshrn_high_n_u64 (uint32x2_t __a, uint64x2_t __b, const int __c)
+{
+  return __builtin_aarch64_uqshrn2_nv2di_uuus (__a, __b, __c);
+}
 
-  This is required for casting memory reference.  */
-#define __STRUCTN(t, sz, nelem)			\
-  typedef struct t ## sz ## x ## nelem ## _t {	\
-    t ## sz ## _t val[nelem];			\
-  }  t ## sz ## x ## nelem ## _t;
+__extension__ extern __inline uint8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqshrun_high_n_s16 (uint8x8_t __a, int16x8_t __b, const int __c)
+{
+  return __builtin_aarch64_sqshrun2_nv8hi_uuss (__a, __b, __c);
+}
 
-/* 2-element structs.  */
-__STRUCTN (int, 8, 2)
-__STRUCTN (int, 16, 2)
-__STRUCTN (uint, 8, 2)
-__STRUCTN (uint, 16, 2)
-__STRUCTN (float, 16, 2)
-__STRUCTN (poly, 8, 2)
-__STRUCTN (poly, 16, 2)
-/* 3-element structs.  */
-__STRUCTN (int, 8, 3)
-__STRUCTN (int, 16, 3)
-__STRUCTN (int, 32, 3)
-__STRUCTN (int, 64, 3)
-__STRUCTN (uint, 8, 3)
-__STRUCTN (uint, 16, 3)
-__STRUCTN (uint, 32, 3)
-__STRUCTN (uint, 64, 3)
-__STRUCTN (float, 16, 3)
-__STRUCTN (float, 32, 3)
-__STRUCTN (float, 64, 3)
-__STRUCTN (poly, 8, 3)
-__STRUCTN (poly, 16, 3)
-/* 4-element structs.  */
-__STRUCTN (int, 8, 4)
-__STRUCTN (int, 64, 4)
-__STRUCTN (uint, 8, 4)
-__STRUCTN (uint, 64, 4)
-__STRUCTN (poly, 8, 4)
-__STRUCTN (float, 64, 4)
-#undef __STRUCTN
+__extension__ extern __inline uint16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqshrun_high_n_s32 (uint16x4_t __a, int32x4_t __b, const int __c)
+{
+  return __builtin_aarch64_sqshrun2_nv4si_uuss (__a, __b, __c);
+}
 
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqshrun_high_n_s64 (uint32x2_t __a, int64x2_t __b, const int __c)
+{
+  return __builtin_aarch64_sqshrun2_nv2di_uuss (__a, __b, __c);
+}
 
-#define __ST2_LANE_FUNC(intype, largetype, ptrtype, mode,		     \
-			qmode, ptr_mode, funcsuffix, signedtype)	     \
-__extension__ extern __inline void					     \
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
-vst2_lane_ ## funcsuffix (ptrtype *__ptr,				     \
-			  intype __b, const int __c)			     \
-{									     \
-  __builtin_aarch64_simd_oi __o;					     \
-  largetype __temp;							     \
-  __temp.val[0]								     \
-    = vcombine_##funcsuffix (__b.val[0],				     \
-			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
-  __temp.val[1]								     \
-    = vcombine_##funcsuffix (__b.val[1],				     \
-			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
-  __o = __builtin_aarch64_set_qregoi##qmode (__o,			     \
-					     (signedtype) __temp.val[0], 0); \
-  __o = __builtin_aarch64_set_qregoi##qmode (__o,			     \
-					     (signedtype) __temp.val[1], 1); \
-  __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *)  \
-				     __ptr, __o, __c);			     \
+__extension__ extern __inline int8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vrshrn_high_n_s16 (int8x8_t __a, int16x8_t __b, const int __c)
+{
+  return __builtin_aarch64_rshrn2v8hi (__a, __b, __c);
 }
 
-__ST2_LANE_FUNC (float16x4x2_t, float16x8x2_t, float16_t, v4hf, v8hf, hf, f16,
-		 float16x8_t)
-__ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v2sf, v4sf, sf, f32,
-		 float32x4_t)
-__ST2_LANE_FUNC (float64x1x2_t, float64x2x2_t, float64_t, df, v2df, df, f64,
-		 float64x2_t)
-__ST2_LANE_FUNC (poly8x8x2_t, poly8x16x2_t, poly8_t, v8qi, v16qi, qi, p8,
-		 int8x16_t)
-__ST2_LANE_FUNC (poly16x4x2_t, poly16x8x2_t, poly16_t, v4hi, v8hi, hi, p16,
-		 int16x8_t)
-__ST2_LANE_FUNC (poly64x1x2_t, poly64x2x2_t, poly64_t, di, v2di_ssps, di, p64,
-		 poly64x2_t)
-__ST2_LANE_FUNC (int8x8x2_t, int8x16x2_t, int8_t, v8qi, v16qi, qi, s8,
-		 int8x16_t)
-__ST2_LANE_FUNC (int16x4x2_t, int16x8x2_t, int16_t, v4hi, v8hi, hi, s16,
-		 int16x8_t)
-__ST2_LANE_FUNC (int32x2x2_t, int32x4x2_t, int32_t, v2si, v4si, si, s32,
-		 int32x4_t)
-__ST2_LANE_FUNC (int64x1x2_t, int64x2x2_t, int64_t, di, v2di, di, s64,
-		 int64x2_t)
-__ST2_LANE_FUNC (uint8x8x2_t, uint8x16x2_t, uint8_t, v8qi, v16qi, qi, u8,
-		 int8x16_t)
-__ST2_LANE_FUNC (uint16x4x2_t, uint16x8x2_t, uint16_t, v4hi, v8hi, hi, u16,
-		 int16x8_t)
-__ST2_LANE_FUNC (uint32x2x2_t, uint32x4x2_t, uint32_t, v2si, v4si, si, u32,
-		 int32x4_t)
-__ST2_LANE_FUNC (uint64x1x2_t, uint64x2x2_t, uint64_t, di, v2di, di, u64,
-		 int64x2_t)
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vrshrn_high_n_s32 (int16x4_t __a, int32x4_t __b, const int __c)
+{
+  return __builtin_aarch64_rshrn2v4si (__a, __b, __c);
+}
 
-#undef __ST2_LANE_FUNC
-#define __ST2_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix)	    \
-__extension__ extern __inline void					    \
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
-vst2q_lane_ ## funcsuffix (ptrtype *__ptr,				    \
-			   intype __b, const int __c)			    \
-{									    \
-  union { intype __i;							    \
-	  __builtin_aarch64_simd_oi __o; } __temp = { __b };		    \
-  __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
-				    __ptr, __temp.__o, __c);		    \
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vrshrn_high_n_s64 (int32x2_t __a, int64x2_t __b, const int __c)
+{
+  return __builtin_aarch64_rshrn2v2di (__a, __b, __c);
 }
 
-__ST2_LANE_FUNC (float16x8x2_t, float16_t, v8hf, hf, f16)
-__ST2_LANE_FUNC (float32x4x2_t, float32_t, v4sf, sf, f32)
-__ST2_LANE_FUNC (float64x2x2_t, float64_t, v2df, df, f64)
-__ST2_LANE_FUNC (poly8x16x2_t, poly8_t, v16qi, qi, p8)
-__ST2_LANE_FUNC (poly16x8x2_t, poly16_t, v8hi, hi, p16)
-__ST2_LANE_FUNC (poly64x2x2_t, poly64_t, v2di, di, p64)
-__ST2_LANE_FUNC (int8x16x2_t, int8_t, v16qi, qi, s8)
-__ST2_LANE_FUNC (int16x8x2_t, int16_t, v8hi, hi, s16)
-__ST2_LANE_FUNC (int32x4x2_t, int32_t, v4si, si, s32)
-__ST2_LANE_FUNC (int64x2x2_t, int64_t, v2di, di, s64)
-__ST2_LANE_FUNC (uint8x16x2_t, uint8_t, v16qi, qi, u8)
-__ST2_LANE_FUNC (uint16x8x2_t, uint16_t, v8hi, hi, u16)
-__ST2_LANE_FUNC (uint32x4x2_t, uint32_t, v4si, si, u32)
-__ST2_LANE_FUNC (uint64x2x2_t, uint64_t, v2di, di, u64)
+__extension__ extern __inline uint8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vrshrn_high_n_u16 (uint8x8_t __a, uint16x8_t __b, const int __c)
+{
+  return (uint8x16_t) __builtin_aarch64_rshrn2v8hi ((int8x8_t) __a,
+						    (int16x8_t) __b, __c);
+}
 
-#define __ST3_LANE_FUNC(intype, largetype, ptrtype, mode,		     \
-			qmode, ptr_mode, funcsuffix, signedtype)	     \
-__extension__ extern __inline void					     \
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
-vst3_lane_ ## funcsuffix (ptrtype *__ptr,				     \
-			  intype __b, const int __c)			     \
-{									     \
-  __builtin_aarch64_simd_ci __o;					     \
-  largetype __temp;							     \
-  __temp.val[0]								     \
-    = vcombine_##funcsuffix (__b.val[0],				     \
-			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
-  __temp.val[1]								     \
-    = vcombine_##funcsuffix (__b.val[1],				     \
-			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
-  __temp.val[2]								     \
-    = vcombine_##funcsuffix (__b.val[2],				     \
-			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
-  __o = __builtin_aarch64_set_qregci##qmode (__o,			     \
-					     (signedtype) __temp.val[0], 0); \
-  __o = __builtin_aarch64_set_qregci##qmode (__o,			     \
-					     (signedtype) __temp.val[1], 1); \
-  __o = __builtin_aarch64_set_qregci##qmode (__o,			     \
-					     (signedtype) __temp.val[2], 2); \
-  __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *)  \
-				     __ptr, __o, __c);			     \
+__extension__ extern __inline uint16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vrshrn_high_n_u32 (uint16x4_t __a, uint32x4_t __b, const int __c)
+{
+  return (uint16x8_t) __builtin_aarch64_rshrn2v4si ((int16x4_t) __a,
+						    (int32x4_t) __b, __c);
 }
 
-__ST3_LANE_FUNC (float16x4x3_t, float16x8x3_t, float16_t, v4hf, v8hf, hf, f16,
-		 float16x8_t)
-__ST3_LANE_FUNC (float32x2x3_t, float32x4x3_t, float32_t, v2sf, v4sf, sf, f32,
-		 float32x4_t)
-__ST3_LANE_FUNC (float64x1x3_t, float64x2x3_t, float64_t, df, v2df, df, f64,
-		 float64x2_t)
-__ST3_LANE_FUNC (poly8x8x3_t, poly8x16x3_t, poly8_t, v8qi, v16qi, qi, p8,
-		 int8x16_t)
-__ST3_LANE_FUNC (poly16x4x3_t, poly16x8x3_t, poly16_t, v4hi, v8hi, hi, p16,
-		 int16x8_t)
-__ST3_LANE_FUNC (poly64x1x3_t, poly64x2x3_t, poly64_t, di, v2di_ssps, di, p64,
-		 poly64x2_t)
-__ST3_LANE_FUNC (int8x8x3_t, int8x16x3_t, int8_t, v8qi, v16qi, qi, s8,
-		 int8x16_t)
-__ST3_LANE_FUNC (int16x4x3_t, int16x8x3_t, int16_t, v4hi, v8hi, hi, s16,
-		 int16x8_t)
-__ST3_LANE_FUNC (int32x2x3_t, int32x4x3_t, int32_t, v2si, v4si, si, s32,
-		 int32x4_t)
-__ST3_LANE_FUNC (int64x1x3_t, int64x2x3_t, int64_t, di, v2di, di, s64,
-		 int64x2_t)
-__ST3_LANE_FUNC (uint8x8x3_t, uint8x16x3_t, uint8_t, v8qi, v16qi, qi, u8,
-		 int8x16_t)
-__ST3_LANE_FUNC (uint16x4x3_t, uint16x8x3_t, uint16_t, v4hi, v8hi, hi, u16,
-		 int16x8_t)
-__ST3_LANE_FUNC (uint32x2x3_t, uint32x4x3_t, uint32_t, v2si, v4si, si, u32,
-		 int32x4_t)
-__ST3_LANE_FUNC (uint64x1x3_t, uint64x2x3_t, uint64_t, di, v2di, di, u64,
-		 int64x2_t)
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vrshrn_high_n_u64 (uint32x2_t __a, uint64x2_t __b, const int __c)
+{
+  return (uint32x4_t) __builtin_aarch64_rshrn2v2di ((int32x2_t)__a,
+						    (int64x2_t)__b, __c);
+}
 
-#undef __ST3_LANE_FUNC
-#define __ST3_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix)	    \
-__extension__ extern __inline void					    \
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
-vst3q_lane_ ## funcsuffix (ptrtype *__ptr,				    \
-			   intype __b, const int __c)			    \
-{									    \
-  union { intype __i;							    \
-	  __builtin_aarch64_simd_ci __o; } __temp = { __b };		    \
-  __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
-				    __ptr, __temp.__o, __c);		    \
+__extension__ extern __inline int8x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vrshrn_n_s16 (int16x8_t __a, const int __b)
+{
+  return __builtin_aarch64_rshrnv8hi (__a, __b);
 }
 
-__ST3_LANE_FUNC (float16x8x3_t, float16_t, v8hf, hf, f16)
-__ST3_LANE_FUNC (float32x4x3_t, float32_t, v4sf, sf, f32)
-__ST3_LANE_FUNC (float64x2x3_t, float64_t, v2df, df, f64)
-__ST3_LANE_FUNC (poly8x16x3_t, poly8_t, v16qi, qi, p8)
-__ST3_LANE_FUNC (poly16x8x3_t, poly16_t, v8hi, hi, p16)
-__ST3_LANE_FUNC (poly64x2x3_t, poly64_t, v2di, di, p64)
-__ST3_LANE_FUNC (int8x16x3_t, int8_t, v16qi, qi, s8)
-__ST3_LANE_FUNC (int16x8x3_t, int16_t, v8hi, hi, s16)
-__ST3_LANE_FUNC (int32x4x3_t, int32_t, v4si, si, s32)
-__ST3_LANE_FUNC (int64x2x3_t, int64_t, v2di, di, s64)
-__ST3_LANE_FUNC (uint8x16x3_t, uint8_t, v16qi, qi, u8)
-__ST3_LANE_FUNC (uint16x8x3_t, uint16_t, v8hi, hi, u16)
-__ST3_LANE_FUNC (uint32x4x3_t, uint32_t, v4si, si, u32)
-__ST3_LANE_FUNC (uint64x2x3_t, uint64_t, v2di, di, u64)
+__extension__ extern __inline int16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vrshrn_n_s32 (int32x4_t __a, const int __b)
+{
+  return __builtin_aarch64_rshrnv4si (__a, __b);
+}
 
-#define __ST4_LANE_FUNC(intype, largetype, ptrtype, mode,		     \
-			qmode, ptr_mode, funcsuffix, signedtype)	     \
-__extension__ extern __inline void					     \
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
-vst4_lane_ ## funcsuffix (ptrtype *__ptr,				     \
-			  intype __b, const int __c)			     \
-{									     \
-  __builtin_aarch64_simd_xi __o;					     \
-  largetype __temp;							     \
-  __temp.val[0]								     \
-    = vcombine_##funcsuffix (__b.val[0],				     \
-			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
-  __temp.val[1]								     \
-    = vcombine_##funcsuffix (__b.val[1],				     \
-			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
-  __temp.val[2]								     \
-    = vcombine_##funcsuffix (__b.val[2],				     \
-			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
-  __temp.val[3]								     \
-    = vcombine_##funcsuffix (__b.val[3],				     \
-			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
-  __o = __builtin_aarch64_set_qregxi##qmode (__o,			     \
-					     (signedtype) __temp.val[0], 0); \
-  __o = __builtin_aarch64_set_qregxi##qmode (__o,			     \
-					     (signedtype) __temp.val[1], 1); \
-  __o = __builtin_aarch64_set_qregxi##qmode (__o,			     \
-					     (signedtype) __temp.val[2], 2); \
-  __o = __builtin_aarch64_set_qregxi##qmode (__o,			     \
-					     (signedtype) __temp.val[3], 3); \
-  __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *)  \
-				     __ptr, __o, __c);			     \
-}
-
-__ST4_LANE_FUNC (float16x4x4_t, float16x8x4_t, float16_t, v4hf, v8hf, hf, f16,
-		 float16x8_t)
-__ST4_LANE_FUNC (float32x2x4_t, float32x4x4_t, float32_t, v2sf, v4sf, sf, f32,
-		 float32x4_t)
-__ST4_LANE_FUNC (float64x1x4_t, float64x2x4_t, float64_t, df, v2df, df, f64,
-		 float64x2_t)
-__ST4_LANE_FUNC (poly8x8x4_t, poly8x16x4_t, poly8_t, v8qi, v16qi, qi, p8,
-		 int8x16_t)
-__ST4_LANE_FUNC (poly16x4x4_t, poly16x8x4_t, poly16_t, v4hi, v8hi, hi, p16,
-		 int16x8_t)
-__ST4_LANE_FUNC (poly64x1x4_t, poly64x2x4_t, poly64_t, di, v2di_ssps, di, p64,
-		 poly64x2_t)
-__ST4_LANE_FUNC (int8x8x4_t, int8x16x4_t, int8_t, v8qi, v16qi, qi, s8,
-		 int8x16_t)
-__ST4_LANE_FUNC (int16x4x4_t, int16x8x4_t, int16_t, v4hi, v8hi, hi, s16,
-		 int16x8_t)
-__ST4_LANE_FUNC (int32x2x4_t, int32x4x4_t, int32_t, v2si, v4si, si, s32,
-		 int32x4_t)
-__ST4_LANE_FUNC (int64x1x4_t, int64x2x4_t, int64_t, di, v2di, di, s64,
-		 int64x2_t)
-__ST4_LANE_FUNC (uint8x8x4_t, uint8x16x4_t, uint8_t, v8qi, v16qi, qi, u8,
-		 int8x16_t)
-__ST4_LANE_FUNC (uint16x4x4_t, uint16x8x4_t, uint16_t, v4hi, v8hi, hi, u16,
-		 int16x8_t)
-__ST4_LANE_FUNC (uint32x2x4_t, uint32x4x4_t, uint32_t, v2si, v4si, si, u32,
-		 int32x4_t)
-__ST4_LANE_FUNC (uint64x1x4_t, uint64x2x4_t, uint64_t, di, v2di, di, u64,
-		 int64x2_t)
-
-#undef __ST4_LANE_FUNC
-#define __ST4_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix)	    \
-__extension__ extern __inline void					    \
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
-vst4q_lane_ ## funcsuffix (ptrtype *__ptr,				    \
-			   intype __b, const int __c)			    \
-{									    \
-  union { intype __i;							    \
-	  __builtin_aarch64_simd_xi __o; } __temp = { __b };		    \
-  __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
-				    __ptr, __temp.__o, __c);		    \
-}
-
-__ST4_LANE_FUNC (float16x8x4_t, float16_t, v8hf, hf, f16)
-__ST4_LANE_FUNC (float32x4x4_t, float32_t, v4sf, sf, f32)
-__ST4_LANE_FUNC (float64x2x4_t, float64_t, v2df, df, f64)
-__ST4_LANE_FUNC (poly8x16x4_t, poly8_t, v16qi, qi, p8)
-__ST4_LANE_FUNC (poly16x8x4_t, poly16_t, v8hi, hi, p16)
-__ST4_LANE_FUNC (poly64x2x4_t, poly64_t, v2di, di, p64)
-__ST4_LANE_FUNC (int8x16x4_t, int8_t, v16qi, qi, s8)
-__ST4_LANE_FUNC (int16x8x4_t, int16_t, v8hi, hi, s16)
-__ST4_LANE_FUNC (int32x4x4_t, int32_t, v4si, si, s32)
-__ST4_LANE_FUNC (int64x2x4_t, int64_t, v2di, di, s64)
-__ST4_LANE_FUNC (uint8x16x4_t, uint8_t, v16qi, qi, u8)
-__ST4_LANE_FUNC (uint16x8x4_t, uint16_t, v8hi, hi, u16)
-__ST4_LANE_FUNC (uint32x4x4_t, uint32_t, v4si, si, u32)
-__ST4_LANE_FUNC (uint64x2x4_t, uint64_t, v2di, di, u64)
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddlv_s32 (int32x2_t a)
-{
-  int64_t result;
-  __asm__ ("saddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
-  return result;
-}
-
-__extension__ extern __inline uint64_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddlv_u32 (uint32x2_t a)
+vrshrn_n_s64 (int64x2_t __a, const int __b)
 {
-  uint64_t result;
-  __asm__ ("uaddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
-  return result;
+  return __builtin_aarch64_rshrnv2di (__a, __b);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
+vrshrn_n_u16 (uint16x8_t __a, const int __b)
 {
-  return __builtin_aarch64_sqdmulh_laneqv4hi (__a, __b, __c);
+  return (uint8x8_t) __builtin_aarch64_rshrnv8hi ((int16x8_t) __a, __b);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
+vrshrn_n_u32 (uint32x4_t __a, const int __b)
 {
-  return __builtin_aarch64_sqdmulh_laneqv2si (__a, __b, __c);
+  return (uint16x4_t) __builtin_aarch64_rshrnv4si ((int32x4_t) __a, __b);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
+vrshrn_n_u64 (uint64x2_t __a, const int __b)
 {
-  return __builtin_aarch64_sqdmulh_laneqv8hi (__a, __b, __c);
+  return (uint32x2_t) __builtin_aarch64_rshrnv2di ((int64x2_t) __a, __b);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
+vrsqrte_u32 (uint32x2_t __a)
 {
-  return __builtin_aarch64_sqdmulh_laneqv4si (__a, __b, __c);
+  return __builtin_aarch64_ursqrtev2si_uu (__a);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
+vrsqrteq_u32 (uint32x4_t __a)
 {
-  return  __builtin_aarch64_sqrdmulh_laneqv4hi (__a, __b, __c);
+  return __builtin_aarch64_ursqrtev4si_uu (__a);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
+vshrn_high_n_s16 (int8x8_t __a, int16x8_t __b, const int __c)
 {
-  return __builtin_aarch64_sqrdmulh_laneqv2si (__a, __b, __c);
+  return __builtin_aarch64_shrn2v8hi (__a, __b, __c);
 }
 
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
+vshrn_high_n_s32 (int16x4_t __a, int32x4_t __b, const int __c)
 {
-  return __builtin_aarch64_sqrdmulh_laneqv8hi (__a, __b, __c);
+  return __builtin_aarch64_shrn2v4si (__a, __b, __c);
 }
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
+vshrn_high_n_s64 (int32x2_t __a, int64x2_t __b, const int __c)
 {
-  return __builtin_aarch64_sqrdmulh_laneqv4si (__a, __b, __c);
+  return __builtin_aarch64_shrn2v2di (__a, __b, __c);
 }
 
-/* Table intrinsics.  */
-
-__extension__ extern __inline poly8x8_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbl1_p8 (poly8x16_t a, uint8x8_t b)
+vshrn_high_n_u16 (uint8x8_t __a, uint16x8_t __b, const int __c)
 {
-  poly8x8_t result;
-  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return (uint8x16_t)
+    __builtin_aarch64_shrn2v8hi ((int8x8_t) __a, (int16x8_t) __b, __c);
 }
 
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbl1_s8 (int8x16_t a, uint8x8_t b)
+vshrn_high_n_u32 (uint16x4_t __a, uint32x4_t __b, const int __c)
 {
-  int8x8_t result;
-  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return (uint16x8_t)
+    __builtin_aarch64_shrn2v4si ((int16x4_t) __a, (int32x4_t) __b, __c);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbl1_u8 (uint8x16_t a, uint8x8_t b)
+vshrn_high_n_u64 (uint32x2_t __a, uint64x2_t __b, const int __c)
 {
-  uint8x8_t result;
-  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return (uint32x4_t)
+    __builtin_aarch64_shrn2v2di ((int32x2_t) __a, (int64x2_t) __b, __c);
 }
 
-__extension__ extern __inline poly8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbl1q_p8 (poly8x16_t a, uint8x16_t b)
-{
-  poly8x16_t result;
-  __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
+#define vsli_n_p8(a, b, c)                                              \
+  __extension__                                                         \
+    ({                                                                  \
+       poly8x8_t b_ = (b);                                              \
+       poly8x8_t a_ = (a);                                              \
+       poly8x8_t result;                                                \
+       __asm__ ("sli %0.8b,%2.8b,%3"                                    \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbl1q_s8 (int8x16_t a, uint8x16_t b)
-{
-  int8x16_t result;
-  __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
+#define vsli_n_p16(a, b, c)                                             \
+  __extension__                                                         \
+    ({                                                                  \
+       poly16x4_t b_ = (b);                                             \
+       poly16x4_t a_ = (a);                                             \
+       poly16x4_t result;                                               \
+       __asm__ ("sli %0.4h,%2.4h,%3"                                    \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbl1q_u8 (uint8x16_t a, uint8x16_t b)
-{
-  uint8x16_t result;
-  __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
+#define vsliq_n_p8(a, b, c)                                             \
+  __extension__                                                         \
+    ({                                                                  \
+       poly8x16_t b_ = (b);                                             \
+       poly8x16_t a_ = (a);                                             \
+       poly8x16_t result;                                               \
+       __asm__ ("sli %0.16b,%2.16b,%3"                                  \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
 
-__extension__ extern __inline int8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbx1_s8 (int8x8_t r, int8x16_t tab, uint8x8_t idx)
-{
-  int8x8_t result = r;
-  __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
-           : "+w"(result)
-           : "w"(tab), "w"(idx)
-           : /* No clobbers */);
-  return result;
-}
+#define vsliq_n_p16(a, b, c)                                            \
+  __extension__                                                         \
+    ({                                                                  \
+       poly16x8_t b_ = (b);                                             \
+       poly16x8_t a_ = (a);                                             \
+       poly16x8_t result;                                               \
+       __asm__ ("sli %0.8h,%2.8h,%3"                                    \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
 
-__extension__ extern __inline uint8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbx1_u8 (uint8x8_t r, uint8x16_t tab, uint8x8_t idx)
-{
-  uint8x8_t result = r;
-  __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
-           : "+w"(result)
-           : "w"(tab), "w"(idx)
-           : /* No clobbers */);
-  return result;
-}
+#define vsri_n_p8(a, b, c)                                              \
+  __extension__                                                         \
+    ({                                                                  \
+       poly8x8_t b_ = (b);                                              \
+       poly8x8_t a_ = (a);                                              \
+       poly8x8_t result;                                                \
+       __asm__ ("sri %0.8b,%2.8b,%3"                                    \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
 
-__extension__ extern __inline poly8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbx1_p8 (poly8x8_t r, poly8x16_t tab, uint8x8_t idx)
-{
-  poly8x8_t result = r;
-  __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
-           : "+w"(result)
-           : "w"(tab), "w"(idx)
-           : /* No clobbers */);
-  return result;
-}
+#define vsri_n_p16(a, b, c)                                             \
+  __extension__                                                         \
+    ({                                                                  \
+       poly16x4_t b_ = (b);                                             \
+       poly16x4_t a_ = (a);                                             \
+       poly16x4_t result;                                               \
+       __asm__ ("sri %0.4h,%2.4h,%3"                                    \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbx1q_s8 (int8x16_t r, int8x16_t tab, uint8x16_t idx)
-{
-  int8x16_t result = r;
-  __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
-           : "+w"(result)
-           : "w"(tab), "w"(idx)
-           : /* No clobbers */);
-  return result;
-}
+#define vsri_n_p64(a, b, c)						\
+  __extension__								\
+    ({									\
+       poly64x1_t b_ = (b);						\
+       poly64x1_t a_ = (a);						\
+       poly64x1_t result;						\
+       __asm__ ("sri %d0,%d2,%3"					\
+		: "=w"(result)						\
+		: "0"(a_), "w"(b_), "i"(c)				\
+		: /* No clobbers.  */);					\
+       result;								\
+     })
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbx1q_u8 (uint8x16_t r, uint8x16_t tab, uint8x16_t idx)
-{
-  uint8x16_t result = r;
-  __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
-           : "+w"(result)
-           : "w"(tab), "w"(idx)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ extern __inline poly8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbx1q_p8 (poly8x16_t r, poly8x16_t tab, uint8x16_t idx)
-{
-  poly8x16_t result = r;
-  __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
-           : "+w"(result)
-           : "w"(tab), "w"(idx)
-           : /* No clobbers */);
-  return result;
-}
+#define vsriq_n_p8(a, b, c)                                             \
+  __extension__                                                         \
+    ({                                                                  \
+       poly8x16_t b_ = (b);                                             \
+       poly8x16_t a_ = (a);                                             \
+       poly8x16_t result;                                               \
+       __asm__ ("sri %0.16b,%2.16b,%3"                                  \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
 
-/* V7 legacy table intrinsics.  */
+#define vsriq_n_p16(a, b, c)                                            \
+  __extension__                                                         \
+    ({                                                                  \
+       poly16x8_t b_ = (b);                                             \
+       poly16x8_t a_ = (a);                                             \
+       poly16x8_t result;                                               \
+       __asm__ ("sri %0.8h,%2.8h,%3"                                    \
+                : "=w"(result)                                          \
+                : "0"(a_), "w"(b_), "i"(c)                              \
+                : /* No clobbers */);                                   \
+       result;                                                          \
+     })
 
-__extension__ extern __inline int8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtbl1_s8 (int8x8_t tab, int8x8_t idx)
-{
-  int8x8_t result;
-  int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
-  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
-           : "=w"(result)
-           : "w"(temp), "w"(idx)
-           : /* No clobbers */);
-  return result;
-}
+#define vsriq_n_p64(a, b, c)						\
+  __extension__								\
+    ({									\
+       poly64x2_t b_ = (b);						\
+       poly64x2_t a_ = (a);						\
+       poly64x2_t result;						\
+       __asm__ ("sri %0.2d,%2.2d,%3"					\
+		: "=w"(result)						\
+		: "0"(a_), "w"(b_), "i"(c)				\
+		: /* No clobbers.  */);					\
+       result;								\
+     })
 
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtbl1_u8 (uint8x8_t tab, uint8x8_t idx)
-{
-  uint8x8_t result;
-  uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
-  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
-           : "=w"(result)
-           : "w"(temp), "w"(idx)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ extern __inline poly8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtbl1_p8 (poly8x8_t tab, uint8x8_t idx)
+vtst_p8 (poly8x8_t __a, poly8x8_t __b)
 {
-  poly8x8_t result;
-  poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (__AARCH64_UINT64_C (0x0)));
-  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
-           : "=w"(result)
-           : "w"(temp), "w"(idx)
-           : /* No clobbers */);
-  return result;
+  return (uint8x8_t) ((((uint8x8_t) __a) & ((uint8x8_t) __b))
+		       != 0);
 }
 
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtbl2_s8 (int8x8x2_t tab, int8x8_t idx)
+vtst_p16 (poly16x4_t __a, poly16x4_t __b)
 {
-  int8x8_t result;
-  int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
-  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
-           : "=w"(result)
-           : "w"(temp), "w"(idx)
-           : /* No clobbers */);
-  return result;
+  return (uint16x4_t) ((((uint16x4_t) __a) & ((uint16x4_t) __b))
+		       != 0);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtbl2_u8 (uint8x8x2_t tab, uint8x8_t idx)
+vtst_p64 (poly64x1_t __a, poly64x1_t __b)
 {
-  uint8x8_t result;
-  uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
-  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
-           : "=w"(result)
-           : "w"(temp), "w"(idx)
-           : /* No clobbers */);
-  return result;
+  return (uint64x1_t) ((__a & __b) != __AARCH64_INT64_C (0));
 }
 
-__extension__ extern __inline poly8x8_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtbl2_p8 (poly8x8x2_t tab, uint8x8_t idx)
+vtstq_p8 (poly8x16_t __a, poly8x16_t __b)
 {
-  poly8x8_t result;
-  poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
-  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
-           : "=w"(result)
-           : "w"(temp), "w"(idx)
-           : /* No clobbers */);
-  return result;
+  return (uint8x16_t) ((((uint8x16_t) __a) & ((uint8x16_t) __b))
+		       != 0);
 }
 
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtbl3_s8 (int8x8x3_t tab, int8x8_t idx)
+vtstq_p16 (poly16x8_t __a, poly16x8_t __b)
 {
-  int8x8_t result;
-  int8x16x2_t temp;
-  __builtin_aarch64_simd_oi __o;
-  temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
-  temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0)));
-  __o = __builtin_aarch64_set_qregoiv16qi (__o,
-					   (int8x16_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o,
-					   (int8x16_t) temp.val[1], 1);
-  result = __builtin_aarch64_tbl3v8qi (__o, idx);
-  return result;
+  return (uint16x8_t) ((((uint16x8_t) __a) & ((uint16x8_t) __b))
+		       != 0);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtbl3_u8 (uint8x8x3_t tab, uint8x8_t idx)
+vtstq_p64 (poly64x2_t __a, poly64x2_t __b)
 {
-  uint8x8_t result;
-  uint8x16x2_t temp;
-  __builtin_aarch64_simd_oi __o;
-  temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
-  temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0)));
-  __o = __builtin_aarch64_set_qregoiv16qi (__o,
-					   (int8x16_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o,
-					   (int8x16_t) temp.val[1], 1);
-  result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx);
-  return result;
+  return (uint64x2_t) ((((uint64x2_t) __a) & ((uint64x2_t) __b))
+		       != __AARCH64_INT64_C (0));
 }
 
-__extension__ extern __inline poly8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtbl3_p8 (poly8x8x3_t tab, uint8x8_t idx)
-{
-  poly8x8_t result;
-  poly8x16x2_t temp;
-  __builtin_aarch64_simd_oi __o;
-  temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
-  temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0)));
-  __o = __builtin_aarch64_set_qregoiv16qi (__o,
-					   (int8x16_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o,
-					   (int8x16_t) temp.val[1], 1);
-  result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx);
-  return result;
-}
+/* End of temporary inline asm implementations.  */
 
-__extension__ extern __inline int8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtbl4_s8 (int8x8x4_t tab, int8x8_t idx)
-{
-  int8x8_t result;
-  int8x16x2_t temp;
-  __builtin_aarch64_simd_oi __o;
-  temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
-  temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o,
-					   (int8x16_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o,
-					   (int8x16_t) temp.val[1], 1);
-  result = __builtin_aarch64_tbl3v8qi (__o, idx);
-  return result;
-}
+/* Start of temporary inline asm for vldn, vstn and friends.  */
 
-__extension__ extern __inline uint8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtbl4_u8 (uint8x8x4_t tab, uint8x8_t idx)
-{
-  uint8x8_t result;
-  uint8x16x2_t temp;
-  __builtin_aarch64_simd_oi __o;
-  temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
-  temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o,
-					   (int8x16_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o,
-					   (int8x16_t) temp.val[1], 1);
-  result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx);
-  return result;
-}
+/* Create struct element types for duplicating loads.
 
-__extension__ extern __inline poly8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtbl4_p8 (poly8x8x4_t tab, uint8x8_t idx)
-{
-  poly8x8_t result;
-  poly8x16x2_t temp;
-  __builtin_aarch64_simd_oi __o;
-  temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
-  temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o,
-					   (int8x16_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o,
-					   (int8x16_t) temp.val[1], 1);
-  result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx);
-  return result;
-}
+   Create 2 element structures of:
 
-__extension__ extern __inline int8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtbx2_s8 (int8x8_t r, int8x8x2_t tab, int8x8_t idx)
-{
-  int8x8_t result = r;
-  int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
-  __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
-           : "+w"(result)
-           : "w"(temp), "w"(idx)
-           : /* No clobbers */);
-  return result;
-}
+   +------+----+----+----+----+
+   |      | 8  | 16 | 32 | 64 |
+   +------+----+----+----+----+
+   |int   | Y  | Y  | N  | N  |
+   +------+----+----+----+----+
+   |uint  | Y  | Y  | N  | N  |
+   +------+----+----+----+----+
+   |float | -  | Y  | N  | N  |
+   +------+----+----+----+----+
+   |poly  | Y  | Y  | -  | -  |
+   +------+----+----+----+----+
 
-__extension__ extern __inline uint8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtbx2_u8 (uint8x8_t r, uint8x8x2_t tab, uint8x8_t idx)
-{
-  uint8x8_t result = r;
-  uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
-  __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
-           : "+w"(result)
-           : "w"(temp), "w"(idx)
-           : /* No clobbers */);
-  return result;
-}
+   Create 3 element structures of:
 
-__extension__ extern __inline poly8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtbx2_p8 (poly8x8_t r, poly8x8x2_t tab, uint8x8_t idx)
-{
-  poly8x8_t result = r;
-  poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
-  __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
-           : "+w"(result)
-           : "w"(temp), "w"(idx)
-           : /* No clobbers */);
-  return result;
-}
+   +------+----+----+----+----+
+   |      | 8  | 16 | 32 | 64 |
+   +------+----+----+----+----+
+   |int   | Y  | Y  | Y  | Y  |
+   +------+----+----+----+----+
+   |uint  | Y  | Y  | Y  | Y  |
+   +------+----+----+----+----+
+   |float | -  | Y  | Y  | Y  |
+   +------+----+----+----+----+
+   |poly  | Y  | Y  | -  | -  |
+   +------+----+----+----+----+
 
-/* End of temporary inline asm.  */
+   Create 4 element structures of:
 
-/* Start of optimal implementations in approved order.  */
+   +------+----+----+----+----+
+   |      | 8  | 16 | 32 | 64 |
+   +------+----+----+----+----+
+   |int   | Y  | N  | N  | Y  |
+   +------+----+----+----+----+
+   |uint  | Y  | N  | N  | Y  |
+   +------+----+----+----+----+
+   |float | -  | N  | N  | Y  |
+   +------+----+----+----+----+
+   |poly  | Y  | N  | -  | -  |
+   +------+----+----+----+----+
 
-/* vabd.  */
+  This is required for casting memory reference.  */
+#define __STRUCTN(t, sz, nelem)			\
+  typedef struct t ## sz ## x ## nelem ## _t {	\
+    t ## sz ## _t val[nelem];			\
+  }  t ## sz ## x ## nelem ## _t;
 
-__extension__ extern __inline float32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabds_f32 (float32_t __a, float32_t __b)
-{
-  return __builtin_aarch64_fabdsf (__a, __b);
-}
+/* 2-element structs.  */
+__STRUCTN (int, 8, 2)
+__STRUCTN (int, 16, 2)
+__STRUCTN (uint, 8, 2)
+__STRUCTN (uint, 16, 2)
+__STRUCTN (float, 16, 2)
+__STRUCTN (poly, 8, 2)
+__STRUCTN (poly, 16, 2)
+/* 3-element structs.  */
+__STRUCTN (int, 8, 3)
+__STRUCTN (int, 16, 3)
+__STRUCTN (int, 32, 3)
+__STRUCTN (int, 64, 3)
+__STRUCTN (uint, 8, 3)
+__STRUCTN (uint, 16, 3)
+__STRUCTN (uint, 32, 3)
+__STRUCTN (uint, 64, 3)
+__STRUCTN (float, 16, 3)
+__STRUCTN (float, 32, 3)
+__STRUCTN (float, 64, 3)
+__STRUCTN (poly, 8, 3)
+__STRUCTN (poly, 16, 3)
+/* 4-element structs.  */
+__STRUCTN (int, 8, 4)
+__STRUCTN (int, 64, 4)
+__STRUCTN (uint, 8, 4)
+__STRUCTN (uint, 64, 4)
+__STRUCTN (poly, 8, 4)
+__STRUCTN (float, 64, 4)
+#undef __STRUCTN
 
-__extension__ extern __inline float64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabdd_f64 (float64_t __a, float64_t __b)
-{
-  return __builtin_aarch64_fabddf (__a, __b);
-}
 
-__extension__ extern __inline float32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabd_f32 (float32x2_t __a, float32x2_t __b)
-{
-  return __builtin_aarch64_fabdv2sf (__a, __b);
+#define __ST2_LANE_FUNC(intype, largetype, ptrtype, mode,		     \
+			qmode, ptr_mode, funcsuffix, signedtype)	     \
+__extension__ extern __inline void					     \
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
+vst2_lane_ ## funcsuffix (ptrtype *__ptr,				     \
+			  intype __b, const int __c)			     \
+{									     \
+  __builtin_aarch64_simd_oi __o;					     \
+  largetype __temp;							     \
+  __temp.val[0]								     \
+    = vcombine_##funcsuffix (__b.val[0],				     \
+			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+  __temp.val[1]								     \
+    = vcombine_##funcsuffix (__b.val[1],				     \
+			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+  __o = __builtin_aarch64_set_qregoi##qmode (__o,			     \
+					     (signedtype) __temp.val[0], 0); \
+  __o = __builtin_aarch64_set_qregoi##qmode (__o,			     \
+					     (signedtype) __temp.val[1], 1); \
+  __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *)  \
+				     __ptr, __o, __c);			     \
 }
 
-__extension__ extern __inline float64x1_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabd_f64 (float64x1_t __a, float64x1_t __b)
-{
-  return (float64x1_t) {vabdd_f64 (vget_lane_f64 (__a, 0),
-				   vget_lane_f64 (__b, 0))};
+__ST2_LANE_FUNC (float16x4x2_t, float16x8x2_t, float16_t, v4hf, v8hf, hf, f16,
+		 float16x8_t)
+__ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v2sf, v4sf, sf, f32,
+		 float32x4_t)
+__ST2_LANE_FUNC (float64x1x2_t, float64x2x2_t, float64_t, df, v2df, df, f64,
+		 float64x2_t)
+__ST2_LANE_FUNC (poly8x8x2_t, poly8x16x2_t, poly8_t, v8qi, v16qi, qi, p8,
+		 int8x16_t)
+__ST2_LANE_FUNC (poly16x4x2_t, poly16x8x2_t, poly16_t, v4hi, v8hi, hi, p16,
+		 int16x8_t)
+__ST2_LANE_FUNC (poly64x1x2_t, poly64x2x2_t, poly64_t, di, v2di_ssps, di, p64,
+		 poly64x2_t)
+__ST2_LANE_FUNC (int8x8x2_t, int8x16x2_t, int8_t, v8qi, v16qi, qi, s8,
+		 int8x16_t)
+__ST2_LANE_FUNC (int16x4x2_t, int16x8x2_t, int16_t, v4hi, v8hi, hi, s16,
+		 int16x8_t)
+__ST2_LANE_FUNC (int32x2x2_t, int32x4x2_t, int32_t, v2si, v4si, si, s32,
+		 int32x4_t)
+__ST2_LANE_FUNC (int64x1x2_t, int64x2x2_t, int64_t, di, v2di, di, s64,
+		 int64x2_t)
+__ST2_LANE_FUNC (uint8x8x2_t, uint8x16x2_t, uint8_t, v8qi, v16qi, qi, u8,
+		 int8x16_t)
+__ST2_LANE_FUNC (uint16x4x2_t, uint16x8x2_t, uint16_t, v4hi, v8hi, hi, u16,
+		 int16x8_t)
+__ST2_LANE_FUNC (uint32x2x2_t, uint32x4x2_t, uint32_t, v2si, v4si, si, u32,
+		 int32x4_t)
+__ST2_LANE_FUNC (uint64x1x2_t, uint64x2x2_t, uint64_t, di, v2di, di, u64,
+		 int64x2_t)
+
+#define __ST2Q_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix)	    \
+__extension__ extern __inline void					    \
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
+vst2q_lane_ ## funcsuffix (ptrtype *__ptr,				    \
+			   intype __b, const int __c)			    \
+{									    \
+  union { intype __i;							    \
+	  __builtin_aarch64_simd_oi __o; } __temp = { __b };		    \
+  __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
+				    __ptr, __temp.__o, __c);		    \
 }
 
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabdq_f32 (float32x4_t __a, float32x4_t __b)
-{
-  return __builtin_aarch64_fabdv4sf (__a, __b);
+__ST2Q_LANE_FUNC (float16x8x2_t, float16_t, v8hf, hf, f16)
+__ST2Q_LANE_FUNC (float32x4x2_t, float32_t, v4sf, sf, f32)
+__ST2Q_LANE_FUNC (float64x2x2_t, float64_t, v2df, df, f64)
+__ST2Q_LANE_FUNC (poly8x16x2_t, poly8_t, v16qi, qi, p8)
+__ST2Q_LANE_FUNC (poly16x8x2_t, poly16_t, v8hi, hi, p16)
+__ST2Q_LANE_FUNC (poly64x2x2_t, poly64_t, v2di, di, p64)
+__ST2Q_LANE_FUNC (int8x16x2_t, int8_t, v16qi, qi, s8)
+__ST2Q_LANE_FUNC (int16x8x2_t, int16_t, v8hi, hi, s16)
+__ST2Q_LANE_FUNC (int32x4x2_t, int32_t, v4si, si, s32)
+__ST2Q_LANE_FUNC (int64x2x2_t, int64_t, v2di, di, s64)
+__ST2Q_LANE_FUNC (uint8x16x2_t, uint8_t, v16qi, qi, u8)
+__ST2Q_LANE_FUNC (uint16x8x2_t, uint16_t, v8hi, hi, u16)
+__ST2Q_LANE_FUNC (uint32x4x2_t, uint32_t, v4si, si, u32)
+__ST2Q_LANE_FUNC (uint64x2x2_t, uint64_t, v2di, di, u64)
+
+#define __ST3_LANE_FUNC(intype, largetype, ptrtype, mode,		     \
+			qmode, ptr_mode, funcsuffix, signedtype)	     \
+__extension__ extern __inline void					     \
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
+vst3_lane_ ## funcsuffix (ptrtype *__ptr,				     \
+			  intype __b, const int __c)			     \
+{									     \
+  __builtin_aarch64_simd_ci __o;					     \
+  largetype __temp;							     \
+  __temp.val[0]								     \
+    = vcombine_##funcsuffix (__b.val[0],				     \
+			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+  __temp.val[1]								     \
+    = vcombine_##funcsuffix (__b.val[1],				     \
+			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+  __temp.val[2]								     \
+    = vcombine_##funcsuffix (__b.val[2],				     \
+			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+  __o = __builtin_aarch64_set_qregci##qmode (__o,			     \
+					     (signedtype) __temp.val[0], 0); \
+  __o = __builtin_aarch64_set_qregci##qmode (__o,			     \
+					     (signedtype) __temp.val[1], 1); \
+  __o = __builtin_aarch64_set_qregci##qmode (__o,			     \
+					     (signedtype) __temp.val[2], 2); \
+  __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *)  \
+				     __ptr, __o, __c);			     \
 }
 
-__extension__ extern __inline float64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabdq_f64 (float64x2_t __a, float64x2_t __b)
-{
-  return __builtin_aarch64_fabdv2df (__a, __b);
+__ST3_LANE_FUNC (float16x4x3_t, float16x8x3_t, float16_t, v4hf, v8hf, hf, f16,
+		 float16x8_t)
+__ST3_LANE_FUNC (float32x2x3_t, float32x4x3_t, float32_t, v2sf, v4sf, sf, f32,
+		 float32x4_t)
+__ST3_LANE_FUNC (float64x1x3_t, float64x2x3_t, float64_t, df, v2df, df, f64,
+		 float64x2_t)
+__ST3_LANE_FUNC (poly8x8x3_t, poly8x16x3_t, poly8_t, v8qi, v16qi, qi, p8,
+		 int8x16_t)
+__ST3_LANE_FUNC (poly16x4x3_t, poly16x8x3_t, poly16_t, v4hi, v8hi, hi, p16,
+		 int16x8_t)
+__ST3_LANE_FUNC (poly64x1x3_t, poly64x2x3_t, poly64_t, di, v2di_ssps, di, p64,
+		 poly64x2_t)
+__ST3_LANE_FUNC (int8x8x3_t, int8x16x3_t, int8_t, v8qi, v16qi, qi, s8,
+		 int8x16_t)
+__ST3_LANE_FUNC (int16x4x3_t, int16x8x3_t, int16_t, v4hi, v8hi, hi, s16,
+		 int16x8_t)
+__ST3_LANE_FUNC (int32x2x3_t, int32x4x3_t, int32_t, v2si, v4si, si, s32,
+		 int32x4_t)
+__ST3_LANE_FUNC (int64x1x3_t, int64x2x3_t, int64_t, di, v2di, di, s64,
+		 int64x2_t)
+__ST3_LANE_FUNC (uint8x8x3_t, uint8x16x3_t, uint8_t, v8qi, v16qi, qi, u8,
+		 int8x16_t)
+__ST3_LANE_FUNC (uint16x4x3_t, uint16x8x3_t, uint16_t, v4hi, v8hi, hi, u16,
+		 int16x8_t)
+__ST3_LANE_FUNC (uint32x2x3_t, uint32x4x3_t, uint32_t, v2si, v4si, si, u32,
+		 int32x4_t)
+__ST3_LANE_FUNC (uint64x1x3_t, uint64x2x3_t, uint64_t, di, v2di, di, u64,
+		 int64x2_t)
+
+#define __ST3Q_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix)	    \
+__extension__ extern __inline void					    \
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
+vst3q_lane_ ## funcsuffix (ptrtype *__ptr,				    \
+			   intype __b, const int __c)			    \
+{									    \
+  union { intype __i;							    \
+	  __builtin_aarch64_simd_ci __o; } __temp = { __b };		    \
+  __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
+				    __ptr, __temp.__o, __c);		    \
 }
 
-/* vabs  */
+__ST3Q_LANE_FUNC (float16x8x3_t, float16_t, v8hf, hf, f16)
+__ST3Q_LANE_FUNC (float32x4x3_t, float32_t, v4sf, sf, f32)
+__ST3Q_LANE_FUNC (float64x2x3_t, float64_t, v2df, df, f64)
+__ST3Q_LANE_FUNC (poly8x16x3_t, poly8_t, v16qi, qi, p8)
+__ST3Q_LANE_FUNC (poly16x8x3_t, poly16_t, v8hi, hi, p16)
+__ST3Q_LANE_FUNC (poly64x2x3_t, poly64_t, v2di, di, p64)
+__ST3Q_LANE_FUNC (int8x16x3_t, int8_t, v16qi, qi, s8)
+__ST3Q_LANE_FUNC (int16x8x3_t, int16_t, v8hi, hi, s16)
+__ST3Q_LANE_FUNC (int32x4x3_t, int32_t, v4si, si, s32)
+__ST3Q_LANE_FUNC (int64x2x3_t, int64_t, v2di, di, s64)
+__ST3Q_LANE_FUNC (uint8x16x3_t, uint8_t, v16qi, qi, u8)
+__ST3Q_LANE_FUNC (uint16x8x3_t, uint16_t, v8hi, hi, u16)
+__ST3Q_LANE_FUNC (uint32x4x3_t, uint32_t, v4si, si, u32)
+__ST3Q_LANE_FUNC (uint64x2x3_t, uint64_t, v2di, di, u64)
 
-__extension__ extern __inline float32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabs_f32 (float32x2_t __a)
-{
-  return __builtin_aarch64_absv2sf (__a);
+#define __ST4_LANE_FUNC(intype, largetype, ptrtype, mode,		     \
+			qmode, ptr_mode, funcsuffix, signedtype)	     \
+__extension__ extern __inline void					     \
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
+vst4_lane_ ## funcsuffix (ptrtype *__ptr,				     \
+			  intype __b, const int __c)			     \
+{									     \
+  __builtin_aarch64_simd_xi __o;					     \
+  largetype __temp;							     \
+  __temp.val[0]								     \
+    = vcombine_##funcsuffix (__b.val[0],				     \
+			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+  __temp.val[1]								     \
+    = vcombine_##funcsuffix (__b.val[1],				     \
+			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+  __temp.val[2]								     \
+    = vcombine_##funcsuffix (__b.val[2],				     \
+			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+  __temp.val[3]								     \
+    = vcombine_##funcsuffix (__b.val[3],				     \
+			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+  __o = __builtin_aarch64_set_qregxi##qmode (__o,			     \
+					     (signedtype) __temp.val[0], 0); \
+  __o = __builtin_aarch64_set_qregxi##qmode (__o,			     \
+					     (signedtype) __temp.val[1], 1); \
+  __o = __builtin_aarch64_set_qregxi##qmode (__o,			     \
+					     (signedtype) __temp.val[2], 2); \
+  __o = __builtin_aarch64_set_qregxi##qmode (__o,			     \
+					     (signedtype) __temp.val[3], 3); \
+  __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *)  \
+				     __ptr, __o, __c);			     \
 }
 
-__extension__ extern __inline float64x1_t
+__ST4_LANE_FUNC (float16x4x4_t, float16x8x4_t, float16_t, v4hf, v8hf, hf, f16,
+		 float16x8_t)
+__ST4_LANE_FUNC (float32x2x4_t, float32x4x4_t, float32_t, v2sf, v4sf, sf, f32,
+		 float32x4_t)
+__ST4_LANE_FUNC (float64x1x4_t, float64x2x4_t, float64_t, df, v2df, df, f64,
+		 float64x2_t)
+__ST4_LANE_FUNC (poly8x8x4_t, poly8x16x4_t, poly8_t, v8qi, v16qi, qi, p8,
+		 int8x16_t)
+__ST4_LANE_FUNC (poly16x4x4_t, poly16x8x4_t, poly16_t, v4hi, v8hi, hi, p16,
+		 int16x8_t)
+__ST4_LANE_FUNC (poly64x1x4_t, poly64x2x4_t, poly64_t, di, v2di_ssps, di, p64,
+		 poly64x2_t)
+__ST4_LANE_FUNC (int8x8x4_t, int8x16x4_t, int8_t, v8qi, v16qi, qi, s8,
+		 int8x16_t)
+__ST4_LANE_FUNC (int16x4x4_t, int16x8x4_t, int16_t, v4hi, v8hi, hi, s16,
+		 int16x8_t)
+__ST4_LANE_FUNC (int32x2x4_t, int32x4x4_t, int32_t, v2si, v4si, si, s32,
+		 int32x4_t)
+__ST4_LANE_FUNC (int64x1x4_t, int64x2x4_t, int64_t, di, v2di, di, s64,
+		 int64x2_t)
+__ST4_LANE_FUNC (uint8x8x4_t, uint8x16x4_t, uint8_t, v8qi, v16qi, qi, u8,
+		 int8x16_t)
+__ST4_LANE_FUNC (uint16x4x4_t, uint16x8x4_t, uint16_t, v4hi, v8hi, hi, u16,
+		 int16x8_t)
+__ST4_LANE_FUNC (uint32x2x4_t, uint32x4x4_t, uint32_t, v2si, v4si, si, u32,
+		 int32x4_t)
+__ST4_LANE_FUNC (uint64x1x4_t, uint64x2x4_t, uint64_t, di, v2di, di, u64,
+		 int64x2_t)
+
+#define __ST4Q_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix)	    \
+__extension__ extern __inline void					    \
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
+vst4q_lane_ ## funcsuffix (ptrtype *__ptr,				    \
+			   intype __b, const int __c)			    \
+{									    \
+  union { intype __i;							    \
+	  __builtin_aarch64_simd_xi __o; } __temp = { __b };		    \
+  __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
+				    __ptr, __temp.__o, __c);		    \
+}
+
+__ST4Q_LANE_FUNC (float16x8x4_t, float16_t, v8hf, hf, f16)
+__ST4Q_LANE_FUNC (float32x4x4_t, float32_t, v4sf, sf, f32)
+__ST4Q_LANE_FUNC (float64x2x4_t, float64_t, v2df, df, f64)
+__ST4Q_LANE_FUNC (poly8x16x4_t, poly8_t, v16qi, qi, p8)
+__ST4Q_LANE_FUNC (poly16x8x4_t, poly16_t, v8hi, hi, p16)
+__ST4Q_LANE_FUNC (poly64x2x4_t, poly64_t, v2di, di, p64)
+__ST4Q_LANE_FUNC (int8x16x4_t, int8_t, v16qi, qi, s8)
+__ST4Q_LANE_FUNC (int16x8x4_t, int16_t, v8hi, hi, s16)
+__ST4Q_LANE_FUNC (int32x4x4_t, int32_t, v4si, si, s32)
+__ST4Q_LANE_FUNC (int64x2x4_t, int64_t, v2di, di, s64)
+__ST4Q_LANE_FUNC (uint8x16x4_t, uint8_t, v16qi, qi, u8)
+__ST4Q_LANE_FUNC (uint16x8x4_t, uint16_t, v8hi, hi, u16)
+__ST4Q_LANE_FUNC (uint32x4x4_t, uint32_t, v4si, si, u32)
+__ST4Q_LANE_FUNC (uint64x2x4_t, uint64_t, v2di, di, u64)
+
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabs_f64 (float64x1_t __a)
+vaddlv_s32 (int32x2_t __a)
 {
-  return (float64x1_t) {__builtin_fabs (__a[0])};
+  return __builtin_aarch64_saddlvv2si (__a);
 }
 
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabs_s8 (int8x8_t __a)
+vaddlv_u32 (uint32x2_t __a)
 {
-  return __builtin_aarch64_absv8qi (__a);
+  return __builtin_aarch64_uaddlvv2si_uu (__a);
 }
 
 __extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabs_s16 (int16x4_t __a)
+vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
 {
-  return __builtin_aarch64_absv4hi (__a);
+  return __builtin_aarch64_sqdmulh_laneqv4hi (__a, __b, __c);
 }
 
 __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabs_s32 (int32x2_t __a)
+vqdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
 {
-  return __builtin_aarch64_absv2si (__a);
+  return __builtin_aarch64_sqdmulh_laneqv2si (__a, __b, __c);
 }
 
-__extension__ extern __inline int64x1_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabs_s64 (int64x1_t __a)
+vqdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
 {
-  return (int64x1_t) {__builtin_aarch64_absdi (__a[0])};
+  return __builtin_aarch64_sqdmulh_laneqv8hi (__a, __b, __c);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabsq_f32 (float32x4_t __a)
+vqdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
 {
-  return __builtin_aarch64_absv4sf (__a);
+  return __builtin_aarch64_sqdmulh_laneqv4si (__a, __b, __c);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabsq_f64 (float64x2_t __a)
+vqrdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
 {
-  return __builtin_aarch64_absv2df (__a);
+  return  __builtin_aarch64_sqrdmulh_laneqv4hi (__a, __b, __c);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabsq_s8 (int8x16_t __a)
+vqrdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
 {
-  return __builtin_aarch64_absv16qi (__a);
+  return __builtin_aarch64_sqrdmulh_laneqv2si (__a, __b, __c);
 }
 
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabsq_s16 (int16x8_t __a)
+vqrdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
 {
-  return __builtin_aarch64_absv8hi (__a);
+  return __builtin_aarch64_sqrdmulh_laneqv8hi (__a, __b, __c);
 }
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabsq_s32 (int32x4_t __a)
+vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
 {
-  return __builtin_aarch64_absv4si (__a);
+  return __builtin_aarch64_sqrdmulh_laneqv4si (__a, __b, __c);
 }
 
-__extension__ extern __inline int64x2_t
+/* Table intrinsics.  */
+
+__extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabsq_s64 (int64x2_t __a)
+vqtbl1_p8 (poly8x16_t __a, uint8x8_t __b)
 {
-  return __builtin_aarch64_absv2di (__a);
+  poly8x8_t __result;
+  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
+           : "=w"(__result)
+           : "w"(__a), "w"(__b)
+           : /* No clobbers */);
+  return __result;
 }
 
-/* vadd */
-
-__extension__ extern __inline int64_t
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddd_s64 (int64_t __a, int64_t __b)
+vqtbl1_s8 (int8x16_t __a, uint8x8_t __b)
 {
-  return __a + __b;
+  int8x8_t __result;
+  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
+           : "=w"(__result)
+           : "w"(__a), "w"(__b)
+           : /* No clobbers */);
+  return __result;
 }
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddd_u64 (uint64_t __a, uint64_t __b)
+vqtbl1_u8 (uint8x16_t __a, uint8x8_t __b)
 {
-  return __a + __b;
+  uint8x8_t __result;
+  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
+           : "=w"(__result)
+           : "w"(__a), "w"(__b)
+           : /* No clobbers */);
+  return __result;
 }
 
-/* vaddv */
-
-__extension__ extern __inline int8_t
+__extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddv_s8 (int8x8_t __a)
+vqtbl1q_p8 (poly8x16_t __a, uint8x16_t __b)
 {
-  return __builtin_aarch64_reduc_plus_scal_v8qi (__a);
+  poly8x16_t __result;
+  __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
+           : "=w"(__result)
+           : "w"(__a), "w"(__b)
+           : /* No clobbers */);
+  return __result;
 }
 
-__extension__ extern __inline int16_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddv_s16 (int16x4_t __a)
+vqtbl1q_s8 (int8x16_t __a, uint8x16_t __b)
 {
-  return __builtin_aarch64_reduc_plus_scal_v4hi (__a);
+  int8x16_t __result;
+  __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
+           : "=w"(__result)
+           : "w"(__a), "w"(__b)
+           : /* No clobbers */);
+  return __result;
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddv_s32 (int32x2_t __a)
+vqtbl1q_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return __builtin_aarch64_reduc_plus_scal_v2si (__a);
+  uint8x16_t __result;
+  __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
+           : "=w"(__result)
+           : "w"(__a), "w"(__b)
+           : /* No clobbers */);
+  return __result;
 }
 
-__extension__ extern __inline uint8_t
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddv_u8 (uint8x8_t __a)
+vqtbx1_s8 (int8x8_t __r, int8x16_t __tab, uint8x8_t __idx)
 {
-  return (uint8_t) __builtin_aarch64_reduc_plus_scal_v8qi ((int8x8_t) __a);
+  int8x8_t __result = __r;
+  __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
+           : "+w"(__result)
+           : "w"(__tab), "w"(__idx)
+           : /* No clobbers */);
+  return __result;
 }
 
-__extension__ extern __inline uint16_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddv_u16 (uint16x4_t __a)
+vqtbx1_u8 (uint8x8_t __r, uint8x16_t __tab, uint8x8_t __idx)
 {
-  return (uint16_t) __builtin_aarch64_reduc_plus_scal_v4hi ((int16x4_t) __a);
+  uint8x8_t __result = __r;
+  __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
+           : "+w"(__result)
+           : "w"(__tab), "w"(__idx)
+           : /* No clobbers */);
+  return __result;
 }
 
-__extension__ extern __inline uint32_t
+__extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddv_u32 (uint32x2_t __a)
+vqtbx1_p8 (poly8x8_t __r, poly8x16_t __tab, uint8x8_t __idx)
 {
-  return (int32_t) __builtin_aarch64_reduc_plus_scal_v2si ((int32x2_t) __a);
+  poly8x8_t __result = __r;
+  __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
+           : "+w"(__result)
+           : "w"(__tab), "w"(__idx)
+           : /* No clobbers */);
+  return __result;
 }
 
-__extension__ extern __inline int8_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddvq_s8 (int8x16_t __a)
+vqtbx1q_s8 (int8x16_t __r, int8x16_t __tab, uint8x16_t __idx)
 {
-  return __builtin_aarch64_reduc_plus_scal_v16qi (__a);
+  int8x16_t __result = __r;
+  __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
+           : "+w"(__result)
+           : "w"(__tab), "w"(__idx)
+           : /* No clobbers */);
+  return __result;
 }
 
-__extension__ extern __inline int16_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddvq_s16 (int16x8_t __a)
+vqtbx1q_u8 (uint8x16_t __r, uint8x16_t __tab, uint8x16_t __idx)
 {
-  return __builtin_aarch64_reduc_plus_scal_v8hi (__a);
+  uint8x16_t __result = __r;
+  __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
+           : "+w"(__result)
+           : "w"(__tab), "w"(__idx)
+           : /* No clobbers */);
+  return __result;
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddvq_s32 (int32x4_t __a)
+vqtbx1q_p8 (poly8x16_t __r, poly8x16_t __tab, uint8x16_t __idx)
 {
-  return __builtin_aarch64_reduc_plus_scal_v4si (__a);
+  poly8x16_t __result = __r;
+  __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
+           : "+w"(__result)
+           : "w"(__tab), "w"(__idx)
+           : /* No clobbers */);
+  return __result;
 }
 
-__extension__ extern __inline int64_t
+/* V7 legacy table intrinsics.  */
+
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddvq_s64 (int64x2_t __a)
+vtbl1_s8 (int8x8_t __tab, int8x8_t __idx)
 {
-  return __builtin_aarch64_reduc_plus_scal_v2di (__a);
+  int8x8_t __result;
+  int8x16_t __temp = vcombine_s8 (__tab, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
+  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
+           : "=w"(__result)
+           : "w"(__temp), "w"(__idx)
+           : /* No clobbers */);
+  return __result;
 }
 
-__extension__ extern __inline uint8_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddvq_u8 (uint8x16_t __a)
+vtbl1_u8 (uint8x8_t __tab, uint8x8_t __idx)
 {
-  return (uint8_t) __builtin_aarch64_reduc_plus_scal_v16qi ((int8x16_t) __a);
+  uint8x8_t __result;
+  uint8x16_t __temp = vcombine_u8 (__tab, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
+  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
+           : "=w"(__result)
+           : "w"(__temp), "w"(__idx)
+           : /* No clobbers */);
+  return __result;
 }
 
-__extension__ extern __inline uint16_t
+__extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddvq_u16 (uint16x8_t __a)
+vtbl1_p8 (poly8x8_t __tab, uint8x8_t __idx)
 {
-  return (uint16_t) __builtin_aarch64_reduc_plus_scal_v8hi ((int16x8_t) __a);
+  poly8x8_t __result;
+  poly8x16_t __temp = vcombine_p8 (__tab, vcreate_p8 (__AARCH64_UINT64_C (0x0)));
+  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
+           : "=w"(__result)
+           : "w"(__temp), "w"(__idx)
+           : /* No clobbers */);
+  return __result;
 }
 
-__extension__ extern __inline uint32_t
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddvq_u32 (uint32x4_t __a)
+vtbl2_s8 (int8x8x2_t __tab, int8x8_t __idx)
 {
-  return (uint32_t) __builtin_aarch64_reduc_plus_scal_v4si ((int32x4_t) __a);
+  int8x8_t __result;
+  int8x16_t __temp = vcombine_s8 (__tab.val[0], __tab.val[1]);
+  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
+           : "=w"(__result)
+           : "w"(__temp), "w"(__idx)
+           : /* No clobbers */);
+  return __result;
 }
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddvq_u64 (uint64x2_t __a)
+vtbl2_u8 (uint8x8x2_t __tab, uint8x8_t __idx)
 {
-  return (uint64_t) __builtin_aarch64_reduc_plus_scal_v2di ((int64x2_t) __a);
+  uint8x8_t __result;
+  uint8x16_t __temp = vcombine_u8 (__tab.val[0], __tab.val[1]);
+  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
+           : "=w"(__result)
+           : "w"(__temp), "w"(__idx)
+           : /* No clobbers */);
+  return __result;
 }
 
-__extension__ extern __inline float32_t
+__extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddv_f32 (float32x2_t __a)
+vtbl2_p8 (poly8x8x2_t __tab, uint8x8_t __idx)
 {
-  return __builtin_aarch64_reduc_plus_scal_v2sf (__a);
+  poly8x8_t __result;
+  poly8x16_t __temp = vcombine_p8 (__tab.val[0], __tab.val[1]);
+  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
+           : "=w"(__result)
+           : "w"(__temp), "w"(__idx)
+           : /* No clobbers */);
+  return __result;
 }
 
-__extension__ extern __inline float32_t
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddvq_f32 (float32x4_t __a)
+vtbl3_s8 (int8x8x3_t __tab, int8x8_t __idx)
 {
-  return __builtin_aarch64_reduc_plus_scal_v4sf (__a);
+  int8x8_t __result;
+  int8x16x2_t __temp;
+  __builtin_aarch64_simd_oi __o;
+  __temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]);
+  __temp.val[1] = vcombine_s8 (__tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0)));
+  __o = __builtin_aarch64_set_qregoiv16qi (__o,
+					   (int8x16_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o,
+					   (int8x16_t) __temp.val[1], 1);
+  __result = __builtin_aarch64_tbl3v8qi (__o, __idx);
+  return __result;
 }
 
-__extension__ extern __inline float64_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddvq_f64 (float64x2_t __a)
+vtbl3_u8 (uint8x8x3_t __tab, uint8x8_t __idx)
 {
-  return __builtin_aarch64_reduc_plus_scal_v2df (__a);
+  uint8x8_t __result;
+  uint8x16x2_t __temp;
+  __builtin_aarch64_simd_oi __o;
+  __temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]);
+  __temp.val[1] = vcombine_u8 (__tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0)));
+  __o = __builtin_aarch64_set_qregoiv16qi (__o,
+					   (int8x16_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o,
+					   (int8x16_t) __temp.val[1], 1);
+  __result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx);
+  return __result;
 }
 
-/* vbsl  */
-
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbsl_f16 (uint16x4_t __a, float16x4_t __b, float16x4_t __c)
+vtbl3_p8 (poly8x8x3_t __tab, uint8x8_t __idx)
 {
-  return __builtin_aarch64_simd_bslv4hf_suss (__a, __b, __c);
+  poly8x8_t __result;
+  poly8x16x2_t __temp;
+  __builtin_aarch64_simd_oi __o;
+  __temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]);
+  __temp.val[1] = vcombine_p8 (__tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0)));
+  __o = __builtin_aarch64_set_qregoiv16qi (__o,
+					   (int8x16_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o,
+					   (int8x16_t) __temp.val[1], 1);
+  __result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx);
+  return __result;
 }
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbsl_f32 (uint32x2_t __a, float32x2_t __b, float32x2_t __c)
+vtbl4_s8 (int8x8x4_t __tab, int8x8_t __idx)
 {
-  return __builtin_aarch64_simd_bslv2sf_suss (__a, __b, __c);
+  int8x8_t __result;
+  int8x16x2_t __temp;
+  __builtin_aarch64_simd_oi __o;
+  __temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]);
+  __temp.val[1] = vcombine_s8 (__tab.val[2], __tab.val[3]);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o,
+					   (int8x16_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o,
+					   (int8x16_t) __temp.val[1], 1);
+  __result = __builtin_aarch64_tbl3v8qi (__o, __idx);
+  return __result;
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbsl_f64 (uint64x1_t __a, float64x1_t __b, float64x1_t __c)
+vtbl4_u8 (uint8x8x4_t __tab, uint8x8_t __idx)
 {
-  return (float64x1_t)
-    { __builtin_aarch64_simd_bsldf_suss (__a[0], __b[0], __c[0]) };
+  uint8x8_t __result;
+  uint8x16x2_t __temp;
+  __builtin_aarch64_simd_oi __o;
+  __temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]);
+  __temp.val[1] = vcombine_u8 (__tab.val[2], __tab.val[3]);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o,
+					   (int8x16_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o,
+					   (int8x16_t) __temp.val[1], 1);
+  __result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx);
+  return __result;
 }
 
 __extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbsl_p8 (uint8x8_t __a, poly8x8_t __b, poly8x8_t __c)
+vtbl4_p8 (poly8x8x4_t __tab, uint8x8_t __idx)
 {
-  return __builtin_aarch64_simd_bslv8qi_pupp (__a, __b, __c);
+  poly8x8_t __result;
+  poly8x16x2_t __temp;
+  __builtin_aarch64_simd_oi __o;
+  __temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]);
+  __temp.val[1] = vcombine_p8 (__tab.val[2], __tab.val[3]);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o,
+					   (int8x16_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o,
+					   (int8x16_t) __temp.val[1], 1);
+  __result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx);
+  return __result;
 }
 
-__extension__ extern __inline poly16x4_t
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbsl_p16 (uint16x4_t __a, poly16x4_t __b, poly16x4_t __c)
+vtbx2_s8 (int8x8_t __r, int8x8x2_t __tab, int8x8_t __idx)
 {
-  return __builtin_aarch64_simd_bslv4hi_pupp (__a, __b, __c);
+  int8x8_t __result = __r;
+  int8x16_t __temp = vcombine_s8 (__tab.val[0], __tab.val[1]);
+  __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
+           : "+w"(__result)
+           : "w"(__temp), "w"(__idx)
+           : /* No clobbers */);
+  return __result;
 }
-__extension__ extern __inline poly64x1_t
+
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbsl_p64 (uint64x1_t __a, poly64x1_t __b, poly64x1_t __c)
+vtbx2_u8 (uint8x8_t __r, uint8x8x2_t __tab, uint8x8_t __idx)
 {
-  return (poly64x1_t)
-      {__builtin_aarch64_simd_bsldi_pupp (__a[0], __b[0], __c[0])};
+  uint8x8_t __result = __r;
+  uint8x16_t __temp = vcombine_u8 (__tab.val[0], __tab.val[1]);
+  __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
+           : "+w"(__result)
+           : "w"(__temp), "w"(__idx)
+           : /* No clobbers */);
+  return __result;
 }
 
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbsl_s8 (uint8x8_t __a, int8x8_t __b, int8x8_t __c)
+vtbx2_p8 (poly8x8_t __r, poly8x8x2_t __tab, uint8x8_t __idx)
 {
-  return __builtin_aarch64_simd_bslv8qi_suss (__a, __b, __c);
+  poly8x8_t __result = __r;
+  poly8x16_t __temp = vcombine_p8 (__tab.val[0], __tab.val[1]);
+  __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
+           : "+w"(__result)
+           : "w"(__temp), "w"(__idx)
+           : /* No clobbers */);
+  return __result;
 }
 
-__extension__ extern __inline int16x4_t
+/* End of temporary inline asm.  */
+
+/* Start of optimal implementations in approved order.  */
+
+/* vabd.  */
+
+__extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbsl_s16 (uint16x4_t __a, int16x4_t __b, int16x4_t __c)
+vabds_f32 (float32_t __a, float32_t __b)
 {
-  return __builtin_aarch64_simd_bslv4hi_suss (__a, __b, __c);
+  return __builtin_aarch64_fabdsf (__a, __b);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline float64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbsl_s32 (uint32x2_t __a, int32x2_t __b, int32x2_t __c)
+vabdd_f64 (float64_t __a, float64_t __b)
 {
-  return __builtin_aarch64_simd_bslv2si_suss (__a, __b, __c);
+  return __builtin_aarch64_fabddf (__a, __b);
 }
 
-__extension__ extern __inline int64x1_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbsl_s64 (uint64x1_t __a, int64x1_t __b, int64x1_t __c)
+vabd_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return (int64x1_t)
-      {__builtin_aarch64_simd_bsldi_suss (__a[0], __b[0], __c[0])};
+  return __builtin_aarch64_fabdv2sf (__a, __b);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbsl_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
+vabd_f64 (float64x1_t __a, float64x1_t __b)
 {
-  return __builtin_aarch64_simd_bslv8qi_uuuu (__a, __b, __c);
+  return (float64x1_t) {vabdd_f64 (vget_lane_f64 (__a, 0),
+				   vget_lane_f64 (__b, 0))};
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbsl_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c)
+vabdq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return __builtin_aarch64_simd_bslv4hi_uuuu (__a, __b, __c);
+  return __builtin_aarch64_fabdv4sf (__a, __b);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbsl_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
+vabdq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  return __builtin_aarch64_simd_bslv2si_uuuu (__a, __b, __c);
+  return __builtin_aarch64_fabdv2df (__a, __b);
 }
 
-__extension__ extern __inline uint64x1_t
+/* vabs  */
+
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbsl_u64 (uint64x1_t __a, uint64x1_t __b, uint64x1_t __c)
+vabs_f32 (float32x2_t __a)
 {
-  return (uint64x1_t)
-      {__builtin_aarch64_simd_bsldi_uuuu (__a[0], __b[0], __c[0])};
+  return __builtin_aarch64_absv2sf (__a);
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbslq_f16 (uint16x8_t __a, float16x8_t __b, float16x8_t __c)
+vabs_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_simd_bslv8hf_suss (__a, __b, __c);
+  return (float64x1_t) {__builtin_fabs (__a[0])};
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbslq_f32 (uint32x4_t __a, float32x4_t __b, float32x4_t __c)
+vabs_s8 (int8x8_t __a)
 {
-  return __builtin_aarch64_simd_bslv4sf_suss (__a, __b, __c);
+  return __builtin_aarch64_absv8qi (__a);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbslq_f64 (uint64x2_t __a, float64x2_t __b, float64x2_t __c)
+vabs_s16 (int16x4_t __a)
 {
-  return __builtin_aarch64_simd_bslv2df_suss (__a, __b, __c);
+  return __builtin_aarch64_absv4hi (__a);
 }
 
-__extension__ extern __inline poly8x16_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbslq_p8 (uint8x16_t __a, poly8x16_t __b, poly8x16_t __c)
+vabs_s32 (int32x2_t __a)
 {
-  return __builtin_aarch64_simd_bslv16qi_pupp (__a, __b, __c);
+  return __builtin_aarch64_absv2si (__a);
 }
 
-__extension__ extern __inline poly16x8_t
+__extension__ extern __inline int64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c)
+vabs_s64 (int64x1_t __a)
 {
-  return __builtin_aarch64_simd_bslv8hi_pupp (__a, __b, __c);
+  return (int64x1_t) {__builtin_aarch64_absdi (__a[0])};
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbslq_s8 (uint8x16_t __a, int8x16_t __b, int8x16_t __c)
+vabsq_f32 (float32x4_t __a)
 {
-  return __builtin_aarch64_simd_bslv16qi_suss (__a, __b, __c);
+  return __builtin_aarch64_absv4sf (__a);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbslq_s16 (uint16x8_t __a, int16x8_t __b, int16x8_t __c)
+vabsq_f64 (float64x2_t __a)
 {
-  return __builtin_aarch64_simd_bslv8hi_suss (__a, __b, __c);
+  return __builtin_aarch64_absv2df (__a);
 }
 
-__extension__ extern __inline poly64x2_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbslq_p64 (uint64x2_t __a, poly64x2_t __b, poly64x2_t __c)
+vabsq_s8 (int8x16_t __a)
 {
-  return __builtin_aarch64_simd_bslv2di_pupp (__a, __b, __c);
+  return __builtin_aarch64_absv16qi (__a);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbslq_s32 (uint32x4_t __a, int32x4_t __b, int32x4_t __c)
+vabsq_s16 (int16x8_t __a)
 {
-  return __builtin_aarch64_simd_bslv4si_suss (__a, __b, __c);
+  return __builtin_aarch64_absv8hi (__a);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbslq_s64 (uint64x2_t __a, int64x2_t __b, int64x2_t __c)
+vabsq_s32 (int32x4_t __a)
 {
-  return __builtin_aarch64_simd_bslv2di_suss (__a, __b, __c);
+  return __builtin_aarch64_absv4si (__a);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbslq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
+vabsq_s64 (int64x2_t __a)
 {
-  return __builtin_aarch64_simd_bslv16qi_uuuu (__a, __b, __c);
+  return __builtin_aarch64_absv2di (__a);
 }
 
-__extension__ extern __inline uint16x8_t
+/* Try to avoid moving between integer and vector registers.
+   For why the cast to unsigned is needed check the vnegd_s64 intrinsic.
+   There is a testcase related to this issue:
+   gcc.target/aarch64/vabsd_s64.c.  */
+
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbslq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
+vabsd_s64 (int64_t __a)
 {
-  return __builtin_aarch64_simd_bslv8hi_uuuu (__a, __b, __c);
+  return __a < 0 ? - (uint64_t) __a : __a;
 }
 
-__extension__ extern __inline uint32x4_t
+/* vadd */
+
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbslq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
+vaddd_s64 (int64_t __a, int64_t __b)
 {
-  return __builtin_aarch64_simd_bslv4si_uuuu (__a, __b, __c);
+  return __a + __b;
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
+vaddd_u64 (uint64_t __a, uint64_t __b)
 {
-  return __builtin_aarch64_simd_bslv2di_uuuu (__a, __b, __c);
+  return __a + __b;
 }
 
-/* ARMv8.1-A instrinsics.  */
-#pragma GCC push_options
-#pragma GCC target ("+nothing+rdma")
+/* vaddv */
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline int8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmlah_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
+vaddv_s8 (int8x8_t __a)
 {
-  return __builtin_aarch64_sqrdmlahv4hi (__a, __b, __c);
+  return __builtin_aarch64_reduc_plus_scal_v8qi (__a);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmlah_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c)
+vaddv_s16 (int16x4_t __a)
 {
-  return __builtin_aarch64_sqrdmlahv2si (__a, __b, __c);
+  return __builtin_aarch64_reduc_plus_scal_v4hi (__a);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmlahq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
+vaddv_s32 (int32x2_t __a)
 {
-  return __builtin_aarch64_sqrdmlahv8hi (__a, __b, __c);
+  return __builtin_aarch64_reduc_plus_scal_v2si (__a);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmlahq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
+vaddv_u8 (uint8x8_t __a)
 {
-  return __builtin_aarch64_sqrdmlahv4si (__a, __b, __c);
+  return (uint8_t) __builtin_aarch64_reduc_plus_scal_v8qi ((int8x8_t) __a);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmlsh_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
+vaddv_u16 (uint16x4_t __a)
 {
-  return __builtin_aarch64_sqrdmlshv4hi (__a, __b, __c);
+  return (uint16_t) __builtin_aarch64_reduc_plus_scal_v4hi ((int16x4_t) __a);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmlsh_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c)
+vaddv_u32 (uint32x2_t __a)
 {
-  return __builtin_aarch64_sqrdmlshv2si (__a, __b, __c);
+  return (int32_t) __builtin_aarch64_reduc_plus_scal_v2si ((int32x2_t) __a);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline int8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmlshq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
+vaddvq_s8 (int8x16_t __a)
 {
-  return __builtin_aarch64_sqrdmlshv8hi (__a, __b, __c);
+  return __builtin_aarch64_reduc_plus_scal_v16qi (__a);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmlshq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
+vaddvq_s16 (int16x8_t __a)
 {
-  return __builtin_aarch64_sqrdmlshv4si (__a, __b, __c);
+  return __builtin_aarch64_reduc_plus_scal_v8hi (__a);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmlah_laneq_s16 (int16x4_t __a, int16x4_t __b, int16x8_t __c, const int __d)
+vaddvq_s32 (int32x4_t __a)
 {
-  return  __builtin_aarch64_sqrdmlah_laneqv4hi (__a, __b, __c, __d);
+  return __builtin_aarch64_reduc_plus_scal_v4si (__a);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmlah_laneq_s32 (int32x2_t __a, int32x2_t __b, int32x4_t __c, const int __d)
+vaddvq_s64 (int64x2_t __a)
 {
-  return __builtin_aarch64_sqrdmlah_laneqv2si (__a, __b, __c, __d);
+  return __builtin_aarch64_reduc_plus_scal_v2di (__a);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline uint8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmlahq_laneq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c, const int __d)
+vaddvq_u8 (uint8x16_t __a)
 {
-  return __builtin_aarch64_sqrdmlah_laneqv8hi (__a, __b, __c, __d);
+  return (uint8_t) __builtin_aarch64_reduc_plus_scal_v16qi ((int8x16_t) __a);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmlahq_laneq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c, const int __d)
+vaddvq_u16 (uint16x8_t __a)
 {
-  return __builtin_aarch64_sqrdmlah_laneqv4si (__a, __b, __c, __d);
+  return (uint16_t) __builtin_aarch64_reduc_plus_scal_v8hi ((int16x8_t) __a);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmlsh_laneq_s16 (int16x4_t __a, int16x4_t __b, int16x8_t __c, const int __d)
+vaddvq_u32 (uint32x4_t __a)
 {
-  return  __builtin_aarch64_sqrdmlsh_laneqv4hi (__a, __b, __c, __d);
+  return (uint32_t) __builtin_aarch64_reduc_plus_scal_v4si ((int32x4_t) __a);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmlsh_laneq_s32 (int32x2_t __a, int32x2_t __b, int32x4_t __c, const int __d)
+vaddvq_u64 (uint64x2_t __a)
 {
-  return __builtin_aarch64_sqrdmlsh_laneqv2si (__a, __b, __c, __d);
+  return (uint64_t) __builtin_aarch64_reduc_plus_scal_v2di ((int64x2_t) __a);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmlshq_laneq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c, const int __d)
+vaddv_f32 (float32x2_t __a)
 {
-  return __builtin_aarch64_sqrdmlsh_laneqv8hi (__a, __b, __c, __d);
+  return __builtin_aarch64_reduc_plus_scal_v2sf (__a);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmlshq_laneq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c, const int __d)
+vaddvq_f32 (float32x4_t __a)
 {
-  return __builtin_aarch64_sqrdmlsh_laneqv4si (__a, __b, __c, __d);
+  return __builtin_aarch64_reduc_plus_scal_v4sf (__a);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline float64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmlah_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
+vaddvq_f64 (float64x2_t __a)
 {
-  return  __builtin_aarch64_sqrdmlah_lanev4hi (__a, __b, __c, __d);
+  return __builtin_aarch64_reduc_plus_scal_v2df (__a);
 }
 
-__extension__ extern __inline int32x2_t
+/* vbsl  */
+
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmlah_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
+vbsl_f16 (uint16x4_t __a, float16x4_t __b, float16x4_t __c)
 {
-  return __builtin_aarch64_sqrdmlah_lanev2si (__a, __b, __c, __d);
+  return __builtin_aarch64_simd_bslv4hf_suss (__a, __b, __c);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmlahq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d)
+vbsl_f32 (uint32x2_t __a, float32x2_t __b, float32x2_t __c)
 {
-  return __builtin_aarch64_sqrdmlah_lanev8hi (__a, __b, __c, __d);
+  return __builtin_aarch64_simd_bslv2sf_suss (__a, __b, __c);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmlahq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d)
+vbsl_f64 (uint64x1_t __a, float64x1_t __b, float64x1_t __c)
 {
-  return __builtin_aarch64_sqrdmlah_lanev4si (__a, __b, __c, __d);
+  return (float64x1_t)
+    { __builtin_aarch64_simd_bsldf_suss (__a[0], __b[0], __c[0]) };
 }
 
-__extension__ extern __inline int16_t
+__extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmlahh_s16 (int16_t __a, int16_t __b, int16_t __c)
+vbsl_p8 (uint8x8_t __a, poly8x8_t __b, poly8x8_t __c)
 {
-  return (int16_t) __builtin_aarch64_sqrdmlahhi (__a, __b, __c);
+  return __builtin_aarch64_simd_bslv8qi_pupp (__a, __b, __c);
 }
 
-__extension__ extern __inline int16_t
+__extension__ extern __inline poly16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmlahh_lane_s16 (int16_t __a, int16_t __b, int16x4_t __c, const int __d)
+vbsl_p16 (uint16x4_t __a, poly16x4_t __b, poly16x4_t __c)
 {
-  return __builtin_aarch64_sqrdmlah_lanehi (__a, __b, __c, __d);
+  return __builtin_aarch64_simd_bslv4hi_pupp (__a, __b, __c);
 }
-
-__extension__ extern __inline int16_t
+__extension__ extern __inline poly64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmlahh_laneq_s16 (int16_t __a, int16_t __b, int16x8_t __c, const int __d)
+vbsl_p64 (uint64x1_t __a, poly64x1_t __b, poly64x1_t __c)
 {
-  return __builtin_aarch64_sqrdmlah_laneqhi (__a, __b, __c, __d);
+  return (poly64x1_t)
+      {__builtin_aarch64_simd_bsldi_pupp (__a[0], __b[0], __c[0])};
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmlahs_s32 (int32_t __a, int32_t __b, int32_t __c)
+vbsl_s8 (uint8x8_t __a, int8x8_t __b, int8x8_t __c)
 {
-  return (int32_t) __builtin_aarch64_sqrdmlahsi (__a, __b, __c);
+  return __builtin_aarch64_simd_bslv8qi_suss (__a, __b, __c);
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmlahs_lane_s32 (int32_t __a, int32_t __b, int32x2_t __c, const int __d)
+vbsl_s16 (uint16x4_t __a, int16x4_t __b, int16x4_t __c)
 {
-  return __builtin_aarch64_sqrdmlah_lanesi (__a, __b, __c, __d);
+  return __builtin_aarch64_simd_bslv4hi_suss (__a, __b, __c);
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmlahs_laneq_s32 (int32_t __a, int32_t __b, int32x4_t __c, const int __d)
+vbsl_s32 (uint32x2_t __a, int32x2_t __b, int32x2_t __c)
 {
-  return __builtin_aarch64_sqrdmlah_laneqsi (__a, __b, __c, __d);
+  return __builtin_aarch64_simd_bslv2si_suss (__a, __b, __c);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline int64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmlsh_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
+vbsl_s64 (uint64x1_t __a, int64x1_t __b, int64x1_t __c)
 {
-  return  __builtin_aarch64_sqrdmlsh_lanev4hi (__a, __b, __c, __d);
+  return (int64x1_t)
+      {__builtin_aarch64_simd_bsldi_suss (__a[0], __b[0], __c[0])};
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmlsh_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
+vbsl_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
 {
-  return __builtin_aarch64_sqrdmlsh_lanev2si (__a, __b, __c, __d);
+  return __builtin_aarch64_simd_bslv8qi_uuuu (__a, __b, __c);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmlshq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d)
+vbsl_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c)
 {
-  return __builtin_aarch64_sqrdmlsh_lanev8hi (__a, __b, __c, __d);
+  return __builtin_aarch64_simd_bslv4hi_uuuu (__a, __b, __c);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmlshq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d)
+vbsl_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
 {
-  return __builtin_aarch64_sqrdmlsh_lanev4si (__a, __b, __c, __d);
+  return __builtin_aarch64_simd_bslv2si_uuuu (__a, __b, __c);
 }
 
-__extension__ extern __inline int16_t
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmlshh_s16 (int16_t __a, int16_t __b, int16_t __c)
+vbsl_u64 (uint64x1_t __a, uint64x1_t __b, uint64x1_t __c)
 {
-  return (int16_t) __builtin_aarch64_sqrdmlshhi (__a, __b, __c);
+  return (uint64x1_t)
+      {__builtin_aarch64_simd_bsldi_uuuu (__a[0], __b[0], __c[0])};
 }
 
-__extension__ extern __inline int16_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmlshh_lane_s16 (int16_t __a, int16_t __b, int16x4_t __c, const int __d)
+vbslq_f16 (uint16x8_t __a, float16x8_t __b, float16x8_t __c)
 {
-  return __builtin_aarch64_sqrdmlsh_lanehi (__a, __b, __c, __d);
+  return __builtin_aarch64_simd_bslv8hf_suss (__a, __b, __c);
 }
 
-__extension__ extern __inline int16_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmlshh_laneq_s16 (int16_t __a, int16_t __b, int16x8_t __c, const int __d)
+vbslq_f32 (uint32x4_t __a, float32x4_t __b, float32x4_t __c)
 {
-  return __builtin_aarch64_sqrdmlsh_laneqhi (__a, __b, __c, __d);
+  return __builtin_aarch64_simd_bslv4sf_suss (__a, __b, __c);
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmlshs_s32 (int32_t __a, int32_t __b, int32_t __c)
+vbslq_f64 (uint64x2_t __a, float64x2_t __b, float64x2_t __c)
 {
-  return (int32_t) __builtin_aarch64_sqrdmlshsi (__a, __b, __c);
+  return __builtin_aarch64_simd_bslv2df_suss (__a, __b, __c);
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmlshs_lane_s32 (int32_t __a, int32_t __b, int32x2_t __c, const int __d)
+vbslq_p8 (uint8x16_t __a, poly8x16_t __b, poly8x16_t __c)
 {
-  return __builtin_aarch64_sqrdmlsh_lanesi (__a, __b, __c, __d);
+  return __builtin_aarch64_simd_bslv16qi_pupp (__a, __b, __c);
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline poly16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmlshs_laneq_s32 (int32_t __a, int32_t __b, int32x4_t __c, const int __d)
+vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c)
 {
-  return __builtin_aarch64_sqrdmlsh_laneqsi (__a, __b, __c, __d);
+  return __builtin_aarch64_simd_bslv8hi_pupp (__a, __b, __c);
 }
-#pragma GCC pop_options
-
-#pragma GCC push_options
-#pragma GCC target ("+nothing+crypto")
-/* vaes  */
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaeseq_u8 (uint8x16_t data, uint8x16_t key)
+vbslq_s8 (uint8x16_t __a, int8x16_t __b, int8x16_t __c)
 {
-  return __builtin_aarch64_crypto_aesev16qi_uuu (data, key);
+  return __builtin_aarch64_simd_bslv16qi_suss (__a, __b, __c);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaesdq_u8 (uint8x16_t data, uint8x16_t key)
+vbslq_s16 (uint16x8_t __a, int16x8_t __b, int16x8_t __c)
 {
-  return __builtin_aarch64_crypto_aesdv16qi_uuu (data, key);
+  return __builtin_aarch64_simd_bslv8hi_suss (__a, __b, __c);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline poly64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaesmcq_u8 (uint8x16_t data)
+vbslq_p64 (uint64x2_t __a, poly64x2_t __b, poly64x2_t __c)
 {
-  return __builtin_aarch64_crypto_aesmcv16qi_uu (data);
+  return __builtin_aarch64_simd_bslv2di_pupp (__a, __b, __c);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaesimcq_u8 (uint8x16_t data)
+vbslq_s32 (uint32x4_t __a, int32x4_t __b, int32x4_t __c)
 {
-  return __builtin_aarch64_crypto_aesimcv16qi_uu (data);
+  return __builtin_aarch64_simd_bslv4si_suss (__a, __b, __c);
 }
-#pragma GCC pop_options
-
-/* vcage  */
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcage_f64 (float64x1_t __a, float64x1_t __b)
+vbslq_s64 (uint64x2_t __a, int64x2_t __b, int64x2_t __c)
 {
-  return vabs_f64 (__a) >= vabs_f64 (__b);
+  return __builtin_aarch64_simd_bslv2di_suss (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcages_f32 (float32_t __a, float32_t __b)
+vbslq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
 {
-  return __builtin_fabsf (__a) >= __builtin_fabsf (__b) ? -1 : 0;
+  return __builtin_aarch64_simd_bslv16qi_uuuu (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcage_f32 (float32x2_t __a, float32x2_t __b)
+vbslq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
 {
-  return vabs_f32 (__a) >= vabs_f32 (__b);
+  return __builtin_aarch64_simd_bslv8hi_uuuu (__a, __b, __c);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcageq_f32 (float32x4_t __a, float32x4_t __b)
+vbslq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
 {
-  return vabsq_f32 (__a) >= vabsq_f32 (__b);
+  return __builtin_aarch64_simd_bslv4si_uuuu (__a, __b, __c);
 }
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcaged_f64 (float64_t __a, float64_t __b)
+vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
 {
-  return __builtin_fabs (__a) >= __builtin_fabs (__b) ? -1 : 0;
+  return __builtin_aarch64_simd_bslv2di_uuuu (__a, __b, __c);
 }
 
-__extension__ extern __inline uint64x2_t
+/* ARMv8.1-A instrinsics.  */
+#pragma GCC push_options
+#pragma GCC target ("+nothing+rdma")
+
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcageq_f64 (float64x2_t __a, float64x2_t __b)
+vqrdmlah_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
 {
-  return vabsq_f64 (__a) >= vabsq_f64 (__b);
+  return __builtin_aarch64_sqrdmlahv4hi (__a, __b, __c);
 }
 
-/* vcagt  */
-
-__extension__ extern __inline uint32_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcagts_f32 (float32_t __a, float32_t __b)
+vqrdmlah_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c)
 {
-  return __builtin_fabsf (__a) > __builtin_fabsf (__b) ? -1 : 0;
+  return __builtin_aarch64_sqrdmlahv2si (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcagt_f32 (float32x2_t __a, float32x2_t __b)
+vqrdmlahq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
 {
-  return vabs_f32 (__a) > vabs_f32 (__b);
+  return __builtin_aarch64_sqrdmlahv8hi (__a, __b, __c);
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcagt_f64 (float64x1_t __a, float64x1_t __b)
+vqrdmlahq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
 {
-  return vabs_f64 (__a) > vabs_f64 (__b);
+  return __builtin_aarch64_sqrdmlahv4si (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcagtq_f32 (float32x4_t __a, float32x4_t __b)
+vqrdmlsh_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
 {
-  return vabsq_f32 (__a) > vabsq_f32 (__b);
+  return __builtin_aarch64_sqrdmlshv4hi (__a, __b, __c);
 }
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcagtd_f64 (float64_t __a, float64_t __b)
+vqrdmlsh_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c)
 {
-  return __builtin_fabs (__a) > __builtin_fabs (__b) ? -1 : 0;
+  return __builtin_aarch64_sqrdmlshv2si (__a, __b, __c);
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcagtq_f64 (float64x2_t __a, float64x2_t __b)
+vqrdmlshq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
 {
-  return vabsq_f64 (__a) > vabsq_f64 (__b);
+  return __builtin_aarch64_sqrdmlshv8hi (__a, __b, __c);
 }
 
-/* vcale  */
-
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcale_f32 (float32x2_t __a, float32x2_t __b)
+vqrdmlshq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
 {
-  return vabs_f32 (__a) <= vabs_f32 (__b);
+  return __builtin_aarch64_sqrdmlshv4si (__a, __b, __c);
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcale_f64 (float64x1_t __a, float64x1_t __b)
+vqrdmlah_laneq_s16 (int16x4_t __a, int16x4_t __b, int16x8_t __c, const int __d)
 {
-  return vabs_f64 (__a) <= vabs_f64 (__b);
+  return  __builtin_aarch64_sqrdmlah_laneqv4hi (__a, __b, __c, __d);
 }
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcaled_f64 (float64_t __a, float64_t __b)
+vqrdmlah_laneq_s32 (int32x2_t __a, int32x2_t __b, int32x4_t __c, const int __d)
 {
-  return __builtin_fabs (__a) <= __builtin_fabs (__b) ? -1 : 0;
+  return __builtin_aarch64_sqrdmlah_laneqv2si (__a, __b, __c, __d);
 }
 
-__extension__ extern __inline uint32_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcales_f32 (float32_t __a, float32_t __b)
+vqrdmlahq_laneq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c, const int __d)
 {
-  return __builtin_fabsf (__a) <= __builtin_fabsf (__b) ? -1 : 0;
+  return __builtin_aarch64_sqrdmlah_laneqv8hi (__a, __b, __c, __d);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcaleq_f32 (float32x4_t __a, float32x4_t __b)
+vqrdmlahq_laneq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c, const int __d)
 {
-  return vabsq_f32 (__a) <= vabsq_f32 (__b);
+  return __builtin_aarch64_sqrdmlah_laneqv4si (__a, __b, __c, __d);
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcaleq_f64 (float64x2_t __a, float64x2_t __b)
+vqrdmlsh_laneq_s16 (int16x4_t __a, int16x4_t __b, int16x8_t __c, const int __d)
 {
-  return vabsq_f64 (__a) <= vabsq_f64 (__b);
+  return  __builtin_aarch64_sqrdmlsh_laneqv4hi (__a, __b, __c, __d);
 }
 
-/* vcalt  */
-
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcalt_f32 (float32x2_t __a, float32x2_t __b)
+vqrdmlsh_laneq_s32 (int32x2_t __a, int32x2_t __b, int32x4_t __c, const int __d)
 {
-  return vabs_f32 (__a) < vabs_f32 (__b);
+  return __builtin_aarch64_sqrdmlsh_laneqv2si (__a, __b, __c, __d);
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcalt_f64 (float64x1_t __a, float64x1_t __b)
+vqrdmlshq_laneq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c, const int __d)
 {
-  return vabs_f64 (__a) < vabs_f64 (__b);
+  return __builtin_aarch64_sqrdmlsh_laneqv8hi (__a, __b, __c, __d);
 }
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcaltd_f64 (float64_t __a, float64_t __b)
+vqrdmlshq_laneq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c, const int __d)
 {
-  return __builtin_fabs (__a) < __builtin_fabs (__b) ? -1 : 0;
+  return __builtin_aarch64_sqrdmlsh_laneqv4si (__a, __b, __c, __d);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcaltq_f32 (float32x4_t __a, float32x4_t __b)
+vqrdmlah_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
 {
-  return vabsq_f32 (__a) < vabsq_f32 (__b);
+  return  __builtin_aarch64_sqrdmlah_lanev4hi (__a, __b, __c, __d);
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcaltq_f64 (float64x2_t __a, float64x2_t __b)
+vqrdmlah_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
 {
-  return vabsq_f64 (__a) < vabsq_f64 (__b);
+  return __builtin_aarch64_sqrdmlah_lanev2si (__a, __b, __c, __d);
 }
 
-__extension__ extern __inline uint32_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcalts_f32 (float32_t __a, float32_t __b)
+vqrdmlahq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d)
 {
-  return __builtin_fabsf (__a) < __builtin_fabsf (__b) ? -1 : 0;
+  return __builtin_aarch64_sqrdmlah_lanev8hi (__a, __b, __c, __d);
 }
 
-/* vceq - vector.  */
-
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceq_f32 (float32x2_t __a, float32x2_t __b)
+vqrdmlahq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d)
 {
-  return (uint32x2_t) (__a == __b);
+  return __builtin_aarch64_sqrdmlah_lanev4si (__a, __b, __c, __d);
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceq_f64 (float64x1_t __a, float64x1_t __b)
+vqrdmlahh_s16 (int16_t __a, int16_t __b, int16_t __c)
 {
-  return (uint64x1_t) (__a == __b);
+  return (int16_t) __builtin_aarch64_sqrdmlahhi (__a, __b, __c);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceq_p8 (poly8x8_t __a, poly8x8_t __b)
+vqrdmlahh_lane_s16 (int16_t __a, int16_t __b, int16x4_t __c, const int __d)
 {
-  return (uint8x8_t) (__a == __b);
+  return __builtin_aarch64_sqrdmlah_lanehi (__a, __b, __c, __d);
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceq_p64 (poly64x1_t __a, poly64x1_t __b)
+vqrdmlahh_laneq_s16 (int16_t __a, int16_t __b, int16x8_t __c, const int __d)
 {
-  return (uint64x1_t) (__a == __b);
+  return __builtin_aarch64_sqrdmlah_laneqhi (__a, __b, __c, __d);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceq_s8 (int8x8_t __a, int8x8_t __b)
+vqrdmlahs_s32 (int32_t __a, int32_t __b, int32_t __c)
 {
-  return (uint8x8_t) (__a == __b);
+  return (int32_t) __builtin_aarch64_sqrdmlahsi (__a, __b, __c);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceq_s16 (int16x4_t __a, int16x4_t __b)
+vqrdmlahs_lane_s32 (int32_t __a, int32_t __b, int32x2_t __c, const int __d)
 {
-  return (uint16x4_t) (__a == __b);
+  return __builtin_aarch64_sqrdmlah_lanesi (__a, __b, __c, __d);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceq_s32 (int32x2_t __a, int32x2_t __b)
+vqrdmlahs_laneq_s32 (int32_t __a, int32_t __b, int32x4_t __c, const int __d)
 {
-  return (uint32x2_t) (__a == __b);
+  return __builtin_aarch64_sqrdmlah_laneqsi (__a, __b, __c, __d);
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceq_s64 (int64x1_t __a, int64x1_t __b)
+vqrdmlsh_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
 {
-  return (uint64x1_t) (__a == __b);
+  return  __builtin_aarch64_sqrdmlsh_lanev4hi (__a, __b, __c, __d);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceq_u8 (uint8x8_t __a, uint8x8_t __b)
+vqrdmlsh_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
 {
-  return (__a == __b);
+  return __builtin_aarch64_sqrdmlsh_lanev2si (__a, __b, __c, __d);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceq_u16 (uint16x4_t __a, uint16x4_t __b)
+vqrdmlshq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d)
 {
-  return (__a == __b);
+  return __builtin_aarch64_sqrdmlsh_lanev8hi (__a, __b, __c, __d);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceq_u32 (uint32x2_t __a, uint32x2_t __b)
+vqrdmlshq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d)
 {
-  return (__a == __b);
+  return __builtin_aarch64_sqrdmlsh_lanev4si (__a, __b, __c, __d);
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceq_u64 (uint64x1_t __a, uint64x1_t __b)
+vqrdmlshh_s16 (int16_t __a, int16_t __b, int16_t __c)
 {
-  return (__a == __b);
+  return (int16_t) __builtin_aarch64_sqrdmlshhi (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqq_f32 (float32x4_t __a, float32x4_t __b)
+vqrdmlshh_lane_s16 (int16_t __a, int16_t __b, int16x4_t __c, const int __d)
 {
-  return (uint32x4_t) (__a == __b);
+  return __builtin_aarch64_sqrdmlsh_lanehi (__a, __b, __c, __d);
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqq_f64 (float64x2_t __a, float64x2_t __b)
+vqrdmlshh_laneq_s16 (int16_t __a, int16_t __b, int16x8_t __c, const int __d)
 {
-  return (uint64x2_t) (__a == __b);
+  return __builtin_aarch64_sqrdmlsh_laneqhi (__a, __b, __c, __d);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqq_p8 (poly8x16_t __a, poly8x16_t __b)
+vqrdmlshs_s32 (int32_t __a, int32_t __b, int32_t __c)
 {
-  return (uint8x16_t) (__a == __b);
+  return (int32_t) __builtin_aarch64_sqrdmlshsi (__a, __b, __c);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqq_s8 (int8x16_t __a, int8x16_t __b)
+vqrdmlshs_lane_s32 (int32_t __a, int32_t __b, int32x2_t __c, const int __d)
 {
-  return (uint8x16_t) (__a == __b);
+  return __builtin_aarch64_sqrdmlsh_lanesi (__a, __b, __c, __d);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqq_s16 (int16x8_t __a, int16x8_t __b)
+vqrdmlshs_laneq_s32 (int32_t __a, int32_t __b, int32x4_t __c, const int __d)
 {
-  return (uint16x8_t) (__a == __b);
+  return __builtin_aarch64_sqrdmlsh_laneqsi (__a, __b, __c, __d);
 }
+#pragma GCC pop_options
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqq_s32 (int32x4_t __a, int32x4_t __b)
-{
-  return (uint32x4_t) (__a == __b);
-}
+#pragma GCC push_options
+#pragma GCC target ("+nothing+crypto")
+/* vaes  */
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqq_s64 (int64x2_t __a, int64x2_t __b)
+vaeseq_u8 (uint8x16_t data, uint8x16_t key)
 {
-  return (uint64x2_t) (__a == __b);
+  return __builtin_aarch64_crypto_aesev16qi_uuu (data, key);
 }
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqq_u8 (uint8x16_t __a, uint8x16_t __b)
+vaesdq_u8 (uint8x16_t data, uint8x16_t key)
 {
-  return (__a == __b);
+  return __builtin_aarch64_crypto_aesdv16qi_uuu (data, key);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqq_u16 (uint16x8_t __a, uint16x8_t __b)
+vaesmcq_u8 (uint8x16_t data)
 {
-  return (__a == __b);
+  return __builtin_aarch64_crypto_aesmcv16qi_uu (data);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqq_u32 (uint32x4_t __a, uint32x4_t __b)
+vaesimcq_u8 (uint8x16_t data)
 {
-  return (__a == __b);
+  return __builtin_aarch64_crypto_aesimcv16qi_uu (data);
 }
+#pragma GCC pop_options
 
-__extension__ extern __inline uint64x2_t
+/* vcage  */
+
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqq_u64 (uint64x2_t __a, uint64x2_t __b)
+vcage_f64 (float64x1_t __a, float64x1_t __b)
 {
-  return (__a == __b);
+  return vabs_f64 (__a) >= vabs_f64 (__b);
 }
 
-/* vceq - scalar.  */
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqs_f32 (float32_t __a, float32_t __b)
+vcages_f32 (float32_t __a, float32_t __b)
 {
-  return __a == __b ? -1 : 0;
+  return __builtin_fabsf (__a) >= __builtin_fabsf (__b) ? -1 : 0;
 }
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqd_s64 (int64_t __a, int64_t __b)
+vcage_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return __a == __b ? -1ll : 0ll;
+  return vabs_f32 (__a) >= vabs_f32 (__b);
 }
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqd_u64 (uint64_t __a, uint64_t __b)
+vcageq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return __a == __b ? -1ll : 0ll;
+  return vabsq_f32 (__a) >= vabsq_f32 (__b);
 }
 
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqd_f64 (float64_t __a, float64_t __b)
+vcaged_f64 (float64_t __a, float64_t __b)
 {
-  return __a == __b ? -1ll : 0ll;
+  return __builtin_fabs (__a) >= __builtin_fabs (__b) ? -1 : 0;
 }
 
-/* vceqz - vector.  */
-
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqz_f32 (float32x2_t __a)
+vcageq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  return (uint32x2_t) (__a == 0.0f);
+  return vabsq_f64 (__a) >= vabsq_f64 (__b);
 }
 
-__extension__ extern __inline uint64x1_t
+/* vcagt  */
+
+__extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqz_f64 (float64x1_t __a)
+vcagts_f32 (float32_t __a, float32_t __b)
 {
-  return (uint64x1_t) (__a == (float64x1_t) {0.0});
+  return __builtin_fabsf (__a) > __builtin_fabsf (__b) ? -1 : 0;
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqz_p8 (poly8x8_t __a)
+vcagt_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return (uint8x8_t) (__a == 0);
+  return vabs_f32 (__a) > vabs_f32 (__b);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqz_s8 (int8x8_t __a)
+vcagt_f64 (float64x1_t __a, float64x1_t __b)
 {
-  return (uint8x8_t) (__a == 0);
+  return vabs_f64 (__a) > vabs_f64 (__b);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqz_s16 (int16x4_t __a)
+vcagtq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return (uint16x4_t) (__a == 0);
+  return vabsq_f32 (__a) > vabsq_f32 (__b);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqz_s32 (int32x2_t __a)
+vcagtd_f64 (float64_t __a, float64_t __b)
 {
-  return (uint32x2_t) (__a == 0);
+  return __builtin_fabs (__a) > __builtin_fabs (__b) ? -1 : 0;
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqz_s64 (int64x1_t __a)
+vcagtq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  return (uint64x1_t) (__a == __AARCH64_INT64_C (0));
+  return vabsq_f64 (__a) > vabsq_f64 (__b);
 }
 
-__extension__ extern __inline uint8x8_t
+/* vcale  */
+
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqz_u8 (uint8x8_t __a)
+vcale_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return (__a == 0);
+  return vabs_f32 (__a) <= vabs_f32 (__b);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqz_u16 (uint16x4_t __a)
+vcale_f64 (float64x1_t __a, float64x1_t __b)
 {
-  return (__a == 0);
+  return vabs_f64 (__a) <= vabs_f64 (__b);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqz_u32 (uint32x2_t __a)
+vcaled_f64 (float64_t __a, float64_t __b)
 {
-  return (__a == 0);
+  return __builtin_fabs (__a) <= __builtin_fabs (__b) ? -1 : 0;
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqz_u64 (uint64x1_t __a)
+vcales_f32 (float32_t __a, float32_t __b)
 {
-  return (__a == __AARCH64_UINT64_C (0));
+  return __builtin_fabsf (__a) <= __builtin_fabsf (__b) ? -1 : 0;
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqzq_f32 (float32x4_t __a)
+vcaleq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return (uint32x4_t) (__a == 0.0f);
+  return vabsq_f32 (__a) <= vabsq_f32 (__b);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqzq_f64 (float64x2_t __a)
+vcaleq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  return (uint64x2_t) (__a == 0.0f);
+  return vabsq_f64 (__a) <= vabsq_f64 (__b);
 }
 
-__extension__ extern __inline uint8x16_t
+/* vcalt  */
+
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqzq_p8 (poly8x16_t __a)
+vcalt_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return (uint8x16_t) (__a == 0);
+  return vabs_f32 (__a) < vabs_f32 (__b);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqzq_s8 (int8x16_t __a)
+vcalt_f64 (float64x1_t __a, float64x1_t __b)
 {
-  return (uint8x16_t) (__a == 0);
+  return vabs_f64 (__a) < vabs_f64 (__b);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqzq_s16 (int16x8_t __a)
+vcaltd_f64 (float64_t __a, float64_t __b)
 {
-  return (uint16x8_t) (__a == 0);
+  return __builtin_fabs (__a) < __builtin_fabs (__b) ? -1 : 0;
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqzq_s32 (int32x4_t __a)
+vcaltq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return (uint32x4_t) (__a == 0);
+  return vabsq_f32 (__a) < vabsq_f32 (__b);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqzq_s64 (int64x2_t __a)
+vcaltq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  return (uint64x2_t) (__a == __AARCH64_INT64_C (0));
+  return vabsq_f64 (__a) < vabsq_f64 (__b);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqzq_u8 (uint8x16_t __a)
+vcalts_f32 (float32_t __a, float32_t __b)
 {
-  return (__a == 0);
+  return __builtin_fabsf (__a) < __builtin_fabsf (__b) ? -1 : 0;
 }
 
-__extension__ extern __inline uint16x8_t
+/* vceq - vector.  */
+
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqzq_u16 (uint16x8_t __a)
+vceq_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return (__a == 0);
+  return (uint32x2_t) (__a == __b);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqzq_u32 (uint32x4_t __a)
+vceq_f64 (float64x1_t __a, float64x1_t __b)
 {
-  return (__a == 0);
+  return (uint64x1_t) (__a == __b);
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqzq_u64 (uint64x2_t __a)
+vceq_p8 (poly8x8_t __a, poly8x8_t __b)
 {
-  return (__a == __AARCH64_UINT64_C (0));
+  return (uint8x8_t) (__a == __b);
 }
 
-/* vceqz - scalar.  */
-
-__extension__ extern __inline uint32_t
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqzs_f32 (float32_t __a)
+vceq_p64 (poly64x1_t __a, poly64x1_t __b)
 {
-  return __a == 0.0f ? -1 : 0;
+  return (uint64x1_t) (__a == __b);
 }
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqzd_s64 (int64_t __a)
-{
-  return __a == 0 ? -1ll : 0ll;
-}
-
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqzd_u64 (uint64_t __a)
+vceq_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return __a == 0 ? -1ll : 0ll;
+  return (uint8x8_t) (__a == __b);
 }
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqzd_f64 (float64_t __a)
+vceq_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return __a == 0.0 ? -1ll : 0ll;
+  return (uint16x4_t) (__a == __b);
 }
 
-/* vcge - vector.  */
-
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcge_f32 (float32x2_t __a, float32x2_t __b)
+vceq_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return (uint32x2_t) (__a >= __b);
+  return (uint32x2_t) (__a == __b);
 }
 
 __extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcge_f64 (float64x1_t __a, float64x1_t __b)
+vceq_s64 (int64x1_t __a, int64x1_t __b)
 {
-  return (uint64x1_t) (__a >= __b);
+  return (uint64x1_t) (__a == __b);
 }
 
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcge_s8 (int8x8_t __a, int8x8_t __b)
+vceq_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (uint8x8_t) (__a >= __b);
+  return (__a == __b);
 }
 
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcge_s16 (int16x4_t __a, int16x4_t __b)
+vceq_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (uint16x4_t) (__a >= __b);
+  return (__a == __b);
 }
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcge_s32 (int32x2_t __a, int32x2_t __b)
+vceq_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (uint32x2_t) (__a >= __b);
+  return (__a == __b);
 }
 
 __extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcge_s64 (int64x1_t __a, int64x1_t __b)
+vceq_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (uint64x1_t) (__a >= __b);
+  return (__a == __b);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcge_u8 (uint8x8_t __a, uint8x8_t __b)
+vceqq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return (__a >= __b);
+  return (uint32x4_t) (__a == __b);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcge_u16 (uint16x4_t __a, uint16x4_t __b)
+vceqq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  return (__a >= __b);
+  return (uint64x2_t) (__a == __b);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcge_u32 (uint32x2_t __a, uint32x2_t __b)
+vceqq_p8 (poly8x16_t __a, poly8x16_t __b)
 {
-  return (__a >= __b);
+  return (uint8x16_t) (__a == __b);
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcge_u64 (uint64x1_t __a, uint64x1_t __b)
+vceqq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return (__a >= __b);
+  return (uint8x16_t) (__a == __b);
+}
+
+__extension__ extern __inline uint16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vceqq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (uint16x8_t) (__a == __b);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgeq_f32 (float32x4_t __a, float32x4_t __b)
+vceqq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return (uint32x4_t) (__a >= __b);
+  return (uint32x4_t) (__a == __b);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgeq_f64 (float64x2_t __a, float64x2_t __b)
+vceqq_s64 (int64x2_t __a, int64x2_t __b)
 {
-  return (uint64x2_t) (__a >= __b);
+  return (uint64x2_t) (__a == __b);
 }
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgeq_s8 (int8x16_t __a, int8x16_t __b)
+vceqq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint8x16_t) (__a >= __b);
+  return (__a == __b);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgeq_s16 (int16x8_t __a, int16x8_t __b)
+vceqq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint16x8_t) (__a >= __b);
+  return (__a == __b);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgeq_s32 (int32x4_t __a, int32x4_t __b)
+vceqq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint32x4_t) (__a >= __b);
+  return (__a == __b);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgeq_s64 (int64x2_t __a, int64x2_t __b)
+vceqq_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  return (uint64x2_t) (__a >= __b);
+  return (__a == __b);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
+vceqq_p64 (poly64x2_t __a, poly64x2_t __b)
 {
-  return (__a >= __b);
+  return (__a == __b);
 }
 
-__extension__ extern __inline uint16x8_t
+/* vceq - scalar.  */
+
+__extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
+vceqs_f32 (float32_t __a, float32_t __b)
 {
-  return (__a >= __b);
+  return __a == __b ? -1 : 0;
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
+vceqd_s64 (int64_t __a, int64_t __b)
 {
-  return (__a >= __b);
+  return __a == __b ? -1ll : 0ll;
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgeq_u64 (uint64x2_t __a, uint64x2_t __b)
+vceqd_u64 (uint64_t __a, uint64_t __b)
 {
-  return (__a >= __b);
+  return __a == __b ? -1ll : 0ll;
 }
 
-/* vcge - scalar.  */
+__extension__ extern __inline uint64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vceqd_f64 (float64_t __a, float64_t __b)
+{
+  return __a == __b ? -1ll : 0ll;
+}
 
-__extension__ extern __inline uint32_t
+/* vceqz - vector.  */
+
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcges_f32 (float32_t __a, float32_t __b)
+vceqz_f32 (float32x2_t __a)
 {
-  return __a >= __b ? -1 : 0;
+  return (uint32x2_t) (__a == 0.0f);
 }
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcged_s64 (int64_t __a, int64_t __b)
+vceqz_f64 (float64x1_t __a)
 {
-  return __a >= __b ? -1ll : 0ll;
+  return (uint64x1_t) (__a == (float64x1_t) {0.0});
 }
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcged_u64 (uint64_t __a, uint64_t __b)
+vceqz_p8 (poly8x8_t __a)
 {
-  return __a >= __b ? -1ll : 0ll;
+  return (uint8x8_t) (__a == 0);
 }
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcged_f64 (float64_t __a, float64_t __b)
+vceqz_s8 (int8x8_t __a)
 {
-  return __a >= __b ? -1ll : 0ll;
+  return (uint8x8_t) (__a == 0);
 }
 
-/* vcgez - vector.  */
+__extension__ extern __inline uint16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vceqz_s16 (int16x4_t __a)
+{
+  return (uint16x4_t) (__a == 0);
+}
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgez_f32 (float32x2_t __a)
+vceqz_s32 (int32x2_t __a)
 {
-  return (uint32x2_t) (__a >= 0.0f);
+  return (uint32x2_t) (__a == 0);
 }
 
 __extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgez_f64 (float64x1_t __a)
+vceqz_s64 (int64x1_t __a)
 {
-  return (uint64x1_t) (__a[0] >= (float64x1_t) {0.0});
+  return (uint64x1_t) (__a == __AARCH64_INT64_C (0));
 }
 
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgez_s8 (int8x8_t __a)
+vceqz_u8 (uint8x8_t __a)
 {
-  return (uint8x8_t) (__a >= 0);
+  return (__a == 0);
 }
 
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgez_s16 (int16x4_t __a)
+vceqz_u16 (uint16x4_t __a)
 {
-  return (uint16x4_t) (__a >= 0);
+  return (__a == 0);
 }
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgez_s32 (int32x2_t __a)
+vceqz_u32 (uint32x2_t __a)
 {
-  return (uint32x2_t) (__a >= 0);
+  return (__a == 0);
 }
 
 __extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgez_s64 (int64x1_t __a)
+vceqz_u64 (uint64x1_t __a)
 {
-  return (uint64x1_t) (__a >= __AARCH64_INT64_C (0));
+  return (__a == __AARCH64_UINT64_C (0));
+}
+
+__extension__ extern __inline uint64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vceqz_p64 (poly64x1_t __a)
+{
+  return (__a == __AARCH64_UINT64_C (0));
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgezq_f32 (float32x4_t __a)
+vceqzq_f32 (float32x4_t __a)
 {
-  return (uint32x4_t) (__a >= 0.0f);
+  return (uint32x4_t) (__a == 0.0f);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgezq_f64 (float64x2_t __a)
+vceqzq_f64 (float64x2_t __a)
 {
-  return (uint64x2_t) (__a >= 0.0);
+  return (uint64x2_t) (__a == 0.0f);
 }
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgezq_s8 (int8x16_t __a)
+vceqzq_p8 (poly8x16_t __a)
 {
-  return (uint8x16_t) (__a >= 0);
+  return (uint8x16_t) (__a == 0);
+}
+
+__extension__ extern __inline uint8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vceqzq_s8 (int8x16_t __a)
+{
+  return (uint8x16_t) (__a == 0);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgezq_s16 (int16x8_t __a)
+vceqzq_s16 (int16x8_t __a)
 {
-  return (uint16x8_t) (__a >= 0);
+  return (uint16x8_t) (__a == 0);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgezq_s32 (int32x4_t __a)
+vceqzq_s32 (int32x4_t __a)
 {
-  return (uint32x4_t) (__a >= 0);
+  return (uint32x4_t) (__a == 0);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgezq_s64 (int64x2_t __a)
+vceqzq_s64 (int64x2_t __a)
 {
-  return (uint64x2_t) (__a >= __AARCH64_INT64_C (0));
+  return (uint64x2_t) (__a == __AARCH64_INT64_C (0));
 }
 
-/* vcgez - scalar.  */
-
-__extension__ extern __inline uint32_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgezs_f32 (float32_t __a)
+vceqzq_u8 (uint8x16_t __a)
 {
-  return __a >= 0.0f ? -1 : 0;
+  return (__a == 0);
 }
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgezd_s64 (int64_t __a)
+vceqzq_u16 (uint16x8_t __a)
 {
-  return __a >= 0 ? -1ll : 0ll;
+  return (__a == 0);
 }
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgezd_f64 (float64_t __a)
+vceqzq_u32 (uint32x4_t __a)
 {
-  return __a >= 0.0 ? -1ll : 0ll;
+  return (__a == 0);
 }
 
-/* vcgt - vector.  */
-
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgt_f32 (float32x2_t __a, float32x2_t __b)
+vceqzq_u64 (uint64x2_t __a)
 {
-  return (uint32x2_t) (__a > __b);
+  return (__a == __AARCH64_UINT64_C (0));
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgt_f64 (float64x1_t __a, float64x1_t __b)
+vceqzq_p64 (poly64x2_t __a)
 {
-  return (uint64x1_t) (__a > __b);
+  return (__a == __AARCH64_UINT64_C (0));
+}
+
+/* vceqz - scalar.  */
+
+__extension__ extern __inline uint32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vceqzs_f32 (float32_t __a)
+{
+  return __a == 0.0f ? -1 : 0;
+}
+
+__extension__ extern __inline uint64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vceqzd_s64 (int64_t __a)
+{
+  return __a == 0 ? -1ll : 0ll;
+}
+
+__extension__ extern __inline uint64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vceqzd_u64 (uint64_t __a)
+{
+  return __a == 0 ? -1ll : 0ll;
+}
+
+__extension__ extern __inline uint64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vceqzd_f64 (float64_t __a)
+{
+  return __a == 0.0 ? -1ll : 0ll;
+}
+
+/* vcge - vector.  */
+
+__extension__ extern __inline uint32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcge_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (uint32x2_t) (__a >= __b);
+}
+
+__extension__ extern __inline uint64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcge_f64 (float64x1_t __a, float64x1_t __b)
+{
+  return (uint64x1_t) (__a >= __b);
 }
 
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgt_s8 (int8x8_t __a, int8x8_t __b)
+vcge_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return (uint8x8_t) (__a > __b);
+  return (uint8x8_t) (__a >= __b);
 }
 
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgt_s16 (int16x4_t __a, int16x4_t __b)
+vcge_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return (uint16x4_t) (__a > __b);
+  return (uint16x4_t) (__a >= __b);
 }
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgt_s32 (int32x2_t __a, int32x2_t __b)
+vcge_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return (uint32x2_t) (__a > __b);
+  return (uint32x2_t) (__a >= __b);
 }
 
 __extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgt_s64 (int64x1_t __a, int64x1_t __b)
+vcge_s64 (int64x1_t __a, int64x1_t __b)
 {
-  return (uint64x1_t) (__a > __b);
+  return (uint64x1_t) (__a >= __b);
 }
 
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
+vcge_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (__a > __b);
+  return (__a >= __b);
 }
 
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
+vcge_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (__a > __b);
+  return (__a >= __b);
 }
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
+vcge_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (__a > __b);
+  return (__a >= __b);
 }
 
 __extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgt_u64 (uint64x1_t __a, uint64x1_t __b)
+vcge_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (__a > __b);
+  return (__a >= __b);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgtq_f32 (float32x4_t __a, float32x4_t __b)
+vcgeq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return (uint32x4_t) (__a > __b);
+  return (uint32x4_t) (__a >= __b);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgtq_f64 (float64x2_t __a, float64x2_t __b)
+vcgeq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  return (uint64x2_t) (__a > __b);
+  return (uint64x2_t) (__a >= __b);
 }
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgtq_s8 (int8x16_t __a, int8x16_t __b)
+vcgeq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return (uint8x16_t) (__a > __b);
+  return (uint8x16_t) (__a >= __b);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgtq_s16 (int16x8_t __a, int16x8_t __b)
+vcgeq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return (uint16x8_t) (__a > __b);
+  return (uint16x8_t) (__a >= __b);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgtq_s32 (int32x4_t __a, int32x4_t __b)
+vcgeq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return (uint32x4_t) (__a > __b);
+  return (uint32x4_t) (__a >= __b);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgtq_s64 (int64x2_t __a, int64x2_t __b)
+vcgeq_s64 (int64x2_t __a, int64x2_t __b)
 {
-  return (uint64x2_t) (__a > __b);
+  return (uint64x2_t) (__a >= __b);
 }
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
+vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (__a > __b);
+  return (__a >= __b);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
+vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (__a > __b);
+  return (__a >= __b);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
+vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (__a > __b);
+  return (__a >= __b);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgtq_u64 (uint64x2_t __a, uint64x2_t __b)
+vcgeq_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  return (__a > __b);
+  return (__a >= __b);
 }
 
-/* vcgt - scalar.  */
+/* vcge - scalar.  */
 
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgts_f32 (float32_t __a, float32_t __b)
+vcges_f32 (float32_t __a, float32_t __b)
 {
-  return __a > __b ? -1 : 0;
+  return __a >= __b ? -1 : 0;
 }
 
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgtd_s64 (int64_t __a, int64_t __b)
+vcged_s64 (int64_t __a, int64_t __b)
 {
-  return __a > __b ? -1ll : 0ll;
+  return __a >= __b ? -1ll : 0ll;
 }
 
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgtd_u64 (uint64_t __a, uint64_t __b)
+vcged_u64 (uint64_t __a, uint64_t __b)
 {
-  return __a > __b ? -1ll : 0ll;
+  return __a >= __b ? -1ll : 0ll;
 }
 
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgtd_f64 (float64_t __a, float64_t __b)
+vcged_f64 (float64_t __a, float64_t __b)
 {
-  return __a > __b ? -1ll : 0ll;
+  return __a >= __b ? -1ll : 0ll;
 }
 
-/* vcgtz - vector.  */
+/* vcgez - vector.  */
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgtz_f32 (float32x2_t __a)
+vcgez_f32 (float32x2_t __a)
 {
-  return (uint32x2_t) (__a > 0.0f);
+  return (uint32x2_t) (__a >= 0.0f);
 }
 
 __extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgtz_f64 (float64x1_t __a)
+vcgez_f64 (float64x1_t __a)
 {
-  return (uint64x1_t) (__a > (float64x1_t) {0.0});
+  return (uint64x1_t) (__a[0] >= (float64x1_t) {0.0});
 }
 
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgtz_s8 (int8x8_t __a)
+vcgez_s8 (int8x8_t __a)
 {
-  return (uint8x8_t) (__a > 0);
+  return (uint8x8_t) (__a >= 0);
 }
 
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgtz_s16 (int16x4_t __a)
+vcgez_s16 (int16x4_t __a)
 {
-  return (uint16x4_t) (__a > 0);
+  return (uint16x4_t) (__a >= 0);
 }
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgtz_s32 (int32x2_t __a)
+vcgez_s32 (int32x2_t __a)
 {
-  return (uint32x2_t) (__a > 0);
+  return (uint32x2_t) (__a >= 0);
 }
 
 __extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgtz_s64 (int64x1_t __a)
+vcgez_s64 (int64x1_t __a)
 {
-  return (uint64x1_t) (__a > __AARCH64_INT64_C (0));
+  return (uint64x1_t) (__a >= __AARCH64_INT64_C (0));
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgtzq_f32 (float32x4_t __a)
+vcgezq_f32 (float32x4_t __a)
 {
-  return (uint32x4_t) (__a > 0.0f);
+  return (uint32x4_t) (__a >= 0.0f);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgtzq_f64 (float64x2_t __a)
+vcgezq_f64 (float64x2_t __a)
 {
-    return (uint64x2_t) (__a > 0.0);
+  return (uint64x2_t) (__a >= 0.0);
 }
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgtzq_s8 (int8x16_t __a)
+vcgezq_s8 (int8x16_t __a)
 {
-  return (uint8x16_t) (__a > 0);
+  return (uint8x16_t) (__a >= 0);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgtzq_s16 (int16x8_t __a)
+vcgezq_s16 (int16x8_t __a)
 {
-  return (uint16x8_t) (__a > 0);
+  return (uint16x8_t) (__a >= 0);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgtzq_s32 (int32x4_t __a)
+vcgezq_s32 (int32x4_t __a)
 {
-  return (uint32x4_t) (__a > 0);
+  return (uint32x4_t) (__a >= 0);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgtzq_s64 (int64x2_t __a)
+vcgezq_s64 (int64x2_t __a)
 {
-  return (uint64x2_t) (__a > __AARCH64_INT64_C (0));
+  return (uint64x2_t) (__a >= __AARCH64_INT64_C (0));
 }
 
-/* vcgtz - scalar.  */
+/* vcgez - scalar.  */
 
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgtzs_f32 (float32_t __a)
+vcgezs_f32 (float32_t __a)
 {
-  return __a > 0.0f ? -1 : 0;
+  return __a >= 0.0f ? -1 : 0;
 }
 
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgtzd_s64 (int64_t __a)
+vcgezd_s64 (int64_t __a)
 {
-  return __a > 0 ? -1ll : 0ll;
+  return __a >= 0 ? -1ll : 0ll;
 }
 
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgtzd_f64 (float64_t __a)
+vcgezd_f64 (float64_t __a)
 {
-  return __a > 0.0 ? -1ll : 0ll;
+  return __a >= 0.0 ? -1ll : 0ll;
 }
 
-/* vcle - vector.  */
+/* vcgt - vector.  */
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcle_f32 (float32x2_t __a, float32x2_t __b)
+vcgt_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return (uint32x2_t) (__a <= __b);
+  return (uint32x2_t) (__a > __b);
 }
 
 __extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcle_f64 (float64x1_t __a, float64x1_t __b)
+vcgt_f64 (float64x1_t __a, float64x1_t __b)
 {
-  return (uint64x1_t) (__a <= __b);
+  return (uint64x1_t) (__a > __b);
 }
 
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcle_s8 (int8x8_t __a, int8x8_t __b)
+vcgt_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return (uint8x8_t) (__a <= __b);
+  return (uint8x8_t) (__a > __b);
 }
 
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcle_s16 (int16x4_t __a, int16x4_t __b)
+vcgt_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return (uint16x4_t) (__a <= __b);
+  return (uint16x4_t) (__a > __b);
 }
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcle_s32 (int32x2_t __a, int32x2_t __b)
+vcgt_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return (uint32x2_t) (__a <= __b);
+  return (uint32x2_t) (__a > __b);
 }
 
 __extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcle_s64 (int64x1_t __a, int64x1_t __b)
+vcgt_s64 (int64x1_t __a, int64x1_t __b)
 {
-  return (uint64x1_t) (__a <= __b);
+  return (uint64x1_t) (__a > __b);
 }
 
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcle_u8 (uint8x8_t __a, uint8x8_t __b)
+vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (__a <= __b);
+  return (__a > __b);
 }
 
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcle_u16 (uint16x4_t __a, uint16x4_t __b)
+vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (__a <= __b);
+  return (__a > __b);
 }
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcle_u32 (uint32x2_t __a, uint32x2_t __b)
+vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (__a <= __b);
+  return (__a > __b);
 }
 
 __extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcle_u64 (uint64x1_t __a, uint64x1_t __b)
+vcgt_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (__a <= __b);
+  return (__a > __b);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcleq_f32 (float32x4_t __a, float32x4_t __b)
+vcgtq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return (uint32x4_t) (__a <= __b);
+  return (uint32x4_t) (__a > __b);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcleq_f64 (float64x2_t __a, float64x2_t __b)
+vcgtq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  return (uint64x2_t) (__a <= __b);
+  return (uint64x2_t) (__a > __b);
 }
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcleq_s8 (int8x16_t __a, int8x16_t __b)
+vcgtq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return (uint8x16_t) (__a <= __b);
+  return (uint8x16_t) (__a > __b);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcleq_s16 (int16x8_t __a, int16x8_t __b)
+vcgtq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return (uint16x8_t) (__a <= __b);
+  return (uint16x8_t) (__a > __b);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcleq_s32 (int32x4_t __a, int32x4_t __b)
+vcgtq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return (uint32x4_t) (__a <= __b);
+  return (uint32x4_t) (__a > __b);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcleq_s64 (int64x2_t __a, int64x2_t __b)
+vcgtq_s64 (int64x2_t __a, int64x2_t __b)
 {
-  return (uint64x2_t) (__a <= __b);
+  return (uint64x2_t) (__a > __b);
 }
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
+vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (__a <= __b);
+  return (__a > __b);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
+vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (__a <= __b);
+  return (__a > __b);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
+vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (__a <= __b);
+  return (__a > __b);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcleq_u64 (uint64x2_t __a, uint64x2_t __b)
+vcgtq_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  return (__a <= __b);
+  return (__a > __b);
 }
 
-/* vcle - scalar.  */
+/* vcgt - scalar.  */
 
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcles_f32 (float32_t __a, float32_t __b)
+vcgts_f32 (float32_t __a, float32_t __b)
 {
-  return __a <= __b ? -1 : 0;
+  return __a > __b ? -1 : 0;
 }
 
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcled_s64 (int64_t __a, int64_t __b)
+vcgtd_s64 (int64_t __a, int64_t __b)
 {
-  return __a <= __b ? -1ll : 0ll;
+  return __a > __b ? -1ll : 0ll;
 }
 
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcled_u64 (uint64_t __a, uint64_t __b)
+vcgtd_u64 (uint64_t __a, uint64_t __b)
 {
-  return __a <= __b ? -1ll : 0ll;
+  return __a > __b ? -1ll : 0ll;
 }
 
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcled_f64 (float64_t __a, float64_t __b)
+vcgtd_f64 (float64_t __a, float64_t __b)
 {
-  return __a <= __b ? -1ll : 0ll;
+  return __a > __b ? -1ll : 0ll;
 }
 
-/* vclez - vector.  */
+/* vcgtz - vector.  */
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclez_f32 (float32x2_t __a)
+vcgtz_f32 (float32x2_t __a)
 {
-  return (uint32x2_t) (__a <= 0.0f);
+  return (uint32x2_t) (__a > 0.0f);
 }
 
 __extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclez_f64 (float64x1_t __a)
+vcgtz_f64 (float64x1_t __a)
 {
-  return (uint64x1_t) (__a <= (float64x1_t) {0.0});
+  return (uint64x1_t) (__a > (float64x1_t) {0.0});
 }
 
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclez_s8 (int8x8_t __a)
+vcgtz_s8 (int8x8_t __a)
 {
-  return (uint8x8_t) (__a <= 0);
+  return (uint8x8_t) (__a > 0);
 }
 
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclez_s16 (int16x4_t __a)
+vcgtz_s16 (int16x4_t __a)
 {
-  return (uint16x4_t) (__a <= 0);
+  return (uint16x4_t) (__a > 0);
 }
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclez_s32 (int32x2_t __a)
+vcgtz_s32 (int32x2_t __a)
 {
-  return (uint32x2_t) (__a <= 0);
+  return (uint32x2_t) (__a > 0);
 }
 
 __extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclez_s64 (int64x1_t __a)
+vcgtz_s64 (int64x1_t __a)
 {
-  return (uint64x1_t) (__a <= __AARCH64_INT64_C (0));
+  return (uint64x1_t) (__a > __AARCH64_INT64_C (0));
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclezq_f32 (float32x4_t __a)
+vcgtzq_f32 (float32x4_t __a)
 {
-  return (uint32x4_t) (__a <= 0.0f);
+  return (uint32x4_t) (__a > 0.0f);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclezq_f64 (float64x2_t __a)
+vcgtzq_f64 (float64x2_t __a)
 {
-  return (uint64x2_t) (__a <= 0.0);
+    return (uint64x2_t) (__a > 0.0);
 }
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclezq_s8 (int8x16_t __a)
+vcgtzq_s8 (int8x16_t __a)
 {
-  return (uint8x16_t) (__a <= 0);
+  return (uint8x16_t) (__a > 0);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclezq_s16 (int16x8_t __a)
+vcgtzq_s16 (int16x8_t __a)
 {
-  return (uint16x8_t) (__a <= 0);
+  return (uint16x8_t) (__a > 0);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclezq_s32 (int32x4_t __a)
+vcgtzq_s32 (int32x4_t __a)
 {
-  return (uint32x4_t) (__a <= 0);
+  return (uint32x4_t) (__a > 0);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclezq_s64 (int64x2_t __a)
+vcgtzq_s64 (int64x2_t __a)
 {
-  return (uint64x2_t) (__a <= __AARCH64_INT64_C (0));
+  return (uint64x2_t) (__a > __AARCH64_INT64_C (0));
 }
 
-/* vclez - scalar.  */
+/* vcgtz - scalar.  */
 
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclezs_f32 (float32_t __a)
+vcgtzs_f32 (float32_t __a)
 {
-  return __a <= 0.0f ? -1 : 0;
+  return __a > 0.0f ? -1 : 0;
 }
 
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclezd_s64 (int64_t __a)
+vcgtzd_s64 (int64_t __a)
 {
-  return __a <= 0 ? -1ll : 0ll;
+  return __a > 0 ? -1ll : 0ll;
 }
 
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclezd_f64 (float64_t __a)
+vcgtzd_f64 (float64_t __a)
 {
-  return __a <= 0.0 ? -1ll : 0ll;
+  return __a > 0.0 ? -1ll : 0ll;
 }
 
-/* vclt - vector.  */
+/* vcle - vector.  */
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclt_f32 (float32x2_t __a, float32x2_t __b)
+vcle_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return (uint32x2_t) (__a < __b);
+  return (uint32x2_t) (__a <= __b);
 }
 
 __extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclt_f64 (float64x1_t __a, float64x1_t __b)
+vcle_f64 (float64x1_t __a, float64x1_t __b)
 {
-  return (uint64x1_t) (__a < __b);
+  return (uint64x1_t) (__a <= __b);
 }
 
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclt_s8 (int8x8_t __a, int8x8_t __b)
+vcle_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return (uint8x8_t) (__a < __b);
+  return (uint8x8_t) (__a <= __b);
 }
 
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclt_s16 (int16x4_t __a, int16x4_t __b)
+vcle_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return (uint16x4_t) (__a < __b);
+  return (uint16x4_t) (__a <= __b);
 }
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclt_s32 (int32x2_t __a, int32x2_t __b)
+vcle_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return (uint32x2_t) (__a < __b);
+  return (uint32x2_t) (__a <= __b);
 }
 
 __extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclt_s64 (int64x1_t __a, int64x1_t __b)
+vcle_s64 (int64x1_t __a, int64x1_t __b)
 {
-  return (uint64x1_t) (__a < __b);
+  return (uint64x1_t) (__a <= __b);
 }
 
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclt_u8 (uint8x8_t __a, uint8x8_t __b)
+vcle_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (__a < __b);
+  return (__a <= __b);
 }
 
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclt_u16 (uint16x4_t __a, uint16x4_t __b)
+vcle_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (__a < __b);
+  return (__a <= __b);
 }
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclt_u32 (uint32x2_t __a, uint32x2_t __b)
+vcle_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (__a < __b);
+  return (__a <= __b);
 }
 
 __extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclt_u64 (uint64x1_t __a, uint64x1_t __b)
+vcle_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (__a < __b);
+  return (__a <= __b);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltq_f32 (float32x4_t __a, float32x4_t __b)
+vcleq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return (uint32x4_t) (__a < __b);
+  return (uint32x4_t) (__a <= __b);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltq_f64 (float64x2_t __a, float64x2_t __b)
+vcleq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  return (uint64x2_t) (__a < __b);
+  return (uint64x2_t) (__a <= __b);
 }
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltq_s8 (int8x16_t __a, int8x16_t __b)
+vcleq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return (uint8x16_t) (__a < __b);
+  return (uint8x16_t) (__a <= __b);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltq_s16 (int16x8_t __a, int16x8_t __b)
+vcleq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return (uint16x8_t) (__a < __b);
+  return (uint16x8_t) (__a <= __b);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltq_s32 (int32x4_t __a, int32x4_t __b)
+vcleq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return (uint32x4_t) (__a < __b);
+  return (uint32x4_t) (__a <= __b);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltq_s64 (int64x2_t __a, int64x2_t __b)
+vcleq_s64 (int64x2_t __a, int64x2_t __b)
 {
-  return (uint64x2_t) (__a < __b);
+  return (uint64x2_t) (__a <= __b);
 }
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
+vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (__a < __b);
+  return (__a <= __b);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
+vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (__a < __b);
+  return (__a <= __b);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
+vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (__a < __b);
+  return (__a <= __b);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltq_u64 (uint64x2_t __a, uint64x2_t __b)
+vcleq_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  return (__a < __b);
+  return (__a <= __b);
 }
 
-/* vclt - scalar.  */
+/* vcle - scalar.  */
 
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclts_f32 (float32_t __a, float32_t __b)
+vcles_f32 (float32_t __a, float32_t __b)
 {
-  return __a < __b ? -1 : 0;
+  return __a <= __b ? -1 : 0;
 }
 
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltd_s64 (int64_t __a, int64_t __b)
+vcled_s64 (int64_t __a, int64_t __b)
 {
-  return __a < __b ? -1ll : 0ll;
+  return __a <= __b ? -1ll : 0ll;
 }
 
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltd_u64 (uint64_t __a, uint64_t __b)
+vcled_u64 (uint64_t __a, uint64_t __b)
 {
-  return __a < __b ? -1ll : 0ll;
+  return __a <= __b ? -1ll : 0ll;
 }
 
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltd_f64 (float64_t __a, float64_t __b)
+vcled_f64 (float64_t __a, float64_t __b)
 {
-  return __a < __b ? -1ll : 0ll;
+  return __a <= __b ? -1ll : 0ll;
 }
 
-/* vcltz - vector.  */
+/* vclez - vector.  */
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltz_f32 (float32x2_t __a)
+vclez_f32 (float32x2_t __a)
 {
-  return (uint32x2_t) (__a < 0.0f);
+  return (uint32x2_t) (__a <= 0.0f);
 }
 
 __extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltz_f64 (float64x1_t __a)
+vclez_f64 (float64x1_t __a)
 {
-  return (uint64x1_t) (__a < (float64x1_t) {0.0});
+  return (uint64x1_t) (__a <= (float64x1_t) {0.0});
 }
 
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltz_s8 (int8x8_t __a)
+vclez_s8 (int8x8_t __a)
 {
-  return (uint8x8_t) (__a < 0);
+  return (uint8x8_t) (__a <= 0);
 }
 
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltz_s16 (int16x4_t __a)
+vclez_s16 (int16x4_t __a)
 {
-  return (uint16x4_t) (__a < 0);
+  return (uint16x4_t) (__a <= 0);
 }
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltz_s32 (int32x2_t __a)
+vclez_s32 (int32x2_t __a)
 {
-  return (uint32x2_t) (__a < 0);
+  return (uint32x2_t) (__a <= 0);
 }
 
 __extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltz_s64 (int64x1_t __a)
+vclez_s64 (int64x1_t __a)
 {
-  return (uint64x1_t) (__a < __AARCH64_INT64_C (0));
+  return (uint64x1_t) (__a <= __AARCH64_INT64_C (0));
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltzq_f32 (float32x4_t __a)
+vclezq_f32 (float32x4_t __a)
 {
-  return (uint32x4_t) (__a < 0.0f);
+  return (uint32x4_t) (__a <= 0.0f);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltzq_f64 (float64x2_t __a)
+vclezq_f64 (float64x2_t __a)
 {
-  return (uint64x2_t) (__a < 0.0);
+  return (uint64x2_t) (__a <= 0.0);
 }
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltzq_s8 (int8x16_t __a)
+vclezq_s8 (int8x16_t __a)
 {
-  return (uint8x16_t) (__a < 0);
+  return (uint8x16_t) (__a <= 0);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltzq_s16 (int16x8_t __a)
+vclezq_s16 (int16x8_t __a)
 {
-  return (uint16x8_t) (__a < 0);
+  return (uint16x8_t) (__a <= 0);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltzq_s32 (int32x4_t __a)
+vclezq_s32 (int32x4_t __a)
 {
-  return (uint32x4_t) (__a < 0);
+  return (uint32x4_t) (__a <= 0);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltzq_s64 (int64x2_t __a)
+vclezq_s64 (int64x2_t __a)
 {
-  return (uint64x2_t) (__a < __AARCH64_INT64_C (0));
+  return (uint64x2_t) (__a <= __AARCH64_INT64_C (0));
 }
 
-/* vcltz - scalar.  */
+/* vclez - scalar.  */
 
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltzs_f32 (float32_t __a)
+vclezs_f32 (float32_t __a)
 {
-  return __a < 0.0f ? -1 : 0;
+  return __a <= 0.0f ? -1 : 0;
 }
 
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltzd_s64 (int64_t __a)
+vclezd_s64 (int64_t __a)
 {
-  return __a < 0 ? -1ll : 0ll;
+  return __a <= 0 ? -1ll : 0ll;
 }
 
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltzd_f64 (float64_t __a)
+vclezd_f64 (float64_t __a)
 {
-  return __a < 0.0 ? -1ll : 0ll;
+  return __a <= 0.0 ? -1ll : 0ll;
 }
 
-/* vcls.  */
+/* vclt - vector.  */
 
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcls_s8 (int8x8_t __a)
+vclt_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return __builtin_aarch64_clrsbv8qi (__a);
+  return (uint32x2_t) (__a < __b);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcls_s16 (int16x4_t __a)
+vclt_f64 (float64x1_t __a, float64x1_t __b)
 {
-  return __builtin_aarch64_clrsbv4hi (__a);
+  return (uint64x1_t) (__a < __b);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcls_s32 (int32x2_t __a)
+vclt_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return __builtin_aarch64_clrsbv2si (__a);
+  return (uint8x8_t) (__a < __b);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclsq_s8 (int8x16_t __a)
+vclt_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return __builtin_aarch64_clrsbv16qi (__a);
+  return (uint16x4_t) (__a < __b);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclsq_s16 (int16x8_t __a)
+vclt_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return __builtin_aarch64_clrsbv8hi (__a);
+  return (uint32x2_t) (__a < __b);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclsq_s32 (int32x4_t __a)
+vclt_s64 (int64x1_t __a, int64x1_t __b)
 {
-  return __builtin_aarch64_clrsbv4si (__a);
+  return (uint64x1_t) (__a < __b);
 }
 
-/* vclz.  */
+__extension__ extern __inline uint8x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vclt_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (__a < __b);
+}
 
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclz_s8 (int8x8_t __a)
+vclt_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return __builtin_aarch64_clzv8qi (__a);
+  return (__a < __b);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclz_s16 (int16x4_t __a)
+vclt_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return __builtin_aarch64_clzv4hi (__a);
+  return (__a < __b);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclz_s32 (int32x2_t __a)
+vclt_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return __builtin_aarch64_clzv2si (__a);
+  return (__a < __b);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclz_u8 (uint8x8_t __a)
+vcltq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return (uint8x8_t)__builtin_aarch64_clzv8qi ((int8x8_t)__a);
+  return (uint32x4_t) (__a < __b);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclz_u16 (uint16x4_t __a)
+vcltq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  return (uint16x4_t)__builtin_aarch64_clzv4hi ((int16x4_t)__a);
+  return (uint64x2_t) (__a < __b);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclz_u32 (uint32x2_t __a)
+vcltq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return (uint32x2_t)__builtin_aarch64_clzv2si ((int32x2_t)__a);
+  return (uint8x16_t) (__a < __b);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclzq_s8 (int8x16_t __a)
+vcltq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return __builtin_aarch64_clzv16qi (__a);
+  return (uint16x8_t) (__a < __b);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclzq_s16 (int16x8_t __a)
+vcltq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return __builtin_aarch64_clzv8hi (__a);
+  return (uint32x4_t) (__a < __b);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclzq_s32 (int32x4_t __a)
+vcltq_s64 (int64x2_t __a, int64x2_t __b)
 {
-  return __builtin_aarch64_clzv4si (__a);
+  return (uint64x2_t) (__a < __b);
 }
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclzq_u8 (uint8x16_t __a)
+vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint8x16_t)__builtin_aarch64_clzv16qi ((int8x16_t)__a);
+  return (__a < __b);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclzq_u16 (uint16x8_t __a)
+vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint16x8_t)__builtin_aarch64_clzv8hi ((int16x8_t)__a);
+  return (__a < __b);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclzq_u32 (uint32x4_t __a)
+vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint32x4_t)__builtin_aarch64_clzv4si ((int32x4_t)__a);
+  return (__a < __b);
 }
 
-/* vcnt.  */
-
-__extension__ extern __inline poly8x8_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcnt_p8 (poly8x8_t __a)
+vcltq_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  return (poly8x8_t) __builtin_aarch64_popcountv8qi ((int8x8_t) __a);
+  return (__a < __b);
 }
 
-__extension__ extern __inline int8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcnt_s8 (int8x8_t __a)
-{
-  return __builtin_aarch64_popcountv8qi (__a);
-}
+/* vclt - scalar.  */
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcnt_u8 (uint8x8_t __a)
+vclts_f32 (float32_t __a, float32_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_popcountv8qi ((int8x8_t) __a);
+  return __a < __b ? -1 : 0;
 }
 
-__extension__ extern __inline poly8x16_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcntq_p8 (poly8x16_t __a)
+vcltd_s64 (int64_t __a, int64_t __b)
 {
-  return (poly8x16_t) __builtin_aarch64_popcountv16qi ((int8x16_t) __a);
+  return __a < __b ? -1ll : 0ll;
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcntq_s8 (int8x16_t __a)
+vcltd_u64 (uint64_t __a, uint64_t __b)
 {
-  return __builtin_aarch64_popcountv16qi (__a);
+  return __a < __b ? -1ll : 0ll;
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcntq_u8 (uint8x16_t __a)
+vcltd_f64 (float64_t __a, float64_t __b)
 {
-  return (uint8x16_t) __builtin_aarch64_popcountv16qi ((int8x16_t) __a);
+  return __a < __b ? -1ll : 0ll;
 }
 
-/* vcopy_lane.  */
+/* vcltz - vector.  */
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopy_lane_f32 (float32x2_t __a, const int __lane1,
-		float32x2_t __b, const int __lane2)
+vcltz_f32 (float32x2_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				  __a, __lane1);
+  return (uint32x2_t) (__a < 0.0f);
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopy_lane_f64 (float64x1_t __a, const int __lane1,
-		float64x1_t __b, const int __lane2)
+vcltz_f64 (float64x1_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				  __a, __lane1);
+  return (uint64x1_t) (__a < (float64x1_t) {0.0});
 }
 
-__extension__ extern __inline poly8x8_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopy_lane_p8 (poly8x8_t __a, const int __lane1,
-	       poly8x8_t __b, const int __lane2)
+vcltz_s8 (int8x8_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				 __a, __lane1);
+  return (uint8x8_t) (__a < 0);
 }
 
-__extension__ extern __inline poly16x4_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopy_lane_p16 (poly16x4_t __a, const int __lane1,
-		poly16x4_t __b, const int __lane2)
+vcltz_s16 (int16x4_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				  __a, __lane1);
+  return (uint16x4_t) (__a < 0);
 }
 
-__extension__ extern __inline poly64x1_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopy_lane_p64 (poly64x1_t __a, const int __lane1,
-		poly64x1_t __b, const int __lane2)
+vcltz_s32 (int32x2_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				  __a, __lane1);
+  return (uint32x2_t) (__a < 0);
 }
 
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopy_lane_s8 (int8x8_t __a, const int __lane1,
-	       int8x8_t __b, const int __lane2)
+vcltz_s64 (int64x1_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				 __a, __lane1);
+  return (uint64x1_t) (__a < __AARCH64_INT64_C (0));
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopy_lane_s16 (int16x4_t __a, const int __lane1,
-		int16x4_t __b, const int __lane2)
+vcltzq_f32 (float32x4_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				  __a, __lane1);
+  return (uint32x4_t) (__a < 0.0f);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopy_lane_s32 (int32x2_t __a, const int __lane1,
-		int32x2_t __b, const int __lane2)
+vcltzq_f64 (float64x2_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				  __a, __lane1);
+  return (uint64x2_t) (__a < 0.0);
 }
 
-__extension__ extern __inline int64x1_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopy_lane_s64 (int64x1_t __a, const int __lane1,
-		int64x1_t __b, const int __lane2)
+vcltzq_s8 (int8x16_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				  __a, __lane1);
+  return (uint8x16_t) (__a < 0);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopy_lane_u8 (uint8x8_t __a, const int __lane1,
-	       uint8x8_t __b, const int __lane2)
+vcltzq_s16 (int16x8_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				 __a, __lane1);
+  return (uint16x8_t) (__a < 0);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopy_lane_u16 (uint16x4_t __a, const int __lane1,
-		uint16x4_t __b, const int __lane2)
+vcltzq_s32 (int32x4_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				  __a, __lane1);
+  return (uint32x4_t) (__a < 0);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopy_lane_u32 (uint32x2_t __a, const int __lane1,
-		uint32x2_t __b, const int __lane2)
+vcltzq_s64 (int64x2_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				  __a, __lane1);
+  return (uint64x2_t) (__a < __AARCH64_INT64_C (0));
 }
 
-__extension__ extern __inline uint64x1_t
+/* vcltz - scalar.  */
+
+__extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopy_lane_u64 (uint64x1_t __a, const int __lane1,
-		uint64x1_t __b, const int __lane2)
+vcltzs_f32 (float32_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				  __a, __lane1);
+  return __a < 0.0f ? -1 : 0;
 }
 
-/* vcopy_laneq.  */
+__extension__ extern __inline uint64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcltzd_s64 (int64_t __a)
+{
+  return __a < 0 ? -1ll : 0ll;
+}
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopy_laneq_f32 (float32x2_t __a, const int __lane1,
-		 float32x4_t __b, const int __lane2)
+vcltzd_f64 (float64_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				  __a, __lane1);
+  return __a < 0.0 ? -1ll : 0ll;
 }
 
-__extension__ extern __inline float64x1_t
+/* vcls.  */
+
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopy_laneq_f64 (float64x1_t __a, const int __lane1,
-		 float64x2_t __b, const int __lane2)
+vcls_s8 (int8x8_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				  __a, __lane1);
+  return __builtin_aarch64_clrsbv8qi (__a);
 }
 
-__extension__ extern __inline poly8x8_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopy_laneq_p8 (poly8x8_t __a, const int __lane1,
-		poly8x16_t __b, const int __lane2)
+vcls_s16 (int16x4_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				 __a, __lane1);
+  return __builtin_aarch64_clrsbv4hi (__a);
 }
 
-__extension__ extern __inline poly16x4_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopy_laneq_p16 (poly16x4_t __a, const int __lane1,
-		 poly16x8_t __b, const int __lane2)
+vcls_s32 (int32x2_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				  __a, __lane1);
+  return __builtin_aarch64_clrsbv2si (__a);
 }
 
-__extension__ extern __inline poly64x1_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopy_laneq_p64 (poly64x1_t __a, const int __lane1,
-		 poly64x2_t __b, const int __lane2)
+vclsq_s8 (int8x16_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				  __a, __lane1);
+  return __builtin_aarch64_clrsbv16qi (__a);
 }
 
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopy_laneq_s8 (int8x8_t __a, const int __lane1,
-		int8x16_t __b, const int __lane2)
+vclsq_s16 (int16x8_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				 __a, __lane1);
+  return __builtin_aarch64_clrsbv8hi (__a);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopy_laneq_s16 (int16x4_t __a, const int __lane1,
-		 int16x8_t __b, const int __lane2)
+vclsq_s32 (int32x4_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				  __a, __lane1);
+  return __builtin_aarch64_clrsbv4si (__a);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopy_laneq_s32 (int32x2_t __a, const int __lane1,
-		 int32x4_t __b, const int __lane2)
+vcls_u8 (uint8x8_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				  __a, __lane1);
+  return __builtin_aarch64_clrsbv8qi ((int8x8_t) __a);
 }
 
-__extension__ extern __inline int64x1_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopy_laneq_s64 (int64x1_t __a, const int __lane1,
-		 int64x2_t __b, const int __lane2)
+vcls_u16 (uint16x4_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				  __a, __lane1);
+  return __builtin_aarch64_clrsbv4hi ((int16x4_t) __a);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopy_laneq_u8 (uint8x8_t __a, const int __lane1,
-		uint8x16_t __b, const int __lane2)
+vcls_u32 (uint32x2_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				 __a, __lane1);
+  return __builtin_aarch64_clrsbv2si ((int32x2_t) __a);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopy_laneq_u16 (uint16x4_t __a, const int __lane1,
-		 uint16x8_t __b, const int __lane2)
+vclsq_u8 (uint8x16_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				  __a, __lane1);
+  return __builtin_aarch64_clrsbv16qi ((int8x16_t) __a);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopy_laneq_u32 (uint32x2_t __a, const int __lane1,
-		 uint32x4_t __b, const int __lane2)
+vclsq_u16 (uint16x8_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				  __a, __lane1);
+  return __builtin_aarch64_clrsbv8hi ((int16x8_t) __a);
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopy_laneq_u64 (uint64x1_t __a, const int __lane1,
-		 uint64x2_t __b, const int __lane2)
+vclsq_u32 (uint32x4_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				  __a, __lane1);
+  return __builtin_aarch64_clrsbv4si ((int32x4_t) __a);
 }
 
-/* vcopyq_lane.  */
+/* vclz.  */
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopyq_lane_f32 (float32x4_t __a, const int __lane1,
-		 float32x2_t __b, const int __lane2)
+vclz_s8 (int8x8_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				   __a, __lane1);
+  return __builtin_aarch64_clzv8qi (__a);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopyq_lane_f64 (float64x2_t __a, const int __lane1,
-		 float64x1_t __b, const int __lane2)
+vclz_s16 (int16x4_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				   __a, __lane1);
+  return __builtin_aarch64_clzv4hi (__a);
 }
 
-__extension__ extern __inline poly8x16_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopyq_lane_p8 (poly8x16_t __a, const int __lane1,
-		poly8x8_t __b, const int __lane2)
+vclz_s32 (int32x2_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				  __a, __lane1);
+  return __builtin_aarch64_clzv2si (__a);
 }
 
-__extension__ extern __inline poly16x8_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopyq_lane_p16 (poly16x8_t __a, const int __lane1,
-		 poly16x4_t __b, const int __lane2)
+vclz_u8 (uint8x8_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				   __a, __lane1);
+  return (uint8x8_t)__builtin_aarch64_clzv8qi ((int8x8_t)__a);
 }
 
-__extension__ extern __inline poly64x2_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopyq_lane_p64 (poly64x2_t __a, const int __lane1,
-		 poly64x1_t __b, const int __lane2)
+vclz_u16 (uint16x4_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				   __a, __lane1);
+  return (uint16x4_t)__builtin_aarch64_clzv4hi ((int16x4_t)__a);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopyq_lane_s8 (int8x16_t __a, const int __lane1,
-		int8x8_t __b, const int __lane2)
+vclz_u32 (uint32x2_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				  __a, __lane1);
+  return (uint32x2_t)__builtin_aarch64_clzv2si ((int32x2_t)__a);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopyq_lane_s16 (int16x8_t __a, const int __lane1,
-		 int16x4_t __b, const int __lane2)
+vclzq_s8 (int8x16_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				   __a, __lane1);
+  return __builtin_aarch64_clzv16qi (__a);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopyq_lane_s32 (int32x4_t __a, const int __lane1,
-		 int32x2_t __b, const int __lane2)
+vclzq_s16 (int16x8_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				   __a, __lane1);
+  return __builtin_aarch64_clzv8hi (__a);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopyq_lane_s64 (int64x2_t __a, const int __lane1,
-		 int64x1_t __b, const int __lane2)
+vclzq_s32 (int32x4_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				   __a, __lane1);
+  return __builtin_aarch64_clzv4si (__a);
 }
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopyq_lane_u8 (uint8x16_t __a, const int __lane1,
-		uint8x8_t __b, const int __lane2)
+vclzq_u8 (uint8x16_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				  __a, __lane1);
+  return (uint8x16_t)__builtin_aarch64_clzv16qi ((int8x16_t)__a);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopyq_lane_u16 (uint16x8_t __a, const int __lane1,
-		 uint16x4_t __b, const int __lane2)
+vclzq_u16 (uint16x8_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				   __a, __lane1);
+  return (uint16x8_t)__builtin_aarch64_clzv8hi ((int16x8_t)__a);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopyq_lane_u32 (uint32x4_t __a, const int __lane1,
-		 uint32x2_t __b, const int __lane2)
+vclzq_u32 (uint32x4_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				   __a, __lane1);
+  return (uint32x4_t)__builtin_aarch64_clzv4si ((int32x4_t)__a);
 }
 
-__extension__ extern __inline uint64x2_t
+/* vcnt.  */
+
+__extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopyq_lane_u64 (uint64x2_t __a, const int __lane1,
-		 uint64x1_t __b, const int __lane2)
+vcnt_p8 (poly8x8_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				   __a, __lane1);
+  return (poly8x8_t) __builtin_aarch64_popcountv8qi ((int8x8_t) __a);
 }
 
-/* vcopyq_laneq.  */
-
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopyq_laneq_f32 (float32x4_t __a, const int __lane1,
-		  float32x4_t __b, const int __lane2)
+vcnt_s8 (int8x8_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				   __a, __lane1);
+  return __builtin_aarch64_popcountv8qi (__a);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopyq_laneq_f64 (float64x2_t __a, const int __lane1,
-		  float64x2_t __b, const int __lane2)
+vcnt_u8 (uint8x8_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				   __a, __lane1);
+  return (uint8x8_t) __builtin_aarch64_popcountv8qi ((int8x8_t) __a);
 }
 
 __extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopyq_laneq_p8 (poly8x16_t __a, const int __lane1,
-		 poly8x16_t __b, const int __lane2)
+vcntq_p8 (poly8x16_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				  __a, __lane1);
+  return (poly8x16_t) __builtin_aarch64_popcountv16qi ((int8x16_t) __a);
 }
 
-__extension__ extern __inline poly16x8_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopyq_laneq_p16 (poly16x8_t __a, const int __lane1,
-		  poly16x8_t __b, const int __lane2)
+vcntq_s8 (int8x16_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				   __a, __lane1);
+  return __builtin_aarch64_popcountv16qi (__a);
 }
 
-__extension__ extern __inline poly64x2_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopyq_laneq_p64 (poly64x2_t __a, const int __lane1,
-		  poly64x2_t __b, const int __lane2)
+vcntq_u8 (uint8x16_t __a)
 {
-  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				   __a, __lane1);
+  return (uint8x16_t) __builtin_aarch64_popcountv16qi ((int8x16_t) __a);
 }
 
-__extension__ extern __inline int8x16_t
+/* vcopy_lane.  */
+
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopyq_laneq_s8 (int8x16_t __a, const int __lane1,
-		 int8x16_t __b, const int __lane2)
+vcopy_lane_f32 (float32x2_t __a, const int __lane1,
+		float32x2_t __b, const int __lane2)
 {
   return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
 				  __a, __lane1);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopyq_laneq_s16 (int16x8_t __a, const int __lane1,
-		  int16x8_t __b, const int __lane2)
+vcopy_lane_f64 (float64x1_t __a, const int __lane1,
+		float64x1_t __b, const int __lane2)
 {
   return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				   __a, __lane1);
+				  __a, __lane1);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopyq_laneq_s32 (int32x4_t __a, const int __lane1,
-		  int32x4_t __b, const int __lane2)
+vcopy_lane_p8 (poly8x8_t __a, const int __lane1,
+	       poly8x8_t __b, const int __lane2)
 {
   return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				   __a, __lane1);
+				 __a, __lane1);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline poly16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopyq_laneq_s64 (int64x2_t __a, const int __lane1,
-		  int64x2_t __b, const int __lane2)
+vcopy_lane_p16 (poly16x4_t __a, const int __lane1,
+		poly16x4_t __b, const int __lane2)
 {
   return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				   __a, __lane1);
+				  __a, __lane1);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline poly64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopyq_laneq_u8 (uint8x16_t __a, const int __lane1,
-		 uint8x16_t __b, const int __lane2)
+vcopy_lane_p64 (poly64x1_t __a, const int __lane1,
+		poly64x1_t __b, const int __lane2)
 {
   return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
 				  __a, __lane1);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopyq_laneq_u16 (uint16x8_t __a, const int __lane1,
-		  uint16x8_t __b, const int __lane2)
+vcopy_lane_s8 (int8x8_t __a, const int __lane1,
+	       int8x8_t __b, const int __lane2)
 {
   return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				   __a, __lane1);
+				 __a, __lane1);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopyq_laneq_u32 (uint32x4_t __a, const int __lane1,
-		  uint32x4_t __b, const int __lane2)
+vcopy_lane_s16 (int16x4_t __a, const int __lane1,
+		int16x4_t __b, const int __lane2)
 {
   return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				   __a, __lane1);
+				  __a, __lane1);
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcopyq_laneq_u64 (uint64x2_t __a, const int __lane1,
-		  uint64x2_t __b, const int __lane2)
+vcopy_lane_s32 (int32x2_t __a, const int __lane1,
+		int32x2_t __b, const int __lane2)
 {
   return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
-				   __a, __lane1);
+				  __a, __lane1);
 }
 
-/* vcvt (double -> float).  */
+__extension__ extern __inline int64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcopy_lane_s64 (int64x1_t __a, const int __lane1,
+		int64x1_t __b, const int __lane2)
+{
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				  __a, __lane1);
+}
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvt_f16_f32 (float32x4_t __a)
+vcopy_lane_u8 (uint8x8_t __a, const int __lane1,
+	       uint8x8_t __b, const int __lane2)
 {
-  return __builtin_aarch64_float_truncate_lo_v4hf (__a);
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				 __a, __lane1);
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvt_high_f16_f32 (float16x4_t __a, float32x4_t __b)
+vcopy_lane_u16 (uint16x4_t __a, const int __lane1,
+		uint16x4_t __b, const int __lane2)
 {
-  return __builtin_aarch64_float_truncate_hi_v8hf (__a, __b);
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				  __a, __lane1);
 }
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvt_f32_f64 (float64x2_t __a)
+vcopy_lane_u32 (uint32x2_t __a, const int __lane1,
+		uint32x2_t __b, const int __lane2)
 {
-  return __builtin_aarch64_float_truncate_lo_v2sf (__a);
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				  __a, __lane1);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvt_high_f32_f64 (float32x2_t __a, float64x2_t __b)
+vcopy_lane_u64 (uint64x1_t __a, const int __lane1,
+		uint64x1_t __b, const int __lane2)
 {
-  return __builtin_aarch64_float_truncate_hi_v4sf (__a, __b);
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				  __a, __lane1);
 }
 
-/* vcvt (float -> double).  */
+/* vcopy_laneq.  */
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvt_f32_f16 (float16x4_t __a)
+vcopy_laneq_f32 (float32x2_t __a, const int __lane1,
+		 float32x4_t __b, const int __lane2)
 {
-  return __builtin_aarch64_float_extend_lo_v4sf (__a);
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				  __a, __lane1);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvt_f64_f32 (float32x2_t __a)
+vcopy_laneq_f64 (float64x1_t __a, const int __lane1,
+		 float64x2_t __b, const int __lane2)
 {
-
-  return __builtin_aarch64_float_extend_lo_v2df (__a);
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				  __a, __lane1);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvt_high_f32_f16 (float16x8_t __a)
+vcopy_laneq_p8 (poly8x8_t __a, const int __lane1,
+		poly8x16_t __b, const int __lane2)
 {
-  return __builtin_aarch64_vec_unpacks_hi_v8hf (__a);
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				 __a, __lane1);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline poly16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvt_high_f64_f32 (float32x4_t __a)
+vcopy_laneq_p16 (poly16x4_t __a, const int __lane1,
+		 poly16x8_t __b, const int __lane2)
 {
-  return __builtin_aarch64_vec_unpacks_hi_v4sf (__a);
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				  __a, __lane1);
 }
 
-/* vcvt (<u>fixed-point -> float).  */
-
-__extension__ extern __inline float64_t
+__extension__ extern __inline poly64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtd_n_f64_s64 (int64_t __a, const int __b)
+vcopy_laneq_p64 (poly64x1_t __a, const int __lane1,
+		 poly64x2_t __b, const int __lane2)
 {
-  return __builtin_aarch64_scvtfdi (__a, __b);
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				  __a, __lane1);
 }
 
-__extension__ extern __inline float64_t
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtd_n_f64_u64 (uint64_t __a, const int __b)
+vcopy_laneq_s8 (int8x8_t __a, const int __lane1,
+		int8x16_t __b, const int __lane2)
 {
-  return __builtin_aarch64_ucvtfdi_sus (__a, __b);
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				 __a, __lane1);
 }
 
-__extension__ extern __inline float32_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvts_n_f32_s32 (int32_t __a, const int __b)
+vcopy_laneq_s16 (int16x4_t __a, const int __lane1,
+		 int16x8_t __b, const int __lane2)
 {
-  return __builtin_aarch64_scvtfsi (__a, __b);
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				  __a, __lane1);
 }
 
-__extension__ extern __inline float32_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvts_n_f32_u32 (uint32_t __a, const int __b)
+vcopy_laneq_s32 (int32x2_t __a, const int __lane1,
+		 int32x4_t __b, const int __lane2)
 {
-  return __builtin_aarch64_ucvtfsi_sus (__a, __b);
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				  __a, __lane1);
 }
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline int64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvt_n_f32_s32 (int32x2_t __a, const int __b)
+vcopy_laneq_s64 (int64x1_t __a, const int __lane1,
+		 int64x2_t __b, const int __lane2)
 {
-  return __builtin_aarch64_scvtfv2si (__a, __b);
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				  __a, __lane1);
 }
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvt_n_f32_u32 (uint32x2_t __a, const int __b)
+vcopy_laneq_u8 (uint8x8_t __a, const int __lane1,
+		uint8x16_t __b, const int __lane2)
 {
-  return __builtin_aarch64_ucvtfv2si_sus (__a, __b);
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				 __a, __lane1);
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvt_n_f64_s64 (int64x1_t __a, const int __b)
+vcopy_laneq_u16 (uint16x4_t __a, const int __lane1,
+		 uint16x8_t __b, const int __lane2)
 {
-  return (float64x1_t)
-    { __builtin_aarch64_scvtfdi (vget_lane_s64 (__a, 0), __b) };
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				  __a, __lane1);
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvt_n_f64_u64 (uint64x1_t __a, const int __b)
+vcopy_laneq_u32 (uint32x2_t __a, const int __lane1,
+		 uint32x4_t __b, const int __lane2)
 {
-  return (float64x1_t)
-    { __builtin_aarch64_ucvtfdi_sus (vget_lane_u64 (__a, 0), __b) };
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				  __a, __lane1);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtq_n_f32_s32 (int32x4_t __a, const int __b)
+vcopy_laneq_u64 (uint64x1_t __a, const int __lane1,
+		 uint64x2_t __b, const int __lane2)
 {
-  return __builtin_aarch64_scvtfv4si (__a, __b);
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				  __a, __lane1);
 }
 
+/* vcopyq_lane.  */
+
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtq_n_f32_u32 (uint32x4_t __a, const int __b)
+vcopyq_lane_f32 (float32x4_t __a, const int __lane1,
+		 float32x2_t __b, const int __lane2)
 {
-  return __builtin_aarch64_ucvtfv4si_sus (__a, __b);
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				   __a, __lane1);
 }
 
 __extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtq_n_f64_s64 (int64x2_t __a, const int __b)
+vcopyq_lane_f64 (float64x2_t __a, const int __lane1,
+		 float64x1_t __b, const int __lane2)
 {
-  return __builtin_aarch64_scvtfv2di (__a, __b);
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				   __a, __lane1);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtq_n_f64_u64 (uint64x2_t __a, const int __b)
+vcopyq_lane_p8 (poly8x16_t __a, const int __lane1,
+		poly8x8_t __b, const int __lane2)
 {
-  return __builtin_aarch64_ucvtfv2di_sus (__a, __b);
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				  __a, __lane1);
 }
 
-/* vcvt (float -> <u>fixed-point).  */
-
-__extension__ extern __inline int64_t
+__extension__ extern __inline poly16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtd_n_s64_f64 (float64_t __a, const int __b)
+vcopyq_lane_p16 (poly16x8_t __a, const int __lane1,
+		 poly16x4_t __b, const int __lane2)
 {
-  return __builtin_aarch64_fcvtzsdf (__a, __b);
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				   __a, __lane1);
 }
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline poly64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtd_n_u64_f64 (float64_t __a, const int __b)
+vcopyq_lane_p64 (poly64x2_t __a, const int __lane1,
+		 poly64x1_t __b, const int __lane2)
 {
-  return __builtin_aarch64_fcvtzudf_uss (__a, __b);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvts_n_s32_f32 (float32_t __a, const int __b)
-{
-  return __builtin_aarch64_fcvtzssf (__a, __b);
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				   __a, __lane1);
 }
 
-__extension__ extern __inline uint32_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvts_n_u32_f32 (float32_t __a, const int __b)
+vcopyq_lane_s8 (int8x16_t __a, const int __lane1,
+		int8x8_t __b, const int __lane2)
 {
-  return __builtin_aarch64_fcvtzusf_uss (__a, __b);
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				  __a, __lane1);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvt_n_s32_f32 (float32x2_t __a, const int __b)
+vcopyq_lane_s16 (int16x8_t __a, const int __lane1,
+		 int16x4_t __b, const int __lane2)
 {
-  return __builtin_aarch64_fcvtzsv2sf (__a, __b);
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				   __a, __lane1);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvt_n_u32_f32 (float32x2_t __a, const int __b)
+vcopyq_lane_s32 (int32x4_t __a, const int __lane1,
+		 int32x2_t __b, const int __lane2)
 {
-  return __builtin_aarch64_fcvtzuv2sf_uss (__a, __b);
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				   __a, __lane1);
 }
 
-__extension__ extern __inline int64x1_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvt_n_s64_f64 (float64x1_t __a, const int __b)
+vcopyq_lane_s64 (int64x2_t __a, const int __lane1,
+		 int64x1_t __b, const int __lane2)
 {
-  return (int64x1_t)
-    { __builtin_aarch64_fcvtzsdf (vget_lane_f64 (__a, 0), __b) };
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				   __a, __lane1);
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvt_n_u64_f64 (float64x1_t __a, const int __b)
+vcopyq_lane_u8 (uint8x16_t __a, const int __lane1,
+		uint8x8_t __b, const int __lane2)
 {
-  return (uint64x1_t)
-    { __builtin_aarch64_fcvtzudf_uss (vget_lane_f64 (__a, 0), __b) };
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				  __a, __lane1);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtq_n_s32_f32 (float32x4_t __a, const int __b)
+vcopyq_lane_u16 (uint16x8_t __a, const int __lane1,
+		 uint16x4_t __b, const int __lane2)
 {
-  return __builtin_aarch64_fcvtzsv4sf (__a, __b);
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				   __a, __lane1);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtq_n_u32_f32 (float32x4_t __a, const int __b)
+vcopyq_lane_u32 (uint32x4_t __a, const int __lane1,
+		 uint32x2_t __b, const int __lane2)
 {
-  return __builtin_aarch64_fcvtzuv4sf_uss (__a, __b);
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				   __a, __lane1);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtq_n_s64_f64 (float64x2_t __a, const int __b)
+vcopyq_lane_u64 (uint64x2_t __a, const int __lane1,
+		 uint64x1_t __b, const int __lane2)
 {
-  return __builtin_aarch64_fcvtzsv2df (__a, __b);
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				   __a, __lane1);
 }
 
-__extension__ extern __inline uint64x2_t
+/* vcopyq_laneq.  */
+
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtq_n_u64_f64 (float64x2_t __a, const int __b)
+vcopyq_laneq_f32 (float32x4_t __a, const int __lane1,
+		  float32x4_t __b, const int __lane2)
 {
-  return __builtin_aarch64_fcvtzuv2df_uss (__a, __b);
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				   __a, __lane1);
 }
 
-/* vcvt  (<u>int -> float)  */
-
-__extension__ extern __inline float64_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtd_f64_s64 (int64_t __a)
+vcopyq_laneq_f64 (float64x2_t __a, const int __lane1,
+		  float64x2_t __b, const int __lane2)
 {
-  return (float64_t) __a;
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				   __a, __lane1);
 }
 
-__extension__ extern __inline float64_t
+__extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtd_f64_u64 (uint64_t __a)
+vcopyq_laneq_p8 (poly8x16_t __a, const int __lane1,
+		 poly8x16_t __b, const int __lane2)
 {
-  return (float64_t) __a;
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				  __a, __lane1);
 }
 
-__extension__ extern __inline float32_t
+__extension__ extern __inline poly16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvts_f32_s32 (int32_t __a)
+vcopyq_laneq_p16 (poly16x8_t __a, const int __lane1,
+		  poly16x8_t __b, const int __lane2)
 {
-  return (float32_t) __a;
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				   __a, __lane1);
 }
 
-__extension__ extern __inline float32_t
+__extension__ extern __inline poly64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvts_f32_u32 (uint32_t __a)
+vcopyq_laneq_p64 (poly64x2_t __a, const int __lane1,
+		  poly64x2_t __b, const int __lane2)
 {
-  return (float32_t) __a;
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				   __a, __lane1);
 }
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvt_f32_s32 (int32x2_t __a)
+vcopyq_laneq_s8 (int8x16_t __a, const int __lane1,
+		 int8x16_t __b, const int __lane2)
 {
-  return __builtin_aarch64_floatv2siv2sf (__a);
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				  __a, __lane1);
 }
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvt_f32_u32 (uint32x2_t __a)
+vcopyq_laneq_s16 (int16x8_t __a, const int __lane1,
+		  int16x8_t __b, const int __lane2)
 {
-  return __builtin_aarch64_floatunsv2siv2sf ((int32x2_t) __a);
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				   __a, __lane1);
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvt_f64_s64 (int64x1_t __a)
+vcopyq_laneq_s32 (int32x4_t __a, const int __lane1,
+		  int32x4_t __b, const int __lane2)
 {
-  return (float64x1_t) { vget_lane_s64 (__a, 0) };
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				   __a, __lane1);
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvt_f64_u64 (uint64x1_t __a)
+vcopyq_laneq_s64 (int64x2_t __a, const int __lane1,
+		  int64x2_t __b, const int __lane2)
 {
-  return (float64x1_t) { vget_lane_u64 (__a, 0) };
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				   __a, __lane1);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtq_f32_s32 (int32x4_t __a)
+vcopyq_laneq_u8 (uint8x16_t __a, const int __lane1,
+		 uint8x16_t __b, const int __lane2)
 {
-  return __builtin_aarch64_floatv4siv4sf (__a);
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				  __a, __lane1);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtq_f32_u32 (uint32x4_t __a)
+vcopyq_laneq_u16 (uint16x8_t __a, const int __lane1,
+		  uint16x8_t __b, const int __lane2)
 {
-  return __builtin_aarch64_floatunsv4siv4sf ((int32x4_t) __a);
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				   __a, __lane1);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtq_f64_s64 (int64x2_t __a)
+vcopyq_laneq_u32 (uint32x4_t __a, const int __lane1,
+		  uint32x4_t __b, const int __lane2)
 {
-  return __builtin_aarch64_floatv2div2df (__a);
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				   __a, __lane1);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtq_f64_u64 (uint64x2_t __a)
+vcopyq_laneq_u64 (uint64x2_t __a, const int __lane1,
+		  uint64x2_t __b, const int __lane2)
 {
-  return __builtin_aarch64_floatunsv2div2df ((int64x2_t) __a);
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				   __a, __lane1);
 }
 
-/* vcvt (float -> <u>int)  */
+/* vcvt (double -> float).  */
 
-__extension__ extern __inline int64_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtd_s64_f64 (float64_t __a)
+vcvt_f16_f32 (float32x4_t __a)
 {
-  return (int64_t) __a;
+  return __builtin_aarch64_float_truncate_lo_v4hf (__a);
 }
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtd_u64_f64 (float64_t __a)
+vcvt_high_f16_f32 (float16x4_t __a, float32x4_t __b)
 {
-  return (uint64_t) __a;
+  return __builtin_aarch64_float_truncate_hi_v8hf (__a, __b);
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvts_s32_f32 (float32_t __a)
+vcvt_f32_f64 (float64x2_t __a)
 {
-  return (int32_t) __a;
+  return __builtin_aarch64_float_truncate_lo_v2sf (__a);
 }
 
-__extension__ extern __inline uint32_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvts_u32_f32 (float32_t __a)
+vcvt_high_f32_f64 (float32x2_t __a, float64x2_t __b)
 {
-  return (uint32_t) __a;
+  return __builtin_aarch64_float_truncate_hi_v4sf (__a, __b);
 }
 
-__extension__ extern __inline int32x2_t
+/* vcvt (float -> double).  */
+
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvt_s32_f32 (float32x2_t __a)
+vcvt_f32_f16 (float16x4_t __a)
 {
-  return __builtin_aarch64_lbtruncv2sfv2si (__a);
+  return __builtin_aarch64_float_extend_lo_v4sf (__a);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvt_u32_f32 (float32x2_t __a)
+vcvt_f64_f32 (float32x2_t __a)
 {
-  return __builtin_aarch64_lbtruncuv2sfv2si_us (__a);
+
+  return __builtin_aarch64_float_extend_lo_v2df (__a);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtq_s32_f32 (float32x4_t __a)
+vcvt_high_f32_f16 (float16x8_t __a)
 {
-  return __builtin_aarch64_lbtruncv4sfv4si (__a);
+  return __builtin_aarch64_vec_unpacks_hi_v8hf (__a);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtq_u32_f32 (float32x4_t __a)
+vcvt_high_f64_f32 (float32x4_t __a)
 {
-  return __builtin_aarch64_lbtruncuv4sfv4si_us (__a);
+  return __builtin_aarch64_vec_unpacks_hi_v4sf (__a);
 }
 
-__extension__ extern __inline int64x1_t
+/* vcvt (<u>fixed-point -> float).  */
+
+__extension__ extern __inline float64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvt_s64_f64 (float64x1_t __a)
+vcvtd_n_f64_s64 (int64_t __a, const int __b)
 {
-  return (int64x1_t) {vcvtd_s64_f64 (__a[0])};
+  return __builtin_aarch64_scvtfdi (__a, __b);
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline float64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvt_u64_f64 (float64x1_t __a)
+vcvtd_n_f64_u64 (uint64_t __a, const int __b)
 {
-  return (uint64x1_t) {vcvtd_u64_f64 (__a[0])};
+  return __builtin_aarch64_ucvtfdi_sus (__a, __b);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtq_s64_f64 (float64x2_t __a)
+vcvts_n_f32_s32 (int32_t __a, const int __b)
 {
-  return __builtin_aarch64_lbtruncv2dfv2di (__a);
+  return __builtin_aarch64_scvtfsi (__a, __b);
 }
 
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtq_u64_f64 (float64x2_t __a)
-{
-  return __builtin_aarch64_lbtruncuv2dfv2di_us (__a);
-}
-
-/* vcvta  */
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtad_s64_f64 (float64_t __a)
-{
-  return __builtin_aarch64_lrounddfdi (__a);
-}
-
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtad_u64_f64 (float64_t __a)
-{
-  return __builtin_aarch64_lroundudfdi_us (__a);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtas_s32_f32 (float32_t __a)
-{
-  return __builtin_aarch64_lroundsfsi (__a);
-}
-
-__extension__ extern __inline uint32_t
+__extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtas_u32_f32 (float32_t __a)
+vcvts_n_f32_u32 (uint32_t __a, const int __b)
 {
-  return __builtin_aarch64_lroundusfsi_us (__a);
+  return __builtin_aarch64_ucvtfsi_sus (__a, __b);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvta_s32_f32 (float32x2_t __a)
+vcvt_n_f32_s32 (int32x2_t __a, const int __b)
 {
-  return __builtin_aarch64_lroundv2sfv2si (__a);
+  return __builtin_aarch64_scvtfv2si (__a, __b);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvta_u32_f32 (float32x2_t __a)
+vcvt_n_f32_u32 (uint32x2_t __a, const int __b)
 {
-  return __builtin_aarch64_lrounduv2sfv2si_us (__a);
+  return __builtin_aarch64_ucvtfv2si_sus (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtaq_s32_f32 (float32x4_t __a)
+vcvt_n_f64_s64 (int64x1_t __a, const int __b)
 {
-  return __builtin_aarch64_lroundv4sfv4si (__a);
+  return (float64x1_t)
+    { __builtin_aarch64_scvtfdi (vget_lane_s64 (__a, 0), __b) };
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtaq_u32_f32 (float32x4_t __a)
+vcvt_n_f64_u64 (uint64x1_t __a, const int __b)
 {
-  return __builtin_aarch64_lrounduv4sfv4si_us (__a);
+  return (float64x1_t)
+    { __builtin_aarch64_ucvtfdi_sus (vget_lane_u64 (__a, 0), __b) };
 }
 
-__extension__ extern __inline int64x1_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvta_s64_f64 (float64x1_t __a)
+vcvtq_n_f32_s32 (int32x4_t __a, const int __b)
 {
-  return (int64x1_t) {vcvtad_s64_f64 (__a[0])};
+  return __builtin_aarch64_scvtfv4si (__a, __b);
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvta_u64_f64 (float64x1_t __a)
+vcvtq_n_f32_u32 (uint32x4_t __a, const int __b)
 {
-  return (uint64x1_t) {vcvtad_u64_f64 (__a[0])};
+  return __builtin_aarch64_ucvtfv4si_sus (__a, __b);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtaq_s64_f64 (float64x2_t __a)
+vcvtq_n_f64_s64 (int64x2_t __a, const int __b)
 {
-  return __builtin_aarch64_lroundv2dfv2di (__a);
+  return __builtin_aarch64_scvtfv2di (__a, __b);
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtaq_u64_f64 (float64x2_t __a)
+vcvtq_n_f64_u64 (uint64x2_t __a, const int __b)
 {
-  return __builtin_aarch64_lrounduv2dfv2di_us (__a);
+  return __builtin_aarch64_ucvtfv2di_sus (__a, __b);
 }
 
-/* vcvtm  */
+/* vcvt (float -> <u>fixed-point).  */
 
 __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtmd_s64_f64 (float64_t __a)
+vcvtd_n_s64_f64 (float64_t __a, const int __b)
 {
-  return __builtin_llfloor (__a);
+  return __builtin_aarch64_fcvtzsdf (__a, __b);
 }
 
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtmd_u64_f64 (float64_t __a)
+vcvtd_n_u64_f64 (float64_t __a, const int __b)
 {
-  return __builtin_aarch64_lfloorudfdi_us (__a);
+  return __builtin_aarch64_fcvtzudf_uss (__a, __b);
 }
 
 __extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtms_s32_f32 (float32_t __a)
+vcvts_n_s32_f32 (float32_t __a, const int __b)
 {
-  return __builtin_ifloorf (__a);
+  return __builtin_aarch64_fcvtzssf (__a, __b);
 }
 
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtms_u32_f32 (float32_t __a)
+vcvts_n_u32_f32 (float32_t __a, const int __b)
 {
-  return __builtin_aarch64_lfloorusfsi_us (__a);
+  return __builtin_aarch64_fcvtzusf_uss (__a, __b);
 }
 
 __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtm_s32_f32 (float32x2_t __a)
+vcvt_n_s32_f32 (float32x2_t __a, const int __b)
 {
-  return __builtin_aarch64_lfloorv2sfv2si (__a);
+  return __builtin_aarch64_fcvtzsv2sf (__a, __b);
 }
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtm_u32_f32 (float32x2_t __a)
+vcvt_n_u32_f32 (float32x2_t __a, const int __b)
 {
-  return __builtin_aarch64_lflooruv2sfv2si_us (__a);
+  return __builtin_aarch64_fcvtzuv2sf_uss (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline int64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtmq_s32_f32 (float32x4_t __a)
+vcvt_n_s64_f64 (float64x1_t __a, const int __b)
 {
-  return __builtin_aarch64_lfloorv4sfv4si (__a);
+  return (int64x1_t)
+    { __builtin_aarch64_fcvtzsdf (vget_lane_f64 (__a, 0), __b) };
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtmq_u32_f32 (float32x4_t __a)
+vcvt_n_u64_f64 (float64x1_t __a, const int __b)
 {
-  return __builtin_aarch64_lflooruv4sfv4si_us (__a);
+  return (uint64x1_t)
+    { __builtin_aarch64_fcvtzudf_uss (vget_lane_f64 (__a, 0), __b) };
 }
 
-__extension__ extern __inline int64x1_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtm_s64_f64 (float64x1_t __a)
+vcvtq_n_s32_f32 (float32x4_t __a, const int __b)
 {
-  return (int64x1_t) {vcvtmd_s64_f64 (__a[0])};
+  return __builtin_aarch64_fcvtzsv4sf (__a, __b);
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtm_u64_f64 (float64x1_t __a)
+vcvtq_n_u32_f32 (float32x4_t __a, const int __b)
 {
-  return (uint64x1_t) {vcvtmd_u64_f64 (__a[0])};
+  return __builtin_aarch64_fcvtzuv4sf_uss (__a, __b);
 }
 
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtmq_s64_f64 (float64x2_t __a)
+vcvtq_n_s64_f64 (float64x2_t __a, const int __b)
 {
-  return __builtin_aarch64_lfloorv2dfv2di (__a);
+  return __builtin_aarch64_fcvtzsv2df (__a, __b);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtmq_u64_f64 (float64x2_t __a)
+vcvtq_n_u64_f64 (float64x2_t __a, const int __b)
 {
-  return __builtin_aarch64_lflooruv2dfv2di_us (__a);
+  return __builtin_aarch64_fcvtzuv2df_uss (__a, __b);
 }
 
-/* vcvtn  */
+/* vcvt  (<u>int -> float)  */
 
-__extension__ extern __inline int64_t
+__extension__ extern __inline float64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtnd_s64_f64 (float64_t __a)
+vcvtd_f64_s64 (int64_t __a)
 {
-  return __builtin_aarch64_lfrintndfdi (__a);
+  return (float64_t) __a;
 }
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline float64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtnd_u64_f64 (float64_t __a)
+vcvtd_f64_u64 (uint64_t __a)
 {
-  return __builtin_aarch64_lfrintnudfdi_us (__a);
+  return (float64_t) __a;
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtns_s32_f32 (float32_t __a)
+vcvts_f32_s32 (int32_t __a)
 {
-  return __builtin_aarch64_lfrintnsfsi (__a);
+  return (float32_t) __a;
 }
 
-__extension__ extern __inline uint32_t
+__extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtns_u32_f32 (float32_t __a)
+vcvts_f32_u32 (uint32_t __a)
 {
-  return __builtin_aarch64_lfrintnusfsi_us (__a);
+  return (float32_t) __a;
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtn_s32_f32 (float32x2_t __a)
+vcvt_f32_s32 (int32x2_t __a)
 {
-  return __builtin_aarch64_lfrintnv2sfv2si (__a);
+  return __builtin_aarch64_floatv2siv2sf (__a);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtn_u32_f32 (float32x2_t __a)
+vcvt_f32_u32 (uint32x2_t __a)
 {
-  return __builtin_aarch64_lfrintnuv2sfv2si_us (__a);
+  return __builtin_aarch64_floatunsv2siv2sf ((int32x2_t) __a);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtnq_s32_f32 (float32x4_t __a)
+vcvt_f64_s64 (int64x1_t __a)
 {
-  return __builtin_aarch64_lfrintnv4sfv4si (__a);
+  return (float64x1_t) { vget_lane_s64 (__a, 0) };
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtnq_u32_f32 (float32x4_t __a)
+vcvt_f64_u64 (uint64x1_t __a)
 {
-  return __builtin_aarch64_lfrintnuv4sfv4si_us (__a);
+  return (float64x1_t) { vget_lane_u64 (__a, 0) };
 }
 
-__extension__ extern __inline int64x1_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtn_s64_f64 (float64x1_t __a)
+vcvtq_f32_s32 (int32x4_t __a)
 {
-  return (int64x1_t) {vcvtnd_s64_f64 (__a[0])};
+  return __builtin_aarch64_floatv4siv4sf (__a);
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtn_u64_f64 (float64x1_t __a)
+vcvtq_f32_u32 (uint32x4_t __a)
 {
-  return (uint64x1_t) {vcvtnd_u64_f64 (__a[0])};
+  return __builtin_aarch64_floatunsv4siv4sf ((int32x4_t) __a);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtnq_s64_f64 (float64x2_t __a)
+vcvtq_f64_s64 (int64x2_t __a)
 {
-  return __builtin_aarch64_lfrintnv2dfv2di (__a);
+  return __builtin_aarch64_floatv2div2df (__a);
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtnq_u64_f64 (float64x2_t __a)
+vcvtq_f64_u64 (uint64x2_t __a)
 {
-  return __builtin_aarch64_lfrintnuv2dfv2di_us (__a);
+  return __builtin_aarch64_floatunsv2div2df ((int64x2_t) __a);
 }
 
-/* vcvtp  */
+/* vcvt (float -> <u>int)  */
 
 __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtpd_s64_f64 (float64_t __a)
+vcvtd_s64_f64 (float64_t __a)
 {
-  return __builtin_llceil (__a);
+  return (int64_t) __a;
 }
 
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtpd_u64_f64 (float64_t __a)
+vcvtd_u64_f64 (float64_t __a)
 {
-  return __builtin_aarch64_lceiludfdi_us (__a);
+  return (uint64_t) __a;
 }
 
 __extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtps_s32_f32 (float32_t __a)
+vcvts_s32_f32 (float32_t __a)
 {
-  return __builtin_iceilf (__a);
+  return (int32_t) __a;
 }
 
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtps_u32_f32 (float32_t __a)
+vcvts_u32_f32 (float32_t __a)
 {
-  return __builtin_aarch64_lceilusfsi_us (__a);
+  return (uint32_t) __a;
 }
 
 __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtp_s32_f32 (float32x2_t __a)
+vcvt_s32_f32 (float32x2_t __a)
 {
-  return __builtin_aarch64_lceilv2sfv2si (__a);
+  return __builtin_aarch64_lbtruncv2sfv2si (__a);
 }
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtp_u32_f32 (float32x2_t __a)
+vcvt_u32_f32 (float32x2_t __a)
 {
-  return __builtin_aarch64_lceiluv2sfv2si_us (__a);
+  return __builtin_aarch64_lbtruncuv2sfv2si_us (__a);
 }
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtpq_s32_f32 (float32x4_t __a)
+vcvtq_s32_f32 (float32x4_t __a)
 {
-  return __builtin_aarch64_lceilv4sfv4si (__a);
+  return __builtin_aarch64_lbtruncv4sfv4si (__a);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtpq_u32_f32 (float32x4_t __a)
+vcvtq_u32_f32 (float32x4_t __a)
 {
-  return __builtin_aarch64_lceiluv4sfv4si_us (__a);
+  return __builtin_aarch64_lbtruncuv4sfv4si_us (__a);
 }
 
 __extension__ extern __inline int64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtp_s64_f64 (float64x1_t __a)
+vcvt_s64_f64 (float64x1_t __a)
 {
-  return (int64x1_t) {vcvtpd_s64_f64 (__a[0])};
+  return (int64x1_t) {vcvtd_s64_f64 (__a[0])};
 }
 
 __extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtp_u64_f64 (float64x1_t __a)
+vcvt_u64_f64 (float64x1_t __a)
 {
-  return (uint64x1_t) {vcvtpd_u64_f64 (__a[0])};
+  return (uint64x1_t) {vcvtd_u64_f64 (__a[0])};
 }
 
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtpq_s64_f64 (float64x2_t __a)
+vcvtq_s64_f64 (float64x2_t __a)
 {
-  return __builtin_aarch64_lceilv2dfv2di (__a);
+  return __builtin_aarch64_lbtruncv2dfv2di (__a);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtpq_u64_f64 (float64x2_t __a)
+vcvtq_u64_f64 (float64x2_t __a)
 {
-  return __builtin_aarch64_lceiluv2dfv2di_us (__a);
+  return __builtin_aarch64_lbtruncuv2dfv2di_us (__a);
 }
 
-/* vdup_n  */
-
-__extension__ extern __inline float16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_n_f16 (float16_t __a)
-{
-  return (float16x4_t) {__a, __a, __a, __a};
-}
+/* vcvta  */
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_n_f32 (float32_t __a)
+vcvtad_s64_f64 (float64_t __a)
 {
-  return (float32x2_t) {__a, __a};
+  return __builtin_aarch64_lrounddfdi (__a);
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_n_f64 (float64_t __a)
+vcvtad_u64_f64 (float64_t __a)
 {
-  return (float64x1_t) {__a};
+  return __builtin_aarch64_lroundudfdi_us (__a);
 }
 
-__extension__ extern __inline poly8x8_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_n_p8 (poly8_t __a)
+vcvtas_s32_f32 (float32_t __a)
 {
-  return (poly8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
+  return __builtin_aarch64_lroundsfsi (__a);
 }
 
-__extension__ extern __inline poly16x4_t
+__extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_n_p16 (poly16_t __a)
+vcvtas_u32_f32 (float32_t __a)
 {
-  return (poly16x4_t) {__a, __a, __a, __a};
+  return __builtin_aarch64_lroundusfsi_us (__a);
 }
 
-__extension__ extern __inline poly64x1_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_n_p64 (poly64_t __a)
+vcvta_s32_f32 (float32x2_t __a)
 {
-  return (poly64x1_t) {__a};
+  return __builtin_aarch64_lroundv2sfv2si (__a);
 }
 
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_n_s8 (int8_t __a)
+vcvta_u32_f32 (float32x2_t __a)
 {
-  return (int8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
+  return __builtin_aarch64_lrounduv2sfv2si_us (__a);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_n_s16 (int16_t __a)
+vcvtaq_s32_f32 (float32x4_t __a)
 {
-  return (int16x4_t) {__a, __a, __a, __a};
+  return __builtin_aarch64_lroundv4sfv4si (__a);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_n_s32 (int32_t __a)
+vcvtaq_u32_f32 (float32x4_t __a)
 {
-  return (int32x2_t) {__a, __a};
+  return __builtin_aarch64_lrounduv4sfv4si_us (__a);
 }
 
 __extension__ extern __inline int64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_n_s64 (int64_t __a)
+vcvta_s64_f64 (float64x1_t __a)
 {
-  return (int64x1_t) {__a};
+  return (int64x1_t) {vcvtad_s64_f64 (__a[0])};
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_n_u8 (uint8_t __a)
+vcvta_u64_f64 (float64x1_t __a)
 {
-  return (uint8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
+  return (uint64x1_t) {vcvtad_u64_f64 (__a[0])};
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_n_u16 (uint16_t __a)
+vcvtaq_s64_f64 (float64x2_t __a)
 {
-  return (uint16x4_t) {__a, __a, __a, __a};
+  return __builtin_aarch64_lroundv2dfv2di (__a);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_n_u32 (uint32_t __a)
+vcvtaq_u64_f64 (float64x2_t __a)
 {
-  return (uint32x2_t) {__a, __a};
+  return __builtin_aarch64_lrounduv2dfv2di_us (__a);
 }
 
-__extension__ extern __inline uint64x1_t
+/* vcvtm  */
+
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_n_u64 (uint64_t __a)
+vcvtmd_s64_f64 (float64_t __a)
 {
-  return (uint64x1_t) {__a};
+  return __builtin_llfloor (__a);
 }
 
-/* vdupq_n  */
-
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_f16 (float16_t __a)
+vcvtmd_u64_f64 (float64_t __a)
 {
-  return (float16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
+  return __builtin_aarch64_lfloorudfdi_us (__a);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_f32 (float32_t __a)
+vcvtms_s32_f32 (float32_t __a)
 {
-  return (float32x4_t) {__a, __a, __a, __a};
+  return __builtin_ifloorf (__a);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_f64 (float64_t __a)
+vcvtms_u32_f32 (float32_t __a)
 {
-  return (float64x2_t) {__a, __a};
+  return __builtin_aarch64_lfloorusfsi_us (__a);
 }
 
-__extension__ extern __inline poly8x16_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_p8 (uint32_t __a)
+vcvtm_s32_f32 (float32x2_t __a)
 {
-  return (poly8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
-		       __a, __a, __a, __a, __a, __a, __a, __a};
+  return __builtin_aarch64_lfloorv2sfv2si (__a);
 }
 
-__extension__ extern __inline poly16x8_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_p16 (uint32_t __a)
+vcvtm_u32_f32 (float32x2_t __a)
 {
-  return (poly16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
+  return __builtin_aarch64_lflooruv2sfv2si_us (__a);
 }
 
-__extension__ extern __inline poly64x2_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_p64 (uint64_t __a)
+vcvtmq_s32_f32 (float32x4_t __a)
 {
-  return (poly64x2_t) {__a, __a};
+  return __builtin_aarch64_lfloorv4sfv4si (__a);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_s8 (int32_t __a)
+vcvtmq_u32_f32 (float32x4_t __a)
 {
-  return (int8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
-		      __a, __a, __a, __a, __a, __a, __a, __a};
+  return __builtin_aarch64_lflooruv4sfv4si_us (__a);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline int64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_s16 (int32_t __a)
+vcvtm_s64_f64 (float64x1_t __a)
 {
-  return (int16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
+  return (int64x1_t) {vcvtmd_s64_f64 (__a[0])};
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_s32 (int32_t __a)
+vcvtm_u64_f64 (float64x1_t __a)
 {
-  return (int32x4_t) {__a, __a, __a, __a};
+  return (uint64x1_t) {vcvtmd_u64_f64 (__a[0])};
 }
 
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_s64 (int64_t __a)
+vcvtmq_s64_f64 (float64x2_t __a)
 {
-  return (int64x2_t) {__a, __a};
+  return __builtin_aarch64_lfloorv2dfv2di (__a);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_u8 (uint32_t __a)
+vcvtmq_u64_f64 (float64x2_t __a)
 {
-  return (uint8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
-		       __a, __a, __a, __a, __a, __a, __a, __a};
+  return __builtin_aarch64_lflooruv2dfv2di_us (__a);
 }
 
-__extension__ extern __inline uint16x8_t
+/* vcvtn  */
+
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_u16 (uint32_t __a)
+vcvtnd_s64_f64 (float64_t __a)
 {
-  return (uint16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
+  return __builtin_aarch64_lfrintndfdi (__a);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_u32 (uint32_t __a)
+vcvtnd_u64_f64 (float64_t __a)
 {
-  return (uint32x4_t) {__a, __a, __a, __a};
+  return __builtin_aarch64_lfrintnudfdi_us (__a);
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_u64 (uint64_t __a)
+vcvtns_s32_f32 (float32_t __a)
 {
-  return (uint64x2_t) {__a, __a};
+  return __builtin_aarch64_lfrintnsfsi (__a);
 }
 
-/* vdup_lane  */
-
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_lane_f16 (float16x4_t __a, const int __b)
+vcvtns_u32_f32 (float32_t __a)
 {
-  return __aarch64_vdup_lane_f16 (__a, __b);
+  return __builtin_aarch64_lfrintnusfsi_us (__a);
 }
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_lane_f32 (float32x2_t __a, const int __b)
+vcvtn_s32_f32 (float32x2_t __a)
 {
-  return __aarch64_vdup_lane_f32 (__a, __b);
+  return __builtin_aarch64_lfrintnv2sfv2si (__a);
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_lane_f64 (float64x1_t __a, const int __b)
+vcvtn_u32_f32 (float32x2_t __a)
 {
-  return __aarch64_vdup_lane_f64 (__a, __b);
+  return __builtin_aarch64_lfrintnuv2sfv2si_us (__a);
 }
 
-__extension__ extern __inline poly8x8_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_lane_p8 (poly8x8_t __a, const int __b)
+vcvtnq_s32_f32 (float32x4_t __a)
 {
-  return __aarch64_vdup_lane_p8 (__a, __b);
+  return __builtin_aarch64_lfrintnv4sfv4si (__a);
 }
 
-__extension__ extern __inline poly16x4_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_lane_p16 (poly16x4_t __a, const int __b)
+vcvtnq_u32_f32 (float32x4_t __a)
 {
-  return __aarch64_vdup_lane_p16 (__a, __b);
+  return __builtin_aarch64_lfrintnuv4sfv4si_us (__a);
 }
 
-__extension__ extern __inline poly64x1_t
+__extension__ extern __inline int64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_lane_p64 (poly64x1_t __a, const int __b)
+vcvtn_s64_f64 (float64x1_t __a)
 {
-  return __aarch64_vdup_lane_p64 (__a, __b);
+  return (int64x1_t) {vcvtnd_s64_f64 (__a[0])};
 }
 
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_lane_s8 (int8x8_t __a, const int __b)
+vcvtn_u64_f64 (float64x1_t __a)
 {
-  return __aarch64_vdup_lane_s8 (__a, __b);
+  return (uint64x1_t) {vcvtnd_u64_f64 (__a[0])};
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_lane_s16 (int16x4_t __a, const int __b)
+vcvtnq_s64_f64 (float64x2_t __a)
 {
-  return __aarch64_vdup_lane_s16 (__a, __b);
+  return __builtin_aarch64_lfrintnv2dfv2di (__a);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_lane_s32 (int32x2_t __a, const int __b)
+vcvtnq_u64_f64 (float64x2_t __a)
 {
-  return __aarch64_vdup_lane_s32 (__a, __b);
+  return __builtin_aarch64_lfrintnuv2dfv2di_us (__a);
 }
 
-__extension__ extern __inline int64x1_t
+/* vcvtp  */
+
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_lane_s64 (int64x1_t __a, const int __b)
+vcvtpd_s64_f64 (float64_t __a)
 {
-  return __aarch64_vdup_lane_s64 (__a, __b);
+  return __builtin_llceil (__a);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_lane_u8 (uint8x8_t __a, const int __b)
+vcvtpd_u64_f64 (float64_t __a)
 {
-  return __aarch64_vdup_lane_u8 (__a, __b);
+  return __builtin_aarch64_lceiludfdi_us (__a);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_lane_u16 (uint16x4_t __a, const int __b)
+vcvtps_s32_f32 (float32_t __a)
 {
-  return __aarch64_vdup_lane_u16 (__a, __b);
+  return __builtin_iceilf (__a);
+}
+
+__extension__ extern __inline uint32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvtps_u32_f32 (float32_t __a)
+{
+  return __builtin_aarch64_lceilusfsi_us (__a);
+}
+
+__extension__ extern __inline int32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvtp_s32_f32 (float32x2_t __a)
+{
+  return __builtin_aarch64_lceilv2sfv2si (__a);
 }
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_lane_u32 (uint32x2_t __a, const int __b)
+vcvtp_u32_f32 (float32x2_t __a)
 {
-  return __aarch64_vdup_lane_u32 (__a, __b);
+  return __builtin_aarch64_lceiluv2sfv2si_us (__a);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvtpq_s32_f32 (float32x4_t __a)
+{
+  return __builtin_aarch64_lceilv4sfv4si (__a);
+}
+
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvtpq_u32_f32 (float32x4_t __a)
+{
+  return __builtin_aarch64_lceiluv4sfv4si_us (__a);
+}
+
+__extension__ extern __inline int64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvtp_s64_f64 (float64x1_t __a)
+{
+  return (int64x1_t) {vcvtpd_s64_f64 (__a[0])};
 }
 
 __extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_lane_u64 (uint64x1_t __a, const int __b)
+vcvtp_u64_f64 (float64x1_t __a)
 {
-  return __aarch64_vdup_lane_u64 (__a, __b);
+  return (uint64x1_t) {vcvtpd_u64_f64 (__a[0])};
 }
 
-/* vdup_laneq  */
+__extension__ extern __inline int64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvtpq_s64_f64 (float64x2_t __a)
+{
+  return __builtin_aarch64_lceilv2dfv2di (__a);
+}
+
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvtpq_u64_f64 (float64x2_t __a)
+{
+  return __builtin_aarch64_lceiluv2dfv2di_us (__a);
+}
+
+/* vdup_n  */
 
 __extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_laneq_f16 (float16x8_t __a, const int __b)
+vdup_n_f16 (float16_t __a)
 {
-  return __aarch64_vdup_laneq_f16 (__a, __b);
+  return (float16x4_t) {__a, __a, __a, __a};
 }
 
 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_laneq_f32 (float32x4_t __a, const int __b)
+vdup_n_f32 (float32_t __a)
 {
-  return __aarch64_vdup_laneq_f32 (__a, __b);
+  return (float32x2_t) {__a, __a};
 }
 
 __extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_laneq_f64 (float64x2_t __a, const int __b)
+vdup_n_f64 (float64_t __a)
 {
-  return __aarch64_vdup_laneq_f64 (__a, __b);
+  return (float64x1_t) {__a};
 }
 
 __extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_laneq_p8 (poly8x16_t __a, const int __b)
+vdup_n_p8 (poly8_t __a)
 {
-  return __aarch64_vdup_laneq_p8 (__a, __b);
+  return (poly8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
 }
 
 __extension__ extern __inline poly16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_laneq_p16 (poly16x8_t __a, const int __b)
+vdup_n_p16 (poly16_t __a)
 {
-  return __aarch64_vdup_laneq_p16 (__a, __b);
+  return (poly16x4_t) {__a, __a, __a, __a};
 }
 
 __extension__ extern __inline poly64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_laneq_p64 (poly64x2_t __a, const int __b)
+vdup_n_p64 (poly64_t __a)
 {
-  return __aarch64_vdup_laneq_p64 (__a, __b);
+  return (poly64x1_t) {__a};
 }
 
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_laneq_s8 (int8x16_t __a, const int __b)
+vdup_n_s8 (int8_t __a)
 {
-  return __aarch64_vdup_laneq_s8 (__a, __b);
+  return (int8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
 }
 
 __extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_laneq_s16 (int16x8_t __a, const int __b)
+vdup_n_s16 (int16_t __a)
 {
-  return __aarch64_vdup_laneq_s16 (__a, __b);
+  return (int16x4_t) {__a, __a, __a, __a};
 }
 
 __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_laneq_s32 (int32x4_t __a, const int __b)
+vdup_n_s32 (int32_t __a)
 {
-  return __aarch64_vdup_laneq_s32 (__a, __b);
+  return (int32x2_t) {__a, __a};
 }
 
 __extension__ extern __inline int64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_laneq_s64 (int64x2_t __a, const int __b)
+vdup_n_s64 (int64_t __a)
 {
-  return __aarch64_vdup_laneq_s64 (__a, __b);
+  return (int64x1_t) {__a};
 }
 
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_laneq_u8 (uint8x16_t __a, const int __b)
+vdup_n_u8 (uint8_t __a)
 {
-  return __aarch64_vdup_laneq_u8 (__a, __b);
+  return (uint8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
 }
 
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_laneq_u16 (uint16x8_t __a, const int __b)
+vdup_n_u16 (uint16_t __a)
 {
-  return __aarch64_vdup_laneq_u16 (__a, __b);
+  return (uint16x4_t) {__a, __a, __a, __a};
 }
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_laneq_u32 (uint32x4_t __a, const int __b)
+vdup_n_u32 (uint32_t __a)
 {
-  return __aarch64_vdup_laneq_u32 (__a, __b);
+  return (uint32x2_t) {__a, __a};
 }
 
 __extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_laneq_u64 (uint64x2_t __a, const int __b)
+vdup_n_u64 (uint64_t __a)
 {
-  return __aarch64_vdup_laneq_u64 (__a, __b);
+  return (uint64x1_t) {__a};
 }
 
-/* vdupq_lane  */
+/* vdupq_n  */
 
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_lane_f16 (float16x4_t __a, const int __b)
+vdupq_n_f16 (float16_t __a)
 {
-  return __aarch64_vdupq_lane_f16 (__a, __b);
+  return (float16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
 }
 
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_lane_f32 (float32x2_t __a, const int __b)
+vdupq_n_f32 (float32_t __a)
 {
-  return __aarch64_vdupq_lane_f32 (__a, __b);
+  return (float32x4_t) {__a, __a, __a, __a};
 }
 
 __extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_lane_f64 (float64x1_t __a, const int __b)
+vdupq_n_f64 (float64_t __a)
 {
-  return __aarch64_vdupq_lane_f64 (__a, __b);
+  return (float64x2_t) {__a, __a};
 }
 
 __extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_lane_p8 (poly8x8_t __a, const int __b)
+vdupq_n_p8 (poly8_t __a)
 {
-  return __aarch64_vdupq_lane_p8 (__a, __b);
+  return (poly8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
+		       __a, __a, __a, __a, __a, __a, __a, __a};
 }
 
 __extension__ extern __inline poly16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_lane_p16 (poly16x4_t __a, const int __b)
+vdupq_n_p16 (poly16_t __a)
 {
-  return __aarch64_vdupq_lane_p16 (__a, __b);
+  return (poly16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
 }
 
 __extension__ extern __inline poly64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_lane_p64 (poly64x1_t __a, const int __b)
+vdupq_n_p64 (poly64_t __a)
 {
-  return __aarch64_vdupq_lane_p64 (__a, __b);
+  return (poly64x2_t) {__a, __a};
 }
 
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_lane_s8 (int8x8_t __a, const int __b)
+vdupq_n_s8 (int8_t __a)
 {
-  return __aarch64_vdupq_lane_s8 (__a, __b);
+  return (int8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
+		      __a, __a, __a, __a, __a, __a, __a, __a};
 }
 
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_lane_s16 (int16x4_t __a, const int __b)
+vdupq_n_s16 (int16_t __a)
 {
-  return __aarch64_vdupq_lane_s16 (__a, __b);
+  return (int16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
 }
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_lane_s32 (int32x2_t __a, const int __b)
+vdupq_n_s32 (int32_t __a)
 {
-  return __aarch64_vdupq_lane_s32 (__a, __b);
+  return (int32x4_t) {__a, __a, __a, __a};
 }
 
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_lane_s64 (int64x1_t __a, const int __b)
+vdupq_n_s64 (int64_t __a)
 {
-  return __aarch64_vdupq_lane_s64 (__a, __b);
+  return (int64x2_t) {__a, __a};
 }
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_lane_u8 (uint8x8_t __a, const int __b)
+vdupq_n_u8 (uint8_t __a)
 {
-  return __aarch64_vdupq_lane_u8 (__a, __b);
+  return (uint8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
+		       __a, __a, __a, __a, __a, __a, __a, __a};
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_lane_u16 (uint16x4_t __a, const int __b)
+vdupq_n_u16 (uint16_t __a)
 {
-  return __aarch64_vdupq_lane_u16 (__a, __b);
+  return (uint16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_lane_u32 (uint32x2_t __a, const int __b)
+vdupq_n_u32 (uint32_t __a)
 {
-  return __aarch64_vdupq_lane_u32 (__a, __b);
+  return (uint32x4_t) {__a, __a, __a, __a};
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_lane_u64 (uint64x1_t __a, const int __b)
+vdupq_n_u64 (uint64_t __a)
 {
-  return __aarch64_vdupq_lane_u64 (__a, __b);
+  return (uint64x2_t) {__a, __a};
 }
 
-/* vdupq_laneq  */
+/* vdup_lane  */
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_laneq_f16 (float16x8_t __a, const int __b)
+vdup_lane_f16 (float16x4_t __a, const int __b)
 {
-  return __aarch64_vdupq_laneq_f16 (__a, __b);
+  return __aarch64_vdup_lane_f16 (__a, __b);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_laneq_f32 (float32x4_t __a, const int __b)
+vdup_lane_f32 (float32x2_t __a, const int __b)
 {
-  return __aarch64_vdupq_laneq_f32 (__a, __b);
+  return __aarch64_vdup_lane_f32 (__a, __b);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_laneq_f64 (float64x2_t __a, const int __b)
+vdup_lane_f64 (float64x1_t __a, const int __b)
 {
-  return __aarch64_vdupq_laneq_f64 (__a, __b);
+  return __aarch64_vdup_lane_f64 (__a, __b);
 }
 
-__extension__ extern __inline poly8x16_t
+__extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_laneq_p8 (poly8x16_t __a, const int __b)
+vdup_lane_p8 (poly8x8_t __a, const int __b)
 {
-  return __aarch64_vdupq_laneq_p8 (__a, __b);
+  return __aarch64_vdup_lane_p8 (__a, __b);
 }
 
-__extension__ extern __inline poly16x8_t
+__extension__ extern __inline poly16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_laneq_p16 (poly16x8_t __a, const int __b)
+vdup_lane_p16 (poly16x4_t __a, const int __b)
 {
-  return __aarch64_vdupq_laneq_p16 (__a, __b);
+  return __aarch64_vdup_lane_p16 (__a, __b);
 }
 
-__extension__ extern __inline poly64x2_t
+__extension__ extern __inline poly64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_laneq_p64 (poly64x2_t __a, const int __b)
+vdup_lane_p64 (poly64x1_t __a, const int __b)
 {
-  return __aarch64_vdupq_laneq_p64 (__a, __b);
+  return __aarch64_vdup_lane_p64 (__a, __b);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_laneq_s8 (int8x16_t __a, const int __b)
+vdup_lane_s8 (int8x8_t __a, const int __b)
 {
-  return __aarch64_vdupq_laneq_s8 (__a, __b);
+  return __aarch64_vdup_lane_s8 (__a, __b);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_laneq_s16 (int16x8_t __a, const int __b)
+vdup_lane_s16 (int16x4_t __a, const int __b)
 {
-  return __aarch64_vdupq_laneq_s16 (__a, __b);
+  return __aarch64_vdup_lane_s16 (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_laneq_s32 (int32x4_t __a, const int __b)
+vdup_lane_s32 (int32x2_t __a, const int __b)
 {
-  return __aarch64_vdupq_laneq_s32 (__a, __b);
+  return __aarch64_vdup_lane_s32 (__a, __b);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline int64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_laneq_s64 (int64x2_t __a, const int __b)
+vdup_lane_s64 (int64x1_t __a, const int __b)
 {
-  return __aarch64_vdupq_laneq_s64 (__a, __b);
+  return __aarch64_vdup_lane_s64 (__a, __b);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_laneq_u8 (uint8x16_t __a, const int __b)
+vdup_lane_u8 (uint8x8_t __a, const int __b)
 {
-  return __aarch64_vdupq_laneq_u8 (__a, __b);
+  return __aarch64_vdup_lane_u8 (__a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_laneq_u16 (uint16x8_t __a, const int __b)
+vdup_lane_u16 (uint16x4_t __a, const int __b)
 {
-  return __aarch64_vdupq_laneq_u16 (__a, __b);
+  return __aarch64_vdup_lane_u16 (__a, __b);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_laneq_u32 (uint32x4_t __a, const int __b)
+vdup_lane_u32 (uint32x2_t __a, const int __b)
 {
-  return __aarch64_vdupq_laneq_u32 (__a, __b);
+  return __aarch64_vdup_lane_u32 (__a, __b);
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_laneq_u64 (uint64x2_t __a, const int __b)
+vdup_lane_u64 (uint64x1_t __a, const int __b)
 {
-  return __aarch64_vdupq_laneq_u64 (__a, __b);
+  return __aarch64_vdup_lane_u64 (__a, __b);
 }
 
-/* vdupb_lane  */
-__extension__ extern __inline poly8_t
+/* vdup_laneq  */
+
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupb_lane_p8 (poly8x8_t __a, const int __b)
+vdup_laneq_f16 (float16x8_t __a, const int __b)
 {
-  return __aarch64_vget_lane_any (__a, __b);
+  return __aarch64_vdup_laneq_f16 (__a, __b);
 }
 
-__extension__ extern __inline int8_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupb_lane_s8 (int8x8_t __a, const int __b)
+vdup_laneq_f32 (float32x4_t __a, const int __b)
 {
-  return __aarch64_vget_lane_any (__a, __b);
+  return __aarch64_vdup_laneq_f32 (__a, __b);
 }
 
-__extension__ extern __inline uint8_t
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupb_lane_u8 (uint8x8_t __a, const int __b)
+vdup_laneq_f64 (float64x2_t __a, const int __b)
 {
-  return __aarch64_vget_lane_any (__a, __b);
+  return __aarch64_vdup_laneq_f64 (__a, __b);
 }
 
-/* vduph_lane  */
-
-__extension__ extern __inline float16_t
+__extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vduph_lane_f16 (float16x4_t __a, const int __b)
+vdup_laneq_p8 (poly8x16_t __a, const int __b)
 {
-  return __aarch64_vget_lane_any (__a, __b);
+  return __aarch64_vdup_laneq_p8 (__a, __b);
 }
 
-__extension__ extern __inline poly16_t
+__extension__ extern __inline poly16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vduph_lane_p16 (poly16x4_t __a, const int __b)
+vdup_laneq_p16 (poly16x8_t __a, const int __b)
 {
-  return __aarch64_vget_lane_any (__a, __b);
+  return __aarch64_vdup_laneq_p16 (__a, __b);
 }
 
-__extension__ extern __inline int16_t
+__extension__ extern __inline poly64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vduph_lane_s16 (int16x4_t __a, const int __b)
+vdup_laneq_p64 (poly64x2_t __a, const int __b)
 {
-  return __aarch64_vget_lane_any (__a, __b);
+  return __aarch64_vdup_laneq_p64 (__a, __b);
 }
 
-__extension__ extern __inline uint16_t
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vduph_lane_u16 (uint16x4_t __a, const int __b)
+vdup_laneq_s8 (int8x16_t __a, const int __b)
 {
-  return __aarch64_vget_lane_any (__a, __b);
+  return __aarch64_vdup_laneq_s8 (__a, __b);
 }
 
-/* vdups_lane  */
-
-__extension__ extern __inline float32_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdups_lane_f32 (float32x2_t __a, const int __b)
+vdup_laneq_s16 (int16x8_t __a, const int __b)
 {
-  return __aarch64_vget_lane_any (__a, __b);
+  return __aarch64_vdup_laneq_s16 (__a, __b);
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdups_lane_s32 (int32x2_t __a, const int __b)
+vdup_laneq_s32 (int32x4_t __a, const int __b)
 {
-  return __aarch64_vget_lane_any (__a, __b);
+  return __aarch64_vdup_laneq_s32 (__a, __b);
 }
 
-__extension__ extern __inline uint32_t
+__extension__ extern __inline int64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdups_lane_u32 (uint32x2_t __a, const int __b)
+vdup_laneq_s64 (int64x2_t __a, const int __b)
 {
-  return __aarch64_vget_lane_any (__a, __b);
+  return __aarch64_vdup_laneq_s64 (__a, __b);
 }
 
-/* vdupd_lane  */
-__extension__ extern __inline float64_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupd_lane_f64 (float64x1_t __a, const int __b)
+vdup_laneq_u8 (uint8x16_t __a, const int __b)
 {
-  __AARCH64_LANE_CHECK (__a, __b);
-  return __a[0];
+  return __aarch64_vdup_laneq_u8 (__a, __b);
 }
 
-__extension__ extern __inline int64_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupd_lane_s64 (int64x1_t __a, const int __b)
+vdup_laneq_u16 (uint16x8_t __a, const int __b)
 {
-  __AARCH64_LANE_CHECK (__a, __b);
-  return __a[0];
+  return __aarch64_vdup_laneq_u16 (__a, __b);
 }
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupd_lane_u64 (uint64x1_t __a, const int __b)
+vdup_laneq_u32 (uint32x4_t __a, const int __b)
 {
-  __AARCH64_LANE_CHECK (__a, __b);
-  return __a[0];
+  return __aarch64_vdup_laneq_u32 (__a, __b);
 }
 
-/* vdupb_laneq  */
-__extension__ extern __inline poly8_t
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupb_laneq_p8 (poly8x16_t __a, const int __b)
+vdup_laneq_u64 (uint64x2_t __a, const int __b)
 {
-  return __aarch64_vget_lane_any (__a, __b);
+  return __aarch64_vdup_laneq_u64 (__a, __b);
 }
 
-__extension__ extern __inline int8_t
+/* vdupq_lane  */
+
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupb_laneq_s8 (int8x16_t __a, const int __b)
+vdupq_lane_f16 (float16x4_t __a, const int __b)
 {
-  return __aarch64_vget_lane_any (__a, __b);
+  return __aarch64_vdupq_lane_f16 (__a, __b);
 }
 
-__extension__ extern __inline uint8_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupb_laneq_u8 (uint8x16_t __a, const int __b)
+vdupq_lane_f32 (float32x2_t __a, const int __b)
 {
-  return __aarch64_vget_lane_any (__a, __b);
+  return __aarch64_vdupq_lane_f32 (__a, __b);
 }
 
-/* vduph_laneq  */
+__extension__ extern __inline float64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vdupq_lane_f64 (float64x1_t __a, const int __b)
+{
+  return __aarch64_vdupq_lane_f64 (__a, __b);
+}
 
-__extension__ extern __inline float16_t
+__extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vduph_laneq_f16 (float16x8_t __a, const int __b)
+vdupq_lane_p8 (poly8x8_t __a, const int __b)
 {
-  return __aarch64_vget_lane_any (__a, __b);
+  return __aarch64_vdupq_lane_p8 (__a, __b);
 }
 
-__extension__ extern __inline poly16_t
+__extension__ extern __inline poly16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vduph_laneq_p16 (poly16x8_t __a, const int __b)
+vdupq_lane_p16 (poly16x4_t __a, const int __b)
 {
-  return __aarch64_vget_lane_any (__a, __b);
+  return __aarch64_vdupq_lane_p16 (__a, __b);
 }
 
-__extension__ extern __inline int16_t
+__extension__ extern __inline poly64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vduph_laneq_s16 (int16x8_t __a, const int __b)
+vdupq_lane_p64 (poly64x1_t __a, const int __b)
 {
-  return __aarch64_vget_lane_any (__a, __b);
+  return __aarch64_vdupq_lane_p64 (__a, __b);
 }
 
-__extension__ extern __inline uint16_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vduph_laneq_u16 (uint16x8_t __a, const int __b)
+vdupq_lane_s8 (int8x8_t __a, const int __b)
 {
-  return __aarch64_vget_lane_any (__a, __b);
+  return __aarch64_vdupq_lane_s8 (__a, __b);
 }
 
-/* vdups_laneq  */
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vdupq_lane_s16 (int16x4_t __a, const int __b)
+{
+  return __aarch64_vdupq_lane_s16 (__a, __b);
+}
 
-__extension__ extern __inline float32_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdups_laneq_f32 (float32x4_t __a, const int __b)
+vdupq_lane_s32 (int32x2_t __a, const int __b)
 {
-  return __aarch64_vget_lane_any (__a, __b);
+  return __aarch64_vdupq_lane_s32 (__a, __b);
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdups_laneq_s32 (int32x4_t __a, const int __b)
+vdupq_lane_s64 (int64x1_t __a, const int __b)
 {
-  return __aarch64_vget_lane_any (__a, __b);
+  return __aarch64_vdupq_lane_s64 (__a, __b);
 }
 
-__extension__ extern __inline uint32_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdups_laneq_u32 (uint32x4_t __a, const int __b)
+vdupq_lane_u8 (uint8x8_t __a, const int __b)
 {
-  return __aarch64_vget_lane_any (__a, __b);
+  return __aarch64_vdupq_lane_u8 (__a, __b);
 }
 
-/* vdupd_laneq  */
-__extension__ extern __inline float64_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupd_laneq_f64 (float64x2_t __a, const int __b)
+vdupq_lane_u16 (uint16x4_t __a, const int __b)
 {
-  return __aarch64_vget_lane_any (__a, __b);
+  return __aarch64_vdupq_lane_u16 (__a, __b);
 }
 
-__extension__ extern __inline int64_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupd_laneq_s64 (int64x2_t __a, const int __b)
+vdupq_lane_u32 (uint32x2_t __a, const int __b)
 {
-  return __aarch64_vget_lane_any (__a, __b);
+  return __aarch64_vdupq_lane_u32 (__a, __b);
 }
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupd_laneq_u64 (uint64x2_t __a, const int __b)
+vdupq_lane_u64 (uint64x1_t __a, const int __b)
 {
-  return __aarch64_vget_lane_any (__a, __b);
+  return __aarch64_vdupq_lane_u64 (__a, __b);
 }
 
-/* vext  */
+/* vdupq_laneq  */
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vext_f16 (float16x4_t __a, float16x4_t __b, __const int __c)
+vdupq_laneq_f16 (float16x8_t __a, const int __b)
 {
-  __AARCH64_LANE_CHECK (__a, __c);
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__b, __a,
-			    (uint16x4_t) {4 - __c, 5 - __c, 6 - __c, 7 - __c});
-#else
-  return __builtin_shuffle (__a, __b,
-			    (uint16x4_t) {__c, __c + 1, __c + 2, __c + 3});
-#endif
+  return __aarch64_vdupq_laneq_f16 (__a, __b);
 }
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vext_f32 (float32x2_t __a, float32x2_t __b, __const int __c)
+vdupq_laneq_f32 (float32x4_t __a, const int __b)
 {
-  __AARCH64_LANE_CHECK (__a, __c);
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
-#endif
+  return __aarch64_vdupq_laneq_f32 (__a, __b);
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vext_f64 (float64x1_t __a, float64x1_t __b, __const int __c)
+vdupq_laneq_f64 (float64x2_t __a, const int __b)
 {
-  __AARCH64_LANE_CHECK (__a, __c);
-  /* The only possible index to the assembler instruction returns element 0.  */
-  return __a;
+  return __aarch64_vdupq_laneq_f64 (__a, __b);
 }
-__extension__ extern __inline poly8x8_t
+
+__extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vext_p8 (poly8x8_t __a, poly8x8_t __b, __const int __c)
+vdupq_laneq_p8 (poly8x16_t __a, const int __b)
 {
-  __AARCH64_LANE_CHECK (__a, __c);
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__b, __a, (uint8x8_t)
-      {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
-#else
-  return __builtin_shuffle (__a, __b,
-      (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
-#endif
+  return __aarch64_vdupq_laneq_p8 (__a, __b);
 }
 
-__extension__ extern __inline poly16x4_t
+__extension__ extern __inline poly16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vext_p16 (poly16x4_t __a, poly16x4_t __b, __const int __c)
+vdupq_laneq_p16 (poly16x8_t __a, const int __b)
 {
-  __AARCH64_LANE_CHECK (__a, __c);
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__b, __a,
-      (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
-#endif
+  return __aarch64_vdupq_laneq_p16 (__a, __b);
 }
 
-__extension__ extern __inline poly64x1_t
+__extension__ extern __inline poly64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vext_p64 (poly64x1_t __a, poly64x1_t __b, __const int __c)
+vdupq_laneq_p64 (poly64x2_t __a, const int __b)
 {
-  __AARCH64_LANE_CHECK (__a, __c);
-  /* The only possible index to the assembler instruction returns element 0.  */
-  return __a;
+  return __aarch64_vdupq_laneq_p64 (__a, __b);
 }
 
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vext_s8 (int8x8_t __a, int8x8_t __b, __const int __c)
+vdupq_laneq_s8 (int8x16_t __a, const int __b)
 {
-  __AARCH64_LANE_CHECK (__a, __c);
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__b, __a, (uint8x8_t)
-      {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
-#else
-  return __builtin_shuffle (__a, __b,
-      (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
-#endif
+  return __aarch64_vdupq_laneq_s8 (__a, __b);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vext_s16 (int16x4_t __a, int16x4_t __b, __const int __c)
+vdupq_laneq_s16 (int16x8_t __a, const int __b)
 {
-  __AARCH64_LANE_CHECK (__a, __c);
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__b, __a,
-      (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
-#endif
+  return __aarch64_vdupq_laneq_s16 (__a, __b);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vext_s32 (int32x2_t __a, int32x2_t __b, __const int __c)
+vdupq_laneq_s32 (int32x4_t __a, const int __b)
 {
-  __AARCH64_LANE_CHECK (__a, __c);
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
-#endif
+  return __aarch64_vdupq_laneq_s32 (__a, __b);
 }
 
-__extension__ extern __inline int64x1_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vext_s64 (int64x1_t __a, int64x1_t __b, __const int __c)
+vdupq_laneq_s64 (int64x2_t __a, const int __b)
 {
-  __AARCH64_LANE_CHECK (__a, __c);
-  /* The only possible index to the assembler instruction returns element 0.  */
-  return __a;
+  return __aarch64_vdupq_laneq_s64 (__a, __b);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vext_u8 (uint8x8_t __a, uint8x8_t __b, __const int __c)
+vdupq_laneq_u8 (uint8x16_t __a, const int __b)
 {
-  __AARCH64_LANE_CHECK (__a, __c);
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__b, __a, (uint8x8_t)
-      {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
-#else
-  return __builtin_shuffle (__a, __b,
-      (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
-#endif
+  return __aarch64_vdupq_laneq_u8 (__a, __b);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vext_u16 (uint16x4_t __a, uint16x4_t __b, __const int __c)
+vdupq_laneq_u16 (uint16x8_t __a, const int __b)
 {
-  __AARCH64_LANE_CHECK (__a, __c);
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__b, __a,
-      (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
-#endif
+  return __aarch64_vdupq_laneq_u16 (__a, __b);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vext_u32 (uint32x2_t __a, uint32x2_t __b, __const int __c)
+vdupq_laneq_u32 (uint32x4_t __a, const int __b)
 {
-  __AARCH64_LANE_CHECK (__a, __c);
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
-#endif
+  return __aarch64_vdupq_laneq_u32 (__a, __b);
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vext_u64 (uint64x1_t __a, uint64x1_t __b, __const int __c)
+vdupq_laneq_u64 (uint64x2_t __a, const int __b)
 {
-  __AARCH64_LANE_CHECK (__a, __c);
-  /* The only possible index to the assembler instruction returns element 0.  */
-  return __a;
+  return __aarch64_vdupq_laneq_u64 (__a, __b);
 }
 
-__extension__ extern __inline float16x8_t
+/* vdupb_lane  */
+__extension__ extern __inline poly8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vextq_f16 (float16x8_t __a, float16x8_t __b, __const int __c)
+vdupb_lane_p8 (poly8x8_t __a, const int __b)
 {
-  __AARCH64_LANE_CHECK (__a, __c);
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__b, __a,
-			    (uint16x8_t) {8 - __c, 9 - __c, 10 - __c, 11 - __c,
-					  12 - __c, 13 - __c, 14 - __c,
-					  15 - __c});
-#else
-  return __builtin_shuffle (__a, __b,
-			    (uint16x8_t) {__c, __c + 1, __c + 2, __c + 3,
-					  __c + 4, __c + 5, __c + 6, __c + 7});
-#endif
+  return __aarch64_vget_lane_any (__a, __b);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline int8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vextq_f32 (float32x4_t __a, float32x4_t __b, __const int __c)
+vdupb_lane_s8 (int8x8_t __a, const int __b)
 {
-  __AARCH64_LANE_CHECK (__a, __c);
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__b, __a,
-      (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
-#endif
+  return __aarch64_vget_lane_any (__a, __b);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline uint8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vextq_f64 (float64x2_t __a, float64x2_t __b, __const int __c)
+vdupb_lane_u8 (uint8x8_t __a, const int __b)
 {
-  __AARCH64_LANE_CHECK (__a, __c);
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
-#else
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
-#endif
+  return __aarch64_vget_lane_any (__a, __b);
 }
 
-__extension__ extern __inline poly8x16_t
+/* vduph_lane  */
+
+__extension__ extern __inline float16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vextq_p8 (poly8x16_t __a, poly8x16_t __b, __const int __c)
+vduph_lane_f16 (float16x4_t __a, const int __b)
 {
-  __AARCH64_LANE_CHECK (__a, __c);
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__b, __a, (uint8x16_t)
-      {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
-       24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
-#else
-  return __builtin_shuffle (__a, __b, (uint8x16_t)
-      {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
-       __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
-#endif
+  return __aarch64_vget_lane_any (__a, __b);
 }
 
-__extension__ extern __inline poly16x8_t
+__extension__ extern __inline poly16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vextq_p16 (poly16x8_t __a, poly16x8_t __b, __const int __c)
+vduph_lane_p16 (poly16x4_t __a, const int __b)
 {
-  __AARCH64_LANE_CHECK (__a, __c);
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__b, __a, (uint16x8_t)
-      {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
-#else
-  return __builtin_shuffle (__a, __b,
-      (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
-#endif
+  return __aarch64_vget_lane_any (__a, __b);
 }
 
-__extension__ extern __inline poly64x2_t
+__extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vextq_p64 (poly64x2_t __a, poly64x2_t __b, __const int __c)
+vduph_lane_s16 (int16x4_t __a, const int __b)
 {
-  __AARCH64_LANE_CHECK (__a, __c);
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
-#else
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
-#endif
+  return __aarch64_vget_lane_any (__a, __b);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vextq_s8 (int8x16_t __a, int8x16_t __b, __const int __c)
+vduph_lane_u16 (uint16x4_t __a, const int __b)
 {
-  __AARCH64_LANE_CHECK (__a, __c);
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__b, __a, (uint8x16_t)
-      {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
-       24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
-#else
-  return __builtin_shuffle (__a, __b, (uint8x16_t)
-      {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
-       __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
-#endif
+  return __aarch64_vget_lane_any (__a, __b);
 }
 
-__extension__ extern __inline int16x8_t
+/* vdups_lane  */
+
+__extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vextq_s16 (int16x8_t __a, int16x8_t __b, __const int __c)
+vdups_lane_f32 (float32x2_t __a, const int __b)
 {
-  __AARCH64_LANE_CHECK (__a, __c);
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__b, __a, (uint16x8_t)
-      {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
-#else
-  return __builtin_shuffle (__a, __b,
-      (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
-#endif
+  return __aarch64_vget_lane_any (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vextq_s32 (int32x4_t __a, int32x4_t __b, __const int __c)
+vdups_lane_s32 (int32x2_t __a, const int __b)
 {
-  __AARCH64_LANE_CHECK (__a, __c);
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__b, __a,
-      (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
-#endif
+  return __aarch64_vget_lane_any (__a, __b);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vextq_s64 (int64x2_t __a, int64x2_t __b, __const int __c)
+vdups_lane_u32 (uint32x2_t __a, const int __b)
 {
-  __AARCH64_LANE_CHECK (__a, __c);
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
-#else
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
-#endif
+  return __aarch64_vget_lane_any (__a, __b);
 }
 
-__extension__ extern __inline uint8x16_t
+/* vdupd_lane  */
+__extension__ extern __inline float64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vextq_u8 (uint8x16_t __a, uint8x16_t __b, __const int __c)
+vdupd_lane_f64 (float64x1_t __a, const int __b)
 {
-  __AARCH64_LANE_CHECK (__a, __c);
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__b, __a, (uint8x16_t)
-      {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
-       24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
-#else
-  return __builtin_shuffle (__a, __b, (uint8x16_t)
-      {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
-       __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
-#endif
+  __AARCH64_LANE_CHECK (__a, __b);
+  return __a[0];
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vextq_u16 (uint16x8_t __a, uint16x8_t __b, __const int __c)
+vdupd_lane_s64 (int64x1_t __a, const int __b)
 {
-  __AARCH64_LANE_CHECK (__a, __c);
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__b, __a, (uint16x8_t)
-      {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
-#else
-  return __builtin_shuffle (__a, __b,
-      (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
-#endif
+  __AARCH64_LANE_CHECK (__a, __b);
+  return __a[0];
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vextq_u32 (uint32x4_t __a, uint32x4_t __b, __const int __c)
+vdupd_lane_u64 (uint64x1_t __a, const int __b)
 {
-  __AARCH64_LANE_CHECK (__a, __c);
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__b, __a,
-      (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
-#endif
+  __AARCH64_LANE_CHECK (__a, __b);
+  return __a[0];
 }
 
-__extension__ extern __inline uint64x2_t
+/* vdupb_laneq  */
+__extension__ extern __inline poly8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vextq_u64 (uint64x2_t __a, uint64x2_t __b, __const int __c)
+vdupb_laneq_p8 (poly8x16_t __a, const int __b)
 {
-  __AARCH64_LANE_CHECK (__a, __c);
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
-#else
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
-#endif
+  return __aarch64_vget_lane_any (__a, __b);
 }
 
-/* vfma  */
-
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline int8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfma_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
+vdupb_laneq_s8 (int8x16_t __a, const int __b)
 {
-  return (float64x1_t) {__builtin_fma (__b[0], __c[0], __a[0])};
+  return __aarch64_vget_lane_any (__a, __b);
 }
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline uint8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfma_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)
+vdupb_laneq_u8 (uint8x16_t __a, const int __b)
 {
-  return __builtin_aarch64_fmav2sf (__b, __c, __a);
+  return __aarch64_vget_lane_any (__a, __b);
 }
 
-__extension__ extern __inline float32x4_t
+/* vduph_laneq  */
+
+__extension__ extern __inline float16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
+vduph_laneq_f16 (float16x8_t __a, const int __b)
 {
-  return __builtin_aarch64_fmav4sf (__b, __c, __a);
+  return __aarch64_vget_lane_any (__a, __b);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline poly16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmaq_f64 (float64x2_t __a, float64x2_t __b, float64x2_t __c)
+vduph_laneq_p16 (poly16x8_t __a, const int __b)
 {
-  return __builtin_aarch64_fmav2df (__b, __c, __a);
+  return __aarch64_vget_lane_any (__a, __b);
 }
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfma_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c)
+vduph_laneq_s16 (int16x8_t __a, const int __b)
 {
-  return __builtin_aarch64_fmav2sf (__b, vdup_n_f32 (__c), __a);
+  return __aarch64_vget_lane_any (__a, __b);
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfma_n_f64 (float64x1_t __a, float64x1_t __b, float64_t __c)
+vduph_laneq_u16 (uint16x8_t __a, const int __b)
 {
-  return (float64x1_t) {__b[0] * __c + __a[0]};
+  return __aarch64_vget_lane_any (__a, __b);
 }
 
-__extension__ extern __inline float32x4_t
+/* vdups_laneq  */
+
+__extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmaq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c)
+vdups_laneq_f32 (float32x4_t __a, const int __b)
 {
-  return __builtin_aarch64_fmav4sf (__b, vdupq_n_f32 (__c), __a);
+  return __aarch64_vget_lane_any (__a, __b);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmaq_n_f64 (float64x2_t __a, float64x2_t __b, float64_t __c)
+vdups_laneq_s32 (int32x4_t __a, const int __b)
 {
-  return __builtin_aarch64_fmav2df (__b, vdupq_n_f64 (__c), __a);
+  return __aarch64_vget_lane_any (__a, __b);
 }
 
-/* vfma_lane  */
-
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfma_lane_f32 (float32x2_t __a, float32x2_t __b,
-	       float32x2_t __c, const int __lane)
+vdups_laneq_u32 (uint32x4_t __a, const int __b)
 {
-  return __builtin_aarch64_fmav2sf (__b,
-				    __aarch64_vdup_lane_f32 (__c, __lane),
-				    __a);
+  return __aarch64_vget_lane_any (__a, __b);
 }
 
-__extension__ extern __inline float64x1_t
+/* vdupd_laneq  */
+__extension__ extern __inline float64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfma_lane_f64 (float64x1_t __a, float64x1_t __b,
-	       float64x1_t __c, const int __lane)
+vdupd_laneq_f64 (float64x2_t __a, const int __b)
 {
-  return (float64x1_t) {__builtin_fma (__b[0], __c[0], __a[0])};
+  return __aarch64_vget_lane_any (__a, __b);
 }
 
-__extension__ extern __inline float64_t
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmad_lane_f64 (float64_t __a, float64_t __b,
-	        float64x1_t __c, const int __lane)
+vdupd_laneq_s64 (int64x2_t __a, const int __b)
 {
-  return __builtin_fma (__b, __c[0], __a);
+  return __aarch64_vget_lane_any (__a, __b);
 }
 
-__extension__ extern __inline float32_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmas_lane_f32 (float32_t __a, float32_t __b,
-	        float32x2_t __c, const int __lane)
+vdupd_laneq_u64 (uint64x2_t __a, const int __b)
 {
-  return __builtin_fmaf (__b, __aarch64_vget_lane_any (__c, __lane), __a);
+  return __aarch64_vget_lane_any (__a, __b);
 }
 
-/* vfma_laneq  */
+/* vext  */
+
+__extension__ extern __inline float16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vext_f16 (float16x4_t __a, float16x4_t __b, __const int __c)
+{
+  __AARCH64_LANE_CHECK (__a, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a,
+			    (uint16x4_t) {4 - __c, 5 - __c, 6 - __c, 7 - __c});
+#else
+  return __builtin_shuffle (__a, __b,
+			    (uint16x4_t) {__c, __c + 1, __c + 2, __c + 3});
+#endif
+}
 
 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfma_laneq_f32 (float32x2_t __a, float32x2_t __b,
-	        float32x4_t __c, const int __lane)
+vext_f32 (float32x2_t __a, float32x2_t __b, __const int __c)
 {
-  return __builtin_aarch64_fmav2sf (__b,
-				    __aarch64_vdup_laneq_f32 (__c, __lane),
-				    __a);
+  __AARCH64_LANE_CHECK (__a, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
+#endif
 }
 
 __extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfma_laneq_f64 (float64x1_t __a, float64x1_t __b,
-	        float64x2_t __c, const int __lane)
+vext_f64 (float64x1_t __a, float64x1_t __b, __const int __c)
 {
-  float64_t __c0 = __aarch64_vget_lane_any (__c, __lane);
-  return (float64x1_t) {__builtin_fma (__b[0], __c0, __a[0])};
+  __AARCH64_LANE_CHECK (__a, __c);
+  /* The only possible index to the assembler instruction returns element 0.  */
+  return __a;
 }
-
-__extension__ extern __inline float64_t
+__extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmad_laneq_f64 (float64_t __a, float64_t __b,
-	         float64x2_t __c, const int __lane)
+vext_p8 (poly8x8_t __a, poly8x8_t __b, __const int __c)
 {
-  return __builtin_fma (__b, __aarch64_vget_lane_any (__c, __lane), __a);
+  __AARCH64_LANE_CHECK (__a, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint8x8_t)
+      {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
+#endif
 }
 
-__extension__ extern __inline float32_t
+__extension__ extern __inline poly16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmas_laneq_f32 (float32_t __a, float32_t __b,
-		 float32x4_t __c, const int __lane)
+vext_p16 (poly16x4_t __a, poly16x4_t __b, __const int __c)
 {
-  return __builtin_fmaf (__b, __aarch64_vget_lane_any (__c, __lane), __a);
+  __AARCH64_LANE_CHECK (__a, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a,
+      (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
+#endif
 }
 
-/* vfmaq_lane  */
-
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline poly64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmaq_lane_f32 (float32x4_t __a, float32x4_t __b,
-	        float32x2_t __c, const int __lane)
+vext_p64 (poly64x1_t __a, poly64x1_t __b, __const int __c)
 {
-  return __builtin_aarch64_fmav4sf (__b,
-				    __aarch64_vdupq_lane_f32 (__c, __lane),
-				    __a);
+  __AARCH64_LANE_CHECK (__a, __c);
+  /* The only possible index to the assembler instruction returns element 0.  */
+  return __a;
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmaq_lane_f64 (float64x2_t __a, float64x2_t __b,
-	        float64x1_t __c, const int __lane)
+vext_s8 (int8x8_t __a, int8x8_t __b, __const int __c)
 {
-  return __builtin_aarch64_fmav2df (__b, vdupq_n_f64 (__c[0]), __a);
+  __AARCH64_LANE_CHECK (__a, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint8x8_t)
+      {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
+#endif
 }
 
-/* vfmaq_laneq  */
-
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmaq_laneq_f32 (float32x4_t __a, float32x4_t __b,
-	         float32x4_t __c, const int __lane)
+vext_s16 (int16x4_t __a, int16x4_t __b, __const int __c)
 {
-  return __builtin_aarch64_fmav4sf (__b,
-				    __aarch64_vdupq_laneq_f32 (__c, __lane),
-				    __a);
+  __AARCH64_LANE_CHECK (__a, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a,
+      (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
+#endif
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmaq_laneq_f64 (float64x2_t __a, float64x2_t __b,
-	         float64x2_t __c, const int __lane)
+vext_s32 (int32x2_t __a, int32x2_t __b, __const int __c)
 {
-  return __builtin_aarch64_fmav2df (__b,
-				    __aarch64_vdupq_laneq_f64 (__c, __lane),
-				    __a);
+  __AARCH64_LANE_CHECK (__a, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
+#endif
 }
 
-/* vfms  */
-
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline int64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfms_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
+vext_s64 (int64x1_t __a, int64x1_t __b, __const int __c)
 {
-  return (float64x1_t) {__builtin_fma (-__b[0], __c[0], __a[0])};
+  __AARCH64_LANE_CHECK (__a, __c);
+  /* The only possible index to the assembler instruction returns element 0.  */
+  return __a;
 }
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfms_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)
+vext_u8 (uint8x8_t __a, uint8x8_t __b, __const int __c)
 {
-  return __builtin_aarch64_fmav2sf (-__b, __c, __a);
+  __AARCH64_LANE_CHECK (__a, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint8x8_t)
+      {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
+#endif
 }
 
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
-{
-  return __builtin_aarch64_fmav4sf (-__b, __c, __a);
-}
+__extension__ extern __inline uint16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vext_u16 (uint16x4_t __a, uint16x4_t __b, __const int __c)
+{
+  __AARCH64_LANE_CHECK (__a, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a,
+      (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
+#endif
+}
+
+__extension__ extern __inline uint32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vext_u32 (uint32x2_t __a, uint32x2_t __b, __const int __c)
+{
+  __AARCH64_LANE_CHECK (__a, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
+#endif
+}
+
+__extension__ extern __inline uint64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vext_u64 (uint64x1_t __a, uint64x1_t __b, __const int __c)
+{
+  __AARCH64_LANE_CHECK (__a, __c);
+  /* The only possible index to the assembler instruction returns element 0.  */
+  return __a;
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vextq_f16 (float16x8_t __a, float16x8_t __b, __const int __c)
+{
+  __AARCH64_LANE_CHECK (__a, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a,
+			    (uint16x8_t) {8 - __c, 9 - __c, 10 - __c, 11 - __c,
+					  12 - __c, 13 - __c, 14 - __c,
+					  15 - __c});
+#else
+  return __builtin_shuffle (__a, __b,
+			    (uint16x8_t) {__c, __c + 1, __c + 2, __c + 3,
+					  __c + 4, __c + 5, __c + 6, __c + 7});
+#endif
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vextq_f32 (float32x4_t __a, float32x4_t __b, __const int __c)
+{
+  __AARCH64_LANE_CHECK (__a, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a,
+      (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
+#endif
+}
+
+__extension__ extern __inline float64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vextq_f64 (float64x2_t __a, float64x2_t __b, __const int __c)
+{
+  __AARCH64_LANE_CHECK (__a, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
+#endif
+}
+
+__extension__ extern __inline poly8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vextq_p8 (poly8x16_t __a, poly8x16_t __b, __const int __c)
+{
+  __AARCH64_LANE_CHECK (__a, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint8x16_t)
+      {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
+       24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
+       __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
+#endif
+}
+
+__extension__ extern __inline poly16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vextq_p16 (poly16x8_t __a, poly16x8_t __b, __const int __c)
+{
+  __AARCH64_LANE_CHECK (__a, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint16x8_t)
+      {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
+#endif
+}
+
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vextq_p64 (poly64x2_t __a, poly64x2_t __b, __const int __c)
+{
+  __AARCH64_LANE_CHECK (__a, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
+#endif
+}
+
+__extension__ extern __inline int8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vextq_s8 (int8x16_t __a, int8x16_t __b, __const int __c)
+{
+  __AARCH64_LANE_CHECK (__a, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint8x16_t)
+      {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
+       24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
+       __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
+#endif
+}
+
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vextq_s16 (int16x8_t __a, int16x8_t __b, __const int __c)
+{
+  __AARCH64_LANE_CHECK (__a, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint16x8_t)
+      {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
+#endif
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vextq_s32 (int32x4_t __a, int32x4_t __b, __const int __c)
+{
+  __AARCH64_LANE_CHECK (__a, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a,
+      (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
+#endif
+}
+
+__extension__ extern __inline int64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vextq_s64 (int64x2_t __a, int64x2_t __b, __const int __c)
+{
+  __AARCH64_LANE_CHECK (__a, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
+#endif
+}
+
+__extension__ extern __inline uint8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vextq_u8 (uint8x16_t __a, uint8x16_t __b, __const int __c)
+{
+  __AARCH64_LANE_CHECK (__a, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint8x16_t)
+      {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
+       24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
+       __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
+#endif
+}
+
+__extension__ extern __inline uint16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vextq_u16 (uint16x8_t __a, uint16x8_t __b, __const int __c)
+{
+  __AARCH64_LANE_CHECK (__a, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint16x8_t)
+      {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
+#endif
+}
+
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vextq_u32 (uint32x4_t __a, uint32x4_t __b, __const int __c)
+{
+  __AARCH64_LANE_CHECK (__a, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a,
+      (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
+#endif
+}
+
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vextq_u64 (uint64x2_t __a, uint64x2_t __b, __const int __c)
+{
+  __AARCH64_LANE_CHECK (__a, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
+#endif
+}
+
+/* vfma  */
+
+__extension__ extern __inline float64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfma_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
+{
+  return (float64x1_t) {__builtin_fma (__b[0], __c[0], __a[0])};
+}
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfma_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)
+{
+  return __builtin_aarch64_fmav2sf (__b, __c, __a);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
+{
+  return __builtin_aarch64_fmav4sf (__b, __c, __a);
+}
+
+__extension__ extern __inline float64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmaq_f64 (float64x2_t __a, float64x2_t __b, float64x2_t __c)
+{
+  return __builtin_aarch64_fmav2df (__b, __c, __a);
+}
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfma_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c)
+{
+  return __builtin_aarch64_fmav2sf (__b, vdup_n_f32 (__c), __a);
+}
+
+__extension__ extern __inline float64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfma_n_f64 (float64x1_t __a, float64x1_t __b, float64_t __c)
+{
+  return (float64x1_t) {__b[0] * __c + __a[0]};
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmaq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c)
+{
+  return __builtin_aarch64_fmav4sf (__b, vdupq_n_f32 (__c), __a);
+}
+
+__extension__ extern __inline float64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmaq_n_f64 (float64x2_t __a, float64x2_t __b, float64_t __c)
+{
+  return __builtin_aarch64_fmav2df (__b, vdupq_n_f64 (__c), __a);
+}
+
+/* vfma_lane  */
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfma_lane_f32 (float32x2_t __a, float32x2_t __b,
+	       float32x2_t __c, const int __lane)
+{
+  return __builtin_aarch64_fmav2sf (__b,
+				    __aarch64_vdup_lane_f32 (__c, __lane),
+				    __a);
+}
+
+__extension__ extern __inline float64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfma_lane_f64 (float64x1_t __a, float64x1_t __b,
+	       float64x1_t __c, const int __lane)
+{
+  return (float64x1_t) {__builtin_fma (__b[0], __c[0], __a[0])};
+}
+
+__extension__ extern __inline float64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmad_lane_f64 (float64_t __a, float64_t __b,
+	        float64x1_t __c, const int __lane)
+{
+  return __builtin_fma (__b, __c[0], __a);
+}
+
+__extension__ extern __inline float32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmas_lane_f32 (float32_t __a, float32_t __b,
+	        float32x2_t __c, const int __lane)
+{
+  return __builtin_fmaf (__b, __aarch64_vget_lane_any (__c, __lane), __a);
+}
+
+/* vfma_laneq  */
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfma_laneq_f32 (float32x2_t __a, float32x2_t __b,
+	        float32x4_t __c, const int __lane)
+{
+  return __builtin_aarch64_fmav2sf (__b,
+				    __aarch64_vdup_laneq_f32 (__c, __lane),
+				    __a);
+}
+
+__extension__ extern __inline float64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfma_laneq_f64 (float64x1_t __a, float64x1_t __b,
+	        float64x2_t __c, const int __lane)
+{
+  float64_t __c0 = __aarch64_vget_lane_any (__c, __lane);
+  return (float64x1_t) {__builtin_fma (__b[0], __c0, __a[0])};
+}
+
+__extension__ extern __inline float64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmad_laneq_f64 (float64_t __a, float64_t __b,
+	         float64x2_t __c, const int __lane)
+{
+  return __builtin_fma (__b, __aarch64_vget_lane_any (__c, __lane), __a);
+}
+
+__extension__ extern __inline float32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmas_laneq_f32 (float32_t __a, float32_t __b,
+		 float32x4_t __c, const int __lane)
+{
+  return __builtin_fmaf (__b, __aarch64_vget_lane_any (__c, __lane), __a);
+}
+
+/* vfmaq_lane  */
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmaq_lane_f32 (float32x4_t __a, float32x4_t __b,
+	        float32x2_t __c, const int __lane)
+{
+  return __builtin_aarch64_fmav4sf (__b,
+				    __aarch64_vdupq_lane_f32 (__c, __lane),
+				    __a);
+}
+
+__extension__ extern __inline float64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmaq_lane_f64 (float64x2_t __a, float64x2_t __b,
+	        float64x1_t __c, const int __lane)
+{
+  return __builtin_aarch64_fmav2df (__b, vdupq_n_f64 (__c[0]), __a);
+}
+
+/* vfmaq_laneq  */
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmaq_laneq_f32 (float32x4_t __a, float32x4_t __b,
+	         float32x4_t __c, const int __lane)
+{
+  return __builtin_aarch64_fmav4sf (__b,
+				    __aarch64_vdupq_laneq_f32 (__c, __lane),
+				    __a);
+}
+
+__extension__ extern __inline float64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmaq_laneq_f64 (float64x2_t __a, float64x2_t __b,
+	         float64x2_t __c, const int __lane)
+{
+  return __builtin_aarch64_fmav2df (__b,
+				    __aarch64_vdupq_laneq_f64 (__c, __lane),
+				    __a);
+}
+
+/* vfms  */
+
+__extension__ extern __inline float64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfms_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
+{
+  return (float64x1_t) {__builtin_fma (-__b[0], __c[0], __a[0])};
+}
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfms_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)
+{
+  return __builtin_aarch64_fmav2sf (-__b, __c, __a);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
+{
+  return __builtin_aarch64_fmav4sf (-__b, __c, __a);
+}
+
+__extension__ extern __inline float64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmsq_f64 (float64x2_t __a, float64x2_t __b, float64x2_t __c)
+{
+  return __builtin_aarch64_fmav2df (-__b, __c, __a);
+}
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfms_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c)
+{
+  return __builtin_aarch64_fmav2sf (-__b, vdup_n_f32 (__c), __a);
+}
+
+__extension__ extern __inline float64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfms_n_f64 (float64x1_t __a, float64x1_t __b, float64_t __c)
+{
+  return (float64x1_t) {-__b[0] * __c + __a[0]};
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmsq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c)
+{
+  return __builtin_aarch64_fmav4sf (-__b, vdupq_n_f32 (__c), __a);
+}
+
+__extension__ extern __inline float64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmsq_n_f64 (float64x2_t __a, float64x2_t __b, float64_t __c)
+{
+  return __builtin_aarch64_fmav2df (-__b, vdupq_n_f64 (__c), __a);
+}
+
+/* vfms_lane  */
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfms_lane_f32 (float32x2_t __a, float32x2_t __b,
+	       float32x2_t __c, const int __lane)
+{
+  return __builtin_aarch64_fmav2sf (-__b,
+				    __aarch64_vdup_lane_f32 (__c, __lane),
+				    __a);
+}
+
+__extension__ extern __inline float64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfms_lane_f64 (float64x1_t __a, float64x1_t __b,
+	       float64x1_t __c, const int __lane)
+{
+  return (float64x1_t) {__builtin_fma (-__b[0], __c[0], __a[0])};
+}
+
+__extension__ extern __inline float64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmsd_lane_f64 (float64_t __a, float64_t __b,
+	        float64x1_t __c, const int __lane)
+{
+  return __builtin_fma (-__b, __c[0], __a);
+}
+
+__extension__ extern __inline float32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmss_lane_f32 (float32_t __a, float32_t __b,
+	        float32x2_t __c, const int __lane)
+{
+  return __builtin_fmaf (-__b, __aarch64_vget_lane_any (__c, __lane), __a);
+}
+
+/* vfms_laneq  */
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfms_laneq_f32 (float32x2_t __a, float32x2_t __b,
+	        float32x4_t __c, const int __lane)
+{
+  return __builtin_aarch64_fmav2sf (-__b,
+				    __aarch64_vdup_laneq_f32 (__c, __lane),
+				    __a);
+}
+
+__extension__ extern __inline float64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfms_laneq_f64 (float64x1_t __a, float64x1_t __b,
+	        float64x2_t __c, const int __lane)
+{
+  float64_t __c0 = __aarch64_vget_lane_any (__c, __lane);
+  return (float64x1_t) {__builtin_fma (-__b[0], __c0, __a[0])};
+}
+
+__extension__ extern __inline float64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmsd_laneq_f64 (float64_t __a, float64_t __b,
+	         float64x2_t __c, const int __lane)
+{
+  return __builtin_fma (-__b, __aarch64_vget_lane_any (__c, __lane), __a);
+}
+
+__extension__ extern __inline float32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmss_laneq_f32 (float32_t __a, float32_t __b,
+		 float32x4_t __c, const int __lane)
+{
+  return __builtin_fmaf (-__b, __aarch64_vget_lane_any (__c, __lane), __a);
+}
+
+/* vfmsq_lane  */
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmsq_lane_f32 (float32x4_t __a, float32x4_t __b,
+	        float32x2_t __c, const int __lane)
+{
+  return __builtin_aarch64_fmav4sf (-__b,
+				    __aarch64_vdupq_lane_f32 (__c, __lane),
+				    __a);
+}
+
+__extension__ extern __inline float64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmsq_lane_f64 (float64x2_t __a, float64x2_t __b,
+	        float64x1_t __c, const int __lane)
+{
+  return __builtin_aarch64_fmav2df (-__b, vdupq_n_f64 (__c[0]), __a);
+}
+
+/* vfmsq_laneq  */
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmsq_laneq_f32 (float32x4_t __a, float32x4_t __b,
+	         float32x4_t __c, const int __lane)
+{
+  return __builtin_aarch64_fmav4sf (-__b,
+				    __aarch64_vdupq_laneq_f32 (__c, __lane),
+				    __a);
+}
+
+__extension__ extern __inline float64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmsq_laneq_f64 (float64x2_t __a, float64x2_t __b,
+	         float64x2_t __c, const int __lane)
+{
+  return __builtin_aarch64_fmav2df (-__b,
+				    __aarch64_vdupq_laneq_f64 (__c, __lane),
+				    __a);
+}
+
+/* vld1 */
+
+__extension__ extern __inline float16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_f16 (const float16_t *__a)
+{
+  return __builtin_aarch64_ld1v4hf (__a);
+}
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_f32 (const float32_t *__a)
+{
+  return __builtin_aarch64_ld1v2sf ((const __builtin_aarch64_simd_sf *) __a);
+}
+
+__extension__ extern __inline float64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_f64 (const float64_t *__a)
+{
+  return (float64x1_t) {*__a};
+}
+
+__extension__ extern __inline poly8x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_p8 (const poly8_t *__a)
+{
+  return (poly8x8_t)
+    __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) __a);
+}
+
+__extension__ extern __inline poly16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_p16 (const poly16_t *__a)
+{
+  return (poly16x4_t)
+    __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) __a);
+}
+
+__extension__ extern __inline poly64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_p64 (const poly64_t *__a)
+{
+  return (poly64x1_t) {*__a};
+}
+
+__extension__ extern __inline int8x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_s8 (const int8_t *__a)
+{
+  return __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) __a);
+}
+
+__extension__ extern __inline int16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_s16 (const int16_t *__a)
+{
+  return __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) __a);
+}
+
+__extension__ extern __inline int32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_s32 (const int32_t *__a)
+{
+  return __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) __a);
+}
+
+__extension__ extern __inline int64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_s64 (const int64_t *__a)
+{
+  return (int64x1_t) {*__a};
+}
+
+__extension__ extern __inline uint8x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_u8 (const uint8_t *__a)
+{
+  return (uint8x8_t)
+    __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) __a);
+}
+
+__extension__ extern __inline uint16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_u16 (const uint16_t *__a)
+{
+  return (uint16x4_t)
+    __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) __a);
+}
+
+__extension__ extern __inline uint32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_u32 (const uint32_t *__a)
+{
+  return (uint32x2_t)
+    __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) __a);
+}
+
+__extension__ extern __inline uint64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_u64 (const uint64_t *__a)
+{
+  return (uint64x1_t) {*__a};
+}
+
+/* vld1x3  */
+
+__extension__ extern __inline uint8x8x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_u8_x3 (const uint8_t *__a)
+{
+  uint8x8x3_t __i;
+  __builtin_aarch64_simd_ci __o;
+  __o = (__builtin_aarch64_simd_ci)__builtin_aarch64_ld1x3v8qi ((const __builtin_aarch64_simd_qi *) __a);
+  __i.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi  (__o, 0);
+  __i.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi  (__o, 1);
+  __i.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi  (__o, 2);
+  return __i;
+}
+
+__extension__ extern __inline int8x8x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_s8_x3 (const uint8_t *__a)
+{
+  int8x8x3_t __i;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld1x3v8qi ((const __builtin_aarch64_simd_qi *) __a);
+  __i.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi  (__o, 0);
+  __i.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi  (__o, 1);
+  __i.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi  (__o, 2);
+  return __i;
+}
+
+__extension__ extern __inline uint16x4x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_u16_x3 (const uint16_t *__a)
+{
+  uint16x4x3_t __i;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld1x3v4hi ((const __builtin_aarch64_simd_hi *) __a);
+  __i.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi  (__o, 0);
+  __i.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi  (__o, 1);
+  __i.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi  (__o, 2);
+  return __i;
+}
+
+__extension__ extern __inline int16x4x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_s16_x3 (const int16_t *__a)
+{
+  int16x4x3_t __i;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld1x3v4hi ((const __builtin_aarch64_simd_hi *) __a);
+  __i.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi  (__o, 0);
+  __i.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi  (__o, 1);
+  __i.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi  (__o, 2);
+  return __i;
+}
+
+__extension__ extern __inline uint32x2x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_u32_x3 (const uint32_t *__a)
+{
+  uint32x2x3_t __i;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld1x3v2si ((const __builtin_aarch64_simd_si *) __a);
+  __i.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si  (__o, 0);
+  __i.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si  (__o, 1);
+  __i.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si  (__o, 2);
+  return __i;
+}
+
+__extension__ extern __inline int32x2x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_s32_x3 (const uint32_t *__a)
+{
+  int32x2x3_t __i;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld1x3v2si ((const __builtin_aarch64_simd_si *) __a);
+  __i.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si  (__o, 0);
+  __i.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si  (__o, 1);
+  __i.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si  (__o, 2);
+  return __i;
+}
+
+__extension__ extern __inline uint64x1x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_u64_x3 (const uint64_t *__a)
+{
+  uint64x1x3_t __i;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld1x3di ((const __builtin_aarch64_simd_di *) __a);
+  __i.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
+  __i.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
+  __i.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
+  return __i;
+}
+
+__extension__ extern __inline int64x1x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_s64_x3 (const int64_t *__a)
+{
+  int64x1x3_t __i;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld1x3di ((const __builtin_aarch64_simd_di *) __a);
+  __i.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
+  __i.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
+  __i.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
+
+  return __i;
+}
+
+__extension__ extern __inline float16x4x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_f16_x3 (const float16_t *__a)
+{
+  float16x4x3_t __i;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld1x3v4hf ((const __builtin_aarch64_simd_hf *) __a);
+  __i.val[0] = (float16x4_t) __builtin_aarch64_get_dregciv4hf  (__o, 0);
+  __i.val[1] = (float16x4_t) __builtin_aarch64_get_dregciv4hf  (__o, 1);
+  __i.val[2] = (float16x4_t) __builtin_aarch64_get_dregciv4hf  (__o, 2);
+  return __i;
+}
+
+__extension__ extern __inline float32x2x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_f32_x3 (const float32_t *__a)
+{
+  float32x2x3_t __i;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld1x3v2sf ((const __builtin_aarch64_simd_sf *) __a);
+  __i.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf  (__o, 0);
+  __i.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf  (__o, 1);
+  __i.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf  (__o, 2);
+  return __i;
+}
+
+__extension__ extern __inline float64x1x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_f64_x3 (const float64_t *__a)
+{
+  float64x1x3_t __i;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld1x3df ((const __builtin_aarch64_simd_df *) __a);
+  __i.val[0] = (float64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
+  __i.val[1] = (float64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
+  __i.val[2] = (float64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
+  return __i;
+}
+
+__extension__ extern __inline poly8x8x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_p8_x3 (const poly8_t *__a)
+{
+  poly8x8x3_t __i;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld1x3v8qi ((const __builtin_aarch64_simd_qi *) __a);
+  __i.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi  (__o, 0);
+  __i.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi  (__o, 1);
+  __i.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi  (__o, 2);
+  return __i;
+}
+
+__extension__ extern __inline poly16x4x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_p16_x3 (const poly16_t *__a)
+{
+  poly16x4x3_t __i;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld1x3v4hi ((const __builtin_aarch64_simd_hi *) __a);
+  __i.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi  (__o, 0);
+  __i.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi  (__o, 1);
+  __i.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi  (__o, 2);
+  return __i;
+}
+
+__extension__ extern __inline poly64x1x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_p64_x3 (const poly64_t *__a)
+{
+  poly64x1x3_t __i;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld1x3di ((const __builtin_aarch64_simd_di *) __a);
+  __i.val[0] = (poly64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
+  __i.val[1] = (poly64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
+  __i.val[2] = (poly64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
+
+return __i;
+}
+
+__extension__ extern __inline uint8x16x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_u8_x3 (const uint8_t *__a)
+{
+  uint8x16x3_t __i;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld1x3v16qi ((const __builtin_aarch64_simd_qi *) __a);
+  __i.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi  (__o, 0);
+  __i.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi  (__o, 1);
+  __i.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi  (__o, 2);
+  return __i;
+}
+
+__extension__ extern __inline int8x16x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_s8_x3 (const int8_t *__a)
+{
+  int8x16x3_t __i;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld1x3v16qi ((const __builtin_aarch64_simd_qi *) __a);
+  __i.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi  (__o, 0);
+  __i.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi  (__o, 1);
+  __i.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi  (__o, 2);
+  return __i;
+}
+
+__extension__ extern __inline uint16x8x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_u16_x3 (const uint16_t *__a)
+{
+  uint16x8x3_t __i;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld1x3v8hi ((const __builtin_aarch64_simd_hi *) __a);
+  __i.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi  (__o, 0);
+  __i.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi  (__o, 1);
+  __i.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi  (__o, 2);
+  return __i;
+}
+
+__extension__ extern __inline int16x8x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_s16_x3 (const int16_t *__a)
+{
+  int16x8x3_t __i;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld1x3v8hi ((const __builtin_aarch64_simd_hi *) __a);
+  __i.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi  (__o, 0);
+  __i.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi  (__o, 1);
+  __i.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi  (__o, 2);
+  return __i;
+}
+
+__extension__ extern __inline uint32x4x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_u32_x3 (const uint32_t *__a)
+{
+  uint32x4x3_t __i;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld1x3v4si ((const __builtin_aarch64_simd_si *) __a);
+  __i.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si  (__o, 0);
+  __i.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si  (__o, 1);
+  __i.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si  (__o, 2);
+  return __i;
+}
+
+__extension__ extern __inline int32x4x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_s32_x3 (const int32_t *__a)
+{
+  int32x4x3_t __i;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld1x3v4si ((const __builtin_aarch64_simd_si *) __a);
+  __i.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si  (__o, 0);
+  __i.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si  (__o, 1);
+  __i.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si  (__o, 2);
+  return __i;
+}
+
+__extension__ extern __inline uint64x2x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_u64_x3 (const uint64_t *__a)
+{
+  uint64x2x3_t __i;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld1x3v2di ((const __builtin_aarch64_simd_di *) __a);
+  __i.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di  (__o, 0);
+  __i.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di  (__o, 1);
+  __i.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di  (__o, 2);
+  return __i;
+}
+
+__extension__ extern __inline int64x2x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_s64_x3 (const int64_t *__a)
+{
+  int64x2x3_t __i;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld1x3v2di ((const __builtin_aarch64_simd_di *) __a);
+  __i.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di  (__o, 0);
+  __i.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di  (__o, 1);
+  __i.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di  (__o, 2);
+  return __i;
+}
+
+__extension__ extern __inline float16x8x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_f16_x3 (const float16_t *__a)
+{
+  float16x8x3_t __i;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld1x3v8hf ((const __builtin_aarch64_simd_hf *) __a);
+  __i.val[0] = (float16x8_t) __builtin_aarch64_get_qregciv8hf  (__o, 0);
+  __i.val[1] = (float16x8_t) __builtin_aarch64_get_qregciv8hf  (__o, 1);
+  __i.val[2] = (float16x8_t) __builtin_aarch64_get_qregciv8hf  (__o, 2);
+  return __i;
+}
+
+__extension__ extern __inline float32x4x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_f32_x3 (const float32_t *__a)
+{
+  float32x4x3_t __i;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld1x3v4sf ((const __builtin_aarch64_simd_sf *) __a);
+  __i.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf  (__o, 0);
+  __i.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf  (__o, 1);
+  __i.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf  (__o, 2);
+  return __i;
+}
+
+__extension__ extern __inline float64x2x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_f64_x3 (const float64_t *__a)
+{
+  float64x2x3_t __i;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld1x3v2df ((const __builtin_aarch64_simd_df *) __a);
+  __i.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df  (__o, 0);
+  __i.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df  (__o, 1);
+  __i.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df  (__o, 2);
+  return __i;
+}
+
+__extension__ extern __inline poly8x16x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_p8_x3 (const poly8_t *__a)
+{
+  poly8x16x3_t __i;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld1x3v16qi ((const __builtin_aarch64_simd_qi *) __a);
+  __i.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi  (__o, 0);
+  __i.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi  (__o, 1);
+  __i.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi  (__o, 2);
+  return __i;
+}
+
+__extension__ extern __inline poly16x8x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_p16_x3 (const poly16_t *__a)
+{
+  poly16x8x3_t __i;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld1x3v8hi ((const __builtin_aarch64_simd_hi *) __a);
+  __i.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi  (__o, 0);
+  __i.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi  (__o, 1);
+  __i.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi  (__o, 2);
+  return __i;
+}
+
+__extension__ extern __inline poly64x2x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_p64_x3 (const poly64_t *__a)
+{
+  poly64x2x3_t __i;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld1x3v2di ((const __builtin_aarch64_simd_di *) __a);
+  __i.val[0] = (poly64x2_t) __builtin_aarch64_get_qregciv2di  (__o, 0);
+  __i.val[1] = (poly64x2_t) __builtin_aarch64_get_qregciv2di  (__o, 1);
+  __i.val[2] = (poly64x2_t) __builtin_aarch64_get_qregciv2di  (__o, 2);
+  return __i;
+}
+
+/* vld1q */
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_f16 (const float16_t *__a)
+{
+  return __builtin_aarch64_ld1v8hf (__a);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_f32 (const float32_t *__a)
+{
+  return __builtin_aarch64_ld1v4sf ((const __builtin_aarch64_simd_sf *) __a);
+}
+
+__extension__ extern __inline float64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_f64 (const float64_t *__a)
+{
+  return __builtin_aarch64_ld1v2df ((const __builtin_aarch64_simd_df *) __a);
+}
+
+__extension__ extern __inline poly8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_p8 (const poly8_t *__a)
+{
+  return (poly8x16_t)
+    __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) __a);
+}
+
+__extension__ extern __inline poly16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_p16 (const poly16_t *__a)
+{
+  return (poly16x8_t)
+    __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) __a);
+}
+
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_p64 (const poly64_t *__a)
+{
+  return (poly64x2_t)
+    __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) __a);
+}
+
+__extension__ extern __inline int8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_s8 (const int8_t *__a)
+{
+  return __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) __a);
+}
+
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_s16 (const int16_t *__a)
+{
+  return __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) __a);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_s32 (const int32_t *__a)
+{
+  return __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) __a);
+}
+
+__extension__ extern __inline int64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_s64 (const int64_t *__a)
+{
+  return __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) __a);
+}
+
+__extension__ extern __inline uint8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_u8 (const uint8_t *__a)
+{
+  return (uint8x16_t)
+    __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) __a);
+}
+
+__extension__ extern __inline uint8x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_u8_x2 (const uint8_t *__a)
+{
+  uint8x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
+  ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline int8x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_s8_x2 (const int8_t *__a)
+{
+  int8x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
+  ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline uint16x4x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_u16_x2 (const uint16_t *__a)
+{
+  uint16x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
+  ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline int16x4x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_s16_x2 (const int16_t *__a)
+{
+  int16x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
+  ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline uint32x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_u32_x2 (const uint32_t *__a)
+{
+  uint32x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v2si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
+  ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline int32x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_s32_x2 (const int32_t *__a)
+{
+  int32x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v2si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
+  ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline uint64x1x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_u64_x2 (const uint64_t *__a)
+{
+  uint64x1x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
+  ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline int64x1x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_s64_x2 (const int64_t *__a)
+{
+  int64x1x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
+  ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline float16x4x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_f16_x2 (const float16_t *__a)
+{
+  float16x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v4hf ((const __builtin_aarch64_simd_hf *) __a);
+  ret.val[0] = (float16x4_t) __builtin_aarch64_get_dregoiv4hf (__o, 0);
+  ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregoiv4hf (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline float32x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_f32_x2 (const float32_t *__a)
+{
+  float32x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v2sf ((const __builtin_aarch64_simd_sf *) __a);
+  ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
+  ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline float64x1x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_f64_x2 (const float64_t *__a)
+{
+  float64x1x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2df ((const __builtin_aarch64_simd_df *) __a);
+  ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)};
+  ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)};
+  return ret;
+}
+
+__extension__ extern __inline poly8x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_p8_x2 (const poly8_t *__a)
+{
+  poly8x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
+  ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline poly16x4x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_p16_x2 (const poly16_t *__a)
+{
+  poly16x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
+  ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline poly64x1x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_p64_x2 (const poly64_t *__a)
+{
+  poly64x1x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
+  ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline uint8x16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_u8_x2 (const uint8_t *__a)
+{
+  uint8x16x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
+  ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline int8x16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_s8_x2 (const int8_t *__a)
+{
+  int8x16x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
+  ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline uint16x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_u16_x2 (const uint16_t *__a)
+{
+  uint16x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
+  ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline int16x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_s16_x2 (const int16_t *__a)
+{
+  int16x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
+  ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline uint32x4x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_u32_x2 (const uint32_t *__a)
+{
+  uint32x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v4si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
+  ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline int32x4x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_s32_x2 (const int32_t *__a)
+{
+  int32x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v4si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
+  ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline uint64x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_u64_x2 (const uint64_t *__a)
+{
+  uint64x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
+  ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline int64x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_s64_x2 (const int64_t *__a)
+{
+  int64x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
+  ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline float16x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_f16_x2 (const float16_t *__a)
+{
+  float16x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v8hf ((const __builtin_aarch64_simd_hf *) __a);
+  ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregoiv8hf (__o, 0);
+  ret.val[1] = (float16x8_t) __builtin_aarch64_get_qregoiv8hf (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline float32x4x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_f32_x2 (const float32_t *__a)
+{
+  float32x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v4sf ((const __builtin_aarch64_simd_sf *) __a);
+  ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
+  ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline float64x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_f64_x2 (const float64_t *__a)
+{
+  float64x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v2df ((const __builtin_aarch64_simd_df *) __a);
+  ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
+  ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline poly8x16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_p8_x2 (const poly8_t *__a)
+{
+  poly8x16x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
+  ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline poly16x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_p16_x2 (const poly16_t *__a)
+{
+  poly16x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
+  ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline poly64x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_p64_x2 (const poly64_t *__a)
+{
+  poly64x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
+  ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline uint16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_u16 (const uint16_t *__a)
+{
+  return (uint16x8_t)
+    __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) __a);
+}
+
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_u32 (const uint32_t *__a)
+{
+  return (uint32x4_t)
+    __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) __a);
+}
+
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_u64 (const uint64_t *__a)
+{
+  return (uint64x2_t)
+    __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) __a);
+}
+
+/* vld1(q)_x4.  */
+
+__extension__ extern __inline int8x8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_s8_x4 (const int8_t *__a)
+{
+  union { int8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
+  __au.__o
+    = __builtin_aarch64_ld1x4v8qi ((const __builtin_aarch64_simd_qi *) __a);
+  return __au.__i;
+}
+
+__extension__ extern __inline int8x16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_s8_x4 (const int8_t *__a)
+{
+  union { int8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
+  __au.__o
+    = __builtin_aarch64_ld1x4v16qi ((const __builtin_aarch64_simd_qi *) __a);
+  return __au.__i;
+}
+
+__extension__ extern __inline int16x4x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_s16_x4 (const int16_t *__a)
+{
+  union { int16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
+  __au.__o
+    = __builtin_aarch64_ld1x4v4hi ((const __builtin_aarch64_simd_hi *) __a);
+  return __au.__i;
+}
+
+__extension__ extern __inline int16x8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_s16_x4 (const int16_t *__a)
+{
+  union { int16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
+  __au.__o
+    = __builtin_aarch64_ld1x4v8hi ((const __builtin_aarch64_simd_hi *) __a);
+  return __au.__i;
+}
+
+__extension__ extern __inline int32x2x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_s32_x4 (const int32_t *__a)
+{
+  union { int32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
+  __au.__o
+  = __builtin_aarch64_ld1x4v2si ((const __builtin_aarch64_simd_si *) __a);
+  return __au.__i;
+}
+
+__extension__ extern __inline int32x4x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_s32_x4 (const int32_t *__a)
+{
+  union { int32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
+  __au.__o
+  = __builtin_aarch64_ld1x4v4si ((const __builtin_aarch64_simd_si *) __a);
+  return __au.__i;
+}
+
+__extension__ extern __inline uint8x8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_u8_x4 (const uint8_t *__a)
+{
+  union { uint8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
+  __au.__o
+    = __builtin_aarch64_ld1x4v8qi ((const __builtin_aarch64_simd_qi *) __a);
+  return __au.__i;
+}
+
+__extension__ extern __inline uint8x16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_u8_x4 (const uint8_t *__a)
+{
+  union { uint8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
+  __au.__o
+    = __builtin_aarch64_ld1x4v16qi ((const __builtin_aarch64_simd_qi *) __a);
+  return __au.__i;
+}
+
+__extension__ extern __inline uint16x4x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_u16_x4 (const uint16_t *__a)
+{
+  union { uint16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
+  __au.__o
+    = __builtin_aarch64_ld1x4v4hi ((const __builtin_aarch64_simd_hi *) __a);
+  return __au.__i;
+}
+
+__extension__ extern __inline uint16x8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_u16_x4 (const uint16_t *__a)
+{
+  union { uint16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
+  __au.__o
+    = __builtin_aarch64_ld1x4v8hi ((const __builtin_aarch64_simd_hi *) __a);
+  return __au.__i;
+}
+
+__extension__ extern __inline uint32x2x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_u32_x4 (const uint32_t *__a)
+{
+  union { uint32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
+  __au.__o
+    = __builtin_aarch64_ld1x4v2si ((const __builtin_aarch64_simd_si *) __a);
+  return __au.__i;
+}
+
+__extension__ extern __inline uint32x4x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_u32_x4 (const uint32_t *__a)
+{
+  union { uint32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
+  __au.__o
+    = __builtin_aarch64_ld1x4v4si ((const __builtin_aarch64_simd_si *) __a);
+  return __au.__i;
+}
+
+__extension__ extern __inline float16x4x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_f16_x4 (const float16_t *__a)
+{
+  union { float16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
+  __au.__o
+    = __builtin_aarch64_ld1x4v4hf ((const __builtin_aarch64_simd_hf *) __a);
+  return __au.__i;
+}
+
+__extension__ extern __inline float16x8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_f16_x4 (const float16_t *__a)
+{
+  union { float16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
+  __au.__o
+    = __builtin_aarch64_ld1x4v8hf ((const __builtin_aarch64_simd_hf *) __a);
+  return __au.__i;
+}
+
+__extension__ extern __inline float32x2x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_f32_x4 (const float32_t *__a)
+{
+  union { float32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
+  __au.__o
+    = __builtin_aarch64_ld1x4v2sf ((const __builtin_aarch64_simd_sf *) __a);
+  return __au.__i;
+}
+
+__extension__ extern __inline float32x4x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_f32_x4 (const float32_t *__a)
+{
+  union { float32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
+  __au.__o
+    = __builtin_aarch64_ld1x4v4sf ((const __builtin_aarch64_simd_sf *) __a);
+  return __au.__i;
+}
+
+__extension__ extern __inline poly8x8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_p8_x4 (const poly8_t *__a)
+{
+  union { poly8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
+  __au.__o
+    = __builtin_aarch64_ld1x4v8qi ((const __builtin_aarch64_simd_qi *) __a);
+  return __au.__i;
+}
+
+__extension__ extern __inline poly8x16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_p8_x4 (const poly8_t *__a)
+{
+  union { poly8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
+  __au.__o
+    = __builtin_aarch64_ld1x4v16qi ((const __builtin_aarch64_simd_qi *) __a);
+  return __au.__i;
+}
+
+__extension__ extern __inline poly16x4x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_p16_x4 (const poly16_t *__a)
+{
+  union { poly16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
+  __au.__o
+    = __builtin_aarch64_ld1x4v4hi ((const __builtin_aarch64_simd_hi *) __a);
+  return __au.__i;
+}
+
+__extension__ extern __inline poly16x8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_p16_x4 (const poly16_t *__a)
+{
+  union { poly16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
+  __au.__o
+    = __builtin_aarch64_ld1x4v8hi ((const __builtin_aarch64_simd_hi *) __a);
+  return __au.__i;
+}
+
+__extension__ extern __inline int64x1x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_s64_x4 (const int64_t *__a)
+{
+  union { int64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
+  __au.__o
+    = __builtin_aarch64_ld1x4di ((const __builtin_aarch64_simd_di *) __a);
+  return __au.__i;
+}
+
+__extension__ extern __inline uint64x1x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_u64_x4 (const uint64_t *__a)
+{
+  union { uint64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
+  __au.__o
+    = __builtin_aarch64_ld1x4di ((const __builtin_aarch64_simd_di *) __a);
+  return __au.__i;
+}
+
+__extension__ extern __inline poly64x1x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_p64_x4 (const poly64_t *__a)
+{
+  union { poly64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
+  __au.__o
+    = __builtin_aarch64_ld1x4di ((const __builtin_aarch64_simd_di *) __a);
+  return __au.__i;
+}
+
+__extension__ extern __inline int64x2x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_s64_x4 (const int64_t *__a)
+{
+  union { int64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
+  __au.__o
+    = __builtin_aarch64_ld1x4v2di ((const __builtin_aarch64_simd_di *) __a);
+  return __au.__i;
+}
+
+__extension__ extern __inline uint64x2x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_u64_x4 (const uint64_t *__a)
+{
+  union { uint64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
+  __au.__o
+    = __builtin_aarch64_ld1x4v2di ((const __builtin_aarch64_simd_di *) __a);
+  return __au.__i;
+}
+
+__extension__ extern __inline poly64x2x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_p64_x4 (const poly64_t *__a)
+{
+  union { poly64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
+  __au.__o
+    = __builtin_aarch64_ld1x4v2di ((const __builtin_aarch64_simd_di *) __a);
+  return __au.__i;
+}
+
+__extension__ extern __inline float64x1x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_f64_x4 (const float64_t *__a)
+{
+  union { float64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
+  __au.__o
+    = __builtin_aarch64_ld1x4df ((const __builtin_aarch64_simd_df *) __a);
+  return __au.__i;
+}
+
+__extension__ extern __inline float64x2x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_f64_x4 (const float64_t *__a)
+{
+  union { float64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
+  __au.__o
+    = __builtin_aarch64_ld1x4v2df ((const __builtin_aarch64_simd_df *) __a);
+  return __au.__i;
+}
+
+/* vld1_dup  */
+
+__extension__ extern __inline float16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_dup_f16 (const float16_t* __a)
+{
+  return vdup_n_f16 (*__a);
+}
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_dup_f32 (const float32_t* __a)
+{
+  return vdup_n_f32 (*__a);
+}
+
+__extension__ extern __inline float64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_dup_f64 (const float64_t* __a)
+{
+  return vdup_n_f64 (*__a);
+}
+
+__extension__ extern __inline poly8x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_dup_p8 (const poly8_t* __a)
+{
+  return vdup_n_p8 (*__a);
+}
+
+__extension__ extern __inline poly16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_dup_p16 (const poly16_t* __a)
+{
+  return vdup_n_p16 (*__a);
+}
+
+__extension__ extern __inline poly64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_dup_p64 (const poly64_t* __a)
+{
+  return vdup_n_p64 (*__a);
+}
+
+__extension__ extern __inline int8x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_dup_s8 (const int8_t* __a)
+{
+  return vdup_n_s8 (*__a);
+}
+
+__extension__ extern __inline int16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_dup_s16 (const int16_t* __a)
+{
+  return vdup_n_s16 (*__a);
+}
+
+__extension__ extern __inline int32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_dup_s32 (const int32_t* __a)
+{
+  return vdup_n_s32 (*__a);
+}
+
+__extension__ extern __inline int64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_dup_s64 (const int64_t* __a)
+{
+  return vdup_n_s64 (*__a);
+}
+
+__extension__ extern __inline uint8x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_dup_u8 (const uint8_t* __a)
+{
+  return vdup_n_u8 (*__a);
+}
+
+__extension__ extern __inline uint16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_dup_u16 (const uint16_t* __a)
+{
+  return vdup_n_u16 (*__a);
+}
+
+__extension__ extern __inline uint32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_dup_u32 (const uint32_t* __a)
+{
+  return vdup_n_u32 (*__a);
+}
+
+__extension__ extern __inline uint64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_dup_u64 (const uint64_t* __a)
+{
+  return vdup_n_u64 (*__a);
+}
+
+/* vld1q_dup  */
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_dup_f16 (const float16_t* __a)
+{
+  return vdupq_n_f16 (*__a);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_dup_f32 (const float32_t* __a)
+{
+  return vdupq_n_f32 (*__a);
+}
+
+__extension__ extern __inline float64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_dup_f64 (const float64_t* __a)
+{
+  return vdupq_n_f64 (*__a);
+}
+
+__extension__ extern __inline poly8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_dup_p8 (const poly8_t* __a)
+{
+  return vdupq_n_p8 (*__a);
+}
+
+__extension__ extern __inline poly16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_dup_p16 (const poly16_t* __a)
+{
+  return vdupq_n_p16 (*__a);
+}
+
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_dup_p64 (const poly64_t* __a)
+{
+  return vdupq_n_p64 (*__a);
+}
+
+ __extension__ extern __inline int8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_dup_s8 (const int8_t* __a)
+{
+  return vdupq_n_s8 (*__a);
+}
+
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_dup_s16 (const int16_t* __a)
+{
+  return vdupq_n_s16 (*__a);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_dup_s32 (const int32_t* __a)
+{
+  return vdupq_n_s32 (*__a);
+}
+
+__extension__ extern __inline int64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_dup_s64 (const int64_t* __a)
+{
+  return vdupq_n_s64 (*__a);
+}
+
+__extension__ extern __inline uint8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_dup_u8 (const uint8_t* __a)
+{
+  return vdupq_n_u8 (*__a);
+}
+
+__extension__ extern __inline uint16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_dup_u16 (const uint16_t* __a)
+{
+  return vdupq_n_u16 (*__a);
+}
+
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_dup_u32 (const uint32_t* __a)
+{
+  return vdupq_n_u32 (*__a);
+}
+
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_dup_u64 (const uint64_t* __a)
+{
+  return vdupq_n_u64 (*__a);
+}
+
+/* vld1_lane  */
+
+__extension__ extern __inline float16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_lane_f16 (const float16_t *__src, float16x4_t __vec, const int __lane)
+{
+  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+}
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_lane_f32 (const float32_t *__src, float32x2_t __vec, const int __lane)
+{
+  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+}
+
+__extension__ extern __inline float64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_lane_f64 (const float64_t *__src, float64x1_t __vec, const int __lane)
+{
+  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+}
+
+__extension__ extern __inline poly8x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_lane_p8 (const poly8_t *__src, poly8x8_t __vec, const int __lane)
+{
+  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+}
+
+__extension__ extern __inline poly16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_lane_p16 (const poly16_t *__src, poly16x4_t __vec, const int __lane)
+{
+  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+}
+
+__extension__ extern __inline poly64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_lane_p64 (const poly64_t *__src, poly64x1_t __vec, const int __lane)
+{
+  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+}
+
+__extension__ extern __inline int8x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_lane_s8 (const int8_t *__src, int8x8_t __vec, const int __lane)
+{
+  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+}
+
+__extension__ extern __inline int16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_lane_s16 (const int16_t *__src, int16x4_t __vec, const int __lane)
+{
+  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+}
+
+__extension__ extern __inline int32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_lane_s32 (const int32_t *__src, int32x2_t __vec, const int __lane)
+{
+  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+}
+
+__extension__ extern __inline int64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_lane_s64 (const int64_t *__src, int64x1_t __vec, const int __lane)
+{
+  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+}
+
+__extension__ extern __inline uint8x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_lane_u8 (const uint8_t *__src, uint8x8_t __vec, const int __lane)
+{
+  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+}
+
+__extension__ extern __inline uint16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_lane_u16 (const uint16_t *__src, uint16x4_t __vec, const int __lane)
+{
+  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+}
+
+__extension__ extern __inline uint32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_lane_u32 (const uint32_t *__src, uint32x2_t __vec, const int __lane)
+{
+  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+}
+
+__extension__ extern __inline uint64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_lane_u64 (const uint64_t *__src, uint64x1_t __vec, const int __lane)
+{
+  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+}
+
+/* vld1q_lane  */
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_lane_f16 (const float16_t *__src, float16x8_t __vec, const int __lane)
+{
+  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_lane_f32 (const float32_t *__src, float32x4_t __vec, const int __lane)
+{
+  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+}
+
+__extension__ extern __inline float64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_lane_f64 (const float64_t *__src, float64x2_t __vec, const int __lane)
+{
+  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+}
+
+__extension__ extern __inline poly8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_lane_p8 (const poly8_t *__src, poly8x16_t __vec, const int __lane)
+{
+  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+}
+
+__extension__ extern __inline poly16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_lane_p16 (const poly16_t *__src, poly16x8_t __vec, const int __lane)
+{
+  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+}
+
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_lane_p64 (const poly64_t *__src, poly64x2_t __vec, const int __lane)
+{
+  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+}
+
+__extension__ extern __inline int8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_lane_s8 (const int8_t *__src, int8x16_t __vec, const int __lane)
+{
+  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+}
+
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_lane_s16 (const int16_t *__src, int16x8_t __vec, const int __lane)
+{
+  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_lane_s32 (const int32_t *__src, int32x4_t __vec, const int __lane)
+{
+  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+}
+
+__extension__ extern __inline int64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_lane_s64 (const int64_t *__src, int64x2_t __vec, const int __lane)
+{
+  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+}
+
+__extension__ extern __inline uint8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_lane_u8 (const uint8_t *__src, uint8x16_t __vec, const int __lane)
+{
+  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+}
+
+__extension__ extern __inline uint16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_lane_u16 (const uint16_t *__src, uint16x8_t __vec, const int __lane)
+{
+  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+}
+
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_lane_u32 (const uint32_t *__src, uint32x4_t __vec, const int __lane)
+{
+  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+}
+
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_lane_u64 (const uint64_t *__src, uint64x2_t __vec, const int __lane)
+{
+  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+}
+
+/* vldn */
+
+__extension__ extern __inline int64x1x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2_s64 (const int64_t * __a)
+{
+  int64x1x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
+  ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline uint64x1x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2_u64 (const uint64_t * __a)
+{
+  uint64x1x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
+  ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline float64x1x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2_f64 (const float64_t * __a)
+{
+  float64x1x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2df ((const __builtin_aarch64_simd_df *) __a);
+  ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)};
+  ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)};
+  return ret;
+}
+
+__extension__ extern __inline int8x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2_s8 (const int8_t * __a)
+{
+  int8x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
+  ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline poly8x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2_p8 (const poly8_t * __a)
+{
+  poly8x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
+  ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline poly64x1x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2_p64 (const poly64_t * __a)
+{
+  poly64x1x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregoidi_pss (__o, 0);
+  ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregoidi_pss (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline int16x4x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2_s16 (const int16_t * __a)
+{
+  int16x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
+  ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline poly16x4x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2_p16 (const poly16_t * __a)
+{
+  poly16x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
+  ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline int32x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2_s32 (const int32_t * __a)
+{
+  int32x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
+  ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline uint8x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2_u8 (const uint8_t * __a)
+{
+  uint8x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
+  ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline uint16x4x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2_u16 (const uint16_t * __a)
+{
+  uint16x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
+  ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline uint32x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2_u32 (const uint32_t * __a)
+{
+  uint32x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
+  ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline float16x4x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2_f16 (const float16_t * __a)
+{
+  float16x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v4hf (__a);
+  ret.val[0] = __builtin_aarch64_get_dregoiv4hf (__o, 0);
+  ret.val[1] = __builtin_aarch64_get_dregoiv4hf (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline float32x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2_f32 (const float32_t * __a)
+{
+  float32x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v2sf ((const __builtin_aarch64_simd_sf *) __a);
+  ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
+  ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline int8x16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2q_s8 (const int8_t * __a)
+{
+  int8x16x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
+  ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline poly8x16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2q_p8 (const poly8_t * __a)
+{
+  poly8x16x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
+  ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline int16x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2q_s16 (const int16_t * __a)
+{
+  int16x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
+  ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline poly16x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2q_p16 (const poly16_t * __a)
+{
+  poly16x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
+  ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline poly64x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2q_p64 (const poly64_t * __a)
+{
+  poly64x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di_pss (__o, 0);
+  ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di_pss (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline int32x4x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2q_s32 (const int32_t * __a)
+{
+  int32x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
+  ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline int64x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2q_s64 (const int64_t * __a)
+{
+  int64x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
+  ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline uint8x16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2q_u8 (const uint8_t * __a)
+{
+  uint8x16x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
+  ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline uint16x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2q_u16 (const uint16_t * __a)
+{
+  uint16x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
+  ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline uint32x4x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2q_u32 (const uint32_t * __a)
+{
+  uint32x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
+  ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline uint64x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2q_u64 (const uint64_t * __a)
+{
+  uint64x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
+  ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline float16x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2q_f16 (const float16_t * __a)
+{
+  float16x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v8hf (__a);
+  ret.val[0] = __builtin_aarch64_get_qregoiv8hf (__o, 0);
+  ret.val[1] = __builtin_aarch64_get_qregoiv8hf (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline float32x4x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2q_f32 (const float32_t * __a)
+{
+  float32x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v4sf ((const __builtin_aarch64_simd_sf *) __a);
+  ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
+  ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline float64x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2q_f64 (const float64_t * __a)
+{
+  float64x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v2df ((const __builtin_aarch64_simd_df *) __a);
+  ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
+  ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline int64x1x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3_s64 (const int64_t * __a)
+{
+  int64x1x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
+  ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
+  ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline uint64x1x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3_u64 (const uint64_t * __a)
+{
+  uint64x1x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
+  ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
+  ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline float64x1x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3_f64 (const float64_t * __a)
+{
+  float64x1x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3df ((const __builtin_aarch64_simd_df *) __a);
+  ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 0)};
+  ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 1)};
+  ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 2)};
+  return ret;
+}
+
+__extension__ extern __inline int8x8x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3_s8 (const int8_t * __a)
+{
+  int8x8x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
+  ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
+  ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline poly8x8x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3_p8 (const poly8_t * __a)
+{
+  poly8x8x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
+  ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
+  ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline int16x4x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3_s16 (const int16_t * __a)
+{
+  int16x4x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
+  ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
+  ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline poly16x4x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3_p16 (const poly16_t * __a)
+{
+  poly16x4x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
+  ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
+  ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline int32x2x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3_s32 (const int32_t * __a)
+{
+  int32x2x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
+  ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
+  ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline uint8x8x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3_u8 (const uint8_t * __a)
+{
+  uint8x8x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
+  ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
+  ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline uint16x4x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3_u16 (const uint16_t * __a)
+{
+  uint16x4x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
+  ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
+  ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline uint32x2x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3_u32 (const uint32_t * __a)
+{
+  uint32x2x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
+  ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
+  ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline float16x4x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3_f16 (const float16_t * __a)
+{
+  float16x4x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v4hf (__a);
+  ret.val[0] = __builtin_aarch64_get_dregciv4hf (__o, 0);
+  ret.val[1] = __builtin_aarch64_get_dregciv4hf (__o, 1);
+  ret.val[2] = __builtin_aarch64_get_dregciv4hf (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline float32x2x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3_f32 (const float32_t * __a)
+{
+  float32x2x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v2sf ((const __builtin_aarch64_simd_sf *) __a);
+  ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0);
+  ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1);
+  ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline poly64x1x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3_p64 (const poly64_t * __a)
+{
+  poly64x1x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 0);
+  ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 1);
+  ret.val[2] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline int8x16x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3q_s8 (const int8_t * __a)
+{
+  int8x16x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
+  ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
+  ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline poly8x16x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3q_p8 (const poly8_t * __a)
+{
+  poly8x16x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
+  ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
+  ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline int16x8x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3q_s16 (const int16_t * __a)
+{
+  int16x8x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
+  ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
+  ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline poly16x8x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3q_p16 (const poly16_t * __a)
+{
+  poly16x8x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
+  ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
+  ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline int32x4x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3q_s32 (const int32_t * __a)
+{
+  int32x4x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
+  ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
+  ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline int64x2x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3q_s64 (const int64_t * __a)
+{
+  int64x2x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
+  ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
+  ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline uint8x16x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3q_u8 (const uint8_t * __a)
+{
+  uint8x16x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
+  ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
+  ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline uint16x8x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3q_u16 (const uint16_t * __a)
+{
+  uint16x8x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
+  ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
+  ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline uint32x4x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3q_u32 (const uint32_t * __a)
+{
+  uint32x4x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
+  ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
+  ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline uint64x2x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3q_u64 (const uint64_t * __a)
+{
+  uint64x2x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
+  ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
+  ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline float16x8x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3q_f16 (const float16_t * __a)
+{
+  float16x8x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v8hf (__a);
+  ret.val[0] = __builtin_aarch64_get_qregciv8hf (__o, 0);
+  ret.val[1] = __builtin_aarch64_get_qregciv8hf (__o, 1);
+  ret.val[2] = __builtin_aarch64_get_qregciv8hf (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline float32x4x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3q_f32 (const float32_t * __a)
+{
+  float32x4x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v4sf ((const __builtin_aarch64_simd_sf *) __a);
+  ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0);
+  ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1);
+  ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline float64x2x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3q_f64 (const float64_t * __a)
+{
+  float64x2x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v2df ((const __builtin_aarch64_simd_df *) __a);
+  ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0);
+  ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1);
+  ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline poly64x2x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3q_p64 (const poly64_t * __a)
+{
+  poly64x2x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 0);
+  ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 1);
+  ret.val[2] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline int64x1x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4_s64 (const int64_t * __a)
+{
+  int64x1x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
+  ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
+  ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
+  ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline uint64x1x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4_u64 (const uint64_t * __a)
+{
+  uint64x1x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
+  ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
+  ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
+  ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline float64x1x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4_f64 (const float64_t * __a)
+{
+  float64x1x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4df ((const __builtin_aarch64_simd_df *) __a);
+  ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 0)};
+  ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 1)};
+  ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 2)};
+  ret.val[3] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 3)};
+  return ret;
+}
+
+__extension__ extern __inline int8x8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4_s8 (const int8_t * __a)
+{
+  int8x8x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
+  ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
+  ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
+  ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline poly8x8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4_p8 (const poly8_t * __a)
+{
+  poly8x8x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
+  ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
+  ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
+  ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline int16x4x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4_s16 (const int16_t * __a)
+{
+  int16x4x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
+  ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
+  ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
+  ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline poly16x4x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4_p16 (const poly16_t * __a)
+{
+  poly16x4x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
+  ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
+  ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
+  ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline int32x2x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4_s32 (const int32_t * __a)
+{
+  int32x2x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
+  ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
+  ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
+  ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline uint8x8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4_u8 (const uint8_t * __a)
+{
+  uint8x8x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
+  ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
+  ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
+  ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline uint16x4x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4_u16 (const uint16_t * __a)
+{
+  uint16x4x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
+  ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
+  ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
+  ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline uint32x2x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4_u32 (const uint32_t * __a)
+{
+  uint32x2x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
+  ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
+  ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
+  ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline float16x4x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4_f16 (const float16_t * __a)
+{
+  float16x4x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v4hf (__a);
+  ret.val[0] = __builtin_aarch64_get_dregxiv4hf (__o, 0);
+  ret.val[1] = __builtin_aarch64_get_dregxiv4hf (__o, 1);
+  ret.val[2] = __builtin_aarch64_get_dregxiv4hf (__o, 2);
+  ret.val[3] = __builtin_aarch64_get_dregxiv4hf (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline float32x2x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4_f32 (const float32_t * __a)
+{
+  float32x2x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v2sf ((const __builtin_aarch64_simd_sf *) __a);
+  ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0);
+  ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1);
+  ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2);
+  ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline poly64x1x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4_p64 (const poly64_t * __a)
+{
+  poly64x1x4_t  ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 0);
+  ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 1);
+  ret.val[2] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 2);
+  ret.val[3] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline int8x16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4q_s8 (const int8_t * __a)
+{
+  int8x16x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
+  ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
+  ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
+  ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline poly8x16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4q_p8 (const poly8_t * __a)
+{
+  poly8x16x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
+  ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
+  ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
+  ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline int16x8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4q_s16 (const int16_t * __a)
+{
+  int16x8x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
+  ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
+  ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
+  ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline poly16x8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4q_p16 (const poly16_t * __a)
+{
+  poly16x8x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
+  ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
+  ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
+  ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline int32x4x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4q_s32 (const int32_t * __a)
+{
+  int32x4x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
+  ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
+  ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
+  ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline int64x2x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4q_s64 (const int64_t * __a)
+{
+  int64x2x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
+  ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
+  ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
+  ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline uint8x16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4q_u8 (const uint8_t * __a)
+{
+  uint8x16x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
+  ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
+  ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
+  ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline uint16x8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4q_u16 (const uint16_t * __a)
+{
+  uint16x8x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
+  ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
+  ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
+  ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline uint32x4x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4q_u32 (const uint32_t * __a)
+{
+  uint32x4x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
+  ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
+  ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
+  ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline uint64x2x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4q_u64 (const uint64_t * __a)
+{
+  uint64x2x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
+  ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
+  ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
+  ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline float16x8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4q_f16 (const float16_t * __a)
+{
+  float16x8x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v8hf (__a);
+  ret.val[0] = __builtin_aarch64_get_qregxiv8hf (__o, 0);
+  ret.val[1] = __builtin_aarch64_get_qregxiv8hf (__o, 1);
+  ret.val[2] = __builtin_aarch64_get_qregxiv8hf (__o, 2);
+  ret.val[3] = __builtin_aarch64_get_qregxiv8hf (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline float32x4x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4q_f32 (const float32_t * __a)
+{
+  float32x4x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v4sf ((const __builtin_aarch64_simd_sf *) __a);
+  ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0);
+  ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1);
+  ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2);
+  ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline float64x2x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4q_f64 (const float64_t * __a)
+{
+  float64x2x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v2df ((const __builtin_aarch64_simd_df *) __a);
+  ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0);
+  ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1);
+  ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2);
+  ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline poly64x2x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4q_p64 (const poly64_t * __a)
+{
+  poly64x2x4_t  ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 0);
+  ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 1);
+  ret.val[2] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 2);
+  ret.val[3] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline poly128_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vldrq_p128 (const poly128_t * __ptr)
+{
+  return *__ptr;
+}
+
+/* vldn_dup */
+
+__extension__ extern __inline int8x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2_dup_s8 (const int8_t * __a)
+{
+  int8x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
+  ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline int16x4x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2_dup_s16 (const int16_t * __a)
+{
+  int16x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
+  ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline int32x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2_dup_s32 (const int32_t * __a)
+{
+  int32x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv2si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
+  ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline float16x4x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2_dup_f16 (const float16_t * __a)
+{
+  float16x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv4hf ((const __builtin_aarch64_simd_hf *) __a);
+  ret.val[0] = __builtin_aarch64_get_dregoiv4hf (__o, 0);
+  ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregoiv4hf (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline float32x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2_dup_f32 (const float32_t * __a)
+{
+  float32x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv2sf ((const __builtin_aarch64_simd_sf *) __a);
+  ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
+  ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline float64x1x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2_dup_f64 (const float64_t * __a)
+{
+  float64x1x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rdf ((const __builtin_aarch64_simd_df *) __a);
+  ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)};
+  ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)};
+  return ret;
+}
+
+__extension__ extern __inline uint8x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2_dup_u8 (const uint8_t * __a)
+{
+  uint8x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
+  ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline uint16x4x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2_dup_u16 (const uint16_t * __a)
+{
+  uint16x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
+  ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline uint32x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2_dup_u32 (const uint32_t * __a)
+{
+  uint32x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv2si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
+  ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline poly8x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2_dup_p8 (const poly8_t * __a)
+{
+  poly8x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
+  ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline poly16x4x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2_dup_p16 (const poly16_t * __a)
+{
+  poly16x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
+  ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline poly64x1x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2_dup_p64 (const poly64_t * __a)
+{
+  poly64x1x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregoidi_pss (__o, 0);
+  ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregoidi_pss (__o, 1);
+  return ret;
+}
+
+
+__extension__ extern __inline int64x1x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2_dup_s64 (const int64_t * __a)
+{
+  int64x1x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rdi ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
+  ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline uint64x1x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2_dup_u64 (const uint64_t * __a)
+{
+  uint64x1x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rdi ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
+  ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline int8x16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2q_dup_s8 (const int8_t * __a)
+{
+  int8x16x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
+  ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline poly8x16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2q_dup_p8 (const poly8_t * __a)
+{
+  poly8x16x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
+  ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline int16x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2q_dup_s16 (const int16_t * __a)
+{
+  int16x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
+  ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline poly16x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2q_dup_p16 (const poly16_t * __a)
+{
+  poly16x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
+  ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline int32x4x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2q_dup_s32 (const int32_t * __a)
+{
+  int32x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv4si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
+  ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline int64x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2q_dup_s64 (const int64_t * __a)
+{
+  int64x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
+  ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline uint8x16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2q_dup_u8 (const uint8_t * __a)
+{
+  uint8x16x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
+  ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline uint16x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2q_dup_u16 (const uint16_t * __a)
+{
+  uint16x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
+  ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline uint32x4x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2q_dup_u32 (const uint32_t * __a)
+{
+  uint32x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv4si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
+  ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline uint64x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2q_dup_u64 (const uint64_t * __a)
+{
+  uint64x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
+  ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline float16x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2q_dup_f16 (const float16_t * __a)
+{
+  float16x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv8hf ((const __builtin_aarch64_simd_hf *) __a);
+  ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregoiv8hf (__o, 0);
+  ret.val[1] = __builtin_aarch64_get_qregoiv8hf (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline float32x4x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2q_dup_f32 (const float32_t * __a)
+{
+  float32x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv4sf ((const __builtin_aarch64_simd_sf *) __a);
+  ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
+  ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline float64x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2q_dup_f64 (const float64_t * __a)
+{
+  float64x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv2df ((const __builtin_aarch64_simd_df *) __a);
+  ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
+  ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline poly64x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2q_dup_p64 (const poly64_t * __a)
+{
+  poly64x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di_pss (__o, 0);
+  ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di_pss (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline int64x1x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3_dup_s64 (const int64_t * __a)
+{
+  int64x1x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rdi ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
+  ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
+  ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline uint64x1x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3_dup_u64 (const uint64_t * __a)
+{
+  uint64x1x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rdi ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
+  ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
+  ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline float64x1x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3_dup_f64 (const float64_t * __a)
+{
+  float64x1x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rdf ((const __builtin_aarch64_simd_df *) __a);
+  ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 0)};
+  ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 1)};
+  ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 2)};
+  return ret;
+}
+
+__extension__ extern __inline int8x8x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3_dup_s8 (const int8_t * __a)
+{
+  int8x8x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
+  ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
+  ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline poly8x8x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3_dup_p8 (const poly8_t * __a)
+{
+  poly8x8x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
+  ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
+  ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline int16x4x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3_dup_s16 (const int16_t * __a)
+{
+  int16x4x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
+  ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
+  ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline poly16x4x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3_dup_p16 (const poly16_t * __a)
+{
+  poly16x4x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
+  ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
+  ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline int32x2x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3_dup_s32 (const int32_t * __a)
+{
+  int32x2x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv2si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
+  ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
+  ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline uint8x8x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3_dup_u8 (const uint8_t * __a)
+{
+  uint8x8x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
+  ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
+  ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline uint16x4x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3_dup_u16 (const uint16_t * __a)
+{
+  uint16x4x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
+  ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
+  ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline uint32x2x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3_dup_u32 (const uint32_t * __a)
+{
+  uint32x2x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv2si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
+  ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
+  ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline float16x4x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3_dup_f16 (const float16_t * __a)
+{
+  float16x4x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv4hf ((const __builtin_aarch64_simd_hf *) __a);
+  ret.val[0] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 0);
+  ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 1);
+  ret.val[2] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline float32x2x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3_dup_f32 (const float32_t * __a)
+{
+  float32x2x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv2sf ((const __builtin_aarch64_simd_sf *) __a);
+  ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0);
+  ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1);
+  ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline poly64x1x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3_dup_p64 (const poly64_t * __a)
+{
+  poly64x1x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 0);
+  ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 1);
+  ret.val[2] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline int8x16x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3q_dup_s8 (const int8_t * __a)
+{
+  int8x16x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
+  ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
+  ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline poly8x16x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3q_dup_p8 (const poly8_t * __a)
+{
+  poly8x16x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
+  ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
+  ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline int16x8x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3q_dup_s16 (const int16_t * __a)
+{
+  int16x8x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
+  ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
+  ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline poly16x8x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3q_dup_p16 (const poly16_t * __a)
+{
+  poly16x8x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
+  ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
+  ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline int32x4x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3q_dup_s32 (const int32_t * __a)
+{
+  int32x4x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv4si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
+  ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
+  ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline int64x2x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3q_dup_s64 (const int64_t * __a)
+{
+  int64x2x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
+  ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
+  ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline uint8x16x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3q_dup_u8 (const uint8_t * __a)
+{
+  uint8x16x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
+  ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
+  ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline uint16x8x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3q_dup_u16 (const uint16_t * __a)
+{
+  uint16x8x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
+  ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
+  ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline uint32x4x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3q_dup_u32 (const uint32_t * __a)
+{
+  uint32x4x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv4si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
+  ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
+  ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline uint64x2x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3q_dup_u64 (const uint64_t * __a)
+{
+  uint64x2x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
+  ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
+  ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline float16x8x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3q_dup_f16 (const float16_t * __a)
+{
+  float16x8x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv8hf ((const __builtin_aarch64_simd_hf *) __a);
+  ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 0);
+  ret.val[1] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 1);
+  ret.val[2] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline float32x4x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3q_dup_f32 (const float32_t * __a)
+{
+  float32x4x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv4sf ((const __builtin_aarch64_simd_sf *) __a);
+  ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0);
+  ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1);
+  ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline float64x2x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3q_dup_f64 (const float64_t * __a)
+{
+  float64x2x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv2df ((const __builtin_aarch64_simd_df *) __a);
+  ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0);
+  ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1);
+  ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline poly64x2x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3q_dup_p64 (const poly64_t * __a)
+{
+  poly64x2x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 0);
+  ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 1);
+  ret.val[2] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 2);
+  return ret;
+}
+
+__extension__ extern __inline int64x1x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4_dup_s64 (const int64_t * __a)
+{
+  int64x1x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rdi ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
+  ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
+  ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
+  ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline uint64x1x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4_dup_u64 (const uint64_t * __a)
+{
+  uint64x1x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rdi ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
+  ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
+  ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
+  ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline float64x1x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4_dup_f64 (const float64_t * __a)
+{
+  float64x1x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rdf ((const __builtin_aarch64_simd_df *) __a);
+  ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 0)};
+  ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 1)};
+  ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 2)};
+  ret.val[3] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 3)};
+  return ret;
+}
+
+__extension__ extern __inline int8x8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4_dup_s8 (const int8_t * __a)
+{
+  int8x8x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
+  ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
+  ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
+  ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline poly8x8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4_dup_p8 (const poly8_t * __a)
+{
+  poly8x8x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
+  ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
+  ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
+  ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline int16x4x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4_dup_s16 (const int16_t * __a)
+{
+  int16x4x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
+  ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
+  ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
+  ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline poly16x4x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4_dup_p16 (const poly16_t * __a)
+{
+  poly16x4x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
+  ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
+  ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
+  ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline int32x2x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4_dup_s32 (const int32_t * __a)
+{
+  int32x2x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv2si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
+  ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
+  ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
+  ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline uint8x8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4_dup_u8 (const uint8_t * __a)
+{
+  uint8x8x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
+  ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
+  ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
+  ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline uint16x4x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4_dup_u16 (const uint16_t * __a)
+{
+  uint16x4x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
+  ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
+  ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
+  ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline uint32x2x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4_dup_u32 (const uint32_t * __a)
+{
+  uint32x2x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv2si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
+  ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
+  ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
+  ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline float16x4x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4_dup_f16 (const float16_t * __a)
+{
+  float16x4x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv4hf ((const __builtin_aarch64_simd_hf *) __a);
+  ret.val[0] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 0);
+  ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 1);
+  ret.val[2] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 2);
+  ret.val[3] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline float32x2x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4_dup_f32 (const float32_t * __a)
+{
+  float32x2x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv2sf ((const __builtin_aarch64_simd_sf *) __a);
+  ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0);
+  ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1);
+  ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2);
+  ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline poly64x1x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4_dup_p64 (const poly64_t * __a)
+{
+  poly64x1x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 0);
+  ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 1);
+  ret.val[2] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 2);
+  ret.val[3] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline int8x16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4q_dup_s8 (const int8_t * __a)
+{
+  int8x16x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
+  ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
+  ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
+  ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline poly8x16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4q_dup_p8 (const poly8_t * __a)
+{
+  poly8x16x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
+  ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
+  ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
+  ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline int16x8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4q_dup_s16 (const int16_t * __a)
+{
+  int16x8x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
+  ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
+  ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
+  ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline poly16x8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4q_dup_p16 (const poly16_t * __a)
+{
+  poly16x8x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
+  ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
+  ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
+  ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline int32x4x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4q_dup_s32 (const int32_t * __a)
+{
+  int32x4x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv4si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
+  ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
+  ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
+  ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline int64x2x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4q_dup_s64 (const int64_t * __a)
+{
+  int64x2x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
+  ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
+  ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
+  ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline uint8x16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4q_dup_u8 (const uint8_t * __a)
+{
+  uint8x16x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
+  ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
+  ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
+  ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline uint16x8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4q_dup_u16 (const uint16_t * __a)
+{
+  uint16x8x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
+  ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
+  ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
+  ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline uint32x4x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4q_dup_u32 (const uint32_t * __a)
+{
+  uint32x4x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv4si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
+  ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
+  ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
+  ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline uint64x2x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4q_dup_u64 (const uint64_t * __a)
+{
+  uint64x2x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
+  ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
+  ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
+  ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline float16x8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4q_dup_f16 (const float16_t * __a)
+{
+  float16x8x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv8hf ((const __builtin_aarch64_simd_hf *) __a);
+  ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 0);
+  ret.val[1] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 1);
+  ret.val[2] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 2);
+  ret.val[3] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline float32x4x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4q_dup_f32 (const float32_t * __a)
+{
+  float32x4x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv4sf ((const __builtin_aarch64_simd_sf *) __a);
+  ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0);
+  ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1);
+  ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2);
+  ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline float64x2x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4q_dup_f64 (const float64_t * __a)
+{
+  float64x2x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv2df ((const __builtin_aarch64_simd_df *) __a);
+  ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0);
+  ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1);
+  ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2);
+  ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3);
+  return ret;
+}
+
+__extension__ extern __inline poly64x2x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4q_dup_p64 (const poly64_t * __a)
+{
+  poly64x2x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 0);
+  ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 1);
+  ret.val[2] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 2);
+  ret.val[3] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 3);
+  return ret;
+}
+
+/* vld2_lane */
+
+#define __LD2_LANE_FUNC(intype, vectype, largetype, ptrtype, mode,	   \
+			 qmode, ptrmode, funcsuffix, signedtype)	   \
+__extension__ extern __inline intype \
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \
+vld2_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c)  \
+{									   \
+  __builtin_aarch64_simd_oi __o;					   \
+  largetype __temp;							   \
+  __temp.val[0] =							   \
+    vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0));	   \
+  __temp.val[1] =							   \
+    vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0));	   \
+  __o = __builtin_aarch64_set_qregoi##qmode (__o,			   \
+					    (signedtype) __temp.val[0],	   \
+					    0);				   \
+  __o = __builtin_aarch64_set_qregoi##qmode (__o,			   \
+					    (signedtype) __temp.val[1],	   \
+					    1);				   \
+  __o =	__builtin_aarch64_ld2_lane##mode (				   \
+	  (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c);	   \
+  __b.val[0] = (vectype) __builtin_aarch64_get_dregoidi (__o, 0);	   \
+  __b.val[1] = (vectype) __builtin_aarch64_get_dregoidi (__o, 1);	   \
+  return __b;								   \
+}
+
+__LD2_LANE_FUNC (float16x4x2_t, float16x4_t, float16x8x2_t, float16_t, v4hf,
+		 v8hf, hf, f16, float16x8_t)
+__LD2_LANE_FUNC (float32x2x2_t, float32x2_t, float32x4x2_t, float32_t, v2sf, v4sf,
+		 sf, f32, float32x4_t)
+__LD2_LANE_FUNC (float64x1x2_t, float64x1_t, float64x2x2_t, float64_t, df, v2df,
+		 df, f64, float64x2_t)
+__LD2_LANE_FUNC (poly8x8x2_t, poly8x8_t, poly8x16x2_t, poly8_t, v8qi, v16qi, qi, p8,
+		 int8x16_t)
+__LD2_LANE_FUNC (poly16x4x2_t, poly16x4_t, poly16x8x2_t, poly16_t, v4hi, v8hi, hi,
+		 p16, int16x8_t)
+__LD2_LANE_FUNC (poly64x1x2_t, poly64x1_t, poly64x2x2_t, poly64_t, di,
+		 v2di_ssps, di, p64, poly64x2_t)
+__LD2_LANE_FUNC (int8x8x2_t, int8x8_t, int8x16x2_t, int8_t, v8qi, v16qi, qi, s8,
+		 int8x16_t)
+__LD2_LANE_FUNC (int16x4x2_t, int16x4_t, int16x8x2_t, int16_t, v4hi, v8hi, hi, s16,
+		 int16x8_t)
+__LD2_LANE_FUNC (int32x2x2_t, int32x2_t, int32x4x2_t, int32_t, v2si, v4si, si, s32,
+		 int32x4_t)
+__LD2_LANE_FUNC (int64x1x2_t, int64x1_t, int64x2x2_t, int64_t, di, v2di, di, s64,
+		 int64x2_t)
+__LD2_LANE_FUNC (uint8x8x2_t, uint8x8_t, uint8x16x2_t, uint8_t, v8qi, v16qi, qi, u8,
+		 int8x16_t)
+__LD2_LANE_FUNC (uint16x4x2_t, uint16x4_t, uint16x8x2_t, uint16_t, v4hi, v8hi, hi,
+		 u16, int16x8_t)
+__LD2_LANE_FUNC (uint32x2x2_t, uint32x2_t, uint32x4x2_t, uint32_t, v2si, v4si, si,
+		 u32, int32x4_t)
+__LD2_LANE_FUNC (uint64x1x2_t, uint64x1_t, uint64x2x2_t, uint64_t, di, v2di, di,
+		 u64, int64x2_t)
+
+/* vld2q_lane */
+
+#define __LD2Q_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
+__extension__ extern __inline intype \
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \
+vld2q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
+{									   \
+  __builtin_aarch64_simd_oi __o;					   \
+  intype ret;								   \
+  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __b.val[0], 0); \
+  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __b.val[1], 1); \
+  __o = __builtin_aarch64_ld2_lane##mode (				   \
+	(__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c);		   \
+  ret.val[0] = (vtype) __builtin_aarch64_get_qregoiv4si (__o, 0);	   \
+  ret.val[1] = (vtype) __builtin_aarch64_get_qregoiv4si (__o, 1);	   \
+  return ret;								   \
+}
+
+__LD2Q_LANE_FUNC (float16x8x2_t, float16x8_t, float16_t, v8hf, hf, f16)
+__LD2Q_LANE_FUNC (float32x4x2_t, float32x4_t, float32_t, v4sf, sf, f32)
+__LD2Q_LANE_FUNC (float64x2x2_t, float64x2_t, float64_t, v2df, df, f64)
+__LD2Q_LANE_FUNC (poly8x16x2_t, poly8x16_t, poly8_t, v16qi, qi, p8)
+__LD2Q_LANE_FUNC (poly16x8x2_t, poly16x8_t, poly16_t, v8hi, hi, p16)
+__LD2Q_LANE_FUNC (poly64x2x2_t, poly64x2_t, poly64_t, v2di, di, p64)
+__LD2Q_LANE_FUNC (int8x16x2_t, int8x16_t, int8_t, v16qi, qi, s8)
+__LD2Q_LANE_FUNC (int16x8x2_t, int16x8_t, int16_t, v8hi, hi, s16)
+__LD2Q_LANE_FUNC (int32x4x2_t, int32x4_t, int32_t, v4si, si, s32)
+__LD2Q_LANE_FUNC (int64x2x2_t, int64x2_t, int64_t, v2di, di, s64)
+__LD2Q_LANE_FUNC (uint8x16x2_t, uint8x16_t, uint8_t, v16qi, qi, u8)
+__LD2Q_LANE_FUNC (uint16x8x2_t, uint16x8_t, uint16_t, v8hi, hi, u16)
+__LD2Q_LANE_FUNC (uint32x4x2_t, uint32x4_t, uint32_t, v4si, si, u32)
+__LD2Q_LANE_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, v2di, di, u64)
+
+/* vld3_lane */
+
+#define __LD3_LANE_FUNC(intype, vectype, largetype, ptrtype, mode,	   \
+			 qmode, ptrmode, funcsuffix, signedtype)	   \
+__extension__ extern __inline intype \
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \
+vld3_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c)  \
+{									   \
+  __builtin_aarch64_simd_ci __o;					   \
+  largetype __temp;							   \
+  __temp.val[0] =							   \
+    vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0));	   \
+  __temp.val[1] =							   \
+    vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0));	   \
+  __temp.val[2] =							   \
+    vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (0));	   \
+  __o = __builtin_aarch64_set_qregci##qmode (__o,			   \
+					    (signedtype) __temp.val[0],	   \
+					    0);				   \
+  __o = __builtin_aarch64_set_qregci##qmode (__o,			   \
+					    (signedtype) __temp.val[1],	   \
+					    1);				   \
+  __o = __builtin_aarch64_set_qregci##qmode (__o,			   \
+					    (signedtype) __temp.val[2],	   \
+					    2);				   \
+  __o =	__builtin_aarch64_ld3_lane##mode (				   \
+	  (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c);	   \
+  __b.val[0] = (vectype) __builtin_aarch64_get_dregcidi (__o, 0);	   \
+  __b.val[1] = (vectype) __builtin_aarch64_get_dregcidi (__o, 1);	   \
+  __b.val[2] = (vectype) __builtin_aarch64_get_dregcidi (__o, 2);	   \
+  return __b;								   \
+}
+
+__LD3_LANE_FUNC (float16x4x3_t, float16x4_t, float16x8x3_t, float16_t, v4hf,
+		 v8hf, hf, f16, float16x8_t)
+__LD3_LANE_FUNC (float32x2x3_t, float32x2_t, float32x4x3_t, float32_t, v2sf, v4sf,
+		 sf, f32, float32x4_t)
+__LD3_LANE_FUNC (float64x1x3_t, float64x1_t, float64x2x3_t, float64_t, df, v2df,
+		 df, f64, float64x2_t)
+__LD3_LANE_FUNC (poly8x8x3_t, poly8x8_t, poly8x16x3_t, poly8_t, v8qi, v16qi, qi, p8,
+		 int8x16_t)
+__LD3_LANE_FUNC (poly16x4x3_t, poly16x4_t, poly16x8x3_t, poly16_t, v4hi, v8hi, hi,
+		 p16, int16x8_t)
+__LD3_LANE_FUNC (poly64x1x3_t, poly64x1_t, poly64x2x3_t, poly64_t, di,
+		 v2di_ssps, di, p64, poly64x2_t)
+__LD3_LANE_FUNC (int8x8x3_t, int8x8_t, int8x16x3_t, int8_t, v8qi, v16qi, qi, s8,
+		 int8x16_t)
+__LD3_LANE_FUNC (int16x4x3_t, int16x4_t, int16x8x3_t, int16_t, v4hi, v8hi, hi, s16,
+		 int16x8_t)
+__LD3_LANE_FUNC (int32x2x3_t, int32x2_t, int32x4x3_t, int32_t, v2si, v4si, si, s32,
+		 int32x4_t)
+__LD3_LANE_FUNC (int64x1x3_t, int64x1_t, int64x2x3_t, int64_t, di, v2di, di, s64,
+		 int64x2_t)
+__LD3_LANE_FUNC (uint8x8x3_t, uint8x8_t, uint8x16x3_t, uint8_t, v8qi, v16qi, qi, u8,
+		 int8x16_t)
+__LD3_LANE_FUNC (uint16x4x3_t, uint16x4_t, uint16x8x3_t, uint16_t, v4hi, v8hi, hi,
+		 u16, int16x8_t)
+__LD3_LANE_FUNC (uint32x2x3_t, uint32x2_t, uint32x4x3_t, uint32_t, v2si, v4si, si,
+		 u32, int32x4_t)
+__LD3_LANE_FUNC (uint64x1x3_t, uint64x1_t, uint64x2x3_t, uint64_t, di, v2di, di,
+		 u64, int64x2_t)
+
+/* vld3q_lane */
+
+#define __LD3Q_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
+__extension__ extern __inline intype \
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \
+vld3q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
+{									   \
+  __builtin_aarch64_simd_ci __o;					   \
+  intype ret;								   \
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[0], 0); \
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[1], 1); \
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[2], 2); \
+  __o = __builtin_aarch64_ld3_lane##mode (				   \
+	(__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c);		   \
+  ret.val[0] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 0);	   \
+  ret.val[1] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 1);	   \
+  ret.val[2] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 2);	   \
+  return ret;								   \
+}
+
+__LD3Q_LANE_FUNC (float16x8x3_t, float16x8_t, float16_t, v8hf, hf, f16)
+__LD3Q_LANE_FUNC (float32x4x3_t, float32x4_t, float32_t, v4sf, sf, f32)
+__LD3Q_LANE_FUNC (float64x2x3_t, float64x2_t, float64_t, v2df, df, f64)
+__LD3Q_LANE_FUNC (poly8x16x3_t, poly8x16_t, poly8_t, v16qi, qi, p8)
+__LD3Q_LANE_FUNC (poly16x8x3_t, poly16x8_t, poly16_t, v8hi, hi, p16)
+__LD3Q_LANE_FUNC (poly64x2x3_t, poly64x2_t, poly64_t, v2di, di, p64)
+__LD3Q_LANE_FUNC (int8x16x3_t, int8x16_t, int8_t, v16qi, qi, s8)
+__LD3Q_LANE_FUNC (int16x8x3_t, int16x8_t, int16_t, v8hi, hi, s16)
+__LD3Q_LANE_FUNC (int32x4x3_t, int32x4_t, int32_t, v4si, si, s32)
+__LD3Q_LANE_FUNC (int64x2x3_t, int64x2_t, int64_t, v2di, di, s64)
+__LD3Q_LANE_FUNC (uint8x16x3_t, uint8x16_t, uint8_t, v16qi, qi, u8)
+__LD3Q_LANE_FUNC (uint16x8x3_t, uint16x8_t, uint16_t, v8hi, hi, u16)
+__LD3Q_LANE_FUNC (uint32x4x3_t, uint32x4_t, uint32_t, v4si, si, u32)
+__LD3Q_LANE_FUNC (uint64x2x3_t, uint64x2_t, uint64_t, v2di, di, u64)
+
+/* vld4_lane */
+
+#define __LD4_LANE_FUNC(intype, vectype, largetype, ptrtype, mode,	   \
+			 qmode, ptrmode, funcsuffix, signedtype)	   \
+__extension__ extern __inline intype \
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \
+vld4_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c)  \
+{									   \
+  __builtin_aarch64_simd_xi __o;					   \
+  largetype __temp;							   \
+  __temp.val[0] =							   \
+    vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0));	   \
+  __temp.val[1] =							   \
+    vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0));	   \
+  __temp.val[2] =							   \
+    vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (0));	   \
+  __temp.val[3] =							   \
+    vcombine_##funcsuffix (__b.val[3], vcreate_##funcsuffix (0));	   \
+  __o = __builtin_aarch64_set_qregxi##qmode (__o,			   \
+					    (signedtype) __temp.val[0],	   \
+					    0);				   \
+  __o = __builtin_aarch64_set_qregxi##qmode (__o,			   \
+					    (signedtype) __temp.val[1],	   \
+					    1);				   \
+  __o = __builtin_aarch64_set_qregxi##qmode (__o,			   \
+					    (signedtype) __temp.val[2],	   \
+					    2);				   \
+  __o = __builtin_aarch64_set_qregxi##qmode (__o,			   \
+					    (signedtype) __temp.val[3],	   \
+					    3);				   \
+  __o =	__builtin_aarch64_ld4_lane##mode (				   \
+	  (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c);	   \
+  __b.val[0] = (vectype) __builtin_aarch64_get_dregxidi (__o, 0);	   \
+  __b.val[1] = (vectype) __builtin_aarch64_get_dregxidi (__o, 1);	   \
+  __b.val[2] = (vectype) __builtin_aarch64_get_dregxidi (__o, 2);	   \
+  __b.val[3] = (vectype) __builtin_aarch64_get_dregxidi (__o, 3);	   \
+  return __b;								   \
+}
+
+/* vld4q_lane */
+
+__LD4_LANE_FUNC (float16x4x4_t, float16x4_t, float16x8x4_t, float16_t, v4hf,
+		 v8hf, hf, f16, float16x8_t)
+__LD4_LANE_FUNC (float32x2x4_t, float32x2_t, float32x4x4_t, float32_t, v2sf, v4sf,
+		 sf, f32, float32x4_t)
+__LD4_LANE_FUNC (float64x1x4_t, float64x1_t, float64x2x4_t, float64_t, df, v2df,
+		 df, f64, float64x2_t)
+__LD4_LANE_FUNC (poly8x8x4_t, poly8x8_t, poly8x16x4_t, poly8_t, v8qi, v16qi, qi, p8,
+		 int8x16_t)
+__LD4_LANE_FUNC (poly16x4x4_t, poly16x4_t, poly16x8x4_t, poly16_t, v4hi, v8hi, hi,
+		 p16, int16x8_t)
+__LD4_LANE_FUNC (poly64x1x4_t, poly64x1_t, poly64x2x4_t, poly64_t, di,
+		 v2di_ssps, di, p64, poly64x2_t)
+__LD4_LANE_FUNC (int8x8x4_t, int8x8_t, int8x16x4_t, int8_t, v8qi, v16qi, qi, s8,
+		 int8x16_t)
+__LD4_LANE_FUNC (int16x4x4_t, int16x4_t, int16x8x4_t, int16_t, v4hi, v8hi, hi, s16,
+		 int16x8_t)
+__LD4_LANE_FUNC (int32x2x4_t, int32x2_t, int32x4x4_t, int32_t, v2si, v4si, si, s32,
+		 int32x4_t)
+__LD4_LANE_FUNC (int64x1x4_t, int64x1_t, int64x2x4_t, int64_t, di, v2di, di, s64,
+		 int64x2_t)
+__LD4_LANE_FUNC (uint8x8x4_t, uint8x8_t, uint8x16x4_t, uint8_t, v8qi, v16qi, qi, u8,
+		 int8x16_t)
+__LD4_LANE_FUNC (uint16x4x4_t, uint16x4_t, uint16x8x4_t, uint16_t, v4hi, v8hi, hi,
+		 u16, int16x8_t)
+__LD4_LANE_FUNC (uint32x2x4_t, uint32x2_t, uint32x4x4_t, uint32_t, v2si, v4si, si,
+		 u32, int32x4_t)
+__LD4_LANE_FUNC (uint64x1x4_t, uint64x1_t, uint64x2x4_t, uint64_t, di, v2di, di,
+		 u64, int64x2_t)
+
+/* vld4q_lane */
 
-__extension__ extern __inline float64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmsq_f64 (float64x2_t __a, float64x2_t __b, float64x2_t __c)
-{
-  return __builtin_aarch64_fmav2df (-__b, __c, __a);
+#define __LD4Q_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
+__extension__ extern __inline intype \
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \
+vld4q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
+{									   \
+  __builtin_aarch64_simd_xi __o;					   \
+  intype ret;								   \
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); \
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); \
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); \
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); \
+  __o = __builtin_aarch64_ld4_lane##mode (				   \
+	(__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c);		   \
+  ret.val[0] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 0);	   \
+  ret.val[1] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 1);	   \
+  ret.val[2] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 2);	   \
+  ret.val[3] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 3);	   \
+  return ret;								   \
 }
 
+__LD4Q_LANE_FUNC (float16x8x4_t, float16x8_t, float16_t, v8hf, hf, f16)
+__LD4Q_LANE_FUNC (float32x4x4_t, float32x4_t, float32_t, v4sf, sf, f32)
+__LD4Q_LANE_FUNC (float64x2x4_t, float64x2_t, float64_t, v2df, df, f64)
+__LD4Q_LANE_FUNC (poly8x16x4_t, poly8x16_t, poly8_t, v16qi, qi, p8)
+__LD4Q_LANE_FUNC (poly16x8x4_t, poly16x8_t, poly16_t, v8hi, hi, p16)
+__LD4Q_LANE_FUNC (poly64x2x4_t, poly64x2_t, poly64_t, v2di, di, p64)
+__LD4Q_LANE_FUNC (int8x16x4_t, int8x16_t, int8_t, v16qi, qi, s8)
+__LD4Q_LANE_FUNC (int16x8x4_t, int16x8_t, int16_t, v8hi, hi, s16)
+__LD4Q_LANE_FUNC (int32x4x4_t, int32x4_t, int32_t, v4si, si, s32)
+__LD4Q_LANE_FUNC (int64x2x4_t, int64x2_t, int64_t, v2di, di, s64)
+__LD4Q_LANE_FUNC (uint8x16x4_t, uint8x16_t, uint8_t, v16qi, qi, u8)
+__LD4Q_LANE_FUNC (uint16x8x4_t, uint16x8_t, uint16_t, v8hi, hi, u16)
+__LD4Q_LANE_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, v4si, si, u32)
+__LD4Q_LANE_FUNC (uint64x2x4_t, uint64x2_t, uint64_t, v2di, di, u64)
+
+/* vmax */
+
 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfms_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c)
+vmax_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return __builtin_aarch64_fmav2sf (-__b, vdup_n_f32 (__c), __a);
+  return __builtin_aarch64_smax_nanv2sf (__a, __b);
 }
 
 __extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfms_n_f64 (float64x1_t __a, float64x1_t __b, float64_t __c)
+vmax_f64 (float64x1_t __a, float64x1_t __b)
 {
-  return (float64x1_t) {-__b[0] * __c + __a[0]};
+    return (float64x1_t)
+      { __builtin_aarch64_smax_nandf (vget_lane_f64 (__a, 0),
+				      vget_lane_f64 (__b, 0)) };
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmsq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c)
+vmax_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return __builtin_aarch64_fmav4sf (-__b, vdupq_n_f32 (__c), __a);
+  return __builtin_aarch64_smaxv8qi (__a, __b);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmsq_n_f64 (float64x2_t __a, float64x2_t __b, float64_t __c)
+vmax_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return __builtin_aarch64_fmav2df (-__b, vdupq_n_f64 (__c), __a);
+  return __builtin_aarch64_smaxv4hi (__a, __b);
 }
 
-/* vfms_lane  */
-
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfms_lane_f32 (float32x2_t __a, float32x2_t __b,
-	       float32x2_t __c, const int __lane)
+vmax_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return __builtin_aarch64_fmav2sf (-__b,
-				    __aarch64_vdup_lane_f32 (__c, __lane),
-				    __a);
+  return __builtin_aarch64_smaxv2si (__a, __b);
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfms_lane_f64 (float64x1_t __a, float64x1_t __b,
-	       float64x1_t __c, const int __lane)
+vmax_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (float64x1_t) {__builtin_fma (-__b[0], __c[0], __a[0])};
+  return (uint8x8_t) __builtin_aarch64_umaxv8qi ((int8x8_t) __a,
+						 (int8x8_t) __b);
 }
 
-__extension__ extern __inline float64_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmsd_lane_f64 (float64_t __a, float64_t __b,
-	        float64x1_t __c, const int __lane)
+vmax_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return __builtin_fma (-__b, __c[0], __a);
+  return (uint16x4_t) __builtin_aarch64_umaxv4hi ((int16x4_t) __a,
+						  (int16x4_t) __b);
 }
 
-__extension__ extern __inline float32_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmss_lane_f32 (float32_t __a, float32_t __b,
-	        float32x2_t __c, const int __lane)
+vmax_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return __builtin_fmaf (-__b, __aarch64_vget_lane_any (__c, __lane), __a);
+  return (uint32x2_t) __builtin_aarch64_umaxv2si ((int32x2_t) __a,
+						  (int32x2_t) __b);
 }
 
-/* vfms_laneq  */
-
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfms_laneq_f32 (float32x2_t __a, float32x2_t __b,
-	        float32x4_t __c, const int __lane)
+vmaxq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return __builtin_aarch64_fmav2sf (-__b,
-				    __aarch64_vdup_laneq_f32 (__c, __lane),
-				    __a);
+  return __builtin_aarch64_smax_nanv4sf (__a, __b);
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfms_laneq_f64 (float64x1_t __a, float64x1_t __b,
-	        float64x2_t __c, const int __lane)
+vmaxq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  float64_t __c0 = __aarch64_vget_lane_any (__c, __lane);
-  return (float64x1_t) {__builtin_fma (-__b[0], __c0, __a[0])};
+  return __builtin_aarch64_smax_nanv2df (__a, __b);
 }
 
-__extension__ extern __inline float64_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmsd_laneq_f64 (float64_t __a, float64_t __b,
-	         float64x2_t __c, const int __lane)
+vmaxq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return __builtin_fma (-__b, __aarch64_vget_lane_any (__c, __lane), __a);
+  return __builtin_aarch64_smaxv16qi (__a, __b);
 }
 
-__extension__ extern __inline float32_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmss_laneq_f32 (float32_t __a, float32_t __b,
-		 float32x4_t __c, const int __lane)
+vmaxq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return __builtin_fmaf (-__b, __aarch64_vget_lane_any (__c, __lane), __a);
+  return __builtin_aarch64_smaxv8hi (__a, __b);
 }
 
-/* vfmsq_lane  */
-
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmsq_lane_f32 (float32x4_t __a, float32x4_t __b,
-	        float32x2_t __c, const int __lane)
+vmaxq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return __builtin_aarch64_fmav4sf (-__b,
-				    __aarch64_vdupq_lane_f32 (__c, __lane),
-				    __a);
+  return __builtin_aarch64_smaxv4si (__a, __b);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmsq_lane_f64 (float64x2_t __a, float64x2_t __b,
-	        float64x1_t __c, const int __lane)
+vmaxq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return __builtin_aarch64_fmav2df (-__b, vdupq_n_f64 (__c[0]), __a);
+  return (uint8x16_t) __builtin_aarch64_umaxv16qi ((int8x16_t) __a,
+						   (int8x16_t) __b);
 }
 
-/* vfmsq_laneq  */
-
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmsq_laneq_f32 (float32x4_t __a, float32x4_t __b,
-	         float32x4_t __c, const int __lane)
+vmaxq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return __builtin_aarch64_fmav4sf (-__b,
-				    __aarch64_vdupq_laneq_f32 (__c, __lane),
-				    __a);
+  return (uint16x8_t) __builtin_aarch64_umaxv8hi ((int16x8_t) __a,
+						  (int16x8_t) __b);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmsq_laneq_f64 (float64x2_t __a, float64x2_t __b,
-	         float64x2_t __c, const int __lane)
+vmaxq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return __builtin_aarch64_fmav2df (-__b,
-				    __aarch64_vdupq_laneq_f64 (__c, __lane),
-				    __a);
+  return (uint32x4_t) __builtin_aarch64_umaxv4si ((int32x4_t) __a,
+						  (int32x4_t) __b);
 }
+/* vmulx */
 
-/* vld1 */
-
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_f16 (const float16_t *__a)
+vmulx_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return __builtin_aarch64_ld1v4hf (__a);
+  return __builtin_aarch64_fmulxv2sf (__a, __b);
 }
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_f32 (const float32_t *a)
+vmulxq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return __builtin_aarch64_ld1v2sf ((const __builtin_aarch64_simd_sf *) a);
+  return __builtin_aarch64_fmulxv4sf (__a, __b);
 }
 
 __extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_f64 (const float64_t *a)
+vmulx_f64 (float64x1_t __a, float64x1_t __b)
 {
-  return (float64x1_t) {*a};
+  return (float64x1_t) {__builtin_aarch64_fmulxdf (__a[0], __b[0])};
 }
 
-__extension__ extern __inline poly8x8_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_p8 (const poly8_t *a)
+vmulxq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  return (poly8x8_t)
-    __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
+  return __builtin_aarch64_fmulxv2df (__a, __b);
 }
 
-__extension__ extern __inline poly16x4_t
+__extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_p16 (const poly16_t *a)
+vmulxs_f32 (float32_t __a, float32_t __b)
 {
-  return (poly16x4_t)
-    __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
+  return __builtin_aarch64_fmulxsf (__a, __b);
 }
 
-__extension__ extern __inline poly64x1_t
+__extension__ extern __inline float64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_p64 (const poly64_t *a)
+vmulxd_f64 (float64_t __a, float64_t __b)
 {
-  return (poly64x1_t) {*a};
+  return __builtin_aarch64_fmulxdf (__a, __b);
 }
 
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_s8 (const int8_t *a)
+vmulx_lane_f32 (float32x2_t __a, float32x2_t __v, const int __lane)
 {
-  return __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
+  return vmulx_f32 (__a, __aarch64_vdup_lane_f32 (__v, __lane));
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_s16 (const int16_t *a)
+vmulx_lane_f64 (float64x1_t __a, float64x1_t __v, const int __lane)
 {
-  return __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
+  return vmulx_f64 (__a, __aarch64_vdup_lane_f64 (__v, __lane));
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_s32 (const int32_t *a)
+vmulxq_lane_f32 (float32x4_t __a, float32x2_t __v, const int __lane)
 {
-  return __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
+  return vmulxq_f32 (__a, __aarch64_vdupq_lane_f32 (__v, __lane));
 }
 
-__extension__ extern __inline int64x1_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_s64 (const int64_t *a)
+vmulxq_lane_f64 (float64x2_t __a, float64x1_t __v, const int __lane)
 {
-  return (int64x1_t) {*a};
+  return vmulxq_f64 (__a, __aarch64_vdupq_lane_f64 (__v, __lane));
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_u8 (const uint8_t *a)
+vmulx_laneq_f32 (float32x2_t __a, float32x4_t __v, const int __lane)
 {
-  return (uint8x8_t)
-    __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
+  return vmulx_f32 (__a, __aarch64_vdup_laneq_f32 (__v, __lane));
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_u16 (const uint16_t *a)
+vmulx_laneq_f64 (float64x1_t __a, float64x2_t __v, const int __lane)
 {
-  return (uint16x4_t)
-    __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
+  return vmulx_f64 (__a, __aarch64_vdup_laneq_f64 (__v, __lane));
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_u32 (const uint32_t *a)
+vmulxq_laneq_f32 (float32x4_t __a, float32x4_t __v, const int __lane)
 {
-  return (uint32x2_t)
-    __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
+  return vmulxq_f32 (__a, __aarch64_vdupq_laneq_f32 (__v, __lane));
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_u64 (const uint64_t *a)
+vmulxq_laneq_f64 (float64x2_t __a, float64x2_t __v, const int __lane)
 {
-  return (uint64x1_t) {*a};
+  return vmulxq_f64 (__a, __aarch64_vdupq_laneq_f64 (__v, __lane));
 }
 
-/* vld1q */
-
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_f16 (const float16_t *__a)
+vmulxs_lane_f32 (float32_t __a, float32x2_t __v, const int __lane)
 {
-  return __builtin_aarch64_ld1v8hf (__a);
+  return vmulxs_f32 (__a, __aarch64_vget_lane_any (__v, __lane));
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_f32 (const float32_t *a)
+vmulxs_laneq_f32 (float32_t __a, float32x4_t __v, const int __lane)
 {
-  return __builtin_aarch64_ld1v4sf ((const __builtin_aarch64_simd_sf *) a);
+  return vmulxs_f32 (__a, __aarch64_vget_lane_any (__v, __lane));
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline float64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_f64 (const float64_t *a)
+vmulxd_lane_f64 (float64_t __a, float64x1_t __v, const int __lane)
 {
-  return __builtin_aarch64_ld1v2df ((const __builtin_aarch64_simd_df *) a);
+  return vmulxd_f64 (__a, __aarch64_vget_lane_any (__v, __lane));
 }
 
-__extension__ extern __inline poly8x16_t
+__extension__ extern __inline float64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_p8 (const poly8_t *a)
+vmulxd_laneq_f64 (float64_t __a, float64x2_t __v, const int __lane)
 {
-  return (poly8x16_t)
-    __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
+  return vmulxd_f64 (__a, __aarch64_vget_lane_any (__v, __lane));
 }
 
-__extension__ extern __inline poly16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_p16 (const poly16_t *a)
-{
-  return (poly16x8_t)
-    __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
-}
+/* vpmax  */
 
-__extension__ extern __inline poly64x2_t
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_p64 (const poly64_t *a)
+vpmax_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return (poly64x2_t)
-    __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
+  return __builtin_aarch64_smaxpv8qi (__a, __b);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_s8 (const int8_t *a)
+vpmax_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
+  return __builtin_aarch64_smaxpv4hi (__a, __b);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_s16 (const int16_t *a)
+vpmax_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
+  return __builtin_aarch64_smaxpv2si (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_s32 (const int32_t *a)
+vpmax_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
+  return (uint8x8_t) __builtin_aarch64_umaxpv8qi ((int8x8_t) __a,
+						  (int8x8_t) __b);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_s64 (const int64_t *a)
+vpmax_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
+  return (uint16x4_t) __builtin_aarch64_umaxpv4hi ((int16x4_t) __a,
+						   (int16x4_t) __b);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_u8 (const uint8_t *a)
+vpmax_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (uint8x16_t)
-    __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
+  return (uint32x2_t) __builtin_aarch64_umaxpv2si ((int32x2_t) __a,
+						   (int32x2_t) __b);
 }
 
-__extension__ extern __inline uint8x8x2_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_u8_x2 (const uint8_t *__a)
+vpmaxq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  uint8x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
-  ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
-  return ret;
+  return __builtin_aarch64_smaxpv16qi (__a, __b);
 }
 
-__extension__ extern __inline int8x8x2_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_s8_x2 (const int8_t *__a)
+vpmaxq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  int8x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
-  ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
-  return ret;
+  return __builtin_aarch64_smaxpv8hi (__a, __b);
 }
 
-__extension__ extern __inline uint16x4x2_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_u16_x2 (const uint16_t *__a)
+vpmaxq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  uint16x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
-  ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
-  return ret;
+  return __builtin_aarch64_smaxpv4si (__a, __b);
 }
 
-__extension__ extern __inline int16x4x2_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_s16_x2 (const int16_t *__a)
+vpmaxq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  int16x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
-  ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
-  return ret;
+  return (uint8x16_t) __builtin_aarch64_umaxpv16qi ((int8x16_t) __a,
+						    (int8x16_t) __b);
 }
 
-__extension__ extern __inline uint32x2x2_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_u32_x2 (const uint32_t *__a)
+vpmaxq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  uint32x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v2si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
-  ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
-  return ret;
+  return (uint16x8_t) __builtin_aarch64_umaxpv8hi ((int16x8_t) __a,
+						   (int16x8_t) __b);
 }
 
-__extension__ extern __inline int32x2x2_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_s32_x2 (const int32_t *__a)
+vpmaxq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  int32x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v2si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
-  ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
-  return ret;
+  return (uint32x4_t) __builtin_aarch64_umaxpv4si ((int32x4_t) __a,
+						   (int32x4_t) __b);
 }
 
-__extension__ extern __inline uint64x1x2_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_u64_x2 (const uint64_t *__a)
+vpmax_f32 (float32x2_t __a, float32x2_t __b)
 {
-  uint64x1x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
-  ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
-  return ret;
+  return __builtin_aarch64_smax_nanpv2sf (__a, __b);
 }
 
-__extension__ extern __inline int64x1x2_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_s64_x2 (const int64_t *__a)
+vpmaxq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  int64x1x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
-  ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
-  return ret;
+  return __builtin_aarch64_smax_nanpv4sf (__a, __b);
 }
 
-__extension__ extern __inline float16x4x2_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_f16_x2 (const float16_t *__a)
+vpmaxq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  float16x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v4hf ((const __builtin_aarch64_simd_hf *) __a);
-  ret.val[0] = (float16x4_t) __builtin_aarch64_get_dregoiv4hf (__o, 0);
-  ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregoiv4hf (__o, 1);
-  return ret;
+  return __builtin_aarch64_smax_nanpv2df (__a, __b);
 }
 
-__extension__ extern __inline float32x2x2_t
+__extension__ extern __inline float64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_f32_x2 (const float32_t *__a)
+vpmaxqd_f64 (float64x2_t __a)
 {
-  float32x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v2sf ((const __builtin_aarch64_simd_sf *) __a);
-  ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
-  ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
-  return ret;
+  return __builtin_aarch64_reduc_smax_nan_scal_v2df (__a);
 }
 
-__extension__ extern __inline float64x1x2_t
+__extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_f64_x2 (const float64_t *__a)
+vpmaxs_f32 (float32x2_t __a)
 {
-  float64x1x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2df ((const __builtin_aarch64_simd_df *) __a);
-  ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)};
-  ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)};
-  return ret;
+  return __builtin_aarch64_reduc_smax_nan_scal_v2sf (__a);
 }
 
-__extension__ extern __inline poly8x8x2_t
+/* vpmaxnm  */
+
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_p8_x2 (const poly8_t *__a)
+vpmaxnm_f32 (float32x2_t __a, float32x2_t __b)
 {
-  poly8x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
-  ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
-  return ret;
+  return __builtin_aarch64_smaxpv2sf (__a, __b);
 }
 
-__extension__ extern __inline poly16x4x2_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_p16_x2 (const poly16_t *__a)
+vpmaxnmq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  poly16x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
-  ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
-  return ret;
+  return __builtin_aarch64_smaxpv4sf (__a, __b);
 }
 
-__extension__ extern __inline poly64x1x2_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_p64_x2 (const poly64_t *__a)
+vpmaxnmq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  poly64x1x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
-  ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
-  return ret;
+  return __builtin_aarch64_smaxpv2df (__a, __b);
 }
 
-__extension__ extern __inline uint8x16x2_t
+__extension__ extern __inline float64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_u8_x2 (const uint8_t *__a)
+vpmaxnmqd_f64 (float64x2_t __a)
 {
-  uint8x16x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
-  ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
-  return ret;
+  return __builtin_aarch64_reduc_smax_scal_v2df (__a);
 }
 
-__extension__ extern __inline int8x16x2_t
+__extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_s8_x2 (const int8_t *__a)
+vpmaxnms_f32 (float32x2_t __a)
 {
-  int8x16x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
-  ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
-  return ret;
+  return __builtin_aarch64_reduc_smax_scal_v2sf (__a);
 }
 
-__extension__ extern __inline uint16x8x2_t
+/* vpmin  */
+
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_u16_x2 (const uint16_t *__a)
+vpmin_s8 (int8x8_t __a, int8x8_t __b)
 {
-  uint16x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
-  ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
-  return ret;
+  return __builtin_aarch64_sminpv8qi (__a, __b);
 }
 
-__extension__ extern __inline int16x8x2_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_s16_x2 (const int16_t *__a)
+vpmin_s16 (int16x4_t __a, int16x4_t __b)
 {
-  int16x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
-  ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
-  return ret;
+  return __builtin_aarch64_sminpv4hi (__a, __b);
 }
 
-__extension__ extern __inline uint32x4x2_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_u32_x2 (const uint32_t *__a)
+vpmin_s32 (int32x2_t __a, int32x2_t __b)
 {
-  uint32x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v4si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
-  ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
-  return ret;
+  return __builtin_aarch64_sminpv2si (__a, __b);
 }
 
-__extension__ extern __inline int32x4x2_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_s32_x2 (const int32_t *__a)
+vpmin_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  int32x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v4si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
-  ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
-  return ret;
+  return (uint8x8_t) __builtin_aarch64_uminpv8qi ((int8x8_t) __a,
+						  (int8x8_t) __b);
 }
 
-__extension__ extern __inline uint64x2x2_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_u64_x2 (const uint64_t *__a)
+vpmin_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  uint64x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
-  ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
-  return ret;
+  return (uint16x4_t) __builtin_aarch64_uminpv4hi ((int16x4_t) __a,
+						   (int16x4_t) __b);
 }
 
-__extension__ extern __inline int64x2x2_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_s64_x2 (const int64_t *__a)
+vpmin_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  int64x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
-  ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
-  return ret;
+  return (uint32x2_t) __builtin_aarch64_uminpv2si ((int32x2_t) __a,
+						   (int32x2_t) __b);
 }
 
-__extension__ extern __inline float16x8x2_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_f16_x2 (const float16_t *__a)
+vpminq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  float16x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v8hf ((const __builtin_aarch64_simd_hf *) __a);
-  ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregoiv8hf (__o, 0);
-  ret.val[1] = (float16x8_t) __builtin_aarch64_get_qregoiv8hf (__o, 1);
-  return ret;
+  return __builtin_aarch64_sminpv16qi (__a, __b);
 }
 
-__extension__ extern __inline float32x4x2_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_f32_x2 (const float32_t *__a)
+vpminq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  float32x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v4sf ((const __builtin_aarch64_simd_sf *) __a);
-  ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
-  ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
-  return ret;
+  return __builtin_aarch64_sminpv8hi (__a, __b);
 }
 
-__extension__ extern __inline float64x2x2_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_f64_x2 (const float64_t *__a)
-{
-  float64x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v2df ((const __builtin_aarch64_simd_df *) __a);
-  ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
-  ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
-  return ret;
+vpminq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return __builtin_aarch64_sminpv4si (__a, __b);
 }
 
-__extension__ extern __inline poly8x16x2_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_p8_x2 (const poly8_t *__a)
+vpminq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  poly8x16x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
-  ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
-  return ret;
+  return (uint8x16_t) __builtin_aarch64_uminpv16qi ((int8x16_t) __a,
+						    (int8x16_t) __b);
 }
 
-__extension__ extern __inline poly16x8x2_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_p16_x2 (const poly16_t *__a)
+vpminq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  poly16x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
-  ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
-  return ret;
+  return (uint16x8_t) __builtin_aarch64_uminpv8hi ((int16x8_t) __a,
+						   (int16x8_t) __b);
 }
 
-__extension__ extern __inline poly64x2x2_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_p64_x2 (const poly64_t *__a)
+vpminq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  poly64x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
-  ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
-  return ret;
+  return (uint32x4_t) __builtin_aarch64_uminpv4si ((int32x4_t) __a,
+						   (int32x4_t) __b);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_u16 (const uint16_t *a)
+vpmin_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return (uint16x8_t)
-    __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
+  return __builtin_aarch64_smin_nanpv2sf (__a, __b);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_u32 (const uint32_t *a)
+vpminq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return (uint32x4_t)
-    __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
+  return __builtin_aarch64_smin_nanpv4sf (__a, __b);
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_u64 (const uint64_t *a)
+vpminq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  return (uint64x2_t)
-    __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
+  return __builtin_aarch64_smin_nanpv2df (__a, __b);
 }
 
-/* vld1_dup  */
+__extension__ extern __inline float64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vpminqd_f64 (float64x2_t __a)
+{
+  return __builtin_aarch64_reduc_smin_nan_scal_v2df (__a);
+}
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_dup_f16 (const float16_t* __a)
+vpmins_f32 (float32x2_t __a)
 {
-  return vdup_n_f16 (*__a);
+  return __builtin_aarch64_reduc_smin_nan_scal_v2sf (__a);
 }
 
+/* vpminnm  */
+
 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_dup_f32 (const float32_t* __a)
+vpminnm_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return vdup_n_f32 (*__a);
+  return __builtin_aarch64_sminpv2sf (__a, __b);
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_dup_f64 (const float64_t* __a)
+vpminnmq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return vdup_n_f64 (*__a);
+  return __builtin_aarch64_sminpv4sf (__a, __b);
 }
 
-__extension__ extern __inline poly8x8_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_dup_p8 (const poly8_t* __a)
+vpminnmq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  return vdup_n_p8 (*__a);
+  return __builtin_aarch64_sminpv2df (__a, __b);
 }
 
-__extension__ extern __inline poly16x4_t
+__extension__ extern __inline float64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_dup_p16 (const poly16_t* __a)
+vpminnmqd_f64 (float64x2_t __a)
 {
-  return vdup_n_p16 (*__a);
+  return __builtin_aarch64_reduc_smin_scal_v2df (__a);
 }
 
-__extension__ extern __inline poly64x1_t
+__extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_dup_p64 (const poly64_t* __a)
+vpminnms_f32 (float32x2_t __a)
 {
-  return vdup_n_p64 (*__a);
+  return __builtin_aarch64_reduc_smin_scal_v2sf (__a);
 }
 
-__extension__ extern __inline int8x8_t
+/* vmaxnm  */
+
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_dup_s8 (const int8_t* __a)
+vmaxnm_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return vdup_n_s8 (*__a);
+  return __builtin_aarch64_fmaxv2sf (__a, __b);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_dup_s16 (const int16_t* __a)
+vmaxnm_f64 (float64x1_t __a, float64x1_t __b)
 {
-  return vdup_n_s16 (*__a);
+  return (float64x1_t)
+    { __builtin_aarch64_fmaxdf (vget_lane_f64 (__a, 0),
+				vget_lane_f64 (__b, 0)) };
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_dup_s32 (const int32_t* __a)
+vmaxnmq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return vdup_n_s32 (*__a);
+  return __builtin_aarch64_fmaxv4sf (__a, __b);
 }
 
-__extension__ extern __inline int64x1_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_dup_s64 (const int64_t* __a)
+vmaxnmq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  return vdup_n_s64 (*__a);
+  return __builtin_aarch64_fmaxv2df (__a, __b);
 }
 
-__extension__ extern __inline uint8x8_t
+/* vmaxv  */
+
+__extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_dup_u8 (const uint8_t* __a)
+vmaxv_f32 (float32x2_t __a)
 {
-  return vdup_n_u8 (*__a);
+  return __builtin_aarch64_reduc_smax_nan_scal_v2sf (__a);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline int8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_dup_u16 (const uint16_t* __a)
+vmaxv_s8 (int8x8_t __a)
 {
-  return vdup_n_u16 (*__a);
+  return __builtin_aarch64_reduc_smax_scal_v8qi (__a);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_dup_u32 (const uint32_t* __a)
+vmaxv_s16 (int16x4_t __a)
 {
-  return vdup_n_u32 (*__a);
+  return __builtin_aarch64_reduc_smax_scal_v4hi (__a);
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_dup_u64 (const uint64_t* __a)
+vmaxv_s32 (int32x2_t __a)
 {
-  return vdup_n_u64 (*__a);
+  return __builtin_aarch64_reduc_smax_scal_v2si (__a);
 }
 
-/* vld1q_dup  */
+__extension__ extern __inline uint8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vmaxv_u8 (uint8x8_t __a)
+{
+  return __builtin_aarch64_reduc_umax_scal_v8qi_uu (__a);
+}
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_dup_f16 (const float16_t* __a)
+vmaxv_u16 (uint16x4_t __a)
 {
-  return vdupq_n_f16 (*__a);
+  return __builtin_aarch64_reduc_umax_scal_v4hi_uu (__a);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_dup_f32 (const float32_t* __a)
+vmaxv_u32 (uint32x2_t __a)
 {
-  return vdupq_n_f32 (*__a);
+  return __builtin_aarch64_reduc_umax_scal_v2si_uu (__a);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_dup_f64 (const float64_t* __a)
+vmaxvq_f32 (float32x4_t __a)
 {
-  return vdupq_n_f64 (*__a);
+  return __builtin_aarch64_reduc_smax_nan_scal_v4sf (__a);
 }
 
-__extension__ extern __inline poly8x16_t
+__extension__ extern __inline float64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_dup_p8 (const poly8_t* __a)
+vmaxvq_f64 (float64x2_t __a)
 {
-  return vdupq_n_p8 (*__a);
+  return __builtin_aarch64_reduc_smax_nan_scal_v2df (__a);
 }
 
-__extension__ extern __inline poly16x8_t
+__extension__ extern __inline int8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_dup_p16 (const poly16_t* __a)
+vmaxvq_s8 (int8x16_t __a)
 {
-  return vdupq_n_p16 (*__a);
+  return __builtin_aarch64_reduc_smax_scal_v16qi (__a);
 }
 
-__extension__ extern __inline poly64x2_t
+__extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_dup_p64 (const poly64_t* __a)
+vmaxvq_s16 (int16x8_t __a)
 {
-  return vdupq_n_p64 (*__a);
+  return __builtin_aarch64_reduc_smax_scal_v8hi (__a);
 }
 
- __extension__ extern __inline int8x16_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_dup_s8 (const int8_t* __a)
+vmaxvq_s32 (int32x4_t __a)
 {
-  return vdupq_n_s8 (*__a);
+  return __builtin_aarch64_reduc_smax_scal_v4si (__a);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline uint8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_dup_s16 (const int16_t* __a)
+vmaxvq_u8 (uint8x16_t __a)
 {
-  return vdupq_n_s16 (*__a);
+  return __builtin_aarch64_reduc_umax_scal_v16qi_uu (__a);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_dup_s32 (const int32_t* __a)
+vmaxvq_u16 (uint16x8_t __a)
 {
-  return vdupq_n_s32 (*__a);
+  return __builtin_aarch64_reduc_umax_scal_v8hi_uu (__a);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_dup_s64 (const int64_t* __a)
+vmaxvq_u32 (uint32x4_t __a)
 {
-  return vdupq_n_s64 (*__a);
+  return __builtin_aarch64_reduc_umax_scal_v4si_uu (__a);
 }
 
-__extension__ extern __inline uint8x16_t
+/* vmaxnmv  */
+
+__extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_dup_u8 (const uint8_t* __a)
+vmaxnmv_f32 (float32x2_t __a)
 {
-  return vdupq_n_u8 (*__a);
+  return __builtin_aarch64_reduc_smax_scal_v2sf (__a);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_dup_u16 (const uint16_t* __a)
+vmaxnmvq_f32 (float32x4_t __a)
 {
-  return vdupq_n_u16 (*__a);
+  return __builtin_aarch64_reduc_smax_scal_v4sf (__a);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline float64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_dup_u32 (const uint32_t* __a)
+vmaxnmvq_f64 (float64x2_t __a)
 {
-  return vdupq_n_u32 (*__a);
+  return __builtin_aarch64_reduc_smax_scal_v2df (__a);
 }
 
-__extension__ extern __inline uint64x2_t
+/* vmin  */
+
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_dup_u64 (const uint64_t* __a)
+vmin_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return vdupq_n_u64 (*__a);
+  return __builtin_aarch64_smin_nanv2sf (__a, __b);
 }
 
-/* vld1_lane  */
+__extension__ extern __inline float64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vmin_f64 (float64x1_t __a, float64x1_t __b)
+{
+    return (float64x1_t)
+	  { __builtin_aarch64_smin_nandf (vget_lane_f64 (__a, 0),
+					  vget_lane_f64 (__b, 0)) };
+}
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_lane_f16 (const float16_t *__src, float16x4_t __vec, const int __lane)
+vmin_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+  return __builtin_aarch64_sminv8qi (__a, __b);
 }
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_lane_f32 (const float32_t *__src, float32x2_t __vec, const int __lane)
+vmin_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+  return __builtin_aarch64_sminv4hi (__a, __b);
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_lane_f64 (const float64_t *__src, float64x1_t __vec, const int __lane)
+vmin_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+  return __builtin_aarch64_sminv2si (__a, __b);
 }
 
-__extension__ extern __inline poly8x8_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_lane_p8 (const poly8_t *__src, poly8x8_t __vec, const int __lane)
+vmin_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+  return (uint8x8_t) __builtin_aarch64_uminv8qi ((int8x8_t) __a,
+						 (int8x8_t) __b);
 }
 
-__extension__ extern __inline poly16x4_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_lane_p16 (const poly16_t *__src, poly16x4_t __vec, const int __lane)
+vmin_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+  return (uint16x4_t) __builtin_aarch64_uminv4hi ((int16x4_t) __a,
+						  (int16x4_t) __b);
 }
 
-__extension__ extern __inline poly64x1_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_lane_p64 (const poly64_t *__src, poly64x1_t __vec, const int __lane)
+vmin_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+  return (uint32x2_t) __builtin_aarch64_uminv2si ((int32x2_t) __a,
+						  (int32x2_t) __b);
 }
 
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_lane_s8 (const int8_t *__src, int8x8_t __vec, const int __lane)
+vminq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+  return __builtin_aarch64_smin_nanv4sf (__a, __b);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_lane_s16 (const int16_t *__src, int16x4_t __vec, const int __lane)
+vminq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+  return __builtin_aarch64_smin_nanv2df (__a, __b);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_lane_s32 (const int32_t *__src, int32x2_t __vec, const int __lane)
+vminq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+  return __builtin_aarch64_sminv16qi (__a, __b);
 }
 
-__extension__ extern __inline int64x1_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_lane_s64 (const int64_t *__src, int64x1_t __vec, const int __lane)
+vminq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+  return __builtin_aarch64_sminv8hi (__a, __b);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_lane_u8 (const uint8_t *__src, uint8x8_t __vec, const int __lane)
+vminq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+  return __builtin_aarch64_sminv4si (__a, __b);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_lane_u16 (const uint16_t *__src, uint16x4_t __vec, const int __lane)
+vminq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+  return (uint8x16_t) __builtin_aarch64_uminv16qi ((int8x16_t) __a,
+						   (int8x16_t) __b);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_lane_u32 (const uint32_t *__src, uint32x2_t __vec, const int __lane)
+vminq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+  return (uint16x8_t) __builtin_aarch64_uminv8hi ((int16x8_t) __a,
+						  (int16x8_t) __b);
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_lane_u64 (const uint64_t *__src, uint64x1_t __vec, const int __lane)
+vminq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+  return (uint32x4_t) __builtin_aarch64_uminv4si ((int32x4_t) __a,
+						  (int32x4_t) __b);
 }
 
-/* vld1q_lane  */
+/* vminnm  */
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_lane_f16 (const float16_t *__src, float16x8_t __vec, const int __lane)
+vminnm_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+  return __builtin_aarch64_fminv2sf (__a, __b);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_lane_f32 (const float32_t *__src, float32x4_t __vec, const int __lane)
+vminnm_f64 (float64x1_t __a, float64x1_t __b)
 {
-  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+  return (float64x1_t)
+    { __builtin_aarch64_fmindf (vget_lane_f64 (__a, 0),
+				vget_lane_f64 (__b, 0)) };
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_lane_f64 (const float64_t *__src, float64x2_t __vec, const int __lane)
+vminnmq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+  return __builtin_aarch64_fminv4sf (__a, __b);
 }
 
-__extension__ extern __inline poly8x16_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_lane_p8 (const poly8_t *__src, poly8x16_t __vec, const int __lane)
+vminnmq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+  return __builtin_aarch64_fminv2df (__a, __b);
 }
 
-__extension__ extern __inline poly16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_lane_p16 (const poly16_t *__src, poly16x8_t __vec, const int __lane)
-{
-  return __aarch64_vset_lane_any (*__src, __vec, __lane);
-}
+/* vminv  */
 
-__extension__ extern __inline poly64x2_t
+__extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_lane_p64 (const poly64_t *__src, poly64x2_t __vec, const int __lane)
+vminv_f32 (float32x2_t __a)
 {
-  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+  return __builtin_aarch64_reduc_smin_nan_scal_v2sf (__a);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline int8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_lane_s8 (const int8_t *__src, int8x16_t __vec, const int __lane)
+vminv_s8 (int8x8_t __a)
 {
-  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+  return __builtin_aarch64_reduc_smin_scal_v8qi (__a);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_lane_s16 (const int16_t *__src, int16x8_t __vec, const int __lane)
+vminv_s16 (int16x4_t __a)
 {
-  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+  return __builtin_aarch64_reduc_smin_scal_v4hi (__a);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_lane_s32 (const int32_t *__src, int32x4_t __vec, const int __lane)
+vminv_s32 (int32x2_t __a)
 {
-  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+  return __builtin_aarch64_reduc_smin_scal_v2si (__a);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline uint8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_lane_s64 (const int64_t *__src, int64x2_t __vec, const int __lane)
+vminv_u8 (uint8x8_t __a)
 {
-  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+  return __builtin_aarch64_reduc_umin_scal_v8qi_uu (__a);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_lane_u8 (const uint8_t *__src, uint8x16_t __vec, const int __lane)
+vminv_u16 (uint16x4_t __a)
 {
-  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+  return __builtin_aarch64_reduc_umin_scal_v4hi_uu (__a);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_lane_u16 (const uint16_t *__src, uint16x8_t __vec, const int __lane)
+vminv_u32 (uint32x2_t __a)
 {
-  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+  return __builtin_aarch64_reduc_umin_scal_v2si_uu (__a);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_lane_u32 (const uint32_t *__src, uint32x4_t __vec, const int __lane)
+vminvq_f32 (float32x4_t __a)
 {
-  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+  return __builtin_aarch64_reduc_smin_nan_scal_v4sf (__a);
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline float64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_lane_u64 (const uint64_t *__src, uint64x2_t __vec, const int __lane)
+vminvq_f64 (float64x2_t __a)
 {
-  return __aarch64_vset_lane_any (*__src, __vec, __lane);
+  return __builtin_aarch64_reduc_smin_nan_scal_v2df (__a);
 }
 
-/* vldn */
-
-__extension__ extern __inline int64x1x2_t
+__extension__ extern __inline int8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2_s64 (const int64_t * __a)
+vminvq_s8 (int8x16_t __a)
 {
-  int64x1x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
-  ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
-  return ret;
+  return __builtin_aarch64_reduc_smin_scal_v16qi (__a);
 }
 
-__extension__ extern __inline uint64x1x2_t
+__extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2_u64 (const uint64_t * __a)
+vminvq_s16 (int16x8_t __a)
 {
-  uint64x1x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
-  ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
-  return ret;
+  return __builtin_aarch64_reduc_smin_scal_v8hi (__a);
 }
 
-__extension__ extern __inline float64x1x2_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2_f64 (const float64_t * __a)
+vminvq_s32 (int32x4_t __a)
 {
-  float64x1x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2df ((const __builtin_aarch64_simd_df *) __a);
-  ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)};
-  ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)};
-  return ret;
+  return __builtin_aarch64_reduc_smin_scal_v4si (__a);
 }
 
-__extension__ extern __inline int8x8x2_t
+__extension__ extern __inline uint8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2_s8 (const int8_t * __a)
+vminvq_u8 (uint8x16_t __a)
 {
-  int8x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
-  ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
-  return ret;
+  return __builtin_aarch64_reduc_umin_scal_v16qi_uu (__a);
 }
 
-__extension__ extern __inline poly8x8x2_t
+__extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2_p8 (const poly8_t * __a)
+vminvq_u16 (uint16x8_t __a)
 {
-  poly8x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
-  ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
-  return ret;
+  return __builtin_aarch64_reduc_umin_scal_v8hi_uu (__a);
 }
 
-__extension__ extern __inline poly64x1x2_t
+__extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2_p64 (const poly64_t * __a)
+vminvq_u32 (uint32x4_t __a)
 {
-  poly64x1x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregoidi_pss (__o, 0);
-  ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregoidi_pss (__o, 1);
-  return ret;
+  return __builtin_aarch64_reduc_umin_scal_v4si_uu (__a);
 }
 
-__extension__ extern __inline int16x4x2_t
+/* vminnmv  */
+
+__extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2_s16 (const int16_t * __a)
+vminnmv_f32 (float32x2_t __a)
 {
-  int16x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
-  ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
-  return ret;
+  return __builtin_aarch64_reduc_smin_scal_v2sf (__a);
 }
 
-__extension__ extern __inline poly16x4x2_t
+__extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2_p16 (const poly16_t * __a)
+vminnmvq_f32 (float32x4_t __a)
 {
-  poly16x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
-  ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
-  return ret;
+  return __builtin_aarch64_reduc_smin_scal_v4sf (__a);
 }
 
-__extension__ extern __inline int32x2x2_t
+__extension__ extern __inline float64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2_s32 (const int32_t * __a)
+vminnmvq_f64 (float64x2_t __a)
 {
-  int32x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
-  ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
-  return ret;
+  return __builtin_aarch64_reduc_smin_scal_v2df (__a);
 }
 
-__extension__ extern __inline uint8x8x2_t
+/* vmla */
+
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2_u8 (const uint8_t * __a)
+vmla_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)
 {
-  uint8x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
-  ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
-  return ret;
+  return __a + __b * __c;
 }
 
-__extension__ extern __inline uint16x4x2_t
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2_u16 (const uint16_t * __a)
+vmla_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
 {
-  uint16x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
-  ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
-  return ret;
+  return __a + __b * __c;
 }
 
-__extension__ extern __inline uint32x2x2_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2_u32 (const uint32_t * __a)
+vmlaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
 {
-  uint32x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
-  ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
-  return ret;
+  return __a + __b * __c;
 }
 
-__extension__ extern __inline float16x4x2_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2_f16 (const float16_t * __a)
+vmlaq_f64 (float64x2_t __a, float64x2_t __b, float64x2_t __c)
 {
-  float16x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v4hf (__a);
-  ret.val[0] = __builtin_aarch64_get_dregoiv4hf (__o, 0);
-  ret.val[1] = __builtin_aarch64_get_dregoiv4hf (__o, 1);
-  return ret;
+  return __a + __b * __c;
 }
 
-__extension__ extern __inline float32x2x2_t
+/* vmla_lane  */
+
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2_f32 (const float32_t * __a)
+vmla_lane_f32 (float32x2_t __a, float32x2_t __b,
+	       float32x2_t __c, const int __lane)
 {
-  float32x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v2sf ((const __builtin_aarch64_simd_sf *) __a);
-  ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
-  ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
-  return ret;
+  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline int8x16x2_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2q_s8 (const int8_t * __a)
+vmla_lane_s16 (int16x4_t __a, int16x4_t __b,
+		int16x4_t __c, const int __lane)
 {
-  int8x16x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
-  ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
-  return ret;
+  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline poly8x16x2_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2q_p8 (const poly8_t * __a)
+vmla_lane_s32 (int32x2_t __a, int32x2_t __b,
+		int32x2_t __c, const int __lane)
 {
-  poly8x16x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
-  ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
-  return ret;
+  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline int16x8x2_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2q_s16 (const int16_t * __a)
+vmla_lane_u16 (uint16x4_t __a, uint16x4_t __b,
+		uint16x4_t __c, const int __lane)
 {
-  int16x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
-  ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
-  return ret;
+  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline poly16x8x2_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2q_p16 (const poly16_t * __a)
+vmla_lane_u32 (uint32x2_t __a, uint32x2_t __b,
+	       uint32x2_t __c, const int __lane)
 {
-  poly16x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
-  ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
-  return ret;
+  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline poly64x2x2_t
+/* vmla_laneq  */
+
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2q_p64 (const poly64_t * __a)
+vmla_laneq_f32 (float32x2_t __a, float32x2_t __b,
+	        float32x4_t __c, const int __lane)
 {
-  poly64x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di_pss (__o, 0);
-  ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di_pss (__o, 1);
-  return ret;
+  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline int32x4x2_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2q_s32 (const int32_t * __a)
+vmla_laneq_s16 (int16x4_t __a, int16x4_t __b,
+		int16x8_t __c, const int __lane)
 {
-  int32x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
-  ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
-  return ret;
+  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline int64x2x2_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2q_s64 (const int64_t * __a)
+vmla_laneq_s32 (int32x2_t __a, int32x2_t __b,
+		int32x4_t __c, const int __lane)
 {
-  int64x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
-  ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
-  return ret;
+  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline uint8x16x2_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2q_u8 (const uint8_t * __a)
+vmla_laneq_u16 (uint16x4_t __a, uint16x4_t __b,
+		uint16x8_t __c, const int __lane)
 {
-  uint8x16x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
-  ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
-  return ret;
+  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline uint16x8x2_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2q_u16 (const uint16_t * __a)
+vmla_laneq_u32 (uint32x2_t __a, uint32x2_t __b,
+		uint32x4_t __c, const int __lane)
 {
-  uint16x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
-  ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
-  return ret;
+  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline uint32x4x2_t
+/* vmlaq_lane  */
+
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2q_u32 (const uint32_t * __a)
+vmlaq_lane_f32 (float32x4_t __a, float32x4_t __b,
+		float32x2_t __c, const int __lane)
 {
-  uint32x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
-  ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
-  return ret;
+  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline uint64x2x2_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2q_u64 (const uint64_t * __a)
+vmlaq_lane_s16 (int16x8_t __a, int16x8_t __b,
+		int16x4_t __c, const int __lane)
 {
-  uint64x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
-  ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
-  return ret;
+  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline float16x8x2_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2q_f16 (const float16_t * __a)
+vmlaq_lane_s32 (int32x4_t __a, int32x4_t __b,
+		int32x2_t __c, const int __lane)
 {
-  float16x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v8hf (__a);
-  ret.val[0] = __builtin_aarch64_get_qregoiv8hf (__o, 0);
-  ret.val[1] = __builtin_aarch64_get_qregoiv8hf (__o, 1);
-  return ret;
+  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline float32x4x2_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2q_f32 (const float32_t * __a)
+vmlaq_lane_u16 (uint16x8_t __a, uint16x8_t __b,
+		uint16x4_t __c, const int __lane)
 {
-  float32x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v4sf ((const __builtin_aarch64_simd_sf *) __a);
-  ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
-  ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
-  return ret;
+  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline float64x2x2_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2q_f64 (const float64_t * __a)
+vmlaq_lane_u32 (uint32x4_t __a, uint32x4_t __b,
+		uint32x2_t __c, const int __lane)
 {
-  float64x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v2df ((const __builtin_aarch64_simd_df *) __a);
-  ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
-  ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
-  return ret;
+  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline int64x1x3_t
+  /* vmlaq_laneq  */
+
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3_s64 (const int64_t * __a)
+vmlaq_laneq_f32 (float32x4_t __a, float32x4_t __b,
+		 float32x4_t __c, const int __lane)
 {
-  int64x1x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
-  ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
-  ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
-  return ret;
+  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline uint64x1x3_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3_u64 (const uint64_t * __a)
-{
-  uint64x1x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
-  ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
-  ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
-  return ret;
+vmlaq_laneq_s16 (int16x8_t __a, int16x8_t __b,
+		int16x8_t __c, const int __lane)
+{
+  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline float64x1x3_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3_f64 (const float64_t * __a)
+vmlaq_laneq_s32 (int32x4_t __a, int32x4_t __b,
+		int32x4_t __c, const int __lane)
 {
-  float64x1x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3df ((const __builtin_aarch64_simd_df *) __a);
-  ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 0)};
-  ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 1)};
-  ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 2)};
-  return ret;
+  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline int8x8x3_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3_s8 (const int8_t * __a)
+vmlaq_laneq_u16 (uint16x8_t __a, uint16x8_t __b,
+		uint16x8_t __c, const int __lane)
 {
-  int8x8x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
-  ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
-  ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
-  return ret;
+  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline poly8x8x3_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3_p8 (const poly8_t * __a)
+vmlaq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
+		uint32x4_t __c, const int __lane)
 {
-  poly8x8x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
-  ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
-  ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
-  return ret;
+  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline int16x4x3_t
+/* vmls  */
+
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3_s16 (const int16_t * __a)
+vmls_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)
 {
-  int16x4x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
-  ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
-  ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
-  return ret;
+  return __a - __b * __c;
 }
 
-__extension__ extern __inline poly16x4x3_t
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3_p16 (const poly16_t * __a)
+vmls_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
 {
-  poly16x4x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
-  ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
-  ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
-  return ret;
+  return __a - __b * __c;
 }
 
-__extension__ extern __inline int32x2x3_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3_s32 (const int32_t * __a)
+vmlsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
 {
-  int32x2x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
-  ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
-  ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
-  return ret;
+  return __a - __b * __c;
 }
 
-__extension__ extern __inline uint8x8x3_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3_u8 (const uint8_t * __a)
+vmlsq_f64 (float64x2_t __a, float64x2_t __b, float64x2_t __c)
 {
-  uint8x8x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
-  ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
-  ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
-  return ret;
+  return __a - __b * __c;
 }
 
-__extension__ extern __inline uint16x4x3_t
+/* vmls_lane  */
+
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3_u16 (const uint16_t * __a)
+vmls_lane_f32 (float32x2_t __a, float32x2_t __b,
+	       float32x2_t __c, const int __lane)
 {
-  uint16x4x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
-  ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
-  ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
-  return ret;
+  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline uint32x2x3_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3_u32 (const uint32_t * __a)
+vmls_lane_s16 (int16x4_t __a, int16x4_t __b,
+		int16x4_t __c, const int __lane)
 {
-  uint32x2x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
-  ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
-  ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
-  return ret;
+  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline float16x4x3_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3_f16 (const float16_t * __a)
+vmls_lane_s32 (int32x2_t __a, int32x2_t __b,
+		int32x2_t __c, const int __lane)
 {
-  float16x4x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v4hf (__a);
-  ret.val[0] = __builtin_aarch64_get_dregciv4hf (__o, 0);
-  ret.val[1] = __builtin_aarch64_get_dregciv4hf (__o, 1);
-  ret.val[2] = __builtin_aarch64_get_dregciv4hf (__o, 2);
-  return ret;
+  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline float32x2x3_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3_f32 (const float32_t * __a)
+vmls_lane_u16 (uint16x4_t __a, uint16x4_t __b,
+		uint16x4_t __c, const int __lane)
 {
-  float32x2x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v2sf ((const __builtin_aarch64_simd_sf *) __a);
-  ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0);
-  ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1);
-  ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2);
-  return ret;
+  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline poly64x1x3_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3_p64 (const poly64_t * __a)
+vmls_lane_u32 (uint32x2_t __a, uint32x2_t __b,
+	       uint32x2_t __c, const int __lane)
 {
-  poly64x1x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 0);
-  ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 1);
-  ret.val[2] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 2);
-  return ret;
+  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline int8x16x3_t
+/* vmls_laneq  */
+
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3q_s8 (const int8_t * __a)
+vmls_laneq_f32 (float32x2_t __a, float32x2_t __b,
+	       float32x4_t __c, const int __lane)
 {
-  int8x16x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
-  ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
-  ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
-  return ret;
+  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline poly8x16x3_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3q_p8 (const poly8_t * __a)
+vmls_laneq_s16 (int16x4_t __a, int16x4_t __b,
+		int16x8_t __c, const int __lane)
 {
-  poly8x16x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
-  ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
-  ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
-  return ret;
+  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline int16x8x3_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3q_s16 (const int16_t * __a)
+vmls_laneq_s32 (int32x2_t __a, int32x2_t __b,
+		int32x4_t __c, const int __lane)
 {
-  int16x8x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
-  ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
-  ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
-  return ret;
+  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline poly16x8x3_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3q_p16 (const poly16_t * __a)
+vmls_laneq_u16 (uint16x4_t __a, uint16x4_t __b,
+		uint16x8_t __c, const int __lane)
 {
-  poly16x8x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
-  ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
-  ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
-  return ret;
+  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline int32x4x3_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3q_s32 (const int32_t * __a)
+vmls_laneq_u32 (uint32x2_t __a, uint32x2_t __b,
+		uint32x4_t __c, const int __lane)
 {
-  int32x4x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
-  ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
-  ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
-  return ret;
+  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline int64x2x3_t
+/* vmlsq_lane  */
+
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3q_s64 (const int64_t * __a)
+vmlsq_lane_f32 (float32x4_t __a, float32x4_t __b,
+		float32x2_t __c, const int __lane)
 {
-  int64x2x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
-  ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
-  ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
-  return ret;
+  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline uint8x16x3_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3q_u8 (const uint8_t * __a)
+vmlsq_lane_s16 (int16x8_t __a, int16x8_t __b,
+		int16x4_t __c, const int __lane)
 {
-  uint8x16x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
-  ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
-  ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
-  return ret;
+  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vmlsq_lane_s32 (int32x4_t __a, int32x4_t __b,
+		int32x2_t __c, const int __lane)
+{
+  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline uint16x8x3_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3q_u16 (const uint16_t * __a)
+vmlsq_lane_u16 (uint16x8_t __a, uint16x8_t __b,
+		uint16x4_t __c, const int __lane)
 {
-  uint16x8x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
-  ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
-  ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
-  return ret;
+  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline uint32x4x3_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3q_u32 (const uint32_t * __a)
+vmlsq_lane_u32 (uint32x4_t __a, uint32x4_t __b,
+		uint32x2_t __c, const int __lane)
 {
-  uint32x4x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
-  ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
-  ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
-  return ret;
+  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline uint64x2x3_t
+  /* vmlsq_laneq  */
+
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3q_u64 (const uint64_t * __a)
+vmlsq_laneq_f32 (float32x4_t __a, float32x4_t __b,
+		float32x4_t __c, const int __lane)
 {
-  uint64x2x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
-  ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
-  ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
-  return ret;
+  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline float16x8x3_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3q_f16 (const float16_t * __a)
+vmlsq_laneq_s16 (int16x8_t __a, int16x8_t __b,
+		int16x8_t __c, const int __lane)
 {
-  float16x8x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v8hf (__a);
-  ret.val[0] = __builtin_aarch64_get_qregciv8hf (__o, 0);
-  ret.val[1] = __builtin_aarch64_get_qregciv8hf (__o, 1);
-  ret.val[2] = __builtin_aarch64_get_qregciv8hf (__o, 2);
-  return ret;
+  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline float32x4x3_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3q_f32 (const float32_t * __a)
+vmlsq_laneq_s32 (int32x4_t __a, int32x4_t __b,
+		int32x4_t __c, const int __lane)
 {
-  float32x4x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v4sf ((const __builtin_aarch64_simd_sf *) __a);
-  ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0);
-  ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1);
-  ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2);
-  return ret;
+  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
-
-__extension__ extern __inline float64x2x3_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3q_f64 (const float64_t * __a)
+vmlsq_laneq_u16 (uint16x8_t __a, uint16x8_t __b,
+		uint16x8_t __c, const int __lane)
 {
-  float64x2x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v2df ((const __builtin_aarch64_simd_df *) __a);
-  ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0);
-  ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1);
-  ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2);
-  return ret;
+  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline poly64x2x3_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3q_p64 (const poly64_t * __a)
+vmlsq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
+		uint32x4_t __c, const int __lane)
 {
-  poly64x2x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 0);
-  ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 1);
-  ret.val[2] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 2);
-  return ret;
+  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
 }
 
-__extension__ extern __inline int64x1x4_t
+/* vmov_n_  */
+
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4_s64 (const int64_t * __a)
+vmov_n_f16 (float16_t __a)
 {
-  int64x1x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
-  ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
-  ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
-  ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
-  return ret;
+  return vdup_n_f16 (__a);
 }
 
-__extension__ extern __inline uint64x1x4_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4_u64 (const uint64_t * __a)
+vmov_n_f32 (float32_t __a)
 {
-  uint64x1x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
-  ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
-  ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
-  ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
-  return ret;
+  return vdup_n_f32 (__a);
 }
 
-__extension__ extern __inline float64x1x4_t
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4_f64 (const float64_t * __a)
+vmov_n_f64 (float64_t __a)
 {
-  float64x1x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4df ((const __builtin_aarch64_simd_df *) __a);
-  ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 0)};
-  ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 1)};
-  ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 2)};
-  ret.val[3] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 3)};
-  return ret;
+  return (float64x1_t) {__a};
 }
 
-__extension__ extern __inline int8x8x4_t
+__extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4_s8 (const int8_t * __a)
+vmov_n_p8 (poly8_t __a)
 {
-  int8x8x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
-  ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
-  ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
-  ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
-  return ret;
+  return vdup_n_p8 (__a);
 }
 
-__extension__ extern __inline poly8x8x4_t
+__extension__ extern __inline poly16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4_p8 (const poly8_t * __a)
+vmov_n_p16 (poly16_t __a)
 {
-  poly8x8x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
-  ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
-  ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
-  ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
-  return ret;
+  return vdup_n_p16 (__a);
 }
 
-__extension__ extern __inline int16x4x4_t
+__extension__ extern __inline poly64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4_s16 (const int16_t * __a)
+vmov_n_p64 (poly64_t __a)
 {
-  int16x4x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
-  ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
-  ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
-  ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
-  return ret;
+  return vdup_n_p64 (__a);
 }
 
-__extension__ extern __inline poly16x4x4_t
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4_p16 (const poly16_t * __a)
+vmov_n_s8 (int8_t __a)
 {
-  poly16x4x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
-  ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
-  ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
-  ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
-  return ret;
+  return vdup_n_s8 (__a);
 }
 
-__extension__ extern __inline int32x2x4_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4_s32 (const int32_t * __a)
+vmov_n_s16 (int16_t __a)
 {
-  int32x2x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
-  ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
-  ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
-  ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
-  return ret;
+  return vdup_n_s16 (__a);
 }
 
-__extension__ extern __inline uint8x8x4_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4_u8 (const uint8_t * __a)
+vmov_n_s32 (int32_t __a)
 {
-  uint8x8x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
-  ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
-  ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
-  ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
-  return ret;
+  return vdup_n_s32 (__a);
 }
 
-__extension__ extern __inline uint16x4x4_t
+__extension__ extern __inline int64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4_u16 (const uint16_t * __a)
+vmov_n_s64 (int64_t __a)
 {
-  uint16x4x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
-  ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
-  ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
-  ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
-  return ret;
+  return (int64x1_t) {__a};
 }
 
-__extension__ extern __inline uint32x2x4_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4_u32 (const uint32_t * __a)
+vmov_n_u8 (uint8_t __a)
 {
-  uint32x2x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
-  ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
-  ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
-  ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
-  return ret;
+  return vdup_n_u8 (__a);
 }
 
-__extension__ extern __inline float16x4x4_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4_f16 (const float16_t * __a)
-{
-  float16x4x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v4hf (__a);
-  ret.val[0] = __builtin_aarch64_get_dregxiv4hf (__o, 0);
-  ret.val[1] = __builtin_aarch64_get_dregxiv4hf (__o, 1);
-  ret.val[2] = __builtin_aarch64_get_dregxiv4hf (__o, 2);
-  ret.val[3] = __builtin_aarch64_get_dregxiv4hf (__o, 3);
-  return ret;
+vmov_n_u16 (uint16_t __a)
+{
+    return vdup_n_u16 (__a);
 }
 
-__extension__ extern __inline float32x2x4_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4_f32 (const float32_t * __a)
+vmov_n_u32 (uint32_t __a)
 {
-  float32x2x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v2sf ((const __builtin_aarch64_simd_sf *) __a);
-  ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0);
-  ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1);
-  ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2);
-  ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3);
-  return ret;
+   return vdup_n_u32 (__a);
 }
 
-__extension__ extern __inline poly64x1x4_t
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4_p64 (const poly64_t * __a)
+vmov_n_u64 (uint64_t __a)
 {
-  poly64x1x4_t  ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 0);
-  ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 1);
-  ret.val[2] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 2);
-  ret.val[3] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 3);
-  return ret;
+  return (uint64x1_t) {__a};
 }
 
-__extension__ extern __inline int8x16x4_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4q_s8 (const int8_t * __a)
+vmovq_n_f16 (float16_t __a)
 {
-  int8x16x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
-  ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
-  ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
-  ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
-  return ret;
+  return vdupq_n_f16 (__a);
 }
 
-__extension__ extern __inline poly8x16x4_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4q_p8 (const poly8_t * __a)
+vmovq_n_f32 (float32_t __a)
 {
-  poly8x16x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
-  ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
-  ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
-  ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
-  return ret;
+  return vdupq_n_f32 (__a);
 }
 
-__extension__ extern __inline int16x8x4_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4q_s16 (const int16_t * __a)
+vmovq_n_f64 (float64_t __a)
 {
-  int16x8x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
-  ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
-  ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
-  ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
-  return ret;
+  return vdupq_n_f64 (__a);
 }
 
-__extension__ extern __inline poly16x8x4_t
+__extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4q_p16 (const poly16_t * __a)
+vmovq_n_p8 (poly8_t __a)
 {
-  poly16x8x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
-  ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
-  ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
-  ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
-  return ret;
+  return vdupq_n_p8 (__a);
 }
 
-__extension__ extern __inline int32x4x4_t
+__extension__ extern __inline poly16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4q_s32 (const int32_t * __a)
+vmovq_n_p16 (poly16_t __a)
 {
-  int32x4x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
-  ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
-  ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
-  ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
-  return ret;
+  return vdupq_n_p16 (__a);
 }
 
-__extension__ extern __inline int64x2x4_t
+__extension__ extern __inline poly64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4q_s64 (const int64_t * __a)
+vmovq_n_p64 (poly64_t __a)
 {
-  int64x2x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
-  ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
-  ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
-  ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
-  return ret;
+  return vdupq_n_p64 (__a);
 }
 
-__extension__ extern __inline uint8x16x4_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4q_u8 (const uint8_t * __a)
+vmovq_n_s8 (int8_t __a)
 {
-  uint8x16x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
-  ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
-  ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
-  ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
-  return ret;
+  return vdupq_n_s8 (__a);
 }
 
-__extension__ extern __inline uint16x8x4_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4q_u16 (const uint16_t * __a)
+vmovq_n_s16 (int16_t __a)
 {
-  uint16x8x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
-  ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
-  ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
-  ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
-  return ret;
+  return vdupq_n_s16 (__a);
 }
 
-__extension__ extern __inline uint32x4x4_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4q_u32 (const uint32_t * __a)
+vmovq_n_s32 (int32_t __a)
 {
-  uint32x4x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
-  ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
-  ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
-  ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
-  return ret;
+  return vdupq_n_s32 (__a);
 }
 
-__extension__ extern __inline uint64x2x4_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4q_u64 (const uint64_t * __a)
+vmovq_n_s64 (int64_t __a)
 {
-  uint64x2x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
-  ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
-  ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
-  ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
-  return ret;
+  return vdupq_n_s64 (__a);
 }
 
-__extension__ extern __inline float16x8x4_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4q_f16 (const float16_t * __a)
+vmovq_n_u8 (uint8_t __a)
 {
-  float16x8x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v8hf (__a);
-  ret.val[0] = __builtin_aarch64_get_qregxiv8hf (__o, 0);
-  ret.val[1] = __builtin_aarch64_get_qregxiv8hf (__o, 1);
-  ret.val[2] = __builtin_aarch64_get_qregxiv8hf (__o, 2);
-  ret.val[3] = __builtin_aarch64_get_qregxiv8hf (__o, 3);
-  return ret;
+  return vdupq_n_u8 (__a);
 }
 
-__extension__ extern __inline float32x4x4_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4q_f32 (const float32_t * __a)
+vmovq_n_u16 (uint16_t __a)
 {
-  float32x4x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v4sf ((const __builtin_aarch64_simd_sf *) __a);
-  ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0);
-  ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1);
-  ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2);
-  ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3);
-  return ret;
+  return vdupq_n_u16 (__a);
 }
 
-__extension__ extern __inline float64x2x4_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4q_f64 (const float64_t * __a)
+vmovq_n_u32 (uint32_t __a)
 {
-  float64x2x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v2df ((const __builtin_aarch64_simd_df *) __a);
-  ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0);
-  ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1);
-  ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2);
-  ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3);
-  return ret;
+  return vdupq_n_u32 (__a);
 }
 
-__extension__ extern __inline poly64x2x4_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4q_p64 (const poly64_t * __a)
+vmovq_n_u64 (uint64_t __a)
 {
-  poly64x2x4_t  ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 0);
-  ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 1);
-  ret.val[2] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 2);
-  ret.val[3] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 3);
-  return ret;
+  return vdupq_n_u64 (__a);
 }
 
-/* vldn_dup */
+/* vmul_lane  */
 
-__extension__ extern __inline int8x8x2_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2_dup_s8 (const int8_t * __a)
+vmul_lane_f32 (float32x2_t __a, float32x2_t __b, const int __lane)
 {
-  int8x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
-  ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
-  return ret;
+  return __a * __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline int16x4x2_t
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2_dup_s16 (const int16_t * __a)
+vmul_lane_f64 (float64x1_t __a, float64x1_t __b, const int __lane)
 {
-  int16x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
-  ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
-  return ret;
+  return __a * __b;
 }
 
-__extension__ extern __inline int32x2x2_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2_dup_s32 (const int32_t * __a)
+vmul_lane_s16 (int16x4_t __a, int16x4_t __b, const int __lane)
 {
-  int32x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv2si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
-  ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
-  return ret;
+  return __a * __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline float16x4x2_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2_dup_f16 (const float16_t * __a)
+vmul_lane_s32 (int32x2_t __a, int32x2_t __b, const int __lane)
 {
-  float16x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv4hf ((const __builtin_aarch64_simd_hf *) __a);
-  ret.val[0] = __builtin_aarch64_get_dregoiv4hf (__o, 0);
-  ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregoiv4hf (__o, 1);
-  return ret;
+  return __a * __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline float32x2x2_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2_dup_f32 (const float32_t * __a)
+vmul_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __lane)
 {
-  float32x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv2sf ((const __builtin_aarch64_simd_sf *) __a);
-  ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
-  ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
-  return ret;
+  return __a * __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline float64x1x2_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2_dup_f64 (const float64_t * __a)
+vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __lane)
 {
-  float64x1x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rdf ((const __builtin_aarch64_simd_df *) __a);
-  ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)};
-  ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)};
-  return ret;
+  return __a * __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline uint8x8x2_t
+/* vmuld_lane  */
+
+__extension__ extern __inline float64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2_dup_u8 (const uint8_t * __a)
+vmuld_lane_f64 (float64_t __a, float64x1_t __b, const int __lane)
 {
-  uint8x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
-  ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
-  return ret;
+  return __a * __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline uint16x4x2_t
+__extension__ extern __inline float64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2_dup_u16 (const uint16_t * __a)
+vmuld_laneq_f64 (float64_t __a, float64x2_t __b, const int __lane)
 {
-  uint16x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
-  ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
-  return ret;
+  return __a * __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline uint32x2x2_t
+/* vmuls_lane  */
+
+__extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2_dup_u32 (const uint32_t * __a)
+vmuls_lane_f32 (float32_t __a, float32x2_t __b, const int __lane)
 {
-  uint32x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv2si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
-  ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
-  return ret;
+  return __a * __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline poly8x8x2_t
+__extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2_dup_p8 (const poly8_t * __a)
+vmuls_laneq_f32 (float32_t __a, float32x4_t __b, const int __lane)
 {
-  poly8x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
-  ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
-  return ret;
+  return __a * __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline poly16x4x2_t
+/* vmul_laneq  */
+
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2_dup_p16 (const poly16_t * __a)
+vmul_laneq_f32 (float32x2_t __a, float32x4_t __b, const int __lane)
 {
-  poly16x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
-  ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
-  return ret;
+  return __a * __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline poly64x1x2_t
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2_dup_p64 (const poly64_t * __a)
+vmul_laneq_f64 (float64x1_t __a, float64x2_t __b, const int __lane)
 {
-  poly64x1x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregoidi_pss (__o, 0);
-  ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregoidi_pss (__o, 1);
-  return ret;
+  return __a * __aarch64_vget_lane_any (__b, __lane);
 }
 
-
-__extension__ extern __inline int64x1x2_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2_dup_s64 (const int64_t * __a)
+vmul_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __lane)
 {
-  int64x1x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rdi ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
-  ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
-  return ret;
+  return __a * __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline uint64x1x2_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2_dup_u64 (const uint64_t * __a)
+vmul_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __lane)
 {
-  uint64x1x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rdi ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
-  ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
-  return ret;
+  return __a * __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline int8x16x2_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2q_dup_s8 (const int8_t * __a)
+vmul_laneq_u16 (uint16x4_t __a, uint16x8_t __b, const int __lane)
 {
-  int8x16x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
-  ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
-  return ret;
+  return __a * __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline poly8x16x2_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2q_dup_p8 (const poly8_t * __a)
+vmul_laneq_u32 (uint32x2_t __a, uint32x4_t __b, const int __lane)
 {
-  poly8x16x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
-  ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
-  return ret;
+  return __a * __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline int16x8x2_t
+/* vmul_n  */
+
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2q_dup_s16 (const int16_t * __a)
+vmul_n_f64  (float64x1_t __a, float64_t __b)
 {
-  int16x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
-  ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
-  return ret;
+  return (float64x1_t) { vget_lane_f64 (__a, 0) * __b };
 }
 
-__extension__ extern __inline poly16x8x2_t
+/* vmulq_lane  */
+
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2q_dup_p16 (const poly16_t * __a)
+vmulq_lane_f32 (float32x4_t __a, float32x2_t __b, const int __lane)
 {
-  poly16x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
-  ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
-  return ret;
+  return __a * __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline int32x4x2_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2q_dup_s32 (const int32_t * __a)
+vmulq_lane_f64 (float64x2_t __a, float64x1_t __b, const int __lane)
 {
-  int32x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv4si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
-  ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
-  return ret;
+  __AARCH64_LANE_CHECK (__a, __lane);
+  return __a * __b[0];
 }
 
-__extension__ extern __inline int64x2x2_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2q_dup_s64 (const int64_t * __a)
+vmulq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __lane)
 {
-  int64x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
-  ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
-  return ret;
+  return __a * __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline uint8x16x2_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2q_dup_u8 (const uint8_t * __a)
+vmulq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __lane)
 {
-  uint8x16x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
-  ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
-  return ret;
+  return __a * __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline uint16x8x2_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2q_dup_u16 (const uint16_t * __a)
+vmulq_lane_u16 (uint16x8_t __a, uint16x4_t __b, const int __lane)
 {
-  uint16x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
-  ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
-  return ret;
+  return __a * __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline uint32x4x2_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2q_dup_u32 (const uint32_t * __a)
+vmulq_lane_u32 (uint32x4_t __a, uint32x2_t __b, const int __lane)
 {
-  uint32x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv4si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
-  ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
-  return ret;
+  return __a * __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline uint64x2x2_t
+/* vmulq_laneq  */
+
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2q_dup_u64 (const uint64_t * __a)
+vmulq_laneq_f32 (float32x4_t __a, float32x4_t __b, const int __lane)
 {
-  uint64x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
-  ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
-  return ret;
+  return __a * __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline float16x8x2_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2q_dup_f16 (const float16_t * __a)
+vmulq_laneq_f64 (float64x2_t __a, float64x2_t __b, const int __lane)
 {
-  float16x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv8hf ((const __builtin_aarch64_simd_hf *) __a);
-  ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregoiv8hf (__o, 0);
-  ret.val[1] = __builtin_aarch64_get_qregoiv8hf (__o, 1);
-  return ret;
+  return __a * __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline float32x4x2_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2q_dup_f32 (const float32_t * __a)
+vmulq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __lane)
 {
-  float32x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv4sf ((const __builtin_aarch64_simd_sf *) __a);
-  ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
-  ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
-  return ret;
+  return __a * __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline float64x2x2_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2q_dup_f64 (const float64_t * __a)
+vmulq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __lane)
 {
-  float64x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv2df ((const __builtin_aarch64_simd_df *) __a);
-  ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
-  ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
-  return ret;
+  return __a * __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline poly64x2x2_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld2q_dup_p64 (const poly64_t * __a)
+vmulq_laneq_u16 (uint16x8_t __a, uint16x8_t __b, const int __lane)
 {
-  poly64x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di_pss (__o, 0);
-  ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di_pss (__o, 1);
-  return ret;
+  return __a * __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline int64x1x3_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3_dup_s64 (const int64_t * __a)
+vmulq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, const int __lane)
 {
-  int64x1x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rdi ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
-  ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
-  ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
-  return ret;
+  return __a * __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline uint64x1x3_t
+/* vmul_n.  */
+
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3_dup_u64 (const uint64_t * __a)
+vmul_n_f32 (float32x2_t __a, float32_t __b)
 {
-  uint64x1x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rdi ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
-  ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
-  ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
-  return ret;
+  return __a * __b;
 }
 
-__extension__ extern __inline float64x1x3_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3_dup_f64 (const float64_t * __a)
+vmulq_n_f32 (float32x4_t __a, float32_t __b)
 {
-  float64x1x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rdf ((const __builtin_aarch64_simd_df *) __a);
-  ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 0)};
-  ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 1)};
-  ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 2)};
-  return ret;
+  return __a * __b;
 }
 
-__extension__ extern __inline int8x8x3_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3_dup_s8 (const int8_t * __a)
+vmulq_n_f64 (float64x2_t __a, float64_t __b)
 {
-  int8x8x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
-  ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
-  ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
-  return ret;
+  return __a * __b;
 }
 
-__extension__ extern __inline poly8x8x3_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3_dup_p8 (const poly8_t * __a)
+vmul_n_s16 (int16x4_t __a, int16_t __b)
 {
-  poly8x8x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
-  ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
-  ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
-  return ret;
+  return __a * __b;
 }
 
-__extension__ extern __inline int16x4x3_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3_dup_s16 (const int16_t * __a)
+vmulq_n_s16 (int16x8_t __a, int16_t __b)
 {
-  int16x4x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
-  ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
-  ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
-  return ret;
+  return __a * __b;
 }
 
-__extension__ extern __inline poly16x4x3_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3_dup_p16 (const poly16_t * __a)
+vmul_n_s32 (int32x2_t __a, int32_t __b)
 {
-  poly16x4x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
-  ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
-  ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
-  return ret;
+  return __a * __b;
 }
 
-__extension__ extern __inline int32x2x3_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3_dup_s32 (const int32_t * __a)
+vmulq_n_s32 (int32x4_t __a, int32_t __b)
 {
-  int32x2x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv2si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
-  ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
-  ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
-  return ret;
+  return __a * __b;
 }
 
-__extension__ extern __inline uint8x8x3_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3_dup_u8 (const uint8_t * __a)
+vmul_n_u16 (uint16x4_t __a, uint16_t __b)
 {
-  uint8x8x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
-  ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
-  ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
-  return ret;
+  return __a * __b;
 }
 
-__extension__ extern __inline uint16x4x3_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3_dup_u16 (const uint16_t * __a)
+vmulq_n_u16 (uint16x8_t __a, uint16_t __b)
 {
-  uint16x4x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
-  ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
-  ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
-  return ret;
+  return __a * __b;
 }
 
-__extension__ extern __inline uint32x2x3_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3_dup_u32 (const uint32_t * __a)
+vmul_n_u32 (uint32x2_t __a, uint32_t __b)
 {
-  uint32x2x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv2si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
-  ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
-  ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
-  return ret;
+  return __a * __b;
 }
 
-__extension__ extern __inline float16x4x3_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3_dup_f16 (const float16_t * __a)
+vmulq_n_u32 (uint32x4_t __a, uint32_t __b)
 {
-  float16x4x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv4hf ((const __builtin_aarch64_simd_hf *) __a);
-  ret.val[0] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 0);
-  ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 1);
-  ret.val[2] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 2);
-  return ret;
+  return __a * __b;
 }
 
-__extension__ extern __inline float32x2x3_t
+/* vmvn  */
+
+__extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3_dup_f32 (const float32_t * __a)
+vmvn_p8 (poly8x8_t __a)
 {
-  float32x2x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv2sf ((const __builtin_aarch64_simd_sf *) __a);
-  ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0);
-  ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1);
-  ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2);
-  return ret;
+  return (poly8x8_t) ~((int8x8_t) __a);
 }
 
-__extension__ extern __inline poly64x1x3_t
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3_dup_p64 (const poly64_t * __a)
+vmvn_s8 (int8x8_t __a)
 {
-  poly64x1x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 0);
-  ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 1);
-  ret.val[2] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 2);
-  return ret;
+  return ~__a;
 }
 
-__extension__ extern __inline int8x16x3_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3q_dup_s8 (const int8_t * __a)
+vmvn_s16 (int16x4_t __a)
 {
-  int8x16x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
-  ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
-  ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
-  return ret;
+  return ~__a;
 }
 
-__extension__ extern __inline poly8x16x3_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3q_dup_p8 (const poly8_t * __a)
+vmvn_s32 (int32x2_t __a)
 {
-  poly8x16x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
-  ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
-  ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
-  return ret;
+  return ~__a;
 }
 
-__extension__ extern __inline int16x8x3_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3q_dup_s16 (const int16_t * __a)
+vmvn_u8 (uint8x8_t __a)
 {
-  int16x8x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
-  ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
-  ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
-  return ret;
+  return ~__a;
 }
 
-__extension__ extern __inline poly16x8x3_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3q_dup_p16 (const poly16_t * __a)
+vmvn_u16 (uint16x4_t __a)
 {
-  poly16x8x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
-  ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
-  ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
-  return ret;
+  return ~__a;
 }
 
-__extension__ extern __inline int32x4x3_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3q_dup_s32 (const int32_t * __a)
+vmvn_u32 (uint32x2_t __a)
 {
-  int32x4x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv4si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
-  ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
-  ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
-  return ret;
+  return ~__a;
 }
 
-__extension__ extern __inline int64x2x3_t
+__extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3q_dup_s64 (const int64_t * __a)
+vmvnq_p8 (poly8x16_t __a)
 {
-  int64x2x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
-  ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
-  ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
-  return ret;
+  return (poly8x16_t) ~((int8x16_t) __a);
 }
 
-__extension__ extern __inline uint8x16x3_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3q_dup_u8 (const uint8_t * __a)
+vmvnq_s8 (int8x16_t __a)
 {
-  uint8x16x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
-  ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
-  ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
-  return ret;
+  return ~__a;
 }
 
-__extension__ extern __inline uint16x8x3_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3q_dup_u16 (const uint16_t * __a)
+vmvnq_s16 (int16x8_t __a)
 {
-  uint16x8x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
-  ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
-  ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
-  return ret;
+  return ~__a;
 }
 
-__extension__ extern __inline uint32x4x3_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3q_dup_u32 (const uint32_t * __a)
+vmvnq_s32 (int32x4_t __a)
 {
-  uint32x4x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv4si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
-  ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
-  ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
-  return ret;
+  return ~__a;
 }
 
-__extension__ extern __inline uint64x2x3_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3q_dup_u64 (const uint64_t * __a)
+vmvnq_u8 (uint8x16_t __a)
 {
-  uint64x2x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
-  ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
-  ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
-  return ret;
+  return ~__a;
 }
 
-__extension__ extern __inline float16x8x3_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3q_dup_f16 (const float16_t * __a)
+vmvnq_u16 (uint16x8_t __a)
 {
-  float16x8x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv8hf ((const __builtin_aarch64_simd_hf *) __a);
-  ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 0);
-  ret.val[1] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 1);
-  ret.val[2] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 2);
-  return ret;
+  return ~__a;
 }
 
-__extension__ extern __inline float32x4x3_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3q_dup_f32 (const float32_t * __a)
+vmvnq_u32 (uint32x4_t __a)
 {
-  float32x4x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv4sf ((const __builtin_aarch64_simd_sf *) __a);
-  ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0);
-  ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1);
-  ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2);
-  return ret;
+  return ~__a;
 }
 
-__extension__ extern __inline float64x2x3_t
+/* vneg  */
+
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3q_dup_f64 (const float64_t * __a)
+vneg_f32 (float32x2_t __a)
 {
-  float64x2x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv2df ((const __builtin_aarch64_simd_df *) __a);
-  ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0);
-  ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1);
-  ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2);
-  return ret;
+  return -__a;
 }
 
-__extension__ extern __inline poly64x2x3_t
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld3q_dup_p64 (const poly64_t * __a)
+vneg_f64 (float64x1_t __a)
 {
-  poly64x2x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 0);
-  ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 1);
-  ret.val[2] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 2);
-  return ret;
+  return -__a;
 }
 
-__extension__ extern __inline int64x1x4_t
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4_dup_s64 (const int64_t * __a)
+vneg_s8 (int8x8_t __a)
 {
-  int64x1x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rdi ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
-  ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
-  ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
-  ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
-  return ret;
+  return -__a;
 }
 
-__extension__ extern __inline uint64x1x4_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4_dup_u64 (const uint64_t * __a)
+vneg_s16 (int16x4_t __a)
 {
-  uint64x1x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rdi ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
-  ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
-  ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
-  ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
-  return ret;
+  return -__a;
 }
 
-__extension__ extern __inline float64x1x4_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4_dup_f64 (const float64_t * __a)
+vneg_s32 (int32x2_t __a)
 {
-  float64x1x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rdf ((const __builtin_aarch64_simd_df *) __a);
-  ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 0)};
-  ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 1)};
-  ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 2)};
-  ret.val[3] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 3)};
-  return ret;
+  return -__a;
 }
 
-__extension__ extern __inline int8x8x4_t
+__extension__ extern __inline int64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4_dup_s8 (const int8_t * __a)
+vneg_s64 (int64x1_t __a)
 {
-  int8x8x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
-  ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
-  ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
-  ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
-  return ret;
+  return -__a;
 }
 
-__extension__ extern __inline poly8x8x4_t
+/* According to the ACLE, the negative of the minimum (signed)
+   value is itself.  This leads to a semantics mismatch, as this is
+   undefined behaviour in C.  The value range predictor is not
+   aware that the negation of a negative number can still be negative
+   and it may try to fold the expression.  See the test in
+   gcc.target/aarch64/vnegd_s64.c for an example.
+
+   The cast below tricks the value range predictor to include
+   INT64_MIN in the range it computes.  So for x in the range
+   [INT64_MIN, y] the range prediction after vnegd_s64 (x) will
+   be ~[INT64_MIN + 1, y].  */
+
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4_dup_p8 (const poly8_t * __a)
+vnegd_s64 (int64_t __a)
 {
-  poly8x8x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
-  ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
-  ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
-  ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
-  return ret;
+  return - (uint64_t) __a;
 }
 
-__extension__ extern __inline int16x4x4_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4_dup_s16 (const int16_t * __a)
+vnegq_f32 (float32x4_t __a)
 {
-  int16x4x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
-  ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
-  ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
-  ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
-  return ret;
+  return -__a;
 }
 
-__extension__ extern __inline poly16x4x4_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4_dup_p16 (const poly16_t * __a)
+vnegq_f64 (float64x2_t __a)
 {
-  poly16x4x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
-  ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
-  ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
-  ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
-  return ret;
+  return -__a;
 }
 
-__extension__ extern __inline int32x2x4_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4_dup_s32 (const int32_t * __a)
+vnegq_s8 (int8x16_t __a)
 {
-  int32x2x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv2si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
-  ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
-  ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
-  ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
-  return ret;
+  return -__a;
 }
 
-__extension__ extern __inline uint8x8x4_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4_dup_u8 (const uint8_t * __a)
+vnegq_s16 (int16x8_t __a)
 {
-  uint8x8x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
-  ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
-  ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
-  ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
-  return ret;
+  return -__a;
 }
 
-__extension__ extern __inline uint16x4x4_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4_dup_u16 (const uint16_t * __a)
+vnegq_s32 (int32x4_t __a)
 {
-  uint16x4x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
-  ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
-  ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
-  ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
-  return ret;
+  return -__a;
 }
 
-__extension__ extern __inline uint32x2x4_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4_dup_u32 (const uint32_t * __a)
+vnegq_s64 (int64x2_t __a)
 {
-  uint32x2x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv2si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
-  ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
-  ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
-  ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
-  return ret;
+  return -__a;
+}
+
+/* vpadd  */
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vpadd_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return __builtin_aarch64_faddpv2sf (__a, __b);
 }
 
-__extension__ extern __inline float16x4x4_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4_dup_f16 (const float16_t * __a)
+vpaddq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  float16x4x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv4hf ((const __builtin_aarch64_simd_hf *) __a);
-  ret.val[0] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 0);
-  ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 1);
-  ret.val[2] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 2);
-  ret.val[3] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 3);
-  return ret;
+  return __builtin_aarch64_faddpv4sf (__a, __b);
 }
 
-__extension__ extern __inline float32x2x4_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4_dup_f32 (const float32_t * __a)
+vpaddq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  float32x2x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv2sf ((const __builtin_aarch64_simd_sf *) __a);
-  ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0);
-  ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1);
-  ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2);
-  ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3);
-  return ret;
+  return __builtin_aarch64_faddpv2df (__a, __b);
 }
 
-__extension__ extern __inline poly64x1x4_t
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4_dup_p64 (const poly64_t * __a)
+vpadd_s8 (int8x8_t __a, int8x8_t __b)
 {
-  poly64x1x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 0);
-  ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 1);
-  ret.val[2] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 2);
-  ret.val[3] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 3);
-  return ret;
+  return __builtin_aarch64_addpv8qi (__a, __b);
 }
 
-__extension__ extern __inline int8x16x4_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4q_dup_s8 (const int8_t * __a)
+vpadd_s16 (int16x4_t __a, int16x4_t __b)
 {
-  int8x16x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
-  ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
-  ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
-  ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
-  return ret;
+  return __builtin_aarch64_addpv4hi (__a, __b);
 }
 
-__extension__ extern __inline poly8x16x4_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4q_dup_p8 (const poly8_t * __a)
+vpadd_s32 (int32x2_t __a, int32x2_t __b)
 {
-  poly8x16x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
-  ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
-  ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
-  ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
-  return ret;
+  return __builtin_aarch64_addpv2si (__a, __b);
 }
 
-__extension__ extern __inline int16x8x4_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4q_dup_s16 (const int16_t * __a)
+vpadd_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  int16x8x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
-  ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
-  ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
-  ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
-  return ret;
+  return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a,
+						 (int8x8_t) __b);
 }
 
-__extension__ extern __inline poly16x8x4_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4q_dup_p16 (const poly16_t * __a)
+vpadd_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  poly16x8x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
-  ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
-  ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
-  ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
-  return ret;
+  return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a,
+						  (int16x4_t) __b);
 }
 
-__extension__ extern __inline int32x4x4_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4q_dup_s32 (const int32_t * __a)
+vpadd_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  int32x4x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv4si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
-  ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
-  ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
-  ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
-  return ret;
+  return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a,
+						  (int32x2_t) __b);
 }
 
-__extension__ extern __inline int64x2x4_t
+__extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4q_dup_s64 (const int64_t * __a)
+vpadds_f32 (float32x2_t __a)
 {
-  int64x2x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
-  ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
-  ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
-  ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
-  return ret;
+  return __builtin_aarch64_reduc_plus_scal_v2sf (__a);
 }
 
-__extension__ extern __inline uint8x16x4_t
+__extension__ extern __inline float64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4q_dup_u8 (const uint8_t * __a)
+vpaddd_f64 (float64x2_t __a)
 {
-  uint8x16x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
-  ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
-  ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
-  ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
-  return ret;
+  return __builtin_aarch64_reduc_plus_scal_v2df (__a);
 }
 
-__extension__ extern __inline uint16x8x4_t
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4q_dup_u16 (const uint16_t * __a)
+vpaddd_s64 (int64x2_t __a)
 {
-  uint16x8x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
-  ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
-  ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
-  ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
-  return ret;
+  return __builtin_aarch64_addpdi (__a);
 }
 
-__extension__ extern __inline uint32x4x4_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4q_dup_u32 (const uint32_t * __a)
+vpaddd_u64 (uint64x2_t __a)
 {
-  uint32x4x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv4si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
-  ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
-  ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
-  ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
-  return ret;
+  return __builtin_aarch64_addpdi ((int64x2_t) __a);
 }
 
-__extension__ extern __inline uint64x2x4_t
+/* vqabs */
+
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4q_dup_u64 (const uint64_t * __a)
+vqabsq_s64 (int64x2_t __a)
 {
-  uint64x2x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
-  ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
-  ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
-  ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
-  return ret;
+  return (int64x2_t) __builtin_aarch64_sqabsv2di (__a);
 }
 
-__extension__ extern __inline float16x8x4_t
+__extension__ extern __inline int8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4q_dup_f16 (const float16_t * __a)
+vqabsb_s8 (int8_t __a)
 {
-  float16x8x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv8hf ((const __builtin_aarch64_simd_hf *) __a);
-  ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 0);
-  ret.val[1] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 1);
-  ret.val[2] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 2);
-  ret.val[3] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 3);
-  return ret;
+  return (int8_t) __builtin_aarch64_sqabsqi (__a);
 }
 
-__extension__ extern __inline float32x4x4_t
+__extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4q_dup_f32 (const float32_t * __a)
+vqabsh_s16 (int16_t __a)
 {
-  float32x4x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv4sf ((const __builtin_aarch64_simd_sf *) __a);
-  ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0);
-  ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1);
-  ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2);
-  ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3);
-  return ret;
+  return (int16_t) __builtin_aarch64_sqabshi (__a);
 }
 
-__extension__ extern __inline float64x2x4_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4q_dup_f64 (const float64_t * __a)
+vqabss_s32 (int32_t __a)
 {
-  float64x2x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv2df ((const __builtin_aarch64_simd_df *) __a);
-  ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0);
-  ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1);
-  ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2);
-  ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3);
-  return ret;
+  return (int32_t) __builtin_aarch64_sqabssi (__a);
 }
 
-__extension__ extern __inline poly64x2x4_t
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld4q_dup_p64 (const poly64_t * __a)
+vqabsd_s64 (int64_t __a)
 {
-  poly64x2x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 0);
-  ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 1);
-  ret.val[2] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 2);
-  ret.val[3] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 3);
-  return ret;
+  return __builtin_aarch64_sqabsdi (__a);
 }
 
-/* vld2_lane */
+/* vqadd */
 
-#define __LD2_LANE_FUNC(intype, vectype, largetype, ptrtype, mode,	   \
-			 qmode, ptrmode, funcsuffix, signedtype)	   \
-__extension__ extern __inline intype \
-__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \
-vld2_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c)  \
-{									   \
-  __builtin_aarch64_simd_oi __o;					   \
-  largetype __temp;							   \
-  __temp.val[0] =							   \
-    vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0));	   \
-  __temp.val[1] =							   \
-    vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0));	   \
-  __o = __builtin_aarch64_set_qregoi##qmode (__o,			   \
-					    (signedtype) __temp.val[0],	   \
-					    0);				   \
-  __o = __builtin_aarch64_set_qregoi##qmode (__o,			   \
-					    (signedtype) __temp.val[1],	   \
-					    1);				   \
-  __o =	__builtin_aarch64_ld2_lane##mode (				   \
-	  (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c);	   \
-  __b.val[0] = (vectype) __builtin_aarch64_get_dregoidi (__o, 0);	   \
-  __b.val[1] = (vectype) __builtin_aarch64_get_dregoidi (__o, 1);	   \
-  return __b;								   \
+__extension__ extern __inline int8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqaddb_s8 (int8_t __a, int8_t __b)
+{
+  return (int8_t) __builtin_aarch64_sqaddqi (__a, __b);
 }
 
-__LD2_LANE_FUNC (float16x4x2_t, float16x4_t, float16x8x2_t, float16_t, v4hf,
-		 v8hf, hf, f16, float16x8_t)
-__LD2_LANE_FUNC (float32x2x2_t, float32x2_t, float32x4x2_t, float32_t, v2sf, v4sf,
-		 sf, f32, float32x4_t)
-__LD2_LANE_FUNC (float64x1x2_t, float64x1_t, float64x2x2_t, float64_t, df, v2df,
-		 df, f64, float64x2_t)
-__LD2_LANE_FUNC (poly8x8x2_t, poly8x8_t, poly8x16x2_t, poly8_t, v8qi, v16qi, qi, p8,
-		 int8x16_t)
-__LD2_LANE_FUNC (poly16x4x2_t, poly16x4_t, poly16x8x2_t, poly16_t, v4hi, v8hi, hi,
-		 p16, int16x8_t)
-__LD2_LANE_FUNC (poly64x1x2_t, poly64x1_t, poly64x2x2_t, poly64_t, di,
-		 v2di_ssps, di, p64, poly64x2_t)
-__LD2_LANE_FUNC (int8x8x2_t, int8x8_t, int8x16x2_t, int8_t, v8qi, v16qi, qi, s8,
-		 int8x16_t)
-__LD2_LANE_FUNC (int16x4x2_t, int16x4_t, int16x8x2_t, int16_t, v4hi, v8hi, hi, s16,
-		 int16x8_t)
-__LD2_LANE_FUNC (int32x2x2_t, int32x2_t, int32x4x2_t, int32_t, v2si, v4si, si, s32,
-		 int32x4_t)
-__LD2_LANE_FUNC (int64x1x2_t, int64x1_t, int64x2x2_t, int64_t, di, v2di, di, s64,
-		 int64x2_t)
-__LD2_LANE_FUNC (uint8x8x2_t, uint8x8_t, uint8x16x2_t, uint8_t, v8qi, v16qi, qi, u8,
-		 int8x16_t)
-__LD2_LANE_FUNC (uint16x4x2_t, uint16x4_t, uint16x8x2_t, uint16_t, v4hi, v8hi, hi,
-		 u16, int16x8_t)
-__LD2_LANE_FUNC (uint32x2x2_t, uint32x2_t, uint32x4x2_t, uint32_t, v2si, v4si, si,
-		 u32, int32x4_t)
-__LD2_LANE_FUNC (uint64x1x2_t, uint64x1_t, uint64x2x2_t, uint64_t, di, v2di, di,
-		 u64, int64x2_t)
+__extension__ extern __inline int16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqaddh_s16 (int16_t __a, int16_t __b)
+{
+  return (int16_t) __builtin_aarch64_sqaddhi (__a, __b);
+}
 
-#undef __LD2_LANE_FUNC
+__extension__ extern __inline int32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqadds_s32 (int32_t __a, int32_t __b)
+{
+  return (int32_t) __builtin_aarch64_sqaddsi (__a, __b);
+}
 
-/* vld2q_lane */
+__extension__ extern __inline int64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqaddd_s64 (int64_t __a, int64_t __b)
+{
+  return __builtin_aarch64_sqadddi (__a, __b);
+}
 
-#define __LD2_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
-__extension__ extern __inline intype \
-__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \
-vld2q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
-{									   \
-  __builtin_aarch64_simd_oi __o;					   \
-  intype ret;								   \
-  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __b.val[0], 0); \
-  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __b.val[1], 1); \
-  __o = __builtin_aarch64_ld2_lane##mode (				   \
-	(__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c);		   \
-  ret.val[0] = (vtype) __builtin_aarch64_get_qregoiv4si (__o, 0);	   \
-  ret.val[1] = (vtype) __builtin_aarch64_get_qregoiv4si (__o, 1);	   \
-  return ret;								   \
+__extension__ extern __inline uint8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqaddb_u8 (uint8_t __a, uint8_t __b)
+{
+  return (uint8_t) __builtin_aarch64_uqaddqi_uuu (__a, __b);
 }
 
-__LD2_LANE_FUNC (float16x8x2_t, float16x8_t, float16_t, v8hf, hf, f16)
-__LD2_LANE_FUNC (float32x4x2_t, float32x4_t, float32_t, v4sf, sf, f32)
-__LD2_LANE_FUNC (float64x2x2_t, float64x2_t, float64_t, v2df, df, f64)
-__LD2_LANE_FUNC (poly8x16x2_t, poly8x16_t, poly8_t, v16qi, qi, p8)
-__LD2_LANE_FUNC (poly16x8x2_t, poly16x8_t, poly16_t, v8hi, hi, p16)
-__LD2_LANE_FUNC (poly64x2x2_t, poly64x2_t, poly64_t, v2di, di, p64)
-__LD2_LANE_FUNC (int8x16x2_t, int8x16_t, int8_t, v16qi, qi, s8)
-__LD2_LANE_FUNC (int16x8x2_t, int16x8_t, int16_t, v8hi, hi, s16)
-__LD2_LANE_FUNC (int32x4x2_t, int32x4_t, int32_t, v4si, si, s32)
-__LD2_LANE_FUNC (int64x2x2_t, int64x2_t, int64_t, v2di, di, s64)
-__LD2_LANE_FUNC (uint8x16x2_t, uint8x16_t, uint8_t, v16qi, qi, u8)
-__LD2_LANE_FUNC (uint16x8x2_t, uint16x8_t, uint16_t, v8hi, hi, u16)
-__LD2_LANE_FUNC (uint32x4x2_t, uint32x4_t, uint32_t, v4si, si, u32)
-__LD2_LANE_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, v2di, di, u64)
+__extension__ extern __inline uint16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqaddh_u16 (uint16_t __a, uint16_t __b)
+{
+  return (uint16_t) __builtin_aarch64_uqaddhi_uuu (__a, __b);
+}
 
-#undef __LD2_LANE_FUNC
+__extension__ extern __inline uint32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqadds_u32 (uint32_t __a, uint32_t __b)
+{
+  return (uint32_t) __builtin_aarch64_uqaddsi_uuu (__a, __b);
+}
 
-/* vld3_lane */
+__extension__ extern __inline uint64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqaddd_u64 (uint64_t __a, uint64_t __b)
+{
+  return __builtin_aarch64_uqadddi_uuu (__a, __b);
+}
 
-#define __LD3_LANE_FUNC(intype, vectype, largetype, ptrtype, mode,	   \
-			 qmode, ptrmode, funcsuffix, signedtype)	   \
-__extension__ extern __inline intype \
-__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \
-vld3_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c)  \
-{									   \
-  __builtin_aarch64_simd_ci __o;					   \
-  largetype __temp;							   \
-  __temp.val[0] =							   \
-    vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0));	   \
-  __temp.val[1] =							   \
-    vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0));	   \
-  __temp.val[2] =							   \
-    vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (0));	   \
-  __o = __builtin_aarch64_set_qregci##qmode (__o,			   \
-					    (signedtype) __temp.val[0],	   \
-					    0);				   \
-  __o = __builtin_aarch64_set_qregci##qmode (__o,			   \
-					    (signedtype) __temp.val[1],	   \
-					    1);				   \
-  __o = __builtin_aarch64_set_qregci##qmode (__o,			   \
-					    (signedtype) __temp.val[2],	   \
-					    2);				   \
-  __o =	__builtin_aarch64_ld3_lane##mode (				   \
-	  (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c);	   \
-  __b.val[0] = (vectype) __builtin_aarch64_get_dregcidi (__o, 0);	   \
-  __b.val[1] = (vectype) __builtin_aarch64_get_dregcidi (__o, 1);	   \
-  __b.val[2] = (vectype) __builtin_aarch64_get_dregcidi (__o, 2);	   \
-  return __b;								   \
+/* vqdmlal */
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
+{
+  return __builtin_aarch64_sqdmlalv4hi (__a, __b, __c);
 }
 
-__LD3_LANE_FUNC (float16x4x3_t, float16x4_t, float16x8x3_t, float16_t, v4hf,
-		 v8hf, hf, f16, float16x8_t)
-__LD3_LANE_FUNC (float32x2x3_t, float32x2_t, float32x4x3_t, float32_t, v2sf, v4sf,
-		 sf, f32, float32x4_t)
-__LD3_LANE_FUNC (float64x1x3_t, float64x1_t, float64x2x3_t, float64_t, df, v2df,
-		 df, f64, float64x2_t)
-__LD3_LANE_FUNC (poly8x8x3_t, poly8x8_t, poly8x16x3_t, poly8_t, v8qi, v16qi, qi, p8,
-		 int8x16_t)
-__LD3_LANE_FUNC (poly16x4x3_t, poly16x4_t, poly16x8x3_t, poly16_t, v4hi, v8hi, hi,
-		 p16, int16x8_t)
-__LD3_LANE_FUNC (poly64x1x3_t, poly64x1_t, poly64x2x3_t, poly64_t, di,
-		 v2di_ssps, di, p64, poly64x2_t)
-__LD3_LANE_FUNC (int8x8x3_t, int8x8_t, int8x16x3_t, int8_t, v8qi, v16qi, qi, s8,
-		 int8x16_t)
-__LD3_LANE_FUNC (int16x4x3_t, int16x4_t, int16x8x3_t, int16_t, v4hi, v8hi, hi, s16,
-		 int16x8_t)
-__LD3_LANE_FUNC (int32x2x3_t, int32x2_t, int32x4x3_t, int32_t, v2si, v4si, si, s32,
-		 int32x4_t)
-__LD3_LANE_FUNC (int64x1x3_t, int64x1_t, int64x2x3_t, int64_t, di, v2di, di, s64,
-		 int64x2_t)
-__LD3_LANE_FUNC (uint8x8x3_t, uint8x8_t, uint8x16x3_t, uint8_t, v8qi, v16qi, qi, u8,
-		 int8x16_t)
-__LD3_LANE_FUNC (uint16x4x3_t, uint16x4_t, uint16x8x3_t, uint16_t, v4hi, v8hi, hi,
-		 u16, int16x8_t)
-__LD3_LANE_FUNC (uint32x2x3_t, uint32x2_t, uint32x4x3_t, uint32_t, v2si, v4si, si,
-		 u32, int32x4_t)
-__LD3_LANE_FUNC (uint64x1x3_t, uint64x1_t, uint64x2x3_t, uint64_t, di, v2di, di,
-		 u64, int64x2_t)
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqdmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
+{
+  return __builtin_aarch64_sqdmlal2v8hi (__a, __b, __c);
+}
 
-#undef __LD3_LANE_FUNC
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c,
+		       int const __d)
+{
+  return __builtin_aarch64_sqdmlal2_lanev8hi (__a, __b, __c, __d);
+}
 
-/* vld3q_lane */
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqdmlal_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
+			int const __d)
+{
+  return __builtin_aarch64_sqdmlal2_laneqv8hi (__a, __b, __c, __d);
+}
 
-#define __LD3_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
-__extension__ extern __inline intype \
-__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \
-vld3q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
-{									   \
-  __builtin_aarch64_simd_ci __o;					   \
-  intype ret;								   \
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[0], 0); \
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[1], 1); \
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[2], 2); \
-  __o = __builtin_aarch64_ld3_lane##mode (				   \
-	(__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c);		   \
-  ret.val[0] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 0);	   \
-  ret.val[1] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 1);	   \
-  ret.val[2] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 2);	   \
-  return ret;								   \
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
+{
+  return __builtin_aarch64_sqdmlal2_nv8hi (__a, __b, __c);
 }
 
-__LD3_LANE_FUNC (float16x8x3_t, float16x8_t, float16_t, v8hf, hf, f16)
-__LD3_LANE_FUNC (float32x4x3_t, float32x4_t, float32_t, v4sf, sf, f32)
-__LD3_LANE_FUNC (float64x2x3_t, float64x2_t, float64_t, v2df, df, f64)
-__LD3_LANE_FUNC (poly8x16x3_t, poly8x16_t, poly8_t, v16qi, qi, p8)
-__LD3_LANE_FUNC (poly16x8x3_t, poly16x8_t, poly16_t, v8hi, hi, p16)
-__LD3_LANE_FUNC (poly64x2x3_t, poly64x2_t, poly64_t, v2di, di, p64)
-__LD3_LANE_FUNC (int8x16x3_t, int8x16_t, int8_t, v16qi, qi, s8)
-__LD3_LANE_FUNC (int16x8x3_t, int16x8_t, int16_t, v8hi, hi, s16)
-__LD3_LANE_FUNC (int32x4x3_t, int32x4_t, int32_t, v4si, si, s32)
-__LD3_LANE_FUNC (int64x2x3_t, int64x2_t, int64_t, v2di, di, s64)
-__LD3_LANE_FUNC (uint8x16x3_t, uint8x16_t, uint8_t, v16qi, qi, u8)
-__LD3_LANE_FUNC (uint16x8x3_t, uint16x8_t, uint16_t, v8hi, hi, u16)
-__LD3_LANE_FUNC (uint32x4x3_t, uint32x4_t, uint32_t, v4si, si, u32)
-__LD3_LANE_FUNC (uint64x2x3_t, uint64x2_t, uint64_t, v2di, di, u64)
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
+{
+  return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __c, __d);
+}
 
-#undef __LD3_LANE_FUNC
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqdmlal_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
+{
+  return __builtin_aarch64_sqdmlal_laneqv4hi (__a, __b, __c, __d);
+}
 
-/* vld4_lane */
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqdmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
+{
+  return __builtin_aarch64_sqdmlal_nv4hi (__a, __b, __c);
+}
 
-#define __LD4_LANE_FUNC(intype, vectype, largetype, ptrtype, mode,	   \
-			 qmode, ptrmode, funcsuffix, signedtype)	   \
-__extension__ extern __inline intype \
-__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \
-vld4_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c)  \
-{									   \
-  __builtin_aarch64_simd_xi __o;					   \
-  largetype __temp;							   \
-  __temp.val[0] =							   \
-    vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0));	   \
-  __temp.val[1] =							   \
-    vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0));	   \
-  __temp.val[2] =							   \
-    vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (0));	   \
-  __temp.val[3] =							   \
-    vcombine_##funcsuffix (__b.val[3], vcreate_##funcsuffix (0));	   \
-  __o = __builtin_aarch64_set_qregxi##qmode (__o,			   \
-					    (signedtype) __temp.val[0],	   \
-					    0);				   \
-  __o = __builtin_aarch64_set_qregxi##qmode (__o,			   \
-					    (signedtype) __temp.val[1],	   \
-					    1);				   \
-  __o = __builtin_aarch64_set_qregxi##qmode (__o,			   \
-					    (signedtype) __temp.val[2],	   \
-					    2);				   \
-  __o = __builtin_aarch64_set_qregxi##qmode (__o,			   \
-					    (signedtype) __temp.val[3],	   \
-					    3);				   \
-  __o =	__builtin_aarch64_ld4_lane##mode (				   \
-	  (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c);	   \
-  __b.val[0] = (vectype) __builtin_aarch64_get_dregxidi (__o, 0);	   \
-  __b.val[1] = (vectype) __builtin_aarch64_get_dregxidi (__o, 1);	   \
-  __b.val[2] = (vectype) __builtin_aarch64_get_dregxidi (__o, 2);	   \
-  __b.val[3] = (vectype) __builtin_aarch64_get_dregxidi (__o, 3);	   \
-  return __b;								   \
+__extension__ extern __inline int64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
+{
+  return __builtin_aarch64_sqdmlalv2si (__a, __b, __c);
 }
 
-/* vld4q_lane */
+__extension__ extern __inline int64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqdmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
+{
+  return __builtin_aarch64_sqdmlal2v4si (__a, __b, __c);
+}
 
-__LD4_LANE_FUNC (float16x4x4_t, float16x4_t, float16x8x4_t, float16_t, v4hf,
-		 v8hf, hf, f16, float16x8_t)
-__LD4_LANE_FUNC (float32x2x4_t, float32x2_t, float32x4x4_t, float32_t, v2sf, v4sf,
-		 sf, f32, float32x4_t)
-__LD4_LANE_FUNC (float64x1x4_t, float64x1_t, float64x2x4_t, float64_t, df, v2df,
-		 df, f64, float64x2_t)
-__LD4_LANE_FUNC (poly8x8x4_t, poly8x8_t, poly8x16x4_t, poly8_t, v8qi, v16qi, qi, p8,
-		 int8x16_t)
-__LD4_LANE_FUNC (poly16x4x4_t, poly16x4_t, poly16x8x4_t, poly16_t, v4hi, v8hi, hi,
-		 p16, int16x8_t)
-__LD4_LANE_FUNC (poly64x1x4_t, poly64x1_t, poly64x2x4_t, poly64_t, di,
-		 v2di_ssps, di, p64, poly64x2_t)
-__LD4_LANE_FUNC (int8x8x4_t, int8x8_t, int8x16x4_t, int8_t, v8qi, v16qi, qi, s8,
-		 int8x16_t)
-__LD4_LANE_FUNC (int16x4x4_t, int16x4_t, int16x8x4_t, int16_t, v4hi, v8hi, hi, s16,
-		 int16x8_t)
-__LD4_LANE_FUNC (int32x2x4_t, int32x2_t, int32x4x4_t, int32_t, v2si, v4si, si, s32,
-		 int32x4_t)
-__LD4_LANE_FUNC (int64x1x4_t, int64x1_t, int64x2x4_t, int64_t, di, v2di, di, s64,
-		 int64x2_t)
-__LD4_LANE_FUNC (uint8x8x4_t, uint8x8_t, uint8x16x4_t, uint8_t, v8qi, v16qi, qi, u8,
-		 int8x16_t)
-__LD4_LANE_FUNC (uint16x4x4_t, uint16x4_t, uint16x8x4_t, uint16_t, v4hi, v8hi, hi,
-		 u16, int16x8_t)
-__LD4_LANE_FUNC (uint32x2x4_t, uint32x2_t, uint32x4x4_t, uint32_t, v2si, v4si, si,
-		 u32, int32x4_t)
-__LD4_LANE_FUNC (uint64x1x4_t, uint64x1_t, uint64x2x4_t, uint64_t, di, v2di, di,
-		 u64, int64x2_t)
+__extension__ extern __inline int64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c,
+		       int const __d)
+{
+  return __builtin_aarch64_sqdmlal2_lanev4si (__a, __b, __c, __d);
+}
 
-#undef __LD4_LANE_FUNC
+__extension__ extern __inline int64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqdmlal_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
+			int const __d)
+{
+  return __builtin_aarch64_sqdmlal2_laneqv4si (__a, __b, __c, __d);
+}
 
-/* vld4q_lane */
+__extension__ extern __inline int64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
+{
+  return __builtin_aarch64_sqdmlal2_nv4si (__a, __b, __c);
+}
 
-#define __LD4_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
-__extension__ extern __inline intype \
-__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \
-vld4q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
-{									   \
-  __builtin_aarch64_simd_xi __o;					   \
-  intype ret;								   \
-  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); \
-  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); \
-  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); \
-  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); \
-  __o = __builtin_aarch64_ld4_lane##mode (				   \
-	(__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c);		   \
-  ret.val[0] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 0);	   \
-  ret.val[1] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 1);	   \
-  ret.val[2] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 2);	   \
-  ret.val[3] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 3);	   \
-  return ret;								   \
+__extension__ extern __inline int64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
+{
+  return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __c, __d);
 }
 
-__LD4_LANE_FUNC (float16x8x4_t, float16x8_t, float16_t, v8hf, hf, f16)
-__LD4_LANE_FUNC (float32x4x4_t, float32x4_t, float32_t, v4sf, sf, f32)
-__LD4_LANE_FUNC (float64x2x4_t, float64x2_t, float64_t, v2df, df, f64)
-__LD4_LANE_FUNC (poly8x16x4_t, poly8x16_t, poly8_t, v16qi, qi, p8)
-__LD4_LANE_FUNC (poly16x8x4_t, poly16x8_t, poly16_t, v8hi, hi, p16)
-__LD4_LANE_FUNC (poly64x2x4_t, poly64x2_t, poly64_t, v2di, di, p64)
-__LD4_LANE_FUNC (int8x16x4_t, int8x16_t, int8_t, v16qi, qi, s8)
-__LD4_LANE_FUNC (int16x8x4_t, int16x8_t, int16_t, v8hi, hi, s16)
-__LD4_LANE_FUNC (int32x4x4_t, int32x4_t, int32_t, v4si, si, s32)
-__LD4_LANE_FUNC (int64x2x4_t, int64x2_t, int64_t, v2di, di, s64)
-__LD4_LANE_FUNC (uint8x16x4_t, uint8x16_t, uint8_t, v16qi, qi, u8)
-__LD4_LANE_FUNC (uint16x8x4_t, uint16x8_t, uint16_t, v8hi, hi, u16)
-__LD4_LANE_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, v4si, si, u32)
-__LD4_LANE_FUNC (uint64x2x4_t, uint64x2_t, uint64_t, v2di, di, u64)
+__extension__ extern __inline int64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqdmlal_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
+{
+  return __builtin_aarch64_sqdmlal_laneqv2si (__a, __b, __c, __d);
+}
 
-#undef __LD4_LANE_FUNC
+__extension__ extern __inline int64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
+{
+  return __builtin_aarch64_sqdmlal_nv2si (__a, __b, __c);
+}
 
-/* vmax */
+__extension__ extern __inline int32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqdmlalh_s16 (int32_t __a, int16_t __b, int16_t __c)
+{
+  return __builtin_aarch64_sqdmlalhi (__a, __b, __c);
+}
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmax_f32 (float32x2_t __a, float32x2_t __b)
+vqdmlalh_lane_s16 (int32_t __a, int16_t __b, int16x4_t __c, const int __d)
 {
-  return __builtin_aarch64_smax_nanv2sf (__a, __b);
+  return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d);
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmax_f64 (float64x1_t __a, float64x1_t __b)
+vqdmlalh_laneq_s16 (int32_t __a, int16_t __b, int16x8_t __c, const int __d)
 {
-    return (float64x1_t)
-      { __builtin_aarch64_smax_nandf (vget_lane_f64 (__a, 0),
-				      vget_lane_f64 (__b, 0)) };
+  return __builtin_aarch64_sqdmlal_laneqhi (__a, __b, __c, __d);
 }
 
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmax_s8 (int8x8_t __a, int8x8_t __b)
+vqdmlals_s32 (int64_t __a, int32_t __b, int32_t __c)
 {
-  return __builtin_aarch64_smaxv8qi (__a, __b);
+  return __builtin_aarch64_sqdmlalsi (__a, __b, __c);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmax_s16 (int16x4_t __a, int16x4_t __b)
+vqdmlals_lane_s32 (int64_t __a, int32_t __b, int32x2_t __c, const int __d)
 {
-  return __builtin_aarch64_smaxv4hi (__a, __b);
+  return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmax_s32 (int32x2_t __a, int32x2_t __b)
+vqdmlals_laneq_s32 (int64_t __a, int32_t __b, int32x4_t __c, const int __d)
 {
-  return __builtin_aarch64_smaxv2si (__a, __b);
+  return __builtin_aarch64_sqdmlal_laneqsi (__a, __b, __c, __d);
 }
 
-__extension__ extern __inline uint8x8_t
+/* vqdmlsl */
+
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmax_u8 (uint8x8_t __a, uint8x8_t __b)
+vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
 {
-  return (uint8x8_t) __builtin_aarch64_umaxv8qi ((int8x8_t) __a,
-						 (int8x8_t) __b);
+  return __builtin_aarch64_sqdmlslv4hi (__a, __b, __c);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmax_u16 (uint16x4_t __a, uint16x4_t __b)
+vqdmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
 {
-  return (uint16x4_t) __builtin_aarch64_umaxv4hi ((int16x4_t) __a,
-						  (int16x4_t) __b);
+  return __builtin_aarch64_sqdmlsl2v8hi (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmax_u32 (uint32x2_t __a, uint32x2_t __b)
+vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c,
+		       int const __d)
 {
-  return (uint32x2_t) __builtin_aarch64_umaxv2si ((int32x2_t) __a,
-						  (int32x2_t) __b);
+  return __builtin_aarch64_sqdmlsl2_lanev8hi (__a, __b, __c, __d);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmaxq_f32 (float32x4_t __a, float32x4_t __b)
+vqdmlsl_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
+			int const __d)
 {
-  return __builtin_aarch64_smax_nanv4sf (__a, __b);
+  return __builtin_aarch64_sqdmlsl2_laneqv8hi (__a, __b, __c, __d);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmaxq_f64 (float64x2_t __a, float64x2_t __b)
+vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
 {
-  return __builtin_aarch64_smax_nanv2df (__a, __b);
+  return __builtin_aarch64_sqdmlsl2_nv8hi (__a, __b, __c);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmaxq_s8 (int8x16_t __a, int8x16_t __b)
+vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
 {
-  return __builtin_aarch64_smaxv16qi (__a, __b);
+  return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __c, __d);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmaxq_s16 (int16x8_t __a, int16x8_t __b)
+vqdmlsl_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
 {
-  return __builtin_aarch64_smaxv8hi (__a, __b);
+  return __builtin_aarch64_sqdmlsl_laneqv4hi (__a, __b, __c, __d);
 }
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmaxq_s32 (int32x4_t __a, int32x4_t __b)
+vqdmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
 {
-  return __builtin_aarch64_smaxv4si (__a, __b);
+  return __builtin_aarch64_sqdmlsl_nv4hi (__a, __b, __c);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmaxq_u8 (uint8x16_t __a, uint8x16_t __b)
+vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
 {
-  return (uint8x16_t) __builtin_aarch64_umaxv16qi ((int8x16_t) __a,
-						   (int8x16_t) __b);
+  return __builtin_aarch64_sqdmlslv2si (__a, __b, __c);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmaxq_u16 (uint16x8_t __a, uint16x8_t __b)
+vqdmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
 {
-  return (uint16x8_t) __builtin_aarch64_umaxv8hi ((int16x8_t) __a,
-						  (int16x8_t) __b);
+  return __builtin_aarch64_sqdmlsl2v4si (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmaxq_u32 (uint32x4_t __a, uint32x4_t __b)
+vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c,
+		       int const __d)
 {
-  return (uint32x4_t) __builtin_aarch64_umaxv4si ((int32x4_t) __a,
-						  (int32x4_t) __b);
+  return __builtin_aarch64_sqdmlsl2_lanev4si (__a, __b, __c, __d);
 }
-/* vmulx */
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulx_f32 (float32x2_t __a, float32x2_t __b)
+vqdmlsl_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
+			int const __d)
 {
-  return __builtin_aarch64_fmulxv2sf (__a, __b);
+  return __builtin_aarch64_sqdmlsl2_laneqv4si (__a, __b, __c, __d);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulxq_f32 (float32x4_t __a, float32x4_t __b)
+vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
 {
-  return __builtin_aarch64_fmulxv4sf (__a, __b);
+  return __builtin_aarch64_sqdmlsl2_nv4si (__a, __b, __c);
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulx_f64 (float64x1_t __a, float64x1_t __b)
+vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
 {
-  return (float64x1_t) {__builtin_aarch64_fmulxdf (__a[0], __b[0])};
+  return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __c, __d);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulxq_f64 (float64x2_t __a, float64x2_t __b)
+vqdmlsl_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
 {
-  return __builtin_aarch64_fmulxv2df (__a, __b);
+  return __builtin_aarch64_sqdmlsl_laneqv2si (__a, __b, __c, __d);
 }
 
-__extension__ extern __inline float32_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulxs_f32 (float32_t __a, float32_t __b)
+vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
+{
+  return __builtin_aarch64_sqdmlsl_nv2si (__a, __b, __c);
+}
+
+__extension__ extern __inline int32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqdmlslh_s16 (int32_t __a, int16_t __b, int16_t __c)
+{
+  return __builtin_aarch64_sqdmlslhi (__a, __b, __c);
+}
+
+__extension__ extern __inline int32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqdmlslh_lane_s16 (int32_t __a, int16_t __b, int16x4_t __c, const int __d)
+{
+  return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d);
+}
+
+__extension__ extern __inline int32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqdmlslh_laneq_s16 (int32_t __a, int16_t __b, int16x8_t __c, const int __d)
 {
-  return __builtin_aarch64_fmulxsf (__a, __b);
+  return __builtin_aarch64_sqdmlsl_laneqhi (__a, __b, __c, __d);
 }
 
-__extension__ extern __inline float64_t
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulxd_f64 (float64_t __a, float64_t __b)
+vqdmlsls_s32 (int64_t __a, int32_t __b, int32_t __c)
 {
-  return __builtin_aarch64_fmulxdf (__a, __b);
+  return __builtin_aarch64_sqdmlslsi (__a, __b, __c);
 }
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulx_lane_f32 (float32x2_t __a, float32x2_t __v, const int __lane)
+vqdmlsls_lane_s32 (int64_t __a, int32_t __b, int32x2_t __c, const int __d)
 {
-  return vmulx_f32 (__a, __aarch64_vdup_lane_f32 (__v, __lane));
+  return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d);
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulx_lane_f64 (float64x1_t __a, float64x1_t __v, const int __lane)
+vqdmlsls_laneq_s32 (int64_t __a, int32_t __b, int32x4_t __c, const int __d)
 {
-  return vmulx_f64 (__a, __aarch64_vdup_lane_f64 (__v, __lane));
+  return __builtin_aarch64_sqdmlsl_laneqsi (__a, __b, __c, __d);
 }
 
-__extension__ extern __inline float32x4_t
+/* vqdmulh */
+
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulxq_lane_f32 (float32x4_t __a, float32x2_t __v, const int __lane)
+vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
 {
-  return vmulxq_f32 (__a, __aarch64_vdupq_lane_f32 (__v, __lane));
+  return __builtin_aarch64_sqdmulh_lanev4hi (__a, __b, __c);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulxq_lane_f64 (float64x2_t __a, float64x1_t __v, const int __lane)
+vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
 {
-  return vmulxq_f64 (__a, __aarch64_vdupq_lane_f64 (__v, __lane));
+  return __builtin_aarch64_sqdmulh_lanev2si (__a, __b, __c);
 }
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulx_laneq_f32 (float32x2_t __a, float32x4_t __v, const int __lane)
+vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
 {
-  return vmulx_f32 (__a, __aarch64_vdup_laneq_f32 (__v, __lane));
+  return __builtin_aarch64_sqdmulh_lanev8hi (__a, __b, __c);
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulx_laneq_f64 (float64x1_t __a, float64x2_t __v, const int __lane)
+vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
 {
-  return vmulx_f64 (__a, __aarch64_vdup_laneq_f64 (__v, __lane));
+  return __builtin_aarch64_sqdmulh_lanev4si (__a, __b, __c);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulxq_laneq_f32 (float32x4_t __a, float32x4_t __v, const int __lane)
+vqdmulhh_s16 (int16_t __a, int16_t __b)
 {
-  return vmulxq_f32 (__a, __aarch64_vdupq_laneq_f32 (__v, __lane));
+  return (int16_t) __builtin_aarch64_sqdmulhhi (__a, __b);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulxq_laneq_f64 (float64x2_t __a, float64x2_t __v, const int __lane)
+vqdmulhh_lane_s16 (int16_t __a, int16x4_t __b, const int __c)
 {
-  return vmulxq_f64 (__a, __aarch64_vdupq_laneq_f64 (__v, __lane));
+  return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c);
 }
 
-__extension__ extern __inline float32_t
+__extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulxs_lane_f32 (float32_t __a, float32x2_t __v, const int __lane)
+vqdmulhh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c)
 {
-  return vmulxs_f32 (__a, __aarch64_vget_lane_any (__v, __lane));
+  return __builtin_aarch64_sqdmulh_laneqhi (__a, __b, __c);
 }
 
-__extension__ extern __inline float32_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulxs_laneq_f32 (float32_t __a, float32x4_t __v, const int __lane)
+vqdmulhs_s32 (int32_t __a, int32_t __b)
 {
-  return vmulxs_f32 (__a, __aarch64_vget_lane_any (__v, __lane));
+  return (int32_t) __builtin_aarch64_sqdmulhsi (__a, __b);
 }
 
-__extension__ extern __inline float64_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulxd_lane_f64 (float64_t __a, float64x1_t __v, const int __lane)
+vqdmulhs_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
 {
-  return vmulxd_f64 (__a, __aarch64_vget_lane_any (__v, __lane));
+  return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c);
 }
 
-__extension__ extern __inline float64_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulxd_laneq_f64 (float64_t __a, float64x2_t __v, const int __lane)
+vqdmulhs_laneq_s32 (int32_t __a, int32x4_t __b, const int __c)
 {
-  return vmulxd_f64 (__a, __aarch64_vget_lane_any (__v, __lane));
+  return __builtin_aarch64_sqdmulh_laneqsi (__a, __b, __c);
 }
 
-/* vpmax  */
+/* vqdmull */
 
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpmax_s8 (int8x8_t a, int8x8_t b)
+vqdmull_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return __builtin_aarch64_smaxpv8qi (a, b);
+  return __builtin_aarch64_sqdmullv4hi (__a, __b);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpmax_s16 (int16x4_t a, int16x4_t b)
+vqdmull_high_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return __builtin_aarch64_smaxpv4hi (a, b);
+  return __builtin_aarch64_sqdmull2v8hi (__a, __b);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpmax_s32 (int32x2_t a, int32x2_t b)
+vqdmull_high_lane_s16 (int16x8_t __a, int16x4_t __b, int const __c)
 {
-  return __builtin_aarch64_smaxpv2si (a, b);
+  return __builtin_aarch64_sqdmull2_lanev8hi (__a, __b,__c);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpmax_u8 (uint8x8_t a, uint8x8_t b)
+vqdmull_high_laneq_s16 (int16x8_t __a, int16x8_t __b, int const __c)
 {
-  return (uint8x8_t) __builtin_aarch64_umaxpv8qi ((int8x8_t) a,
-						  (int8x8_t) b);
+  return __builtin_aarch64_sqdmull2_laneqv8hi (__a, __b,__c);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpmax_u16 (uint16x4_t a, uint16x4_t b)
+vqdmull_high_n_s16 (int16x8_t __a, int16_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_umaxpv4hi ((int16x4_t) a,
-						   (int16x4_t) b);
+  return __builtin_aarch64_sqdmull2_nv8hi (__a, __b);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpmax_u32 (uint32x2_t a, uint32x2_t b)
+vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c)
 {
-  return (uint32x2_t) __builtin_aarch64_umaxpv2si ((int32x2_t) a,
-						   (int32x2_t) b);
+  return __builtin_aarch64_sqdmull_lanev4hi (__a, __b, __c);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpmaxq_s8 (int8x16_t a, int8x16_t b)
+vqdmull_laneq_s16 (int16x4_t __a, int16x8_t __b, int const __c)
 {
-  return __builtin_aarch64_smaxpv16qi (a, b);
+  return __builtin_aarch64_sqdmull_laneqv4hi (__a, __b, __c);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpmaxq_s16 (int16x8_t a, int16x8_t b)
+vqdmull_n_s16 (int16x4_t __a, int16_t __b)
 {
-  return __builtin_aarch64_smaxpv8hi (a, b);
+  return __builtin_aarch64_sqdmull_nv4hi (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpmaxq_s32 (int32x4_t a, int32x4_t b)
+vqdmull_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return __builtin_aarch64_smaxpv4si (a, b);
+  return __builtin_aarch64_sqdmullv2si (__a, __b);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpmaxq_u8 (uint8x16_t a, uint8x16_t b)
+vqdmull_high_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return (uint8x16_t) __builtin_aarch64_umaxpv16qi ((int8x16_t) a,
-						    (int8x16_t) b);
+  return __builtin_aarch64_sqdmull2v4si (__a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpmaxq_u16 (uint16x8_t a, uint16x8_t b)
+vqdmull_high_lane_s32 (int32x4_t __a, int32x2_t __b, int const __c)
 {
-  return (uint16x8_t) __builtin_aarch64_umaxpv8hi ((int16x8_t) a,
-						   (int16x8_t) b);
+  return __builtin_aarch64_sqdmull2_lanev4si (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpmaxq_u32 (uint32x4_t a, uint32x4_t b)
+vqdmull_high_laneq_s32 (int32x4_t __a, int32x4_t __b, int const __c)
 {
-  return (uint32x4_t) __builtin_aarch64_umaxpv4si ((int32x4_t) a,
-						   (int32x4_t) b);
+  return __builtin_aarch64_sqdmull2_laneqv4si (__a, __b, __c);
 }
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpmax_f32 (float32x2_t a, float32x2_t b)
+vqdmull_high_n_s32 (int32x4_t __a, int32_t __b)
 {
-  return __builtin_aarch64_smax_nanpv2sf (a, b);
+  return __builtin_aarch64_sqdmull2_nv4si (__a, __b);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpmaxq_f32 (float32x4_t a, float32x4_t b)
+vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c)
 {
-  return __builtin_aarch64_smax_nanpv4sf (a, b);
+  return __builtin_aarch64_sqdmull_lanev2si (__a, __b, __c);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpmaxq_f64 (float64x2_t a, float64x2_t b)
+vqdmull_laneq_s32 (int32x2_t __a, int32x4_t __b, int const __c)
 {
-  return __builtin_aarch64_smax_nanpv2df (a, b);
+  return __builtin_aarch64_sqdmull_laneqv2si (__a, __b, __c);
 }
 
-__extension__ extern __inline float64_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpmaxqd_f64 (float64x2_t a)
+vqdmull_n_s32 (int32x2_t __a, int32_t __b)
 {
-  return __builtin_aarch64_reduc_smax_nan_scal_v2df (a);
+  return __builtin_aarch64_sqdmull_nv2si (__a, __b);
 }
 
-__extension__ extern __inline float32_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpmaxs_f32 (float32x2_t a)
+vqdmullh_s16 (int16_t __a, int16_t __b)
 {
-  return __builtin_aarch64_reduc_smax_nan_scal_v2sf (a);
+  return (int32_t) __builtin_aarch64_sqdmullhi (__a, __b);
 }
 
-/* vpmaxnm  */
-
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpmaxnm_f32 (float32x2_t a, float32x2_t b)
+vqdmullh_lane_s16 (int16_t __a, int16x4_t __b, const int __c)
 {
-  return __builtin_aarch64_smaxpv2sf (a, b);
+  return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpmaxnmq_f32 (float32x4_t a, float32x4_t b)
+vqdmullh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c)
 {
-  return __builtin_aarch64_smaxpv4sf (a, b);
+  return __builtin_aarch64_sqdmull_laneqhi (__a, __b, __c);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpmaxnmq_f64 (float64x2_t a, float64x2_t b)
+vqdmulls_s32 (int32_t __a, int32_t __b)
 {
-  return __builtin_aarch64_smaxpv2df (a, b);
+  return __builtin_aarch64_sqdmullsi (__a, __b);
 }
 
-__extension__ extern __inline float64_t
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpmaxnmqd_f64 (float64x2_t a)
+vqdmulls_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
 {
-  return __builtin_aarch64_reduc_smax_scal_v2df (a);
+  return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c);
 }
 
-__extension__ extern __inline float32_t
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpmaxnms_f32 (float32x2_t a)
+vqdmulls_laneq_s32 (int32_t __a, int32x4_t __b, const int __c)
 {
-  return __builtin_aarch64_reduc_smax_scal_v2sf (a);
+  return __builtin_aarch64_sqdmull_laneqsi (__a, __b, __c);
 }
 
-/* vpmin  */
+/* vqmovn */
 
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpmin_s8 (int8x8_t a, int8x8_t b)
+vqmovn_s16 (int16x8_t __a)
 {
-  return __builtin_aarch64_sminpv8qi (a, b);
+  return (int8x8_t) __builtin_aarch64_sqmovnv8hi (__a);
 }
 
 __extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpmin_s16 (int16x4_t a, int16x4_t b)
+vqmovn_s32 (int32x4_t __a)
 {
-  return __builtin_aarch64_sminpv4hi (a, b);
+  return (int16x4_t) __builtin_aarch64_sqmovnv4si (__a);
 }
 
 __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpmin_s32 (int32x2_t a, int32x2_t b)
+vqmovn_s64 (int64x2_t __a)
 {
-  return __builtin_aarch64_sminpv2si (a, b);
+  return (int32x2_t) __builtin_aarch64_sqmovnv2di (__a);
 }
 
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpmin_u8 (uint8x8_t a, uint8x8_t b)
+vqmovn_u16 (uint16x8_t __a)
 {
-  return (uint8x8_t) __builtin_aarch64_uminpv8qi ((int8x8_t) a,
-						  (int8x8_t) b);
+  return (uint8x8_t) __builtin_aarch64_uqmovnv8hi ((int16x8_t) __a);
 }
 
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpmin_u16 (uint16x4_t a, uint16x4_t b)
+vqmovn_u32 (uint32x4_t __a)
 {
-  return (uint16x4_t) __builtin_aarch64_uminpv4hi ((int16x4_t) a,
-						   (int16x4_t) b);
+  return (uint16x4_t) __builtin_aarch64_uqmovnv4si ((int32x4_t) __a);
 }
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpmin_u32 (uint32x2_t a, uint32x2_t b)
-{
-  return (uint32x2_t) __builtin_aarch64_uminpv2si ((int32x2_t) a,
-						   (int32x2_t) b);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpminq_s8 (int8x16_t a, int8x16_t b)
+vqmovn_u64 (uint64x2_t __a)
 {
-  return __builtin_aarch64_sminpv16qi (a, b);
+  return (uint32x2_t) __builtin_aarch64_uqmovnv2di ((int64x2_t) __a);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline int8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpminq_s16 (int16x8_t a, int16x8_t b)
+vqmovnh_s16 (int16_t __a)
 {
-  return __builtin_aarch64_sminpv8hi (a, b);
+  return (int8_t) __builtin_aarch64_sqmovnhi (__a);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpminq_s32 (int32x4_t a, int32x4_t b)
+vqmovns_s32 (int32_t __a)
 {
-  return __builtin_aarch64_sminpv4si (a, b);
+  return (int16_t) __builtin_aarch64_sqmovnsi (__a);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpminq_u8 (uint8x16_t a, uint8x16_t b)
+vqmovnd_s64 (int64_t __a)
 {
-  return (uint8x16_t) __builtin_aarch64_uminpv16qi ((int8x16_t) a,
-						    (int8x16_t) b);
+  return (int32_t) __builtin_aarch64_sqmovndi (__a);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline uint8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpminq_u16 (uint16x8_t a, uint16x8_t b)
+vqmovnh_u16 (uint16_t __a)
 {
-  return (uint16x8_t) __builtin_aarch64_uminpv8hi ((int16x8_t) a,
-						   (int16x8_t) b);
+  return (uint8_t) __builtin_aarch64_uqmovnhi (__a);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpminq_u32 (uint32x4_t a, uint32x4_t b)
+vqmovns_u32 (uint32_t __a)
 {
-  return (uint32x4_t) __builtin_aarch64_uminpv4si ((int32x4_t) a,
-						   (int32x4_t) b);
+  return (uint16_t) __builtin_aarch64_uqmovnsi (__a);
 }
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpmin_f32 (float32x2_t a, float32x2_t b)
+vqmovnd_u64 (uint64_t __a)
 {
-  return __builtin_aarch64_smin_nanpv2sf (a, b);
+  return (uint32_t) __builtin_aarch64_uqmovndi (__a);
 }
 
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpminq_f32 (float32x4_t a, float32x4_t b)
-{
-  return __builtin_aarch64_smin_nanpv4sf (a, b);
-}
+/* vqmovun */
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpminq_f64 (float64x2_t a, float64x2_t b)
+vqmovun_s16 (int16x8_t __a)
 {
-  return __builtin_aarch64_smin_nanpv2df (a, b);
+  return __builtin_aarch64_sqmovunv8hi_us (__a);
 }
 
-__extension__ extern __inline float64_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpminqd_f64 (float64x2_t a)
+vqmovun_s32 (int32x4_t __a)
 {
-  return __builtin_aarch64_reduc_smin_nan_scal_v2df (a);
+  return __builtin_aarch64_sqmovunv4si_us (__a);
 }
 
-__extension__ extern __inline float32_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpmins_f32 (float32x2_t a)
+vqmovun_s64 (int64x2_t __a)
 {
-  return __builtin_aarch64_reduc_smin_nan_scal_v2sf (a);
+  return __builtin_aarch64_sqmovunv2di_us (__a);
 }
 
-/* vpminnm  */
-
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline uint8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpminnm_f32 (float32x2_t a, float32x2_t b)
+vqmovunh_s16 (int16_t __a)
 {
-  return __builtin_aarch64_sminpv2sf (a, b);
+  return __builtin_aarch64_sqmovunhi_us (__a);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpminnmq_f32 (float32x4_t a, float32x4_t b)
+vqmovuns_s32 (int32_t __a)
 {
-  return __builtin_aarch64_sminpv4sf (a, b);
+  return __builtin_aarch64_sqmovunsi_us (__a);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpminnmq_f64 (float64x2_t a, float64x2_t b)
+vqmovund_s64 (int64_t __a)
 {
-  return __builtin_aarch64_sminpv2df (a, b);
+  return __builtin_aarch64_sqmovundi_us (__a);
 }
 
-__extension__ extern __inline float64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpminnmqd_f64 (float64x2_t a)
-{
-  return __builtin_aarch64_reduc_smin_scal_v2df (a);
-}
+/* vqneg */
 
-__extension__ extern __inline float32_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpminnms_f32 (float32x2_t a)
+vqnegq_s64 (int64x2_t __a)
 {
-  return __builtin_aarch64_reduc_smin_scal_v2sf (a);
+  return (int64x2_t) __builtin_aarch64_sqnegv2di (__a);
 }
 
-/* vmaxnm  */
-
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline int8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmaxnm_f32 (float32x2_t __a, float32x2_t __b)
+vqnegb_s8 (int8_t __a)
 {
-  return __builtin_aarch64_fmaxv2sf (__a, __b);
+  return (int8_t) __builtin_aarch64_sqnegqi (__a);
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmaxnm_f64 (float64x1_t __a, float64x1_t __b)
+vqnegh_s16 (int16_t __a)
 {
-  return (float64x1_t)
-    { __builtin_aarch64_fmaxdf (vget_lane_f64 (__a, 0),
-				vget_lane_f64 (__b, 0)) };
+  return (int16_t) __builtin_aarch64_sqneghi (__a);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmaxnmq_f32 (float32x4_t __a, float32x4_t __b)
+vqnegs_s32 (int32_t __a)
 {
-  return __builtin_aarch64_fmaxv4sf (__a, __b);
+  return (int32_t) __builtin_aarch64_sqnegsi (__a);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmaxnmq_f64 (float64x2_t __a, float64x2_t __b)
+vqnegd_s64 (int64_t __a)
 {
-  return __builtin_aarch64_fmaxv2df (__a, __b);
+  return __builtin_aarch64_sqnegdi (__a);
 }
 
-/* vmaxv  */
+/* vqrdmulh */
 
-__extension__ extern __inline float32_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmaxv_f32 (float32x2_t __a)
+vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
 {
-  return __builtin_aarch64_reduc_smax_nan_scal_v2sf (__a);
+  return  __builtin_aarch64_sqrdmulh_lanev4hi (__a, __b, __c);
 }
 
-__extension__ extern __inline int8_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmaxv_s8 (int8x8_t __a)
+vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
 {
-  return __builtin_aarch64_reduc_smax_scal_v8qi (__a);
+  return __builtin_aarch64_sqrdmulh_lanev2si (__a, __b, __c);
 }
 
-__extension__ extern __inline int16_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmaxv_s16 (int16x4_t __a)
+vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
 {
-  return __builtin_aarch64_reduc_smax_scal_v4hi (__a);
+  return __builtin_aarch64_sqrdmulh_lanev8hi (__a, __b, __c);
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmaxv_s32 (int32x2_t __a)
+vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
 {
-  return __builtin_aarch64_reduc_smax_scal_v2si (__a);
+  return __builtin_aarch64_sqrdmulh_lanev4si (__a, __b, __c);
 }
 
-__extension__ extern __inline uint8_t
+__extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmaxv_u8 (uint8x8_t __a)
+vqrdmulhh_s16 (int16_t __a, int16_t __b)
 {
-  return __builtin_aarch64_reduc_umax_scal_v8qi_uu (__a);
+  return (int16_t) __builtin_aarch64_sqrdmulhhi (__a, __b);
 }
 
-__extension__ extern __inline uint16_t
+__extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmaxv_u16 (uint16x4_t __a)
+vqrdmulhh_lane_s16 (int16_t __a, int16x4_t __b, const int __c)
 {
-  return __builtin_aarch64_reduc_umax_scal_v4hi_uu (__a);
+  return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32_t
+__extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmaxv_u32 (uint32x2_t __a)
+vqrdmulhh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c)
 {
-  return __builtin_aarch64_reduc_umax_scal_v2si_uu (__a);
+  return __builtin_aarch64_sqrdmulh_laneqhi (__a, __b, __c);
 }
 
-__extension__ extern __inline float32_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmaxvq_f32 (float32x4_t __a)
+vqrdmulhs_s32 (int32_t __a, int32_t __b)
 {
-  return __builtin_aarch64_reduc_smax_nan_scal_v4sf (__a);
+  return (int32_t) __builtin_aarch64_sqrdmulhsi (__a, __b);
 }
 
-__extension__ extern __inline float64_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmaxvq_f64 (float64x2_t __a)
+vqrdmulhs_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
 {
-  return __builtin_aarch64_reduc_smax_nan_scal_v2df (__a);
+  return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c);
 }
 
-__extension__ extern __inline int8_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmaxvq_s8 (int8x16_t __a)
+vqrdmulhs_laneq_s32 (int32_t __a, int32x4_t __b, const int __c)
 {
-  return __builtin_aarch64_reduc_smax_scal_v16qi (__a);
+  return __builtin_aarch64_sqrdmulh_laneqsi (__a, __b, __c);
 }
 
-__extension__ extern __inline int16_t
+/* vqrshl */
+
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmaxvq_s16 (int16x8_t __a)
+vqrshl_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return __builtin_aarch64_reduc_smax_scal_v8hi (__a);
+  return __builtin_aarch64_sqrshlv8qi (__a, __b);
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmaxvq_s32 (int32x4_t __a)
+vqrshl_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return __builtin_aarch64_reduc_smax_scal_v4si (__a);
+  return __builtin_aarch64_sqrshlv4hi (__a, __b);
 }
 
-__extension__ extern __inline uint8_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmaxvq_u8 (uint8x16_t __a)
+vqrshl_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return __builtin_aarch64_reduc_umax_scal_v16qi_uu (__a);
+  return __builtin_aarch64_sqrshlv2si (__a, __b);
 }
 
-__extension__ extern __inline uint16_t
+__extension__ extern __inline int64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmaxvq_u16 (uint16x8_t __a)
+vqrshl_s64 (int64x1_t __a, int64x1_t __b)
 {
-  return __builtin_aarch64_reduc_umax_scal_v8hi_uu (__a);
+  return (int64x1_t) {__builtin_aarch64_sqrshldi (__a[0], __b[0])};
 }
 
-__extension__ extern __inline uint32_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmaxvq_u32 (uint32x4_t __a)
+vqrshl_u8 (uint8x8_t __a, int8x8_t __b)
 {
-  return __builtin_aarch64_reduc_umax_scal_v4si_uu (__a);
+  return __builtin_aarch64_uqrshlv8qi_uus ( __a, __b);
 }
 
-/* vmaxnmv  */
-
-__extension__ extern __inline float32_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmaxnmv_f32 (float32x2_t __a)
+vqrshl_u16 (uint16x4_t __a, int16x4_t __b)
 {
-  return __builtin_aarch64_reduc_smax_scal_v2sf (__a);
+  return __builtin_aarch64_uqrshlv4hi_uus ( __a, __b);
 }
 
-__extension__ extern __inline float32_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmaxnmvq_f32 (float32x4_t __a)
+vqrshl_u32 (uint32x2_t __a, int32x2_t __b)
 {
-  return __builtin_aarch64_reduc_smax_scal_v4sf (__a);
+  return __builtin_aarch64_uqrshlv2si_uus ( __a, __b);
 }
 
-__extension__ extern __inline float64_t
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmaxnmvq_f64 (float64x2_t __a)
+vqrshl_u64 (uint64x1_t __a, int64x1_t __b)
 {
-  return __builtin_aarch64_reduc_smax_scal_v2df (__a);
+  return (uint64x1_t) {__builtin_aarch64_uqrshldi_uus (__a[0], __b[0])};
 }
 
-/* vmin  */
-
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmin_f32 (float32x2_t __a, float32x2_t __b)
+vqrshlq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return __builtin_aarch64_smin_nanv2sf (__a, __b);
+  return __builtin_aarch64_sqrshlv16qi (__a, __b);
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmin_f64 (float64x1_t __a, float64x1_t __b)
+vqrshlq_s16 (int16x8_t __a, int16x8_t __b)
 {
-    return (float64x1_t)
-	  { __builtin_aarch64_smin_nandf (vget_lane_f64 (__a, 0),
-					  vget_lane_f64 (__b, 0)) };
+  return __builtin_aarch64_sqrshlv8hi (__a, __b);
 }
 
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmin_s8 (int8x8_t __a, int8x8_t __b)
+vqrshlq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return __builtin_aarch64_sminv8qi (__a, __b);
+  return __builtin_aarch64_sqrshlv4si (__a, __b);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmin_s16 (int16x4_t __a, int16x4_t __b)
+vqrshlq_s64 (int64x2_t __a, int64x2_t __b)
 {
-  return __builtin_aarch64_sminv4hi (__a, __b);
+  return __builtin_aarch64_sqrshlv2di (__a, __b);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmin_s32 (int32x2_t __a, int32x2_t __b)
+vqrshlq_u8 (uint8x16_t __a, int8x16_t __b)
 {
-  return __builtin_aarch64_sminv2si (__a, __b);
+  return __builtin_aarch64_uqrshlv16qi_uus ( __a, __b);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmin_u8 (uint8x8_t __a, uint8x8_t __b)
+vqrshlq_u16 (uint16x8_t __a, int16x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_uminv8qi ((int8x8_t) __a,
-						 (int8x8_t) __b);
+  return __builtin_aarch64_uqrshlv8hi_uus ( __a, __b);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmin_u16 (uint16x4_t __a, uint16x4_t __b)
+vqrshlq_u32 (uint32x4_t __a, int32x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_uminv4hi ((int16x4_t) __a,
-						  (int16x4_t) __b);
+  return __builtin_aarch64_uqrshlv4si_uus ( __a, __b);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmin_u32 (uint32x2_t __a, uint32x2_t __b)
+vqrshlq_u64 (uint64x2_t __a, int64x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_uminv2si ((int32x2_t) __a,
-						  (int32x2_t) __b);
+  return __builtin_aarch64_uqrshlv2di_uus ( __a, __b);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline int8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vminq_f32 (float32x4_t __a, float32x4_t __b)
+vqrshlb_s8 (int8_t __a, int8_t __b)
 {
-  return __builtin_aarch64_smin_nanv4sf (__a, __b);
+  return __builtin_aarch64_sqrshlqi (__a, __b);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vminq_f64 (float64x2_t __a, float64x2_t __b)
+vqrshlh_s16 (int16_t __a, int16_t __b)
 {
-  return __builtin_aarch64_smin_nanv2df (__a, __b);
+  return __builtin_aarch64_sqrshlhi (__a, __b);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vminq_s8 (int8x16_t __a, int8x16_t __b)
+vqrshls_s32 (int32_t __a, int32_t __b)
 {
-  return __builtin_aarch64_sminv16qi (__a, __b);
+  return __builtin_aarch64_sqrshlsi (__a, __b);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vminq_s16 (int16x8_t __a, int16x8_t __b)
+vqrshld_s64 (int64_t __a, int64_t __b)
 {
-  return __builtin_aarch64_sminv8hi (__a, __b);
+  return __builtin_aarch64_sqrshldi (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vminq_s32 (int32x4_t __a, int32x4_t __b)
+vqrshlb_u8 (uint8_t __a, int8_t __b)
 {
-  return __builtin_aarch64_sminv4si (__a, __b);
+  return __builtin_aarch64_uqrshlqi_uus (__a, __b);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vminq_u8 (uint8x16_t __a, uint8x16_t __b)
+vqrshlh_u16 (uint16_t __a, int16_t __b)
 {
-  return (uint8x16_t) __builtin_aarch64_uminv16qi ((int8x16_t) __a,
-						   (int8x16_t) __b);
+  return __builtin_aarch64_uqrshlhi_uus (__a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vminq_u16 (uint16x8_t __a, uint16x8_t __b)
+vqrshls_u32 (uint32_t __a, int32_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_uminv8hi ((int16x8_t) __a,
-						  (int16x8_t) __b);
+  return __builtin_aarch64_uqrshlsi_uus (__a, __b);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vminq_u32 (uint32x4_t __a, uint32x4_t __b)
+vqrshld_u64 (uint64_t __a, int64_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_uminv4si ((int32x4_t) __a,
-						  (int32x4_t) __b);
+  return __builtin_aarch64_uqrshldi_uus (__a, __b);
 }
 
-/* vminnm  */
+/* vqrshrn */
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vminnm_f32 (float32x2_t __a, float32x2_t __b)
+vqrshrn_n_s16 (int16x8_t __a, const int __b)
 {
-  return __builtin_aarch64_fminv2sf (__a, __b);
+  return (int8x8_t) __builtin_aarch64_sqrshrn_nv8hi (__a, __b);
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vminnm_f64 (float64x1_t __a, float64x1_t __b)
+vqrshrn_n_s32 (int32x4_t __a, const int __b)
 {
-  return (float64x1_t)
-    { __builtin_aarch64_fmindf (vget_lane_f64 (__a, 0),
-				vget_lane_f64 (__b, 0)) };
+  return (int16x4_t) __builtin_aarch64_sqrshrn_nv4si (__a, __b);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vminnmq_f32 (float32x4_t __a, float32x4_t __b)
+vqrshrn_n_s64 (int64x2_t __a, const int __b)
 {
-  return __builtin_aarch64_fminv4sf (__a, __b);
+  return (int32x2_t) __builtin_aarch64_sqrshrn_nv2di (__a, __b);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vminnmq_f64 (float64x2_t __a, float64x2_t __b)
+vqrshrn_n_u16 (uint16x8_t __a, const int __b)
 {
-  return __builtin_aarch64_fminv2df (__a, __b);
+  return __builtin_aarch64_uqrshrn_nv8hi_uus ( __a, __b);
 }
 
-/* vminv  */
+__extension__ extern __inline uint16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqrshrn_n_u32 (uint32x4_t __a, const int __b)
+{
+  return __builtin_aarch64_uqrshrn_nv4si_uus ( __a, __b);
+}
 
-__extension__ extern __inline float32_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vminv_f32 (float32x2_t __a)
+vqrshrn_n_u64 (uint64x2_t __a, const int __b)
 {
-  return __builtin_aarch64_reduc_smin_nan_scal_v2sf (__a);
+  return __builtin_aarch64_uqrshrn_nv2di_uus ( __a, __b);
 }
 
 __extension__ extern __inline int8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vminv_s8 (int8x8_t __a)
+vqrshrnh_n_s16 (int16_t __a, const int __b)
 {
-  return __builtin_aarch64_reduc_smin_scal_v8qi (__a);
+  return (int8_t) __builtin_aarch64_sqrshrn_nhi (__a, __b);
 }
 
 __extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vminv_s16 (int16x4_t __a)
+vqrshrns_n_s32 (int32_t __a, const int __b)
 {
-  return __builtin_aarch64_reduc_smin_scal_v4hi (__a);
+  return (int16_t) __builtin_aarch64_sqrshrn_nsi (__a, __b);
 }
 
 __extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vminv_s32 (int32x2_t __a)
+vqrshrnd_n_s64 (int64_t __a, const int __b)
 {
-  return __builtin_aarch64_reduc_smin_scal_v2si (__a);
+  return (int32_t) __builtin_aarch64_sqrshrn_ndi (__a, __b);
 }
 
 __extension__ extern __inline uint8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vminv_u8 (uint8x8_t __a)
+vqrshrnh_n_u16 (uint16_t __a, const int __b)
 {
-  return __builtin_aarch64_reduc_umin_scal_v8qi_uu (__a);
+  return __builtin_aarch64_uqrshrn_nhi_uus (__a, __b);
 }
 
 __extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vminv_u16 (uint16x4_t __a)
+vqrshrns_n_u32 (uint32_t __a, const int __b)
 {
-  return __builtin_aarch64_reduc_umin_scal_v4hi_uu (__a);
+  return __builtin_aarch64_uqrshrn_nsi_uus (__a, __b);
 }
 
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vminv_u32 (uint32x2_t __a)
+vqrshrnd_n_u64 (uint64_t __a, const int __b)
 {
-  return __builtin_aarch64_reduc_umin_scal_v2si_uu (__a);
+  return __builtin_aarch64_uqrshrn_ndi_uus (__a, __b);
 }
 
-__extension__ extern __inline float32_t
+/* vqrshrun */
+
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vminvq_f32 (float32x4_t __a)
+vqrshrun_n_s16 (int16x8_t __a, const int __b)
 {
-  return __builtin_aarch64_reduc_smin_nan_scal_v4sf (__a);
+  return (uint8x8_t) __builtin_aarch64_sqrshrun_nv8hi (__a, __b);
 }
 
-__extension__ extern __inline float64_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vminvq_f64 (float64x2_t __a)
+vqrshrun_n_s32 (int32x4_t __a, const int __b)
 {
-  return __builtin_aarch64_reduc_smin_nan_scal_v2df (__a);
+  return (uint16x4_t) __builtin_aarch64_sqrshrun_nv4si (__a, __b);
+}
+
+__extension__ extern __inline uint32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqrshrun_n_s64 (int64x2_t __a, const int __b)
+{
+  return (uint32x2_t) __builtin_aarch64_sqrshrun_nv2di (__a, __b);
 }
 
 __extension__ extern __inline int8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vminvq_s8 (int8x16_t __a)
+vqrshrunh_n_s16 (int16_t __a, const int __b)
 {
-  return __builtin_aarch64_reduc_smin_scal_v16qi (__a);
+  return (int8_t) __builtin_aarch64_sqrshrun_nhi (__a, __b);
 }
 
 __extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vminvq_s16 (int16x8_t __a)
+vqrshruns_n_s32 (int32_t __a, const int __b)
 {
-  return __builtin_aarch64_reduc_smin_scal_v8hi (__a);
+  return (int16_t) __builtin_aarch64_sqrshrun_nsi (__a, __b);
 }
 
 __extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vminvq_s32 (int32x4_t __a)
+vqrshrund_n_s64 (int64_t __a, const int __b)
 {
-  return __builtin_aarch64_reduc_smin_scal_v4si (__a);
+  return (int32_t) __builtin_aarch64_sqrshrun_ndi (__a, __b);
 }
 
-__extension__ extern __inline uint8_t
+/* vqshl */
+
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vminvq_u8 (uint8x16_t __a)
+vqshl_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return __builtin_aarch64_reduc_umin_scal_v16qi_uu (__a);
+  return __builtin_aarch64_sqshlv8qi (__a, __b);
 }
 
-__extension__ extern __inline uint16_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vminvq_u16 (uint16x8_t __a)
+vqshl_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return __builtin_aarch64_reduc_umin_scal_v8hi_uu (__a);
+  return __builtin_aarch64_sqshlv4hi (__a, __b);
 }
 
-__extension__ extern __inline uint32_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vminvq_u32 (uint32x4_t __a)
+vqshl_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return __builtin_aarch64_reduc_umin_scal_v4si_uu (__a);
+  return __builtin_aarch64_sqshlv2si (__a, __b);
 }
 
-/* vminnmv  */
-
-__extension__ extern __inline float32_t
+__extension__ extern __inline int64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vminnmv_f32 (float32x2_t __a)
+vqshl_s64 (int64x1_t __a, int64x1_t __b)
 {
-  return __builtin_aarch64_reduc_smin_scal_v2sf (__a);
+  return (int64x1_t) {__builtin_aarch64_sqshldi (__a[0], __b[0])};
 }
 
-__extension__ extern __inline float32_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vminnmvq_f32 (float32x4_t __a)
+vqshl_u8 (uint8x8_t __a, int8x8_t __b)
 {
-  return __builtin_aarch64_reduc_smin_scal_v4sf (__a);
+  return __builtin_aarch64_uqshlv8qi_uus ( __a, __b);
 }
 
-__extension__ extern __inline float64_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vminnmvq_f64 (float64x2_t __a)
+vqshl_u16 (uint16x4_t __a, int16x4_t __b)
 {
-  return __builtin_aarch64_reduc_smin_scal_v2df (__a);
+  return __builtin_aarch64_uqshlv4hi_uus ( __a, __b);
 }
 
-/* vmla */
-
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmla_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
+vqshl_u32 (uint32x2_t __a, int32x2_t __b)
 {
-  return a + b * c;
+  return __builtin_aarch64_uqshlv2si_uus ( __a, __b);
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmla_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
+vqshl_u64 (uint64x1_t __a, int64x1_t __b)
 {
-  return __a + __b * __c;
+  return (uint64x1_t) {__builtin_aarch64_uqshldi_uus (__a[0], __b[0])};
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
+vqshlq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return a + b * c;
+  return __builtin_aarch64_sqshlv16qi (__a, __b);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
+vqshlq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return a + b * c;
+  return __builtin_aarch64_sqshlv8hi (__a, __b);
 }
 
-/* vmla_lane  */
-
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmla_lane_f32 (float32x2_t __a, float32x2_t __b,
-	       float32x2_t __c, const int __lane)
+vqshlq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_sqshlv4si (__a, __b);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmla_lane_s16 (int16x4_t __a, int16x4_t __b,
-		int16x4_t __c, const int __lane)
+vqshlq_s64 (int64x2_t __a, int64x2_t __b)
 {
-  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_sqshlv2di (__a, __b);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmla_lane_s32 (int32x2_t __a, int32x2_t __b,
-		int32x2_t __c, const int __lane)
+vqshlq_u8 (uint8x16_t __a, int8x16_t __b)
 {
-  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_uqshlv16qi_uus ( __a, __b);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmla_lane_u16 (uint16x4_t __a, uint16x4_t __b,
-		uint16x4_t __c, const int __lane)
+vqshlq_u16 (uint16x8_t __a, int16x8_t __b)
 {
-  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_uqshlv8hi_uus ( __a, __b);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmla_lane_u32 (uint32x2_t __a, uint32x2_t __b,
-	       uint32x2_t __c, const int __lane)
+vqshlq_u32 (uint32x4_t __a, int32x4_t __b)
 {
-  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_uqshlv4si_uus ( __a, __b);
 }
 
-/* vmla_laneq  */
-
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmla_laneq_f32 (float32x2_t __a, float32x2_t __b,
-	        float32x4_t __c, const int __lane)
+vqshlq_u64 (uint64x2_t __a, int64x2_t __b)
 {
-  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_uqshlv2di_uus ( __a, __b);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline int8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmla_laneq_s16 (int16x4_t __a, int16x4_t __b,
-		int16x8_t __c, const int __lane)
+vqshlb_s8 (int8_t __a, int8_t __b)
 {
-  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_sqshlqi (__a, __b);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmla_laneq_s32 (int32x2_t __a, int32x2_t __b,
-		int32x4_t __c, const int __lane)
+vqshlh_s16 (int16_t __a, int16_t __b)
 {
-  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_sqshlhi (__a, __b);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmla_laneq_u16 (uint16x4_t __a, uint16x4_t __b,
-		uint16x8_t __c, const int __lane)
+vqshls_s32 (int32_t __a, int32_t __b)
 {
-  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_sqshlsi (__a, __b);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmla_laneq_u32 (uint32x2_t __a, uint32x2_t __b,
-		uint32x4_t __c, const int __lane)
+vqshld_s64 (int64_t __a, int64_t __b)
 {
-  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_sqshldi (__a, __b);
 }
 
-/* vmlaq_lane  */
-
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline uint8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlaq_lane_f32 (float32x4_t __a, float32x4_t __b,
-		float32x2_t __c, const int __lane)
+vqshlb_u8 (uint8_t __a, int8_t __b)
 {
-  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_uqshlqi_uus (__a, __b);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlaq_lane_s16 (int16x8_t __a, int16x8_t __b,
-		int16x4_t __c, const int __lane)
+vqshlh_u16 (uint16_t __a, int16_t __b)
 {
-  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_uqshlhi_uus (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlaq_lane_s32 (int32x4_t __a, int32x4_t __b,
-		int32x2_t __c, const int __lane)
+vqshls_u32 (uint32_t __a, int32_t __b)
 {
-  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_uqshlsi_uus (__a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlaq_lane_u16 (uint16x8_t __a, uint16x8_t __b,
-		uint16x4_t __c, const int __lane)
+vqshld_u64 (uint64_t __a, int64_t __b)
 {
-  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_uqshldi_uus (__a, __b);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlaq_lane_u32 (uint32x4_t __a, uint32x4_t __b,
-		uint32x2_t __c, const int __lane)
+vqshl_n_s8 (int8x8_t __a, const int __b)
 {
-  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return (int8x8_t) __builtin_aarch64_sqshl_nv8qi (__a, __b);
 }
 
-  /* vmlaq_laneq  */
+__extension__ extern __inline int16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqshl_n_s16 (int16x4_t __a, const int __b)
+{
+  return (int16x4_t) __builtin_aarch64_sqshl_nv4hi (__a, __b);
+}
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlaq_laneq_f32 (float32x4_t __a, float32x4_t __b,
-		 float32x4_t __c, const int __lane)
+vqshl_n_s32 (int32x2_t __a, const int __b)
 {
-  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return (int32x2_t) __builtin_aarch64_sqshl_nv2si (__a, __b);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline int64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlaq_laneq_s16 (int16x8_t __a, int16x8_t __b,
-		int16x8_t __c, const int __lane)
+vqshl_n_s64 (int64x1_t __a, const int __b)
 {
-  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return (int64x1_t) {__builtin_aarch64_sqshl_ndi (__a[0], __b)};
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlaq_laneq_s32 (int32x4_t __a, int32x4_t __b,
-		int32x4_t __c, const int __lane)
+vqshl_n_u8 (uint8x8_t __a, const int __b)
 {
-  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_uqshl_nv8qi_uus (__a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlaq_laneq_u16 (uint16x8_t __a, uint16x8_t __b,
-		uint16x8_t __c, const int __lane)
+vqshl_n_u16 (uint16x4_t __a, const int __b)
 {
-  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_uqshl_nv4hi_uus (__a, __b);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlaq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
-		uint32x4_t __c, const int __lane)
+vqshl_n_u32 (uint32x2_t __a, const int __b)
 {
-  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_uqshl_nv2si_uus (__a, __b);
 }
 
-/* vmls  */
-
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmls_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
+vqshl_n_u64 (uint64x1_t __a, const int __b)
 {
-  return a - b * c;
+  return (uint64x1_t) {__builtin_aarch64_uqshl_ndi_uus (__a[0], __b)};
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmls_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
+vqshlq_n_s8 (int8x16_t __a, const int __b)
 {
-  return __a - __b * __c;
+  return (int8x16_t) __builtin_aarch64_sqshl_nv16qi (__a, __b);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
+vqshlq_n_s16 (int16x8_t __a, const int __b)
 {
-  return a - b * c;
+  return (int16x8_t) __builtin_aarch64_sqshl_nv8hi (__a, __b);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
+vqshlq_n_s32 (int32x4_t __a, const int __b)
 {
-  return a - b * c;
+  return (int32x4_t) __builtin_aarch64_sqshl_nv4si (__a, __b);
 }
 
-/* vmls_lane  */
-
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmls_lane_f32 (float32x2_t __a, float32x2_t __b,
-	       float32x2_t __c, const int __lane)
+vqshlq_n_s64 (int64x2_t __a, const int __b)
 {
-  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return (int64x2_t) __builtin_aarch64_sqshl_nv2di (__a, __b);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmls_lane_s16 (int16x4_t __a, int16x4_t __b,
-		int16x4_t __c, const int __lane)
+vqshlq_n_u8 (uint8x16_t __a, const int __b)
 {
-  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_uqshl_nv16qi_uus (__a, __b);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmls_lane_s32 (int32x2_t __a, int32x2_t __b,
-		int32x2_t __c, const int __lane)
+vqshlq_n_u16 (uint16x8_t __a, const int __b)
 {
-  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_uqshl_nv8hi_uus (__a, __b);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmls_lane_u16 (uint16x4_t __a, uint16x4_t __b,
-		uint16x4_t __c, const int __lane)
+vqshlq_n_u32 (uint32x4_t __a, const int __b)
 {
-  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_uqshl_nv4si_uus (__a, __b);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmls_lane_u32 (uint32x2_t __a, uint32x2_t __b,
-	       uint32x2_t __c, const int __lane)
+vqshlq_n_u64 (uint64x2_t __a, const int __b)
 {
-  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_uqshl_nv2di_uus (__a, __b);
 }
 
-/* vmls_laneq  */
-
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline int8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmls_laneq_f32 (float32x2_t __a, float32x2_t __b,
-	       float32x4_t __c, const int __lane)
+vqshlb_n_s8 (int8_t __a, const int __b)
 {
-  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return (int8_t) __builtin_aarch64_sqshl_nqi (__a, __b);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmls_laneq_s16 (int16x4_t __a, int16x4_t __b,
-		int16x8_t __c, const int __lane)
+vqshlh_n_s16 (int16_t __a, const int __b)
 {
-  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return (int16_t) __builtin_aarch64_sqshl_nhi (__a, __b);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmls_laneq_s32 (int32x2_t __a, int32x2_t __b,
-		int32x4_t __c, const int __lane)
+vqshls_n_s32 (int32_t __a, const int __b)
 {
-  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return (int32_t) __builtin_aarch64_sqshl_nsi (__a, __b);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmls_laneq_u16 (uint16x4_t __a, uint16x4_t __b,
-		uint16x8_t __c, const int __lane)
+vqshld_n_s64 (int64_t __a, const int __b)
 {
-  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_sqshl_ndi (__a, __b);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline uint8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmls_laneq_u32 (uint32x2_t __a, uint32x2_t __b,
-		uint32x4_t __c, const int __lane)
+vqshlb_n_u8 (uint8_t __a, const int __b)
 {
-  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_uqshl_nqi_uus (__a, __b);
 }
 
-/* vmlsq_lane  */
-
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsq_lane_f32 (float32x4_t __a, float32x4_t __b,
-		float32x2_t __c, const int __lane)
+vqshlh_n_u16 (uint16_t __a, const int __b)
 {
-  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_uqshl_nhi_uus (__a, __b);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsq_lane_s16 (int16x8_t __a, int16x8_t __b,
-		int16x4_t __c, const int __lane)
+vqshls_n_u32 (uint32_t __a, const int __b)
 {
-  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_uqshl_nsi_uus (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsq_lane_s32 (int32x4_t __a, int32x4_t __b,
-		int32x2_t __c, const int __lane)
+vqshld_n_u64 (uint64_t __a, const int __b)
 {
-  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_uqshl_ndi_uus (__a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
+/* vqshlu */
+
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsq_lane_u16 (uint16x8_t __a, uint16x8_t __b,
-		uint16x4_t __c, const int __lane)
+vqshlu_n_s8 (int8x8_t __a, const int __b)
 {
-  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_sqshlu_nv8qi_uss (__a, __b);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsq_lane_u32 (uint32x4_t __a, uint32x4_t __b,
-		uint32x2_t __c, const int __lane)
+vqshlu_n_s16 (int16x4_t __a, const int __b)
 {
-  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_sqshlu_nv4hi_uss (__a, __b);
 }
 
-  /* vmlsq_laneq  */
-
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsq_laneq_f32 (float32x4_t __a, float32x4_t __b,
-		float32x4_t __c, const int __lane)
+vqshlu_n_s32 (int32x2_t __a, const int __b)
 {
-  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_sqshlu_nv2si_uss (__a, __b);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsq_laneq_s16 (int16x8_t __a, int16x8_t __b,
-		int16x8_t __c, const int __lane)
+vqshlu_n_s64 (int64x1_t __a, const int __b)
 {
-  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return (uint64x1_t) {__builtin_aarch64_sqshlu_ndi_uss (__a[0], __b)};
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsq_laneq_s32 (int32x4_t __a, int32x4_t __b,
-		int32x4_t __c, const int __lane)
+vqshluq_n_s8 (int8x16_t __a, const int __b)
 {
-  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_sqshlu_nv16qi_uss (__a, __b);
 }
+
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsq_laneq_u16 (uint16x8_t __a, uint16x8_t __b,
-		uint16x8_t __c, const int __lane)
+vqshluq_n_s16 (int16x8_t __a, const int __b)
 {
-  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_sqshlu_nv8hi_uss (__a, __b);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmlsq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
-		uint32x4_t __c, const int __lane)
+vqshluq_n_s32 (int32x4_t __a, const int __b)
 {
-  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_sqshlu_nv4si_uss (__a, __b);
 }
 
-/* vmov_n_  */
-
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmov_n_f16 (float16_t __a)
+vqshluq_n_s64 (int64x2_t __a, const int __b)
 {
-  return vdup_n_f16 (__a);
+  return __builtin_aarch64_sqshlu_nv2di_uss (__a, __b);
 }
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline int8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmov_n_f32 (float32_t __a)
+vqshlub_n_s8 (int8_t __a, const int __b)
 {
-  return vdup_n_f32 (__a);
+  return (int8_t) __builtin_aarch64_sqshlu_nqi_uss (__a, __b);
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmov_n_f64 (float64_t __a)
+vqshluh_n_s16 (int16_t __a, const int __b)
 {
-  return (float64x1_t) {__a};
+  return (int16_t) __builtin_aarch64_sqshlu_nhi_uss (__a, __b);
 }
 
-__extension__ extern __inline poly8x8_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmov_n_p8 (poly8_t __a)
+vqshlus_n_s32 (int32_t __a, const int __b)
 {
-  return vdup_n_p8 (__a);
+  return (int32_t) __builtin_aarch64_sqshlu_nsi_uss (__a, __b);
 }
 
-__extension__ extern __inline poly16x4_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmov_n_p16 (poly16_t __a)
+vqshlud_n_s64 (int64_t __a, const int __b)
 {
-  return vdup_n_p16 (__a);
+  return __builtin_aarch64_sqshlu_ndi_uss (__a, __b);
 }
 
-__extension__ extern __inline poly64x1_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmov_n_p64 (poly64_t __a)
-{
-  return vdup_n_p64 (__a);
-}
+/* vqshrn */
 
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmov_n_s8 (int8_t __a)
+vqshrn_n_s16 (int16x8_t __a, const int __b)
 {
-  return vdup_n_s8 (__a);
+  return (int8x8_t) __builtin_aarch64_sqshrn_nv8hi (__a, __b);
 }
 
 __extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmov_n_s16 (int16_t __a)
+vqshrn_n_s32 (int32x4_t __a, const int __b)
 {
-  return vdup_n_s16 (__a);
+  return (int16x4_t) __builtin_aarch64_sqshrn_nv4si (__a, __b);
 }
 
 __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmov_n_s32 (int32_t __a)
-{
-  return vdup_n_s32 (__a);
-}
-
-__extension__ extern __inline int64x1_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmov_n_s64 (int64_t __a)
+vqshrn_n_s64 (int64x2_t __a, const int __b)
 {
-  return (int64x1_t) {__a};
+  return (int32x2_t) __builtin_aarch64_sqshrn_nv2di (__a, __b);
 }
 
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmov_n_u8 (uint8_t __a)
+vqshrn_n_u16 (uint16x8_t __a, const int __b)
 {
-  return vdup_n_u8 (__a);
+  return __builtin_aarch64_uqshrn_nv8hi_uus ( __a, __b);
 }
 
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmov_n_u16 (uint16_t __a)
+vqshrn_n_u32 (uint32x4_t __a, const int __b)
 {
-    return vdup_n_u16 (__a);
+  return __builtin_aarch64_uqshrn_nv4si_uus ( __a, __b);
 }
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmov_n_u32 (uint32_t __a)
+vqshrn_n_u64 (uint64x2_t __a, const int __b)
 {
-   return vdup_n_u32 (__a);
+  return __builtin_aarch64_uqshrn_nv2di_uus ( __a, __b);
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline int8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmov_n_u64 (uint64_t __a)
+vqshrnh_n_s16 (int16_t __a, const int __b)
 {
-  return (uint64x1_t) {__a};
+  return (int8_t) __builtin_aarch64_sqshrn_nhi (__a, __b);
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovq_n_f16 (float16_t __a)
+vqshrns_n_s32 (int32_t __a, const int __b)
 {
-  return vdupq_n_f16 (__a);
+  return (int16_t) __builtin_aarch64_sqshrn_nsi (__a, __b);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovq_n_f32 (float32_t __a)
+vqshrnd_n_s64 (int64_t __a, const int __b)
 {
-  return vdupq_n_f32 (__a);
+  return (int32_t) __builtin_aarch64_sqshrn_ndi (__a, __b);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline uint8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovq_n_f64 (float64_t __a)
+vqshrnh_n_u16 (uint16_t __a, const int __b)
 {
-  return vdupq_n_f64 (__a);
+  return __builtin_aarch64_uqshrn_nhi_uus (__a, __b);
 }
 
-__extension__ extern __inline poly8x16_t
+__extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovq_n_p8 (poly8_t __a)
+vqshrns_n_u32 (uint32_t __a, const int __b)
 {
-  return vdupq_n_p8 (__a);
+  return __builtin_aarch64_uqshrn_nsi_uus (__a, __b);
 }
 
-__extension__ extern __inline poly16x8_t
+__extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovq_n_p16 (poly16_t __a)
+vqshrnd_n_u64 (uint64_t __a, const int __b)
 {
-  return vdupq_n_p16 (__a);
+  return __builtin_aarch64_uqshrn_ndi_uus (__a, __b);
 }
 
-__extension__ extern __inline poly64x2_t
+/* vqshrun */
+
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovq_n_p64 (poly64_t __a)
+vqshrun_n_s16 (int16x8_t __a, const int __b)
 {
-  return vdupq_n_p64 (__a);
+  return (uint8x8_t) __builtin_aarch64_sqshrun_nv8hi (__a, __b);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovq_n_s8 (int8_t __a)
+vqshrun_n_s32 (int32x4_t __a, const int __b)
 {
-  return vdupq_n_s8 (__a);
+  return (uint16x4_t) __builtin_aarch64_sqshrun_nv4si (__a, __b);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovq_n_s16 (int16_t __a)
+vqshrun_n_s64 (int64x2_t __a, const int __b)
 {
-  return vdupq_n_s16 (__a);
+  return (uint32x2_t) __builtin_aarch64_sqshrun_nv2di (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline int8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovq_n_s32 (int32_t __a)
+vqshrunh_n_s16 (int16_t __a, const int __b)
 {
-  return vdupq_n_s32 (__a);
+  return (int8_t) __builtin_aarch64_sqshrun_nhi (__a, __b);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovq_n_s64 (int64_t __a)
+vqshruns_n_s32 (int32_t __a, const int __b)
 {
-  return vdupq_n_s64 (__a);
+  return (int16_t) __builtin_aarch64_sqshrun_nsi (__a, __b);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovq_n_u8 (uint8_t __a)
+vqshrund_n_s64 (int64_t __a, const int __b)
 {
-  return vdupq_n_u8 (__a);
+  return (int32_t) __builtin_aarch64_sqshrun_ndi (__a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
+/* vqsub */
+
+__extension__ extern __inline int8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovq_n_u16 (uint16_t __a)
+vqsubb_s8 (int8_t __a, int8_t __b)
 {
-  return vdupq_n_u16 (__a);
+  return (int8_t) __builtin_aarch64_sqsubqi (__a, __b);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovq_n_u32 (uint32_t __a)
+vqsubh_s16 (int16_t __a, int16_t __b)
 {
-  return vdupq_n_u32 (__a);
+  return (int16_t) __builtin_aarch64_sqsubhi (__a, __b);
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovq_n_u64 (uint64_t __a)
+vqsubs_s32 (int32_t __a, int32_t __b)
 {
-  return vdupq_n_u64 (__a);
+  return (int32_t) __builtin_aarch64_sqsubsi (__a, __b);
 }
 
-/* vmul_lane  */
-
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmul_lane_f32 (float32x2_t __a, float32x2_t __b, const int __lane)
+vqsubd_s64 (int64_t __a, int64_t __b)
 {
-  return __a * __aarch64_vget_lane_any (__b, __lane);
+  return __builtin_aarch64_sqsubdi (__a, __b);
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline uint8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmul_lane_f64 (float64x1_t __a, float64x1_t __b, const int __lane)
+vqsubb_u8 (uint8_t __a, uint8_t __b)
 {
-  return __a * __b;
+  return (uint8_t) __builtin_aarch64_uqsubqi_uuu (__a, __b);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmul_lane_s16 (int16x4_t __a, int16x4_t __b, const int __lane)
+vqsubh_u16 (uint16_t __a, uint16_t __b)
 {
-  return __a * __aarch64_vget_lane_any (__b, __lane);
+  return (uint16_t) __builtin_aarch64_uqsubhi_uuu (__a, __b);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmul_lane_s32 (int32x2_t __a, int32x2_t __b, const int __lane)
+vqsubs_u32 (uint32_t __a, uint32_t __b)
 {
-  return __a * __aarch64_vget_lane_any (__b, __lane);
+  return (uint32_t) __builtin_aarch64_uqsubsi_uuu (__a, __b);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmul_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __lane)
+vqsubd_u64 (uint64_t __a, uint64_t __b)
 {
-  return __a * __aarch64_vget_lane_any (__b, __lane);
+  return __builtin_aarch64_uqsubdi_uuu (__a, __b);
 }
 
-__extension__ extern __inline uint32x2_t
+/* vqtbl2 */
+
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __lane)
+vqtbl2_s8 (int8x16x2_t __tab, uint8x8_t __idx)
 {
-  return __a * __aarch64_vget_lane_any (__b, __lane);
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[1], 1);
+  return __builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx);
 }
 
-/* vmuld_lane  */
-
-__extension__ extern __inline float64_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmuld_lane_f64 (float64_t __a, float64x1_t __b, const int __lane)
+vqtbl2_u8 (uint8x16x2_t __tab, uint8x8_t __idx)
 {
-  return __a * __aarch64_vget_lane_any (__b, __lane);
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1);
+  return (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx);
 }
 
-__extension__ extern __inline float64_t
+__extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmuld_laneq_f64 (float64_t __a, float64x2_t __b, const int __lane)
+vqtbl2_p8 (poly8x16x2_t __tab, uint8x8_t __idx)
 {
-  return __a * __aarch64_vget_lane_any (__b, __lane);
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1);
+  return (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx);
 }
 
-/* vmuls_lane  */
+__extension__ extern __inline int8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vqtbl2q_s8 (int8x16x2_t __tab, uint8x16_t __idx)
+{
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1);
+  return __builtin_aarch64_tbl3v16qi (__o, (int8x16_t)__idx);
+}
 
-__extension__ extern __inline float32_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmuls_lane_f32 (float32_t __a, float32x2_t __b, const int __lane)
+vqtbl2q_u8 (uint8x16x2_t __tab, uint8x16_t __idx)
 {
-  return __a * __aarch64_vget_lane_any (__b, __lane);
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1);
+  return (uint8x16_t)__builtin_aarch64_tbl3v16qi (__o, (int8x16_t)__idx);
 }
 
-__extension__ extern __inline float32_t
+__extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmuls_laneq_f32 (float32_t __a, float32x4_t __b, const int __lane)
+vqtbl2q_p8 (poly8x16x2_t __tab, uint8x16_t __idx)
 {
-  return __a * __aarch64_vget_lane_any (__b, __lane);
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1);
+  return (poly8x16_t)__builtin_aarch64_tbl3v16qi (__o, (int8x16_t)__idx);
 }
 
-/* vmul_laneq  */
+/* vqtbl3 */
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmul_laneq_f32 (float32x2_t __a, float32x4_t __b, const int __lane)
+vqtbl3_s8 (int8x16x3_t __tab, uint8x8_t __idx)
 {
-  return __a * __aarch64_vget_lane_any (__b, __lane);
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2);
+  return __builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)__idx);
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmul_laneq_f64 (float64x1_t __a, float64x2_t __b, const int __lane)
+vqtbl3_u8 (uint8x16x3_t __tab, uint8x8_t __idx)
 {
-  return __a * __aarch64_vget_lane_any (__b, __lane);
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2);
+  return (uint8x8_t)__builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)__idx);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmul_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __lane)
+vqtbl3_p8 (poly8x16x3_t __tab, uint8x8_t __idx)
 {
-  return __a * __aarch64_vget_lane_any (__b, __lane);
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2);
+  return (poly8x8_t)__builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)__idx);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmul_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __lane)
+vqtbl3q_s8 (int8x16x3_t __tab, uint8x16_t __idx)
 {
-  return __a * __aarch64_vget_lane_any (__b, __lane);
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2);
+  return __builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)__idx);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmul_laneq_u16 (uint16x4_t __a, uint16x8_t __b, const int __lane)
+vqtbl3q_u8 (uint8x16x3_t __tab, uint8x16_t __idx)
 {
-  return __a * __aarch64_vget_lane_any (__b, __lane);
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2);
+  return (uint8x16_t)__builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)__idx);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmul_laneq_u32 (uint32x2_t __a, uint32x4_t __b, const int __lane)
+vqtbl3q_p8 (poly8x16x3_t __tab, uint8x16_t __idx)
 {
-  return __a * __aarch64_vget_lane_any (__b, __lane);
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2);
+  return (poly8x16_t)__builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)__idx);
 }
 
-/* vmul_n  */
+/* vqtbl4 */
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmul_n_f64  (float64x1_t __a, float64_t __b)
+vqtbl4_s8 (int8x16x4_t __tab, uint8x8_t __idx)
 {
-  return (float64x1_t) { vget_lane_f64 (__a, 0) * __b };
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3);
+  return __builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)__idx);
 }
 
-/* vmulq_lane  */
-
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulq_lane_f32 (float32x4_t __a, float32x2_t __b, const int __lane)
+vqtbl4_u8 (uint8x16x4_t __tab, uint8x8_t __idx)
 {
-  return __a * __aarch64_vget_lane_any (__b, __lane);
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3);
+  return (uint8x8_t)__builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)__idx);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulq_lane_f64 (float64x2_t __a, float64x1_t __b, const int __lane)
+vqtbl4_p8 (poly8x16x4_t __tab, uint8x8_t __idx)
 {
-  __AARCH64_LANE_CHECK (__a, __lane);
-  return __a * __b[0];
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3);
+  return (poly8x8_t)__builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)__idx);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __lane)
+vqtbl4q_s8 (int8x16x4_t __tab, uint8x16_t __idx)
 {
-  return __a * __aarch64_vget_lane_any (__b, __lane);
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3);
+  return __builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)__idx);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __lane)
+vqtbl4q_u8 (uint8x16x4_t __tab, uint8x16_t __idx)
 {
-  return __a * __aarch64_vget_lane_any (__b, __lane);
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3);
+  return (uint8x16_t)__builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)__idx);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulq_lane_u16 (uint16x8_t __a, uint16x4_t __b, const int __lane)
+vqtbl4q_p8 (poly8x16x4_t __tab, uint8x16_t __idx)
 {
-  return __a * __aarch64_vget_lane_any (__b, __lane);
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3);
+  return (poly8x16_t)__builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)__idx);
 }
 
-__extension__ extern __inline uint32x4_t
+
+/* vqtbx2 */
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulq_lane_u32 (uint32x4_t __a, uint32x2_t __b, const int __lane)
+vqtbx2_s8 (int8x8_t __r, int8x16x2_t __tab, uint8x8_t __idx)
 {
-  return __a * __aarch64_vget_lane_any (__b, __lane);
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[1], 1);
+  return __builtin_aarch64_tbx4v8qi (__r, __o, (int8x8_t)__idx);
 }
 
-/* vmulq_laneq  */
-
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulq_laneq_f32 (float32x4_t __a, float32x4_t __b, const int __lane)
+vqtbx2_u8 (uint8x8_t __r, uint8x16x2_t __tab, uint8x8_t __idx)
 {
-  return __a * __aarch64_vget_lane_any (__b, __lane);
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1);
+  return (uint8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o,
+						(int8x8_t)__idx);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulq_laneq_f64 (float64x2_t __a, float64x2_t __b, const int __lane)
+vqtbx2_p8 (poly8x8_t __r, poly8x16x2_t __tab, uint8x8_t __idx)
 {
-  return __a * __aarch64_vget_lane_any (__b, __lane);
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1);
+  return (poly8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o,
+						(int8x8_t)__idx);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __lane)
+vqtbx2q_s8 (int8x16_t __r, int8x16x2_t __tab, uint8x16_t __idx)
 {
-  return __a * __aarch64_vget_lane_any (__b, __lane);
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[1], 1);
+  return __builtin_aarch64_tbx4v16qi (__r, __o, (int8x16_t)__idx);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __lane)
+vqtbx2q_u8 (uint8x16_t __r, uint8x16x2_t __tab, uint8x16_t __idx)
 {
-  return __a * __aarch64_vget_lane_any (__b, __lane);
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1);
+  return (uint8x16_t)__builtin_aarch64_tbx4v16qi ((int8x16_t)__r, __o,
+						  (int8x16_t)__idx);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulq_laneq_u16 (uint16x8_t __a, uint16x8_t __b, const int __lane)
+vqtbx2q_p8 (poly8x16_t __r, poly8x16x2_t __tab, uint8x16_t __idx)
 {
-  return __a * __aarch64_vget_lane_any (__b, __lane);
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1);
+  return (poly8x16_t)__builtin_aarch64_tbx4v16qi ((int8x16_t)__r, __o,
+						  (int8x16_t)__idx);
 }
 
-__extension__ extern __inline uint32x4_t
+/* vqtbx3 */
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, const int __lane)
+vqtbx3_s8 (int8x8_t __r, int8x16x3_t __tab, uint8x8_t __idx)
 {
-  return __a * __aarch64_vget_lane_any (__b, __lane);
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv16qi (__o, __tab.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, __tab.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, __tab.val[2], 2);
+  return __builtin_aarch64_qtbx3v8qi (__r, __o, (int8x8_t)__idx);
 }
 
-/* vmul_n.  */
-
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmul_n_f32 (float32x2_t __a, float32_t __b)
+vqtbx3_u8 (uint8x8_t __r, uint8x16x3_t __tab, uint8x8_t __idx)
 {
-  return __a * __b;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2);
+  return (uint8x8_t)__builtin_aarch64_qtbx3v8qi ((int8x8_t)__r, __o,
+						 (int8x8_t)__idx);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulq_n_f32 (float32x4_t __a, float32_t __b)
+vqtbx3_p8 (poly8x8_t __r, poly8x16x3_t __tab, uint8x8_t __idx)
 {
-  return __a * __b;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2);
+  return (poly8x8_t)__builtin_aarch64_qtbx3v8qi ((int8x8_t)__r, __o,
+						 (int8x8_t)__idx);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulq_n_f64 (float64x2_t __a, float64_t __b)
+vqtbx3q_s8 (int8x16_t __r, int8x16x3_t __tab, uint8x16_t __idx)
 {
-  return __a * __b;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv16qi (__o, __tab.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, __tab.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, __tab.val[2], 2);
+  return __builtin_aarch64_qtbx3v16qi (__r, __o, (int8x16_t)__idx);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmul_n_s16 (int16x4_t __a, int16_t __b)
+vqtbx3q_u8 (uint8x16_t __r, uint8x16x3_t __tab, uint8x16_t __idx)
 {
-  return __a * __b;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2);
+  return (uint8x16_t)__builtin_aarch64_qtbx3v16qi ((int8x16_t)__r, __o,
+						   (int8x16_t)__idx);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulq_n_s16 (int16x8_t __a, int16_t __b)
+vqtbx3q_p8 (poly8x16_t __r, poly8x16x3_t __tab, uint8x16_t __idx)
 {
-  return __a * __b;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2);
+  return (poly8x16_t)__builtin_aarch64_qtbx3v16qi ((int8x16_t)__r, __o,
+						   (int8x16_t)__idx);
 }
 
-__extension__ extern __inline int32x2_t
+/* vqtbx4 */
+
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmul_n_s32 (int32x2_t __a, int32_t __b)
+vqtbx4_s8 (int8x8_t __r, int8x16x4_t __tab, uint8x8_t __idx)
 {
-  return __a * __b;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[3], 3);
+  return __builtin_aarch64_qtbx4v8qi (__r, __o, (int8x8_t)__idx);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulq_n_s32 (int32x4_t __a, int32_t __b)
+vqtbx4_u8 (uint8x8_t __r, uint8x16x4_t __tab, uint8x8_t __idx)
 {
-  return __a * __b;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3);
+  return (uint8x8_t)__builtin_aarch64_qtbx4v8qi ((int8x8_t)__r, __o,
+						 (int8x8_t)__idx);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmul_n_u16 (uint16x4_t __a, uint16_t __b)
+vqtbx4_p8 (poly8x8_t __r, poly8x16x4_t __tab, uint8x8_t __idx)
 {
-  return __a * __b;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3);
+  return (poly8x8_t)__builtin_aarch64_qtbx4v8qi ((int8x8_t)__r, __o,
+						 (int8x8_t)__idx);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulq_n_u16 (uint16x8_t __a, uint16_t __b)
+vqtbx4q_s8 (int8x16_t __r, int8x16x4_t __tab, uint8x16_t __idx)
 {
-  return __a * __b;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[3], 3);
+  return __builtin_aarch64_qtbx4v16qi (__r, __o, (int8x16_t)__idx);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmul_n_u32 (uint32x2_t __a, uint32_t __b)
+vqtbx4q_u8 (uint8x16_t __r, uint8x16x4_t __tab, uint8x16_t __idx)
 {
-  return __a * __b;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3);
+  return (uint8x16_t)__builtin_aarch64_qtbx4v16qi ((int8x16_t)__r, __o,
+						   (int8x16_t)__idx);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulq_n_u32 (uint32x4_t __a, uint32_t __b)
+vqtbx4q_p8 (poly8x16_t __r, poly8x16x4_t __tab, uint8x16_t __idx)
 {
-  return __a * __b;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3);
+  return (poly8x16_t)__builtin_aarch64_qtbx4v16qi ((int8x16_t)__r, __o,
+						   (int8x16_t)__idx);
 }
 
-/* vmvn  */
+/* vrbit  */
 
 __extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmvn_p8 (poly8x8_t __a)
+vrbit_p8 (poly8x8_t __a)
 {
-  return (poly8x8_t) ~((int8x8_t) __a);
+  return (poly8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a);
 }
 
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmvn_s8 (int8x8_t __a)
+vrbit_s8 (int8x8_t __a)
 {
-  return ~__a;
+  return __builtin_aarch64_rbitv8qi (__a);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmvn_s16 (int16x4_t __a)
+vrbit_u8 (uint8x8_t __a)
 {
-  return ~__a;
+  return (uint8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmvn_s32 (int32x2_t __a)
+vrbitq_p8 (poly8x16_t __a)
 {
-  return ~__a;
+  return (poly8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t)__a);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmvn_u8 (uint8x8_t __a)
+vrbitq_s8 (int8x16_t __a)
 {
-  return ~__a;
+  return __builtin_aarch64_rbitv16qi (__a);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmvn_u16 (uint16x4_t __a)
+vrbitq_u8 (uint8x16_t __a)
 {
-  return ~__a;
+  return (uint8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t) __a);
 }
 
+/* vrecpe  */
+
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmvn_u32 (uint32x2_t __a)
+vrecpe_u32 (uint32x2_t __a)
 {
-  return ~__a;
+  return (uint32x2_t) __builtin_aarch64_urecpev2si ((int32x2_t) __a);
 }
 
-__extension__ extern __inline poly8x16_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmvnq_p8 (poly8x16_t __a)
+vrecpeq_u32 (uint32x4_t __a)
 {
-  return (poly8x16_t) ~((int8x16_t) __a);
+  return (uint32x4_t) __builtin_aarch64_urecpev4si ((int32x4_t) __a);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmvnq_s8 (int8x16_t __a)
+vrecpes_f32 (float32_t __a)
 {
-  return ~__a;
+  return __builtin_aarch64_frecpesf (__a);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline float64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmvnq_s16 (int16x8_t __a)
+vrecped_f64 (float64_t __a)
 {
-  return ~__a;
+  return __builtin_aarch64_frecpedf (__a);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmvnq_s32 (int32x4_t __a)
+vrecpe_f32 (float32x2_t __a)
 {
-  return ~__a;
+  return __builtin_aarch64_frecpev2sf (__a);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmvnq_u8 (uint8x16_t __a)
+vrecpe_f64 (float64x1_t __a)
 {
-  return ~__a;
+  return (float64x1_t) { vrecped_f64 (vget_lane_f64 (__a, 0)) };
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmvnq_u16 (uint16x8_t __a)
+vrecpeq_f32 (float32x4_t __a)
 {
-  return ~__a;
+  return __builtin_aarch64_frecpev4sf (__a);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmvnq_u32 (uint32x4_t __a)
+vrecpeq_f64 (float64x2_t __a)
 {
-  return ~__a;
+  return __builtin_aarch64_frecpev2df (__a);
 }
 
-/* vneg  */
+/* vrecps  */
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vneg_f32 (float32x2_t __a)
+vrecpss_f32 (float32_t __a, float32_t __b)
 {
-  return -__a;
+  return __builtin_aarch64_frecpssf (__a, __b);
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline float64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vneg_f64 (float64x1_t __a)
+vrecpsd_f64 (float64_t __a, float64_t __b)
 {
-  return -__a;
+  return __builtin_aarch64_frecpsdf (__a, __b);
 }
 
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vneg_s8 (int8x8_t __a)
+vrecps_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return -__a;
+  return __builtin_aarch64_frecpsv2sf (__a, __b);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vneg_s16 (int16x4_t __a)
+vrecps_f64 (float64x1_t __a, float64x1_t __b)
 {
-  return -__a;
+  return (float64x1_t) { vrecpsd_f64 (vget_lane_f64 (__a, 0),
+				      vget_lane_f64 (__b, 0)) };
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vneg_s32 (int32x2_t __a)
+vrecpsq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return -__a;
+  return __builtin_aarch64_frecpsv4sf (__a, __b);
 }
 
-__extension__ extern __inline int64x1_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vneg_s64 (int64x1_t __a)
+vrecpsq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  return -__a;
+  return __builtin_aarch64_frecpsv2df (__a, __b);
 }
 
-__extension__ extern __inline float32x4_t
+/* vrecpx  */
+
+__extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vnegq_f32 (float32x4_t __a)
+vrecpxs_f32 (float32_t __a)
 {
-  return -__a;
+  return __builtin_aarch64_frecpxsf (__a);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline float64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vnegq_f64 (float64x2_t __a)
+vrecpxd_f64 (float64_t __a)
 {
-  return -__a;
+  return __builtin_aarch64_frecpxdf (__a);
 }
 
-__extension__ extern __inline int8x16_t
+
+/* vrev  */
+
+__extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vnegq_s8 (int8x16_t __a)
+vrev16_p8 (poly8x8_t __a)
 {
-  return -__a;
+  return __builtin_shuffle (__a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vnegq_s16 (int16x8_t __a)
+vrev16_s8 (int8x8_t __a)
 {
-  return -__a;
+  return __builtin_shuffle (__a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vnegq_s32 (int32x4_t __a)
+vrev16_u8 (uint8x8_t __a)
 {
-  return -__a;
+  return __builtin_shuffle (__a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vnegq_s64 (int64x2_t __a)
+vrev16q_p8 (poly8x16_t __a)
 {
-  return -__a;
+  return __builtin_shuffle (__a,
+      (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
 }
 
-/* vpadd  */
+__extension__ extern __inline int8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vrev16q_s8 (int8x16_t __a)
+{
+  return __builtin_shuffle (__a,
+      (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
+}
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpadd_f32 (float32x2_t __a, float32x2_t __b)
+vrev16q_u8 (uint8x16_t __a)
 {
-  return __builtin_aarch64_faddpv2sf (__a, __b);
+  return __builtin_shuffle (__a,
+      (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpaddq_f32 (float32x4_t __a, float32x4_t __b)
+vrev32_p8 (poly8x8_t __a)
 {
-  return __builtin_aarch64_faddpv4sf (__a, __b);
+  return __builtin_shuffle (__a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline poly16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpaddq_f64 (float64x2_t __a, float64x2_t __b)
+vrev32_p16 (poly16x4_t __a)
 {
-  return __builtin_aarch64_faddpv2df (__a, __b);
+  return __builtin_shuffle (__a, (uint16x4_t) { 1, 0, 3, 2 });
 }
 
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpadd_s8 (int8x8_t __a, int8x8_t __b)
+vrev32_s8 (int8x8_t __a)
 {
-  return __builtin_aarch64_addpv8qi (__a, __b);
+  return __builtin_shuffle (__a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
 }
 
 __extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpadd_s16 (int16x4_t __a, int16x4_t __b)
+vrev32_s16 (int16x4_t __a)
 {
-  return __builtin_aarch64_addpv4hi (__a, __b);
+  return __builtin_shuffle (__a, (uint16x4_t) { 1, 0, 3, 2 });
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpadd_s32 (int32x2_t __a, int32x2_t __b)
+vrev32_u8 (uint8x8_t __a)
 {
-  return __builtin_aarch64_addpv2si (__a, __b);
+  return __builtin_shuffle (__a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpadd_u8 (uint8x8_t __a, uint8x8_t __b)
+vrev32_u16 (uint16x4_t __a)
 {
-  return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a,
-						 (int8x8_t) __b);
+  return __builtin_shuffle (__a, (uint16x4_t) { 1, 0, 3, 2 });
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpadd_u16 (uint16x4_t __a, uint16x4_t __b)
+vrev32q_p8 (poly8x16_t __a)
 {
-  return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a,
-						  (int16x4_t) __b);
+  return __builtin_shuffle (__a,
+      (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline poly16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpadd_u32 (uint32x2_t __a, uint32x2_t __b)
+vrev32q_p16 (poly16x8_t __a)
 {
-  return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a,
-						  (int32x2_t) __b);
+  return __builtin_shuffle (__a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
 }
 
-__extension__ extern __inline float32_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpadds_f32 (float32x2_t __a)
+vrev32q_s8 (int8x16_t __a)
 {
-  return __builtin_aarch64_reduc_plus_scal_v2sf (__a);
+  return __builtin_shuffle (__a,
+      (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
 }
 
-__extension__ extern __inline float64_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpaddd_f64 (float64x2_t __a)
+vrev32q_s16 (int16x8_t __a)
 {
-  return __builtin_aarch64_reduc_plus_scal_v2df (__a);
+  return __builtin_shuffle (__a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
 }
 
-__extension__ extern __inline int64_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpaddd_s64 (int64x2_t __a)
+vrev32q_u8 (uint8x16_t __a)
 {
-  return __builtin_aarch64_addpdi (__a);
+  return __builtin_shuffle (__a,
+      (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
 }
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpaddd_u64 (uint64x2_t __a)
+vrev32q_u16 (uint16x8_t __a)
 {
-  return __builtin_aarch64_addpdi ((int64x2_t) __a);
+  return __builtin_shuffle (__a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
 }
 
-/* vqabs */
+__extension__ extern __inline float16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vrev64_f16 (float16x4_t __a)
+{
+  return __builtin_shuffle (__a, (uint16x4_t) { 3, 2, 1, 0 });
+}
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqabsq_s64 (int64x2_t __a)
+vrev64_f32 (float32x2_t __a)
 {
-  return (int64x2_t) __builtin_aarch64_sqabsv2di (__a);
+  return __builtin_shuffle (__a, (uint32x2_t) { 1, 0 });
 }
 
-__extension__ extern __inline int8_t
+__extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqabsb_s8 (int8_t __a)
+vrev64_p8 (poly8x8_t __a)
 {
-  return (int8_t) __builtin_aarch64_sqabsqi (__a);
+  return __builtin_shuffle (__a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
 }
 
-__extension__ extern __inline int16_t
+__extension__ extern __inline poly16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqabsh_s16 (int16_t __a)
+vrev64_p16 (poly16x4_t __a)
 {
-  return (int16_t) __builtin_aarch64_sqabshi (__a);
+  return __builtin_shuffle (__a, (uint16x4_t) { 3, 2, 1, 0 });
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqabss_s32 (int32_t __a)
+vrev64_s8 (int8x8_t __a)
 {
-  return (int32_t) __builtin_aarch64_sqabssi (__a);
+  return __builtin_shuffle (__a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
 }
 
-__extension__ extern __inline int64_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqabsd_s64 (int64_t __a)
+vrev64_s16 (int16x4_t __a)
 {
-  return __builtin_aarch64_sqabsdi (__a);
+  return __builtin_shuffle (__a, (uint16x4_t) { 3, 2, 1, 0 });
 }
 
-/* vqadd */
+__extension__ extern __inline int32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vrev64_s32 (int32x2_t __a)
+{
+  return __builtin_shuffle (__a, (uint32x2_t) { 1, 0 });
+}
 
-__extension__ extern __inline int8_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqaddb_s8 (int8_t __a, int8_t __b)
+vrev64_u8 (uint8x8_t __a)
 {
-  return (int8_t) __builtin_aarch64_sqaddqi (__a, __b);
+  return __builtin_shuffle (__a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
 }
 
-__extension__ extern __inline int16_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqaddh_s16 (int16_t __a, int16_t __b)
+vrev64_u16 (uint16x4_t __a)
 {
-  return (int16_t) __builtin_aarch64_sqaddhi (__a, __b);
+  return __builtin_shuffle (__a, (uint16x4_t) { 3, 2, 1, 0 });
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqadds_s32 (int32_t __a, int32_t __b)
+vrev64_u32 (uint32x2_t __a)
 {
-  return (int32_t) __builtin_aarch64_sqaddsi (__a, __b);
+  return __builtin_shuffle (__a, (uint32x2_t) { 1, 0 });
 }
 
-__extension__ extern __inline int64_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqaddd_s64 (int64_t __a, int64_t __b)
+vrev64q_f16 (float16x8_t __a)
 {
-  return __builtin_aarch64_sqadddi (__a, __b);
+  return __builtin_shuffle (__a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
 }
 
-__extension__ extern __inline uint8_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqaddb_u8 (uint8_t __a, uint8_t __b)
+vrev64q_f32 (float32x4_t __a)
 {
-  return (uint8_t) __builtin_aarch64_uqaddqi_uuu (__a, __b);
+  return __builtin_shuffle (__a, (uint32x4_t) { 1, 0, 3, 2 });
 }
 
-__extension__ extern __inline uint16_t
+__extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqaddh_u16 (uint16_t __a, uint16_t __b)
+vrev64q_p8 (poly8x16_t __a)
 {
-  return (uint16_t) __builtin_aarch64_uqaddhi_uuu (__a, __b);
+  return __builtin_shuffle (__a,
+      (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
 }
 
-__extension__ extern __inline uint32_t
+__extension__ extern __inline poly16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqadds_u32 (uint32_t __a, uint32_t __b)
+vrev64q_p16 (poly16x8_t __a)
 {
-  return (uint32_t) __builtin_aarch64_uqaddsi_uuu (__a, __b);
+  return __builtin_shuffle (__a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
 }
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqaddd_u64 (uint64_t __a, uint64_t __b)
+vrev64q_s8 (int8x16_t __a)
 {
-  return __builtin_aarch64_uqadddi_uuu (__a, __b);
+  return __builtin_shuffle (__a,
+      (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
 }
 
-/* vqdmlal */
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vrev64q_s16 (int16x8_t __a)
+{
+  return __builtin_shuffle (__a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
+}
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
+vrev64q_s32 (int32x4_t __a)
 {
-  return __builtin_aarch64_sqdmlalv4hi (__a, __b, __c);
+  return __builtin_shuffle (__a, (uint32x4_t) { 1, 0, 3, 2 });
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
+vrev64q_u8 (uint8x16_t __a)
 {
-  return __builtin_aarch64_sqdmlal2v8hi (__a, __b, __c);
+  return __builtin_shuffle (__a,
+      (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c,
-		       int const __d)
+vrev64q_u16 (uint16x8_t __a)
 {
-  return __builtin_aarch64_sqdmlal2_lanev8hi (__a, __b, __c, __d);
+  return __builtin_shuffle (__a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlal_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
-			int const __d)
+vrev64q_u32 (uint32x4_t __a)
 {
-  return __builtin_aarch64_sqdmlal2_laneqv8hi (__a, __b, __c, __d);
+  return __builtin_shuffle (__a, (uint32x4_t) { 1, 0, 3, 2 });
 }
 
-__extension__ extern __inline int32x4_t
+/* vrnd  */
+
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
+vrnd_f32 (float32x2_t __a)
 {
-  return __builtin_aarch64_sqdmlal2_nv8hi (__a, __b, __c);
+  return __builtin_aarch64_btruncv2sf (__a);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
+vrnd_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __c, __d);
+  return vset_lane_f64 (__builtin_trunc (vget_lane_f64 (__a, 0)), __a, 0);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlal_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
+vrndq_f32 (float32x4_t __a)
 {
-  return __builtin_aarch64_sqdmlal_laneqv4hi (__a, __b, __c, __d);
+  return __builtin_aarch64_btruncv4sf (__a);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
+vrndq_f64 (float64x2_t __a)
 {
-  return __builtin_aarch64_sqdmlal_nv4hi (__a, __b, __c);
+  return __builtin_aarch64_btruncv2df (__a);
 }
 
-__extension__ extern __inline int64x2_t
+/* vrnda  */
+
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
+vrnda_f32 (float32x2_t __a)
 {
-  return __builtin_aarch64_sqdmlalv2si (__a, __b, __c);
+  return __builtin_aarch64_roundv2sf (__a);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
+vrnda_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_sqdmlal2v4si (__a, __b, __c);
+  return vset_lane_f64 (__builtin_round (vget_lane_f64 (__a, 0)), __a, 0);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c,
-		       int const __d)
+vrndaq_f32 (float32x4_t __a)
 {
-  return __builtin_aarch64_sqdmlal2_lanev4si (__a, __b, __c, __d);
+  return __builtin_aarch64_roundv4sf (__a);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlal_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
-			int const __d)
+vrndaq_f64 (float64x2_t __a)
 {
-  return __builtin_aarch64_sqdmlal2_laneqv4si (__a, __b, __c, __d);
+  return __builtin_aarch64_roundv2df (__a);
 }
 
-__extension__ extern __inline int64x2_t
+/* vrndi  */
+
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
+vrndi_f32 (float32x2_t __a)
 {
-  return __builtin_aarch64_sqdmlal2_nv4si (__a, __b, __c);
+  return __builtin_aarch64_nearbyintv2sf (__a);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
+vrndi_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __c, __d);
+  return vset_lane_f64 (__builtin_nearbyint (vget_lane_f64 (__a, 0)), __a, 0);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlal_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
+vrndiq_f32 (float32x4_t __a)
 {
-  return __builtin_aarch64_sqdmlal_laneqv2si (__a, __b, __c, __d);
+  return __builtin_aarch64_nearbyintv4sf (__a);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
+vrndiq_f64 (float64x2_t __a)
 {
-  return __builtin_aarch64_sqdmlal_nv2si (__a, __b, __c);
+  return __builtin_aarch64_nearbyintv2df (__a);
 }
 
-__extension__ extern __inline int32_t
+/* vrndm  */
+
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlalh_s16 (int32_t __a, int16_t __b, int16_t __c)
+vrndm_f32 (float32x2_t __a)
 {
-  return __builtin_aarch64_sqdmlalhi (__a, __b, __c);
+  return __builtin_aarch64_floorv2sf (__a);
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlalh_lane_s16 (int32_t __a, int16_t __b, int16x4_t __c, const int __d)
+vrndm_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d);
+  return vset_lane_f64 (__builtin_floor (vget_lane_f64 (__a, 0)), __a, 0);
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlalh_laneq_s16 (int32_t __a, int16_t __b, int16x8_t __c, const int __d)
+vrndmq_f32 (float32x4_t __a)
 {
-  return __builtin_aarch64_sqdmlal_laneqhi (__a, __b, __c, __d);
+  return __builtin_aarch64_floorv4sf (__a);
 }
 
-__extension__ extern __inline int64_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlals_s32 (int64_t __a, int32_t __b, int32_t __c)
+vrndmq_f64 (float64x2_t __a)
 {
-  return __builtin_aarch64_sqdmlalsi (__a, __b, __c);
+  return __builtin_aarch64_floorv2df (__a);
 }
 
-__extension__ extern __inline int64_t
+/* vrndn  */
+
+__extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlals_lane_s32 (int64_t __a, int32_t __b, int32x2_t __c, const int __d)
+vrndns_f32 (float32_t __a)
 {
-  return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d);
+  return __builtin_aarch64_frintnsf (__a);
 }
 
-__extension__ extern __inline int64_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlals_laneq_s32 (int64_t __a, int32_t __b, int32x4_t __c, const int __d)
+vrndn_f32 (float32x2_t __a)
 {
-  return __builtin_aarch64_sqdmlal_laneqsi (__a, __b, __c, __d);
+  return __builtin_aarch64_frintnv2sf (__a);
 }
 
-/* vqdmlsl */
-
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
+vrndn_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_sqdmlslv4hi (__a, __b, __c);
+  return (float64x1_t) {__builtin_aarch64_frintndf (__a[0])};
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
+vrndnq_f32 (float32x4_t __a)
 {
-  return __builtin_aarch64_sqdmlsl2v8hi (__a, __b, __c);
+  return __builtin_aarch64_frintnv4sf (__a);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c,
-		       int const __d)
+vrndnq_f64 (float64x2_t __a)
 {
-  return __builtin_aarch64_sqdmlsl2_lanev8hi (__a, __b, __c, __d);
+  return __builtin_aarch64_frintnv2df (__a);
 }
 
-__extension__ extern __inline int32x4_t
+/* vrndp  */
+
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlsl_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
-			int const __d)
+vrndp_f32 (float32x2_t __a)
 {
-  return __builtin_aarch64_sqdmlsl2_laneqv8hi (__a, __b, __c, __d);
+  return __builtin_aarch64_ceilv2sf (__a);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
+vrndp_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_sqdmlsl2_nv8hi (__a, __b, __c);
+  return vset_lane_f64 (__builtin_ceil (vget_lane_f64 (__a, 0)), __a, 0);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
+vrndpq_f32 (float32x4_t __a)
 {
-  return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __c, __d);
+  return __builtin_aarch64_ceilv4sf (__a);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlsl_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
+vrndpq_f64 (float64x2_t __a)
 {
-  return __builtin_aarch64_sqdmlsl_laneqv4hi (__a, __b, __c, __d);
+  return __builtin_aarch64_ceilv2df (__a);
 }
 
-__extension__ extern __inline int32x4_t
+/* vrndx  */
+
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
+vrndx_f32 (float32x2_t __a)
 {
-  return __builtin_aarch64_sqdmlsl_nv4hi (__a, __b, __c);
+  return __builtin_aarch64_rintv2sf (__a);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
+vrndx_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_sqdmlslv2si (__a, __b, __c);
+  return vset_lane_f64 (__builtin_rint (vget_lane_f64 (__a, 0)), __a, 0);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
+vrndxq_f32 (float32x4_t __a)
 {
-  return __builtin_aarch64_sqdmlsl2v4si (__a, __b, __c);
+  return __builtin_aarch64_rintv4sf (__a);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c,
-		       int const __d)
+vrndxq_f64 (float64x2_t __a)
 {
-  return __builtin_aarch64_sqdmlsl2_lanev4si (__a, __b, __c, __d);
+  return __builtin_aarch64_rintv2df (__a);
 }
 
-__extension__ extern __inline int64x2_t
+/* vrshl */
+
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlsl_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
-			int const __d)
+vrshl_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return __builtin_aarch64_sqdmlsl2_laneqv4si (__a, __b, __c, __d);
+  return (int8x8_t) __builtin_aarch64_srshlv8qi (__a, __b);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
+vrshl_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return __builtin_aarch64_sqdmlsl2_nv4si (__a, __b, __c);
+  return (int16x4_t) __builtin_aarch64_srshlv4hi (__a, __b);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
+vrshl_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __c, __d);
+  return (int32x2_t) __builtin_aarch64_srshlv2si (__a, __b);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline int64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlsl_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
+vrshl_s64 (int64x1_t __a, int64x1_t __b)
 {
-  return __builtin_aarch64_sqdmlsl_laneqv2si (__a, __b, __c, __d);
+  return (int64x1_t) {__builtin_aarch64_srshldi (__a[0], __b[0])};
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
+vrshl_u8 (uint8x8_t __a, int8x8_t __b)
 {
-  return __builtin_aarch64_sqdmlsl_nv2si (__a, __b, __c);
+  return __builtin_aarch64_urshlv8qi_uus (__a, __b);
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlslh_s16 (int32_t __a, int16_t __b, int16_t __c)
+vrshl_u16 (uint16x4_t __a, int16x4_t __b)
 {
-  return __builtin_aarch64_sqdmlslhi (__a, __b, __c);
+  return __builtin_aarch64_urshlv4hi_uus (__a, __b);
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlslh_lane_s16 (int32_t __a, int16_t __b, int16x4_t __c, const int __d)
+vrshl_u32 (uint32x2_t __a, int32x2_t __b)
 {
-  return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d);
+  return __builtin_aarch64_urshlv2si_uus (__a, __b);
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlslh_laneq_s16 (int32_t __a, int16_t __b, int16x8_t __c, const int __d)
+vrshl_u64 (uint64x1_t __a, int64x1_t __b)
 {
-  return __builtin_aarch64_sqdmlsl_laneqhi (__a, __b, __c, __d);
+  return (uint64x1_t) {__builtin_aarch64_urshldi_uus (__a[0], __b[0])};
 }
 
-__extension__ extern __inline int64_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlsls_s32 (int64_t __a, int32_t __b, int32_t __c)
+vrshlq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return __builtin_aarch64_sqdmlslsi (__a, __b, __c);
+  return (int8x16_t) __builtin_aarch64_srshlv16qi (__a, __b);
 }
 
-__extension__ extern __inline int64_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlsls_lane_s32 (int64_t __a, int32_t __b, int32x2_t __c, const int __d)
+vrshlq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d);
+  return (int16x8_t) __builtin_aarch64_srshlv8hi (__a, __b);
 }
 
-__extension__ extern __inline int64_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmlsls_laneq_s32 (int64_t __a, int32_t __b, int32x4_t __c, const int __d)
+vrshlq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return __builtin_aarch64_sqdmlsl_laneqsi (__a, __b, __c, __d);
+  return (int32x4_t) __builtin_aarch64_srshlv4si (__a, __b);
 }
 
-/* vqdmulh */
-
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
+vrshlq_s64 (int64x2_t __a, int64x2_t __b)
 {
-  return __builtin_aarch64_sqdmulh_lanev4hi (__a, __b, __c);
+  return (int64x2_t) __builtin_aarch64_srshlv2di (__a, __b);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
+vrshlq_u8 (uint8x16_t __a, int8x16_t __b)
 {
-  return __builtin_aarch64_sqdmulh_lanev2si (__a, __b, __c);
+  return __builtin_aarch64_urshlv16qi_uus (__a, __b);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
+vrshlq_u16 (uint16x8_t __a, int16x8_t __b)
 {
-  return __builtin_aarch64_sqdmulh_lanev8hi (__a, __b, __c);
+  return __builtin_aarch64_urshlv8hi_uus (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
+vrshlq_u32 (uint32x4_t __a, int32x4_t __b)
 {
-  return __builtin_aarch64_sqdmulh_lanev4si (__a, __b, __c);
+  return __builtin_aarch64_urshlv4si_uus (__a, __b);
 }
 
-__extension__ extern __inline int16_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmulhh_s16 (int16_t __a, int16_t __b)
+vrshlq_u64 (uint64x2_t __a, int64x2_t __b)
 {
-  return (int16_t) __builtin_aarch64_sqdmulhhi (__a, __b);
+  return __builtin_aarch64_urshlv2di_uus (__a, __b);
 }
 
-__extension__ extern __inline int16_t
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmulhh_lane_s16 (int16_t __a, int16x4_t __b, const int __c)
+vrshld_s64 (int64_t __a, int64_t __b)
 {
-  return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c);
+  return __builtin_aarch64_srshldi (__a, __b);
 }
 
-__extension__ extern __inline int16_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmulhh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c)
+vrshld_u64 (uint64_t __a, int64_t __b)
 {
-  return __builtin_aarch64_sqdmulh_laneqhi (__a, __b, __c);
+  return __builtin_aarch64_urshldi_uus (__a, __b);
 }
 
-__extension__ extern __inline int32_t
+/* vrshr */
+
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmulhs_s32 (int32_t __a, int32_t __b)
+vrshr_n_s8 (int8x8_t __a, const int __b)
 {
-  return (int32_t) __builtin_aarch64_sqdmulhsi (__a, __b);
+  return (int8x8_t) __builtin_aarch64_srshr_nv8qi (__a, __b);
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmulhs_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
+vrshr_n_s16 (int16x4_t __a, const int __b)
 {
-  return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c);
+  return (int16x4_t) __builtin_aarch64_srshr_nv4hi (__a, __b);
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmulhs_laneq_s32 (int32_t __a, int32x4_t __b, const int __c)
+vrshr_n_s32 (int32x2_t __a, const int __b)
 {
-  return __builtin_aarch64_sqdmulh_laneqsi (__a, __b, __c);
+  return (int32x2_t) __builtin_aarch64_srshr_nv2si (__a, __b);
 }
 
-/* vqdmull */
-
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline int64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmull_s16 (int16x4_t __a, int16x4_t __b)
+vrshr_n_s64 (int64x1_t __a, const int __b)
 {
-  return __builtin_aarch64_sqdmullv4hi (__a, __b);
+  return (int64x1_t) {__builtin_aarch64_srshr_ndi (__a[0], __b)};
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmull_high_s16 (int16x8_t __a, int16x8_t __b)
+vrshr_n_u8 (uint8x8_t __a, const int __b)
 {
-  return __builtin_aarch64_sqdmull2v8hi (__a, __b);
+  return __builtin_aarch64_urshr_nv8qi_uus (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmull_high_lane_s16 (int16x8_t __a, int16x4_t __b, int const __c)
+vrshr_n_u16 (uint16x4_t __a, const int __b)
 {
-  return __builtin_aarch64_sqdmull2_lanev8hi (__a, __b,__c);
+  return __builtin_aarch64_urshr_nv4hi_uus (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmull_high_laneq_s16 (int16x8_t __a, int16x8_t __b, int const __c)
+vrshr_n_u32 (uint32x2_t __a, const int __b)
 {
-  return __builtin_aarch64_sqdmull2_laneqv8hi (__a, __b,__c);
+  return __builtin_aarch64_urshr_nv2si_uus (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmull_high_n_s16 (int16x8_t __a, int16_t __b)
+vrshr_n_u64 (uint64x1_t __a, const int __b)
 {
-  return __builtin_aarch64_sqdmull2_nv8hi (__a, __b);
+  return (uint64x1_t) {__builtin_aarch64_urshr_ndi_uus (__a[0], __b)};
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c)
+vrshrq_n_s8 (int8x16_t __a, const int __b)
 {
-  return __builtin_aarch64_sqdmull_lanev4hi (__a, __b, __c);
+  return (int8x16_t) __builtin_aarch64_srshr_nv16qi (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmull_laneq_s16 (int16x4_t __a, int16x8_t __b, int const __c)
+vrshrq_n_s16 (int16x8_t __a, const int __b)
 {
-  return __builtin_aarch64_sqdmull_laneqv4hi (__a, __b, __c);
+  return (int16x8_t) __builtin_aarch64_srshr_nv8hi (__a, __b);
 }
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmull_n_s16 (int16x4_t __a, int16_t __b)
+vrshrq_n_s32 (int32x4_t __a, const int __b)
 {
-  return __builtin_aarch64_sqdmull_nv4hi (__a, __b);
+  return (int32x4_t) __builtin_aarch64_srshr_nv4si (__a, __b);
 }
 
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmull_s32 (int32x2_t __a, int32x2_t __b)
+vrshrq_n_s64 (int64x2_t __a, const int __b)
 {
-  return __builtin_aarch64_sqdmullv2si (__a, __b);
+  return (int64x2_t) __builtin_aarch64_srshr_nv2di (__a, __b);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmull_high_s32 (int32x4_t __a, int32x4_t __b)
+vrshrq_n_u8 (uint8x16_t __a, const int __b)
 {
-  return __builtin_aarch64_sqdmull2v4si (__a, __b);
+  return __builtin_aarch64_urshr_nv16qi_uus (__a, __b);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmull_high_lane_s32 (int32x4_t __a, int32x2_t __b, int const __c)
+vrshrq_n_u16 (uint16x8_t __a, const int __b)
 {
-  return __builtin_aarch64_sqdmull2_lanev4si (__a, __b, __c);
+  return __builtin_aarch64_urshr_nv8hi_uus (__a, __b);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmull_high_laneq_s32 (int32x4_t __a, int32x4_t __b, int const __c)
+vrshrq_n_u32 (uint32x4_t __a, const int __b)
 {
-  return __builtin_aarch64_sqdmull2_laneqv4si (__a, __b, __c);
+  return __builtin_aarch64_urshr_nv4si_uus (__a, __b);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmull_high_n_s32 (int32x4_t __a, int32_t __b)
+vrshrq_n_u64 (uint64x2_t __a, const int __b)
 {
-  return __builtin_aarch64_sqdmull2_nv4si (__a, __b);
+  return __builtin_aarch64_urshr_nv2di_uus (__a, __b);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c)
+vrshrd_n_s64 (int64_t __a, const int __b)
 {
-  return __builtin_aarch64_sqdmull_lanev2si (__a, __b, __c);
+  return __builtin_aarch64_srshr_ndi (__a, __b);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmull_laneq_s32 (int32x2_t __a, int32x4_t __b, int const __c)
+vrshrd_n_u64 (uint64_t __a, const int __b)
 {
-  return __builtin_aarch64_sqdmull_laneqv2si (__a, __b, __c);
+  return __builtin_aarch64_urshr_ndi_uus (__a, __b);
 }
 
-__extension__ extern __inline int64x2_t
+/* vrsqrte.  */
+
+__extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmull_n_s32 (int32x2_t __a, int32_t __b)
+vrsqrtes_f32 (float32_t __a)
 {
-  return __builtin_aarch64_sqdmull_nv2si (__a, __b);
+  return __builtin_aarch64_rsqrtesf (__a);
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline float64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmullh_s16 (int16_t __a, int16_t __b)
+vrsqrted_f64 (float64_t __a)
 {
-  return (int32_t) __builtin_aarch64_sqdmullhi (__a, __b);
+  return __builtin_aarch64_rsqrtedf (__a);
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmullh_lane_s16 (int16_t __a, int16x4_t __b, const int __c)
+vrsqrte_f32 (float32x2_t __a)
 {
-  return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c);
+  return __builtin_aarch64_rsqrtev2sf (__a);
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmullh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c)
+vrsqrte_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_sqdmull_laneqhi (__a, __b, __c);
+  return (float64x1_t) {vrsqrted_f64 (vget_lane_f64 (__a, 0))};
 }
 
-__extension__ extern __inline int64_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmulls_s32 (int32_t __a, int32_t __b)
+vrsqrteq_f32 (float32x4_t __a)
 {
-  return __builtin_aarch64_sqdmullsi (__a, __b);
+  return __builtin_aarch64_rsqrtev4sf (__a);
 }
 
-__extension__ extern __inline int64_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmulls_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
+vrsqrteq_f64 (float64x2_t __a)
 {
-  return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c);
+  return __builtin_aarch64_rsqrtev2df (__a);
 }
 
-__extension__ extern __inline int64_t
+/* vrsqrts.  */
+
+__extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqdmulls_laneq_s32 (int32_t __a, int32x4_t __b, const int __c)
+vrsqrtss_f32 (float32_t __a, float32_t __b)
 {
-  return __builtin_aarch64_sqdmull_laneqsi (__a, __b, __c);
+  return __builtin_aarch64_rsqrtssf (__a, __b);
 }
 
-/* vqmovn */
-
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline float64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqmovn_s16 (int16x8_t __a)
+vrsqrtsd_f64 (float64_t __a, float64_t __b)
 {
-  return (int8x8_t) __builtin_aarch64_sqmovnv8hi (__a);
+  return __builtin_aarch64_rsqrtsdf (__a, __b);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqmovn_s32 (int32x4_t __a)
+vrsqrts_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return (int16x4_t) __builtin_aarch64_sqmovnv4si (__a);
+  return __builtin_aarch64_rsqrtsv2sf (__a, __b);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqmovn_s64 (int64x2_t __a)
+vrsqrts_f64 (float64x1_t __a, float64x1_t __b)
 {
-  return (int32x2_t) __builtin_aarch64_sqmovnv2di (__a);
+  return (float64x1_t) {vrsqrtsd_f64 (vget_lane_f64 (__a, 0),
+				      vget_lane_f64 (__b, 0))};
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqmovn_u16 (uint16x8_t __a)
+vrsqrtsq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_uqmovnv8hi ((int16x8_t) __a);
+  return __builtin_aarch64_rsqrtsv4sf (__a, __b);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqmovn_u32 (uint32x4_t __a)
+vrsqrtsq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_uqmovnv4si ((int32x4_t) __a);
+  return __builtin_aarch64_rsqrtsv2df (__a, __b);
 }
 
-__extension__ extern __inline uint32x2_t
+/* vrsra */
+
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqmovn_u64 (uint64x2_t __a)
+vrsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
 {
-  return (uint32x2_t) __builtin_aarch64_uqmovnv2di ((int64x2_t) __a);
+  return (int8x8_t) __builtin_aarch64_srsra_nv8qi (__a, __b, __c);
 }
 
-__extension__ extern __inline int8_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqmovnh_s16 (int16_t __a)
+vrsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
 {
-  return (int8_t) __builtin_aarch64_sqmovnhi (__a);
+  return (int16x4_t) __builtin_aarch64_srsra_nv4hi (__a, __b, __c);
 }
 
-__extension__ extern __inline int16_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqmovns_s32 (int32_t __a)
+vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
 {
-  return (int16_t) __builtin_aarch64_sqmovnsi (__a);
+  return (int32x2_t) __builtin_aarch64_srsra_nv2si (__a, __b, __c);
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline int64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqmovnd_s64 (int64_t __a)
+vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
 {
-  return (int32_t) __builtin_aarch64_sqmovndi (__a);
+  return (int64x1_t) {__builtin_aarch64_srsra_ndi (__a[0], __b[0], __c)};
 }
 
-__extension__ extern __inline uint8_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqmovnh_u16 (uint16_t __a)
+vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
 {
-  return (uint8_t) __builtin_aarch64_uqmovnhi (__a);
+  return __builtin_aarch64_ursra_nv8qi_uuus (__a, __b, __c);
 }
 
-__extension__ extern __inline uint16_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqmovns_u32 (uint32_t __a)
+vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
 {
-  return (uint16_t) __builtin_aarch64_uqmovnsi (__a);
+  return __builtin_aarch64_ursra_nv4hi_uuus (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqmovnd_u64 (uint64_t __a)
+vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
 {
-  return (uint32_t) __builtin_aarch64_uqmovndi (__a);
+  return __builtin_aarch64_ursra_nv2si_uuus (__a, __b, __c);
 }
 
-/* vqmovun */
-
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqmovun_s16 (int16x8_t __a)
+vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
 {
-  return (uint8x8_t) __builtin_aarch64_sqmovunv8hi (__a);
+  return (uint64x1_t) {__builtin_aarch64_ursra_ndi_uuus (__a[0], __b[0], __c)};
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqmovun_s32 (int32x4_t __a)
+vrsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
 {
-  return (uint16x4_t) __builtin_aarch64_sqmovunv4si (__a);
+  return (int8x16_t) __builtin_aarch64_srsra_nv16qi (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqmovun_s64 (int64x2_t __a)
+vrsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
 {
-  return (uint32x2_t) __builtin_aarch64_sqmovunv2di (__a);
+  return (int16x8_t) __builtin_aarch64_srsra_nv8hi (__a, __b, __c);
 }
 
-__extension__ extern __inline int8_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqmovunh_s16 (int16_t __a)
+vrsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
 {
-  return (int8_t) __builtin_aarch64_sqmovunhi (__a);
+  return (int32x4_t) __builtin_aarch64_srsra_nv4si (__a, __b, __c);
 }
 
-__extension__ extern __inline int16_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqmovuns_s32 (int32_t __a)
+vrsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
 {
-  return (int16_t) __builtin_aarch64_sqmovunsi (__a);
+  return (int64x2_t) __builtin_aarch64_srsra_nv2di (__a, __b, __c);
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqmovund_s64 (int64_t __a)
+vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
 {
-  return (int32_t) __builtin_aarch64_sqmovundi (__a);
+  return __builtin_aarch64_ursra_nv16qi_uuus (__a, __b, __c);
 }
 
-/* vqneg */
+__extension__ extern __inline uint16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
+{
+  return __builtin_aarch64_ursra_nv8hi_uuus (__a, __b, __c);
+}
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqnegq_s64 (int64x2_t __a)
+vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
 {
-  return (int64x2_t) __builtin_aarch64_sqnegv2di (__a);
+  return __builtin_aarch64_ursra_nv4si_uuus (__a, __b, __c);
 }
 
-__extension__ extern __inline int8_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqnegb_s8 (int8_t __a)
+vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
 {
-  return (int8_t) __builtin_aarch64_sqnegqi (__a);
+  return __builtin_aarch64_ursra_nv2di_uuus (__a, __b, __c);
 }
 
-__extension__ extern __inline int16_t
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqnegh_s16 (int16_t __a)
+vrsrad_n_s64 (int64_t __a, int64_t __b, const int __c)
 {
-  return (int16_t) __builtin_aarch64_sqneghi (__a);
+  return __builtin_aarch64_srsra_ndi (__a, __b, __c);
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqnegs_s32 (int32_t __a)
+vrsrad_n_u64 (uint64_t __a, uint64_t __b, const int __c)
 {
-  return (int32_t) __builtin_aarch64_sqnegsi (__a);
+  return __builtin_aarch64_ursra_ndi_uuus (__a, __b, __c);
 }
 
-__extension__ extern __inline int64_t
+#pragma GCC push_options
+#pragma GCC target ("+nothing+crypto")
+
+/* vsha1  */
+
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqnegd_s64 (int64_t __a)
+vsha1cq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk)
 {
-  return __builtin_aarch64_sqnegdi (__a);
+  return __builtin_aarch64_crypto_sha1cv4si_uuuu (__hash_abcd, __hash_e, __wk);
 }
 
-/* vqrdmulh */
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsha1mq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk)
+{
+  return __builtin_aarch64_crypto_sha1mv4si_uuuu (__hash_abcd, __hash_e, __wk);
+}
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
+vsha1pq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk)
 {
-  return  __builtin_aarch64_sqrdmulh_lanev4hi (__a, __b, __c);
+  return __builtin_aarch64_crypto_sha1pv4si_uuuu (__hash_abcd, __hash_e, __wk);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
+vsha1h_u32 (uint32_t __hash_e)
 {
-  return __builtin_aarch64_sqrdmulh_lanev2si (__a, __b, __c);
+  return __builtin_aarch64_crypto_sha1hsi_uu (__hash_e);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
+vsha1su0q_u32 (uint32x4_t __w0_3, uint32x4_t __w4_7, uint32x4_t __w8_11)
 {
-  return __builtin_aarch64_sqrdmulh_lanev8hi (__a, __b, __c);
+  return __builtin_aarch64_crypto_sha1su0v4si_uuuu (__w0_3, __w4_7, __w8_11);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
+vsha1su1q_u32 (uint32x4_t __tw0_3, uint32x4_t __w12_15)
 {
-  return __builtin_aarch64_sqrdmulh_lanev4si (__a, __b, __c);
+  return __builtin_aarch64_crypto_sha1su1v4si_uuu (__tw0_3, __w12_15);
 }
 
-__extension__ extern __inline int16_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmulhh_s16 (int16_t __a, int16_t __b)
+vsha256hq_u32 (uint32x4_t __hash_abcd, uint32x4_t __hash_efgh, uint32x4_t __wk)
 {
-  return (int16_t) __builtin_aarch64_sqrdmulhhi (__a, __b);
+  return __builtin_aarch64_crypto_sha256hv4si_uuuu (__hash_abcd, __hash_efgh,
+						     __wk);
 }
 
-__extension__ extern __inline int16_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmulhh_lane_s16 (int16_t __a, int16x4_t __b, const int __c)
+vsha256h2q_u32 (uint32x4_t __hash_efgh, uint32x4_t __hash_abcd, uint32x4_t __wk)
 {
-  return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c);
+  return __builtin_aarch64_crypto_sha256h2v4si_uuuu (__hash_efgh, __hash_abcd,
+						      __wk);
 }
 
-__extension__ extern __inline int16_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmulhh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c)
+vsha256su0q_u32 (uint32x4_t __w0_3, uint32x4_t __w4_7)
 {
-  return __builtin_aarch64_sqrdmulh_laneqhi (__a, __b, __c);
+  return __builtin_aarch64_crypto_sha256su0v4si_uuu (__w0_3, __w4_7);
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmulhs_s32 (int32_t __a, int32_t __b)
+vsha256su1q_u32 (uint32x4_t __tw0_3, uint32x4_t __w8_11, uint32x4_t __w12_15)
 {
-  return (int32_t) __builtin_aarch64_sqrdmulhsi (__a, __b);
+  return __builtin_aarch64_crypto_sha256su1v4si_uuuu (__tw0_3, __w8_11,
+						       __w12_15);
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline poly128_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmulhs_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
+vmull_p64 (poly64_t __a, poly64_t __b)
 {
-  return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c);
+  return
+    __builtin_aarch64_crypto_pmulldi_ppp (__a, __b);
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline poly128_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrdmulhs_laneq_s32 (int32_t __a, int32x4_t __b, const int __c)
+vmull_high_p64 (poly64x2_t __a, poly64x2_t __b)
 {
-  return __builtin_aarch64_sqrdmulh_laneqsi (__a, __b, __c);
+  return __builtin_aarch64_crypto_pmullv2di_ppp (__a, __b);
 }
 
-/* vqrshl */
+#pragma GCC pop_options
+
+/* vshl */
 
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshl_s8 (int8x8_t __a, int8x8_t __b)
+vshl_n_s8 (int8x8_t __a, const int __b)
 {
-  return __builtin_aarch64_sqrshlv8qi (__a, __b);
+  return (int8x8_t) __builtin_aarch64_ashlv8qi (__a, __b);
 }
 
 __extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshl_s16 (int16x4_t __a, int16x4_t __b)
+vshl_n_s16 (int16x4_t __a, const int __b)
 {
-  return __builtin_aarch64_sqrshlv4hi (__a, __b);
+  return (int16x4_t) __builtin_aarch64_ashlv4hi (__a, __b);
 }
 
 __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshl_s32 (int32x2_t __a, int32x2_t __b)
+vshl_n_s32 (int32x2_t __a, const int __b)
 {
-  return __builtin_aarch64_sqrshlv2si (__a, __b);
+  return (int32x2_t) __builtin_aarch64_ashlv2si (__a, __b);
 }
 
 __extension__ extern __inline int64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshl_s64 (int64x1_t __a, int64x1_t __b)
+vshl_n_s64 (int64x1_t __a, const int __b)
 {
-  return (int64x1_t) {__builtin_aarch64_sqrshldi (__a[0], __b[0])};
+  return (int64x1_t) {__builtin_aarch64_ashldi (__a[0], __b)};
 }
 
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshl_u8 (uint8x8_t __a, int8x8_t __b)
+vshl_n_u8 (uint8x8_t __a, const int __b)
 {
-  return __builtin_aarch64_uqrshlv8qi_uus ( __a, __b);
+  return (uint8x8_t) __builtin_aarch64_ashlv8qi ((int8x8_t) __a, __b);
 }
 
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshl_u16 (uint16x4_t __a, int16x4_t __b)
+vshl_n_u16 (uint16x4_t __a, const int __b)
 {
-  return __builtin_aarch64_uqrshlv4hi_uus ( __a, __b);
+  return (uint16x4_t) __builtin_aarch64_ashlv4hi ((int16x4_t) __a, __b);
 }
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshl_u32 (uint32x2_t __a, int32x2_t __b)
+vshl_n_u32 (uint32x2_t __a, const int __b)
 {
-  return __builtin_aarch64_uqrshlv2si_uus ( __a, __b);
+  return (uint32x2_t) __builtin_aarch64_ashlv2si ((int32x2_t) __a, __b);
 }
 
 __extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshl_u64 (uint64x1_t __a, int64x1_t __b)
+vshl_n_u64 (uint64x1_t __a, const int __b)
 {
-  return (uint64x1_t) {__builtin_aarch64_uqrshldi_uus (__a[0], __b[0])};
+  return (uint64x1_t) {__builtin_aarch64_ashldi ((int64_t) __a[0], __b)};
 }
 
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshlq_s8 (int8x16_t __a, int8x16_t __b)
+vshlq_n_s8 (int8x16_t __a, const int __b)
 {
-  return __builtin_aarch64_sqrshlv16qi (__a, __b);
+  return (int8x16_t) __builtin_aarch64_ashlv16qi (__a, __b);
 }
 
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshlq_s16 (int16x8_t __a, int16x8_t __b)
+vshlq_n_s16 (int16x8_t __a, const int __b)
 {
-  return __builtin_aarch64_sqrshlv8hi (__a, __b);
+  return (int16x8_t) __builtin_aarch64_ashlv8hi (__a, __b);
 }
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshlq_s32 (int32x4_t __a, int32x4_t __b)
+vshlq_n_s32 (int32x4_t __a, const int __b)
 {
-  return __builtin_aarch64_sqrshlv4si (__a, __b);
+  return (int32x4_t) __builtin_aarch64_ashlv4si (__a, __b);
 }
 
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshlq_s64 (int64x2_t __a, int64x2_t __b)
+vshlq_n_s64 (int64x2_t __a, const int __b)
 {
-  return __builtin_aarch64_sqrshlv2di (__a, __b);
+  return (int64x2_t) __builtin_aarch64_ashlv2di (__a, __b);
 }
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshlq_u8 (uint8x16_t __a, int8x16_t __b)
+vshlq_n_u8 (uint8x16_t __a, const int __b)
 {
-  return __builtin_aarch64_uqrshlv16qi_uus ( __a, __b);
+  return (uint8x16_t) __builtin_aarch64_ashlv16qi ((int8x16_t) __a, __b);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshlq_u16 (uint16x8_t __a, int16x8_t __b)
+vshlq_n_u16 (uint16x8_t __a, const int __b)
 {
-  return __builtin_aarch64_uqrshlv8hi_uus ( __a, __b);
+  return (uint16x8_t) __builtin_aarch64_ashlv8hi ((int16x8_t) __a, __b);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshlq_u32 (uint32x4_t __a, int32x4_t __b)
+vshlq_n_u32 (uint32x4_t __a, const int __b)
 {
-  return __builtin_aarch64_uqrshlv4si_uus ( __a, __b);
+  return (uint32x4_t) __builtin_aarch64_ashlv4si ((int32x4_t) __a, __b);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshlq_u64 (uint64x2_t __a, int64x2_t __b)
+vshlq_n_u64 (uint64x2_t __a, const int __b)
 {
-  return __builtin_aarch64_uqrshlv2di_uus ( __a, __b);
+  return (uint64x2_t) __builtin_aarch64_ashlv2di ((int64x2_t) __a, __b);
 }
 
-__extension__ extern __inline int8_t
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshlb_s8 (int8_t __a, int8_t __b)
+vshld_n_s64 (int64_t __a, const int __b)
 {
-  return __builtin_aarch64_sqrshlqi (__a, __b);
+  return __builtin_aarch64_ashldi (__a, __b);
 }
 
-__extension__ extern __inline int16_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshlh_s16 (int16_t __a, int16_t __b)
+vshld_n_u64 (uint64_t __a, const int __b)
 {
-  return __builtin_aarch64_sqrshlhi (__a, __b);
+  return (uint64_t) __builtin_aarch64_ashldi (__a, __b);
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshls_s32 (int32_t __a, int32_t __b)
+vshl_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return __builtin_aarch64_sqrshlsi (__a, __b);
+  return __builtin_aarch64_sshlv8qi (__a, __b);
 }
 
-__extension__ extern __inline int64_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshld_s64 (int64_t __a, int64_t __b)
+vshl_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return __builtin_aarch64_sqrshldi (__a, __b);
+  return __builtin_aarch64_sshlv4hi (__a, __b);
 }
 
-__extension__ extern __inline uint8_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshlb_u8 (uint8_t __a, uint8_t __b)
+vshl_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return __builtin_aarch64_uqrshlqi_uus (__a, __b);
+  return __builtin_aarch64_sshlv2si (__a, __b);
 }
 
-__extension__ extern __inline uint16_t
+__extension__ extern __inline int64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshlh_u16 (uint16_t __a, uint16_t __b)
+vshl_s64 (int64x1_t __a, int64x1_t __b)
 {
-  return __builtin_aarch64_uqrshlhi_uus (__a, __b);
+  return (int64x1_t) {__builtin_aarch64_sshldi (__a[0], __b[0])};
 }
 
-__extension__ extern __inline uint32_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshls_u32 (uint32_t __a, uint32_t __b)
+vshl_u8 (uint8x8_t __a, int8x8_t __b)
 {
-  return __builtin_aarch64_uqrshlsi_uus (__a, __b);
+  return __builtin_aarch64_ushlv8qi_uus (__a, __b);
 }
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshld_u64 (uint64_t __a, uint64_t __b)
+vshl_u16 (uint16x4_t __a, int16x4_t __b)
 {
-  return __builtin_aarch64_uqrshldi_uus (__a, __b);
+  return __builtin_aarch64_ushlv4hi_uus (__a, __b);
 }
 
-/* vqrshrn */
+__extension__ extern __inline uint32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vshl_u32 (uint32x2_t __a, int32x2_t __b)
+{
+  return __builtin_aarch64_ushlv2si_uus (__a, __b);
+}
 
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshrn_n_s16 (int16x8_t __a, const int __b)
+vshl_u64 (uint64x1_t __a, int64x1_t __b)
 {
-  return (int8x8_t) __builtin_aarch64_sqrshrn_nv8hi (__a, __b);
+  return (uint64x1_t) {__builtin_aarch64_ushldi_uus (__a[0], __b[0])};
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshrn_n_s32 (int32x4_t __a, const int __b)
+vshlq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return (int16x4_t) __builtin_aarch64_sqrshrn_nv4si (__a, __b);
+  return __builtin_aarch64_sshlv16qi (__a, __b);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshrn_n_s64 (int64x2_t __a, const int __b)
+vshlq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return (int32x2_t) __builtin_aarch64_sqrshrn_nv2di (__a, __b);
+  return __builtin_aarch64_sshlv8hi (__a, __b);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshrn_n_u16 (uint16x8_t __a, const int __b)
+vshlq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return __builtin_aarch64_uqrshrn_nv8hi_uus ( __a, __b);
+  return __builtin_aarch64_sshlv4si (__a, __b);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshrn_n_u32 (uint32x4_t __a, const int __b)
+vshlq_s64 (int64x2_t __a, int64x2_t __b)
 {
-  return __builtin_aarch64_uqrshrn_nv4si_uus ( __a, __b);
+  return __builtin_aarch64_sshlv2di (__a, __b);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshrn_n_u64 (uint64x2_t __a, const int __b)
+vshlq_u8 (uint8x16_t __a, int8x16_t __b)
 {
-  return __builtin_aarch64_uqrshrn_nv2di_uus ( __a, __b);
+  return __builtin_aarch64_ushlv16qi_uus (__a, __b);
 }
 
-__extension__ extern __inline int8_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshrnh_n_s16 (int16_t __a, const int __b)
+vshlq_u16 (uint16x8_t __a, int16x8_t __b)
 {
-  return (int8_t) __builtin_aarch64_sqrshrn_nhi (__a, __b);
+  return __builtin_aarch64_ushlv8hi_uus (__a, __b);
 }
 
-__extension__ extern __inline int16_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshrns_n_s32 (int32_t __a, const int __b)
+vshlq_u32 (uint32x4_t __a, int32x4_t __b)
 {
-  return (int16_t) __builtin_aarch64_sqrshrn_nsi (__a, __b);
+  return __builtin_aarch64_ushlv4si_uus (__a, __b);
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshrnd_n_s64 (int64_t __a, const int __b)
+vshlq_u64 (uint64x2_t __a, int64x2_t __b)
 {
-  return (int32_t) __builtin_aarch64_sqrshrn_ndi (__a, __b);
+  return __builtin_aarch64_ushlv2di_uus (__a, __b);
 }
 
-__extension__ extern __inline uint8_t
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshrnh_n_u16 (uint16_t __a, const int __b)
+vshld_s64 (int64_t __a, int64_t __b)
 {
-  return __builtin_aarch64_uqrshrn_nhi_uus (__a, __b);
+  return __builtin_aarch64_sshldi (__a, __b);
 }
 
-__extension__ extern __inline uint16_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshrns_n_u32 (uint32_t __a, const int __b)
+vshld_u64 (uint64_t __a, int64_t __b)
 {
-  return __builtin_aarch64_uqrshrn_nsi_uus (__a, __b);
+  return __builtin_aarch64_ushldi_uus (__a, __b);
 }
 
-__extension__ extern __inline uint32_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshrnd_n_u64 (uint64_t __a, const int __b)
+vshll_high_n_s8 (int8x16_t __a, const int __b)
 {
-  return __builtin_aarch64_uqrshrn_ndi_uus (__a, __b);
+  return __builtin_aarch64_sshll2_nv16qi (__a, __b);
 }
 
-/* vqrshrun */
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vshll_high_n_s16 (int16x8_t __a, const int __b)
+{
+  return __builtin_aarch64_sshll2_nv8hi (__a, __b);
+}
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline int64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vshll_high_n_s32 (int32x4_t __a, const int __b)
+{
+  return __builtin_aarch64_sshll2_nv4si (__a, __b);
+}
+
+__extension__ extern __inline uint16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vshll_high_n_u8 (uint8x16_t __a, const int __b)
+{
+  return (uint16x8_t) __builtin_aarch64_ushll2_nv16qi ((int8x16_t) __a, __b);
+}
+
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vshll_high_n_u16 (uint16x8_t __a, const int __b)
+{
+  return (uint32x4_t) __builtin_aarch64_ushll2_nv8hi ((int16x8_t) __a, __b);
+}
+
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vshll_high_n_u32 (uint32x4_t __a, const int __b)
+{
+  return (uint64x2_t) __builtin_aarch64_ushll2_nv4si ((int32x4_t) __a, __b);
+}
+
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshrun_n_s16 (int16x8_t __a, const int __b)
+vshll_n_s8 (int8x8_t __a, const int __b)
 {
-  return (uint8x8_t) __builtin_aarch64_sqrshrun_nv8hi (__a, __b);
+  return __builtin_aarch64_sshll_nv8qi (__a, __b);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshrun_n_s32 (int32x4_t __a, const int __b)
+vshll_n_s16 (int16x4_t __a, const int __b)
 {
-  return (uint16x4_t) __builtin_aarch64_sqrshrun_nv4si (__a, __b);
+  return __builtin_aarch64_sshll_nv4hi (__a, __b);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshrun_n_s64 (int64x2_t __a, const int __b)
+vshll_n_s32 (int32x2_t __a, const int __b)
 {
-  return (uint32x2_t) __builtin_aarch64_sqrshrun_nv2di (__a, __b);
+  return __builtin_aarch64_sshll_nv2si (__a, __b);
 }
 
-__extension__ extern __inline int8_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshrunh_n_s16 (int16_t __a, const int __b)
+vshll_n_u8 (uint8x8_t __a, const int __b)
 {
-  return (int8_t) __builtin_aarch64_sqrshrun_nhi (__a, __b);
+  return __builtin_aarch64_ushll_nv8qi_uus (__a, __b);
 }
 
-__extension__ extern __inline int16_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshruns_n_s32 (int32_t __a, const int __b)
+vshll_n_u16 (uint16x4_t __a, const int __b)
 {
-  return (int16_t) __builtin_aarch64_sqrshrun_nsi (__a, __b);
+  return __builtin_aarch64_ushll_nv4hi_uus (__a, __b);
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshrund_n_s64 (int64_t __a, const int __b)
+vshll_n_u32 (uint32x2_t __a, const int __b)
 {
-  return (int32_t) __builtin_aarch64_sqrshrun_ndi (__a, __b);
+  return __builtin_aarch64_ushll_nv2si_uus (__a, __b);
 }
 
-/* vqshl */
+/* vshr */
 
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshl_s8 (int8x8_t __a, int8x8_t __b)
+vshr_n_s8 (int8x8_t __a, const int __b)
 {
-  return __builtin_aarch64_sqshlv8qi (__a, __b);
+  return (int8x8_t) __builtin_aarch64_ashrv8qi (__a, __b);
 }
 
 __extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshl_s16 (int16x4_t __a, int16x4_t __b)
+vshr_n_s16 (int16x4_t __a, const int __b)
 {
-  return __builtin_aarch64_sqshlv4hi (__a, __b);
+  return (int16x4_t) __builtin_aarch64_ashrv4hi (__a, __b);
 }
 
 __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshl_s32 (int32x2_t __a, int32x2_t __b)
+vshr_n_s32 (int32x2_t __a, const int __b)
 {
-  return __builtin_aarch64_sqshlv2si (__a, __b);
+  return (int32x2_t) __builtin_aarch64_ashrv2si (__a, __b);
 }
 
 __extension__ extern __inline int64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshl_s64 (int64x1_t __a, int64x1_t __b)
+vshr_n_s64 (int64x1_t __a, const int __b)
 {
-  return (int64x1_t) {__builtin_aarch64_sqshldi (__a[0], __b[0])};
+  return (int64x1_t) {__builtin_aarch64_ashr_simddi (__a[0], __b)};
 }
 
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshl_u8 (uint8x8_t __a, int8x8_t __b)
+vshr_n_u8 (uint8x8_t __a, const int __b)
 {
-  return __builtin_aarch64_uqshlv8qi_uus ( __a, __b);
+  return (uint8x8_t) __builtin_aarch64_lshrv8qi ((int8x8_t) __a, __b);
 }
 
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshl_u16 (uint16x4_t __a, int16x4_t __b)
+vshr_n_u16 (uint16x4_t __a, const int __b)
 {
-  return __builtin_aarch64_uqshlv4hi_uus ( __a, __b);
+  return (uint16x4_t) __builtin_aarch64_lshrv4hi ((int16x4_t) __a, __b);
 }
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshl_u32 (uint32x2_t __a, int32x2_t __b)
+vshr_n_u32 (uint32x2_t __a, const int __b)
 {
-  return __builtin_aarch64_uqshlv2si_uus ( __a, __b);
+  return (uint32x2_t) __builtin_aarch64_lshrv2si ((int32x2_t) __a, __b);
 }
 
 __extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshl_u64 (uint64x1_t __a, int64x1_t __b)
+vshr_n_u64 (uint64x1_t __a, const int __b)
 {
-  return (uint64x1_t) {__builtin_aarch64_uqshldi_uus (__a[0], __b[0])};
+  return (uint64x1_t) {__builtin_aarch64_lshr_simddi_uus ( __a[0], __b)};
 }
 
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshlq_s8 (int8x16_t __a, int8x16_t __b)
+vshrq_n_s8 (int8x16_t __a, const int __b)
 {
-  return __builtin_aarch64_sqshlv16qi (__a, __b);
+  return (int8x16_t) __builtin_aarch64_ashrv16qi (__a, __b);
 }
 
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshlq_s16 (int16x8_t __a, int16x8_t __b)
+vshrq_n_s16 (int16x8_t __a, const int __b)
 {
-  return __builtin_aarch64_sqshlv8hi (__a, __b);
+  return (int16x8_t) __builtin_aarch64_ashrv8hi (__a, __b);
 }
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshlq_s32 (int32x4_t __a, int32x4_t __b)
+vshrq_n_s32 (int32x4_t __a, const int __b)
 {
-  return __builtin_aarch64_sqshlv4si (__a, __b);
+  return (int32x4_t) __builtin_aarch64_ashrv4si (__a, __b);
 }
 
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshlq_s64 (int64x2_t __a, int64x2_t __b)
+vshrq_n_s64 (int64x2_t __a, const int __b)
 {
-  return __builtin_aarch64_sqshlv2di (__a, __b);
+  return (int64x2_t) __builtin_aarch64_ashrv2di (__a, __b);
 }
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshlq_u8 (uint8x16_t __a, int8x16_t __b)
+vshrq_n_u8 (uint8x16_t __a, const int __b)
 {
-  return __builtin_aarch64_uqshlv16qi_uus ( __a, __b);
+  return (uint8x16_t) __builtin_aarch64_lshrv16qi ((int8x16_t) __a, __b);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshlq_u16 (uint16x8_t __a, int16x8_t __b)
+vshrq_n_u16 (uint16x8_t __a, const int __b)
 {
-  return __builtin_aarch64_uqshlv8hi_uus ( __a, __b);
+  return (uint16x8_t) __builtin_aarch64_lshrv8hi ((int16x8_t) __a, __b);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshlq_u32 (uint32x4_t __a, int32x4_t __b)
+vshrq_n_u32 (uint32x4_t __a, const int __b)
 {
-  return __builtin_aarch64_uqshlv4si_uus ( __a, __b);
+  return (uint32x4_t) __builtin_aarch64_lshrv4si ((int32x4_t) __a, __b);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshlq_u64 (uint64x2_t __a, int64x2_t __b)
+vshrq_n_u64 (uint64x2_t __a, const int __b)
 {
-  return __builtin_aarch64_uqshlv2di_uus ( __a, __b);
+  return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b);
 }
 
-__extension__ extern __inline int8_t
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshlb_s8 (int8_t __a, int8_t __b)
+vshrd_n_s64 (int64_t __a, const int __b)
 {
-  return __builtin_aarch64_sqshlqi (__a, __b);
+  return __builtin_aarch64_ashr_simddi (__a, __b);
 }
 
-__extension__ extern __inline int16_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshlh_s16 (int16_t __a, int16_t __b)
+vshrd_n_u64 (uint64_t __a, const int __b)
 {
-  return __builtin_aarch64_sqshlhi (__a, __b);
+  return __builtin_aarch64_lshr_simddi_uus (__a, __b);
 }
 
-__extension__ extern __inline int32_t
+/* vsli */
+
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshls_s32 (int32_t __a, int32_t __b)
+vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
 {
-  return __builtin_aarch64_sqshlsi (__a, __b);
+  return (int8x8_t) __builtin_aarch64_ssli_nv8qi (__a, __b, __c);
 }
 
-__extension__ extern __inline int64_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshld_s64 (int64_t __a, int64_t __b)
+vsli_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
 {
-  return __builtin_aarch64_sqshldi (__a, __b);
+  return (int16x4_t) __builtin_aarch64_ssli_nv4hi (__a, __b, __c);
 }
 
-__extension__ extern __inline uint8_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshlb_u8 (uint8_t __a, uint8_t __b)
+vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
 {
-  return __builtin_aarch64_uqshlqi_uus (__a, __b);
+  return (int32x2_t) __builtin_aarch64_ssli_nv2si (__a, __b, __c);
 }
 
-__extension__ extern __inline uint16_t
+__extension__ extern __inline int64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshlh_u16 (uint16_t __a, uint16_t __b)
+vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
 {
-  return __builtin_aarch64_uqshlhi_uus (__a, __b);
+  return (int64x1_t) {__builtin_aarch64_ssli_ndi (__a[0], __b[0], __c)};
 }
 
-__extension__ extern __inline uint32_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshls_u32 (uint32_t __a, uint32_t __b)
+vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
 {
-  return __builtin_aarch64_uqshlsi_uus (__a, __b);
+  return __builtin_aarch64_usli_nv8qi_uuus (__a, __b, __c);
 }
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshld_u64 (uint64_t __a, uint64_t __b)
+vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
 {
-  return __builtin_aarch64_uqshldi_uus (__a, __b);
+  return __builtin_aarch64_usli_nv4hi_uuus (__a, __b, __c);
 }
 
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshl_n_s8 (int8x8_t __a, const int __b)
+vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
 {
-  return (int8x8_t) __builtin_aarch64_sqshl_nv8qi (__a, __b);
+  return __builtin_aarch64_usli_nv2si_uuus (__a, __b, __c);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshl_n_s16 (int16x4_t __a, const int __b)
+vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
 {
-  return (int16x4_t) __builtin_aarch64_sqshl_nv4hi (__a, __b);
+  return (uint64x1_t) {__builtin_aarch64_usli_ndi_uuus (__a[0], __b[0], __c)};
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline poly64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshl_n_s32 (int32x2_t __a, const int __b)
+vsli_n_p64 (poly64x1_t __a, poly64x1_t __b, const int __c)
 {
-  return (int32x2_t) __builtin_aarch64_sqshl_nv2si (__a, __b);
+  return (poly64x1_t) {__builtin_aarch64_ssli_ndi_ppps (__a[0], __b[0], __c)};
 }
 
-__extension__ extern __inline int64x1_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshl_n_s64 (int64x1_t __a, const int __b)
+vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
 {
-  return (int64x1_t) {__builtin_aarch64_sqshl_ndi (__a[0], __b)};
+  return (int8x16_t) __builtin_aarch64_ssli_nv16qi (__a, __b, __c);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshl_n_u8 (uint8x8_t __a, const int __b)
+vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
 {
-  return __builtin_aarch64_uqshl_nv8qi_uus (__a, __b);
+  return (int16x8_t) __builtin_aarch64_ssli_nv8hi (__a, __b, __c);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshl_n_u16 (uint16x4_t __a, const int __b)
+vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
 {
-  return __builtin_aarch64_uqshl_nv4hi_uus (__a, __b);
+  return (int32x4_t) __builtin_aarch64_ssli_nv4si (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshl_n_u32 (uint32x2_t __a, const int __b)
+vsliq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
 {
-  return __builtin_aarch64_uqshl_nv2si_uus (__a, __b);
+  return (int64x2_t) __builtin_aarch64_ssli_nv2di (__a, __b, __c);
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshl_n_u64 (uint64x1_t __a, const int __b)
+vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
 {
-  return (uint64x1_t) {__builtin_aarch64_uqshl_ndi_uus (__a[0], __b)};
+  return __builtin_aarch64_usli_nv16qi_uuus (__a, __b, __c);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshlq_n_s8 (int8x16_t __a, const int __b)
+vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
 {
-  return (int8x16_t) __builtin_aarch64_sqshl_nv16qi (__a, __b);
+  return __builtin_aarch64_usli_nv8hi_uuus (__a, __b, __c);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshlq_n_s16 (int16x8_t __a, const int __b)
+vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
 {
-  return (int16x8_t) __builtin_aarch64_sqshl_nv8hi (__a, __b);
+  return __builtin_aarch64_usli_nv4si_uuus (__a, __b, __c);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshlq_n_s32 (int32x4_t __a, const int __b)
+vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
 {
-  return (int32x4_t) __builtin_aarch64_sqshl_nv4si (__a, __b);
+  return __builtin_aarch64_usli_nv2di_uuus (__a, __b, __c);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline poly64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshlq_n_s64 (int64x2_t __a, const int __b)
+vsliq_n_p64 (poly64x2_t __a, poly64x2_t __b, const int __c)
 {
-  return (int64x2_t) __builtin_aarch64_sqshl_nv2di (__a, __b);
+  return __builtin_aarch64_ssli_nv2di_ppps (__a, __b, __c);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshlq_n_u8 (uint8x16_t __a, const int __b)
+vslid_n_s64 (int64_t __a, int64_t __b, const int __c)
 {
-  return __builtin_aarch64_uqshl_nv16qi_uus (__a, __b);
+  return __builtin_aarch64_ssli_ndi (__a, __b, __c);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshlq_n_u16 (uint16x8_t __a, const int __b)
+vslid_n_u64 (uint64_t __a, uint64_t __b, const int __c)
 {
-  return __builtin_aarch64_uqshl_nv8hi_uus (__a, __b);
+  return __builtin_aarch64_usli_ndi_uuus (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32x4_t
+/* vsqadd */
+
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshlq_n_u32 (uint32x4_t __a, const int __b)
+vsqadd_u8 (uint8x8_t __a, int8x8_t __b)
 {
-  return __builtin_aarch64_uqshl_nv4si_uus (__a, __b);
+  return __builtin_aarch64_usqaddv8qi_uus (__a, __b);
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshlq_n_u64 (uint64x2_t __a, const int __b)
+vsqadd_u16 (uint16x4_t __a, int16x4_t __b)
+{
+  return __builtin_aarch64_usqaddv4hi_uus (__a, __b);
+}
+
+__extension__ extern __inline uint32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsqadd_u32 (uint32x2_t __a, int32x2_t __b)
+{
+  return __builtin_aarch64_usqaddv2si_uus (__a, __b);
+}
+
+__extension__ extern __inline uint64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsqadd_u64 (uint64x1_t __a, int64x1_t __b)
 {
-  return __builtin_aarch64_uqshl_nv2di_uus (__a, __b);
+  return (uint64x1_t) {__builtin_aarch64_usqadddi_uus (__a[0], __b[0])};
 }
 
-__extension__ extern __inline int8_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshlb_n_s8 (int8_t __a, const int __b)
+vsqaddq_u8 (uint8x16_t __a, int8x16_t __b)
 {
-  return (int8_t) __builtin_aarch64_sqshl_nqi (__a, __b);
+  return __builtin_aarch64_usqaddv16qi_uus (__a, __b);
 }
 
-__extension__ extern __inline int16_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshlh_n_s16 (int16_t __a, const int __b)
+vsqaddq_u16 (uint16x8_t __a, int16x8_t __b)
 {
-  return (int16_t) __builtin_aarch64_sqshl_nhi (__a, __b);
+  return __builtin_aarch64_usqaddv8hi_uus (__a, __b);
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshls_n_s32 (int32_t __a, const int __b)
+vsqaddq_u32 (uint32x4_t __a, int32x4_t __b)
 {
-  return (int32_t) __builtin_aarch64_sqshl_nsi (__a, __b);
+  return __builtin_aarch64_usqaddv4si_uus (__a, __b);
 }
 
-__extension__ extern __inline int64_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshld_n_s64 (int64_t __a, const int __b)
+vsqaddq_u64 (uint64x2_t __a, int64x2_t __b)
 {
-  return __builtin_aarch64_sqshl_ndi (__a, __b);
+  return __builtin_aarch64_usqaddv2di_uus (__a, __b);
 }
 
 __extension__ extern __inline uint8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshlb_n_u8 (uint8_t __a, const int __b)
+vsqaddb_u8 (uint8_t __a, int8_t __b)
 {
-  return __builtin_aarch64_uqshl_nqi_uus (__a, __b);
+  return __builtin_aarch64_usqaddqi_uus (__a, __b);
 }
 
 __extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshlh_n_u16 (uint16_t __a, const int __b)
+vsqaddh_u16 (uint16_t __a, int16_t __b)
 {
-  return __builtin_aarch64_uqshl_nhi_uus (__a, __b);
+  return __builtin_aarch64_usqaddhi_uus (__a, __b);
 }
 
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshls_n_u32 (uint32_t __a, const int __b)
+vsqadds_u32 (uint32_t __a, int32_t __b)
 {
-  return __builtin_aarch64_uqshl_nsi_uus (__a, __b);
+  return __builtin_aarch64_usqaddsi_uus (__a, __b);
 }
 
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshld_n_u64 (uint64_t __a, const int __b)
+vsqaddd_u64 (uint64_t __a, int64_t __b)
 {
-  return __builtin_aarch64_uqshl_ndi_uus (__a, __b);
+  return __builtin_aarch64_usqadddi_uus (__a, __b);
 }
 
-/* vqshlu */
-
-__extension__ extern __inline uint8x8_t
+/* vsqrt */
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshlu_n_s8 (int8x8_t __a, const int __b)
+vsqrt_f32 (float32x2_t __a)
 {
-  return __builtin_aarch64_sqshlu_nv8qi_uss (__a, __b);
+  return __builtin_aarch64_sqrtv2sf (__a);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshlu_n_s16 (int16x4_t __a, const int __b)
+vsqrtq_f32 (float32x4_t __a)
 {
-  return __builtin_aarch64_sqshlu_nv4hi_uss (__a, __b);
+  return __builtin_aarch64_sqrtv4sf (__a);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshlu_n_s32 (int32x2_t __a, const int __b)
+vsqrt_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_sqshlu_nv2si_uss (__a, __b);
+  return (float64x1_t) { __builtin_aarch64_sqrtdf (__a[0]) };
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshlu_n_s64 (int64x1_t __a, const int __b)
+vsqrtq_f64 (float64x2_t __a)
 {
-  return (uint64x1_t) {__builtin_aarch64_sqshlu_ndi_uss (__a[0], __b)};
+  return __builtin_aarch64_sqrtv2df (__a);
 }
 
-__extension__ extern __inline uint8x16_t
+/* vsra */
+
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshluq_n_s8 (int8x16_t __a, const int __b)
+vsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
 {
-  return __builtin_aarch64_sqshlu_nv16qi_uss (__a, __b);
+  return (int8x8_t) __builtin_aarch64_ssra_nv8qi (__a, __b, __c);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshluq_n_s16 (int16x8_t __a, const int __b)
+vsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
 {
-  return __builtin_aarch64_sqshlu_nv8hi_uss (__a, __b);
+  return (int16x4_t) __builtin_aarch64_ssra_nv4hi (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshluq_n_s32 (int32x4_t __a, const int __b)
+vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
 {
-  return __builtin_aarch64_sqshlu_nv4si_uss (__a, __b);
+  return (int32x2_t) __builtin_aarch64_ssra_nv2si (__a, __b, __c);
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline int64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshluq_n_s64 (int64x2_t __a, const int __b)
+vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
 {
-  return __builtin_aarch64_sqshlu_nv2di_uss (__a, __b);
+  return (int64x1_t) {__builtin_aarch64_ssra_ndi (__a[0], __b[0], __c)};
 }
 
-__extension__ extern __inline int8_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshlub_n_s8 (int8_t __a, const int __b)
+vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
 {
-  return (int8_t) __builtin_aarch64_sqshlu_nqi_uss (__a, __b);
+  return __builtin_aarch64_usra_nv8qi_uuus (__a, __b, __c);
 }
 
-__extension__ extern __inline int16_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshluh_n_s16 (int16_t __a, const int __b)
+vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
 {
-  return (int16_t) __builtin_aarch64_sqshlu_nhi_uss (__a, __b);
+  return __builtin_aarch64_usra_nv4hi_uuus (__a, __b, __c);
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshlus_n_s32 (int32_t __a, const int __b)
+vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
 {
-  return (int32_t) __builtin_aarch64_sqshlu_nsi_uss (__a, __b);
+  return __builtin_aarch64_usra_nv2si_uuus (__a, __b, __c);
 }
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshlud_n_s64 (int64_t __a, const int __b)
+vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
 {
-  return __builtin_aarch64_sqshlu_ndi_uss (__a, __b);
+  return (uint64x1_t) {__builtin_aarch64_usra_ndi_uuus (__a[0], __b[0], __c)};
 }
 
-/* vqshrn */
-
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshrn_n_s16 (int16x8_t __a, const int __b)
+vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
 {
-  return (int8x8_t) __builtin_aarch64_sqshrn_nv8hi (__a, __b);
+  return (int8x16_t) __builtin_aarch64_ssra_nv16qi (__a, __b, __c);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshrn_n_s32 (int32x4_t __a, const int __b)
+vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
 {
-  return (int16x4_t) __builtin_aarch64_sqshrn_nv4si (__a, __b);
+  return (int16x8_t) __builtin_aarch64_ssra_nv8hi (__a, __b, __c);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshrn_n_s64 (int64x2_t __a, const int __b)
+vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
 {
-  return (int32x2_t) __builtin_aarch64_sqshrn_nv2di (__a, __b);
+  return (int32x4_t) __builtin_aarch64_ssra_nv4si (__a, __b, __c);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshrn_n_u16 (uint16x8_t __a, const int __b)
+vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
 {
-  return __builtin_aarch64_uqshrn_nv8hi_uus ( __a, __b);
+  return (int64x2_t) __builtin_aarch64_ssra_nv2di (__a, __b, __c);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshrn_n_u32 (uint32x4_t __a, const int __b)
+vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
 {
-  return __builtin_aarch64_uqshrn_nv4si_uus ( __a, __b);
+  return __builtin_aarch64_usra_nv16qi_uuus (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshrn_n_u64 (uint64x2_t __a, const int __b)
+vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
 {
-  return __builtin_aarch64_uqshrn_nv2di_uus ( __a, __b);
+  return __builtin_aarch64_usra_nv8hi_uuus (__a, __b, __c);
 }
 
-__extension__ extern __inline int8_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshrnh_n_s16 (int16_t __a, const int __b)
+vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
 {
-  return (int8_t) __builtin_aarch64_sqshrn_nhi (__a, __b);
+  return __builtin_aarch64_usra_nv4si_uuus (__a, __b, __c);
 }
 
-__extension__ extern __inline int16_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshrns_n_s32 (int32_t __a, const int __b)
+vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
 {
-  return (int16_t) __builtin_aarch64_sqshrn_nsi (__a, __b);
+  return __builtin_aarch64_usra_nv2di_uuus (__a, __b, __c);
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshrnd_n_s64 (int64_t __a, const int __b)
+vsrad_n_s64 (int64_t __a, int64_t __b, const int __c)
 {
-  return (int32_t) __builtin_aarch64_sqshrn_ndi (__a, __b);
+  return __builtin_aarch64_ssra_ndi (__a, __b, __c);
 }
 
-__extension__ extern __inline uint8_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshrnh_n_u16 (uint16_t __a, const int __b)
+vsrad_n_u64 (uint64_t __a, uint64_t __b, const int __c)
 {
-  return __builtin_aarch64_uqshrn_nhi_uus (__a, __b);
+  return __builtin_aarch64_usra_ndi_uuus (__a, __b, __c);
 }
 
-__extension__ extern __inline uint16_t
+/* vsri */
+
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshrns_n_u32 (uint32_t __a, const int __b)
+vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
 {
-  return __builtin_aarch64_uqshrn_nsi_uus (__a, __b);
+  return (int8x8_t) __builtin_aarch64_ssri_nv8qi (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshrnd_n_u64 (uint64_t __a, const int __b)
+vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
 {
-  return __builtin_aarch64_uqshrn_ndi_uus (__a, __b);
+  return (int16x4_t) __builtin_aarch64_ssri_nv4hi (__a, __b, __c);
 }
 
-/* vqshrun */
-
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshrun_n_s16 (int16x8_t __a, const int __b)
+vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
 {
-  return (uint8x8_t) __builtin_aarch64_sqshrun_nv8hi (__a, __b);
+  return (int32x2_t) __builtin_aarch64_ssri_nv2si (__a, __b, __c);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline int64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshrun_n_s32 (int32x4_t __a, const int __b)
+vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
 {
-  return (uint16x4_t) __builtin_aarch64_sqshrun_nv4si (__a, __b);
+  return (int64x1_t) {__builtin_aarch64_ssri_ndi (__a[0], __b[0], __c)};
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshrun_n_s64 (int64x2_t __a, const int __b)
+vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
 {
-  return (uint32x2_t) __builtin_aarch64_sqshrun_nv2di (__a, __b);
+  return __builtin_aarch64_usri_nv8qi_uuus (__a, __b, __c);
 }
 
-__extension__ extern __inline int8_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshrunh_n_s16 (int16_t __a, const int __b)
+vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
 {
-  return (int8_t) __builtin_aarch64_sqshrun_nhi (__a, __b);
+  return __builtin_aarch64_usri_nv4hi_uuus (__a, __b, __c);
 }
 
-__extension__ extern __inline int16_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshruns_n_s32 (int32_t __a, const int __b)
+vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
 {
-  return (int16_t) __builtin_aarch64_sqshrun_nsi (__a, __b);
+  return __builtin_aarch64_usri_nv2si_uuus (__a, __b, __c);
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshrund_n_s64 (int64_t __a, const int __b)
+vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
 {
-  return (int32_t) __builtin_aarch64_sqshrun_ndi (__a, __b);
+  return (uint64x1_t) {__builtin_aarch64_usri_ndi_uuus (__a[0], __b[0], __c)};
 }
 
-/* vqsub */
-
-__extension__ extern __inline int8_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqsubb_s8 (int8_t __a, int8_t __b)
+vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
 {
-  return (int8_t) __builtin_aarch64_sqsubqi (__a, __b);
+  return (int8x16_t) __builtin_aarch64_ssri_nv16qi (__a, __b, __c);
 }
 
-__extension__ extern __inline int16_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqsubh_s16 (int16_t __a, int16_t __b)
+vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
 {
-  return (int16_t) __builtin_aarch64_sqsubhi (__a, __b);
+  return (int16x8_t) __builtin_aarch64_ssri_nv8hi (__a, __b, __c);
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqsubs_s32 (int32_t __a, int32_t __b)
+vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
 {
-  return (int32_t) __builtin_aarch64_sqsubsi (__a, __b);
+  return (int32x4_t) __builtin_aarch64_ssri_nv4si (__a, __b, __c);
 }
 
-__extension__ extern __inline int64_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqsubd_s64 (int64_t __a, int64_t __b)
+vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
 {
-  return __builtin_aarch64_sqsubdi (__a, __b);
+  return (int64x2_t) __builtin_aarch64_ssri_nv2di (__a, __b, __c);
 }
 
-__extension__ extern __inline uint8_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqsubb_u8 (uint8_t __a, uint8_t __b)
+vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
 {
-  return (uint8_t) __builtin_aarch64_uqsubqi_uuu (__a, __b);
+  return __builtin_aarch64_usri_nv16qi_uuus (__a, __b, __c);
 }
 
-__extension__ extern __inline uint16_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqsubh_u16 (uint16_t __a, uint16_t __b)
+vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
 {
-  return (uint16_t) __builtin_aarch64_uqsubhi_uuu (__a, __b);
+  return __builtin_aarch64_usri_nv8hi_uuus (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqsubs_u32 (uint32_t __a, uint32_t __b)
+vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
 {
-  return (uint32_t) __builtin_aarch64_uqsubsi_uuu (__a, __b);
+  return __builtin_aarch64_usri_nv4si_uuus (__a, __b, __c);
 }
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqsubd_u64 (uint64_t __a, uint64_t __b)
+vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
 {
-  return __builtin_aarch64_uqsubdi_uuu (__a, __b);
+  return __builtin_aarch64_usri_nv2di_uuus (__a, __b, __c);
 }
 
-/* vqtbl2 */
-
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbl2_s8 (int8x16x2_t tab, uint8x8_t idx)
+vsrid_n_s64 (int64_t __a, int64_t __b, const int __c)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[1], 1);
-  return __builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx);
+  return __builtin_aarch64_ssri_ndi (__a, __b, __c);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbl2_u8 (uint8x16x2_t tab, uint8x8_t idx)
+vsrid_n_u64 (uint64_t __a, uint64_t __b, const int __c)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1);
-  return (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx);
+  return __builtin_aarch64_usri_ndi_uuus (__a, __b, __c);
 }
 
-__extension__ extern __inline poly8x8_t
+/* vst1 */
+
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbl2_p8 (poly8x16x2_t tab, uint8x8_t idx)
+vst1_f16 (float16_t *__a, float16x4_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1);
-  return (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx);
+  __builtin_aarch64_st1v4hf (__a, __b);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbl2q_s8 (int8x16x2_t tab, uint8x16_t idx)
+vst1_f32 (float32_t *__a, float32x2_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1);
-  return __builtin_aarch64_tbl3v16qi (__o, (int8x16_t)idx);
+  __builtin_aarch64_st1v2sf ((__builtin_aarch64_simd_sf *) __a, __b);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbl2q_u8 (uint8x16x2_t tab, uint8x16_t idx)
+vst1_f64 (float64_t *__a, float64x1_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1);
-  return (uint8x16_t)__builtin_aarch64_tbl3v16qi (__o, (int8x16_t)idx);
+  *__a = __b[0];
 }
 
-__extension__ extern __inline poly8x16_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbl2q_p8 (poly8x16x2_t tab, uint8x16_t idx)
+vst1_p8 (poly8_t *__a, poly8x8_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1);
-  return (poly8x16_t)__builtin_aarch64_tbl3v16qi (__o, (int8x16_t)idx);
+  __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) __a,
+			     (int8x8_t) __b);
 }
 
-/* vqtbl3 */
-
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbl3_s8 (int8x16x3_t tab, uint8x8_t idx)
+vst1_p16 (poly16_t *__a, poly16x4_t __b)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
-  return __builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)idx);
+  __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) __a,
+			     (int16x4_t) __b);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbl3_u8 (uint8x16x3_t tab, uint8x8_t idx)
+vst1_p64 (poly64_t *__a, poly64x1_t __b)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
-  return (uint8x8_t)__builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)idx);
+  *__a = __b[0];
 }
 
-__extension__ extern __inline poly8x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbl3_p8 (poly8x16x3_t tab, uint8x8_t idx)
+vst1_s8 (int8_t *__a, int8x8_t __b)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
-  return (poly8x8_t)__builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)idx);
+  __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) __a, __b);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbl3q_s8 (int8x16x3_t tab, uint8x16_t idx)
+vst1_s16 (int16_t *__a, int16x4_t __b)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
-  return __builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)idx);
+  __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) __a, __b);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbl3q_u8 (uint8x16x3_t tab, uint8x16_t idx)
+vst1_s32 (int32_t *__a, int32x2_t __b)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
-  return (uint8x16_t)__builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)idx);
+  __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) __a, __b);
 }
 
-__extension__ extern __inline poly8x16_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbl3q_p8 (poly8x16x3_t tab, uint8x16_t idx)
+vst1_s64 (int64_t *__a, int64x1_t __b)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
-  return (poly8x16_t)__builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)idx);
+  *__a = __b[0];
 }
 
-/* vqtbl4 */
-
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbl4_s8 (int8x16x4_t tab, uint8x8_t idx)
+vst1_u8 (uint8_t *__a, uint8x8_t __b)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
-  return __builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)idx);
+  __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) __a,
+			     (int8x8_t) __b);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbl4_u8 (uint8x16x4_t tab, uint8x8_t idx)
+vst1_u16 (uint16_t *__a, uint16x4_t __b)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
-  return (uint8x8_t)__builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)idx);
+  __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) __a,
+			     (int16x4_t) __b);
 }
 
-__extension__ extern __inline poly8x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbl4_p8 (poly8x16x4_t tab, uint8x8_t idx)
+vst1_u32 (uint32_t *__a, uint32x2_t __b)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
-  return (poly8x8_t)__builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)idx);
+  __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) __a,
+			     (int32x2_t) __b);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbl4q_s8 (int8x16x4_t tab, uint8x16_t idx)
+vst1_u64 (uint64_t *__a, uint64x1_t __b)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
-  return __builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)idx);
+  *__a = __b[0];
 }
 
-__extension__ extern __inline uint8x16_t
+/* vst1q */
+
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbl4q_u8 (uint8x16x4_t tab, uint8x16_t idx)
+vst1q_f16 (float16_t *__a, float16x8_t __b)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
-  return (uint8x16_t)__builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)idx);
+  __builtin_aarch64_st1v8hf (__a, __b);
 }
 
-__extension__ extern __inline poly8x16_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbl4q_p8 (poly8x16x4_t tab, uint8x16_t idx)
+vst1q_f32 (float32_t *__a, float32x4_t __b)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
-  return (poly8x16_t)__builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)idx);
+  __builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf *) __a, __b);
 }
 
-
-/* vqtbx2 */
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbx2_s8 (int8x8_t r, int8x16x2_t tab, uint8x8_t idx)
+vst1q_f64 (float64_t *__a, float64x2_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[1], 1);
-  return __builtin_aarch64_tbx4v8qi (r, __o, (int8x8_t)idx);
+  __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) __a, __b);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbx2_u8 (uint8x8_t r, uint8x16x2_t tab, uint8x8_t idx)
+vst1q_p8 (poly8_t *__a, poly8x16_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1);
-  return (uint8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)r, __o,
-						(int8x8_t)idx);
+  __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) __a,
+			      (int8x16_t) __b);
 }
 
-__extension__ extern __inline poly8x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbx2_p8 (poly8x8_t r, poly8x16x2_t tab, uint8x8_t idx)
+vst1q_p16 (poly16_t *__a, poly16x8_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1);
-  return (poly8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)r, __o,
-						(int8x8_t)idx);
+  __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) __a,
+			     (int16x8_t) __b);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbx2q_s8 (int8x16_t r, int8x16x2_t tab, uint8x16_t idx)
+vst1q_p64 (poly64_t *__a, poly64x2_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[1], 1);
-  return __builtin_aarch64_tbx4v16qi (r, __o, (int8x16_t)idx);
+  __builtin_aarch64_st1v2di_sp ((__builtin_aarch64_simd_di *) __a,
+				(poly64x2_t) __b);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbx2q_u8 (uint8x16_t r, uint8x16x2_t tab, uint8x16_t idx)
+vst1q_s8 (int8_t *__a, int8x16_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1);
-  return (uint8x16_t)__builtin_aarch64_tbx4v16qi ((int8x16_t)r, __o,
-						  (int8x16_t)idx);
+  __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) __a, __b);
 }
 
-__extension__ extern __inline poly8x16_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbx2q_p8 (poly8x16_t r, poly8x16x2_t tab, uint8x16_t idx)
+vst1q_s16 (int16_t *__a, int16x8_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1);
-  return (poly8x16_t)__builtin_aarch64_tbx4v16qi ((int8x16_t)r, __o,
-						  (int8x16_t)idx);
+  __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) __a, __b);
 }
 
-/* vqtbx3 */
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbx3_s8 (int8x8_t r, int8x16x3_t tab, uint8x8_t idx)
+vst1q_s32 (int32_t *__a, int32x4_t __b)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[2], 2);
-  return __builtin_aarch64_qtbx3v8qi (r, __o, (int8x8_t)idx);
+  __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) __a, __b);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbx3_u8 (uint8x8_t r, uint8x16x3_t tab, uint8x8_t idx)
+vst1q_s64 (int64_t *__a, int64x2_t __b)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
-  return (uint8x8_t)__builtin_aarch64_qtbx3v8qi ((int8x8_t)r, __o,
-						 (int8x8_t)idx);
+  __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) __a, __b);
 }
 
-__extension__ extern __inline poly8x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbx3_p8 (poly8x8_t r, poly8x16x3_t tab, uint8x8_t idx)
+vst1q_u8 (uint8_t *__a, uint8x16_t __b)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
-  return (poly8x8_t)__builtin_aarch64_qtbx3v8qi ((int8x8_t)r, __o,
-						 (int8x8_t)idx);
+  __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) __a,
+			      (int8x16_t) __b);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbx3q_s8 (int8x16_t r, int8x16x3_t tab, uint8x16_t idx)
+vst1q_u16 (uint16_t *__a, uint16x8_t __b)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[2], 2);
-  return __builtin_aarch64_qtbx3v16qi (r, __o, (int8x16_t)idx);
+  __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) __a,
+			     (int16x8_t) __b);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbx3q_u8 (uint8x16_t r, uint8x16x3_t tab, uint8x16_t idx)
+vst1q_u32 (uint32_t *__a, uint32x4_t __b)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
-  return (uint8x16_t)__builtin_aarch64_qtbx3v16qi ((int8x16_t)r, __o,
-						   (int8x16_t)idx);
+  __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) __a,
+			     (int32x4_t) __b);
 }
 
-__extension__ extern __inline poly8x16_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbx3q_p8 (poly8x16_t r, poly8x16x3_t tab, uint8x16_t idx)
+vst1q_u64 (uint64_t *__a, uint64x2_t __b)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
-  return (poly8x16_t)__builtin_aarch64_qtbx3v16qi ((int8x16_t)r, __o,
-						   (int8x16_t)idx);
+  __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) __a,
+			     (int64x2_t) __b);
 }
 
-/* vqtbx4 */
+/* vst1_lane */
 
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbx4_s8 (int8x8_t r, int8x16x4_t tab, uint8x8_t idx)
+vst1_lane_f16 (float16_t *__a, float16x4_t __b, const int __lane)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[3], 3);
-  return __builtin_aarch64_qtbx4v8qi (r, __o, (int8x8_t)idx);
+  *__a = __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbx4_u8 (uint8x8_t r, uint8x16x4_t tab, uint8x8_t idx)
+vst1_lane_f32 (float32_t *__a, float32x2_t __b, const int __lane)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
-  return (uint8x8_t)__builtin_aarch64_qtbx4v8qi ((int8x8_t)r, __o,
-						 (int8x8_t)idx);
+  *__a = __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline poly8x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbx4_p8 (poly8x8_t r, poly8x16x4_t tab, uint8x8_t idx)
+vst1_lane_f64 (float64_t *__a, float64x1_t __b, const int __lane)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
-  return (poly8x8_t)__builtin_aarch64_qtbx4v8qi ((int8x8_t)r, __o,
-						 (int8x8_t)idx);
+  *__a = __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbx4q_s8 (int8x16_t r, int8x16x4_t tab, uint8x16_t idx)
+vst1_lane_p8 (poly8_t *__a, poly8x8_t __b, const int __lane)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[3], 3);
-  return __builtin_aarch64_qtbx4v16qi (r, __o, (int8x16_t)idx);
+  *__a = __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbx4q_u8 (uint8x16_t r, uint8x16x4_t tab, uint8x16_t idx)
+vst1_lane_p16 (poly16_t *__a, poly16x4_t __b, const int __lane)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
-  return (uint8x16_t)__builtin_aarch64_qtbx4v16qi ((int8x16_t)r, __o,
-						   (int8x16_t)idx);
+  *__a = __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline poly8x16_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbx4q_p8 (poly8x16_t r, poly8x16x4_t tab, uint8x16_t idx)
+vst1_lane_p64 (poly64_t *__a, poly64x1_t __b, const int __lane)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
-  return (poly8x16_t)__builtin_aarch64_qtbx4v16qi ((int8x16_t)r, __o,
-						   (int8x16_t)idx);
+  *__a = __aarch64_vget_lane_any (__b, __lane);
 }
 
-/* vrbit  */
-
-__extension__ extern __inline poly8x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrbit_p8 (poly8x8_t __a)
+vst1_lane_s8 (int8_t *__a, int8x8_t __b, const int __lane)
 {
-  return (poly8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a);
+  *__a = __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrbit_s8 (int8x8_t __a)
+vst1_lane_s16 (int16_t *__a, int16x4_t __b, const int __lane)
 {
-  return __builtin_aarch64_rbitv8qi (__a);
+  *__a = __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrbit_u8 (uint8x8_t __a)
+vst1_lane_s32 (int32_t *__a, int32x2_t __b, const int __lane)
 {
-  return (uint8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a);
+  *__a = __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline poly8x16_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrbitq_p8 (poly8x16_t __a)
+vst1_lane_s64 (int64_t *__a, int64x1_t __b, const int __lane)
 {
-  return (poly8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t)__a);
+  *__a = __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrbitq_s8 (int8x16_t __a)
+vst1_lane_u8 (uint8_t *__a, uint8x8_t __b, const int __lane)
 {
-  return __builtin_aarch64_rbitv16qi (__a);
+  *__a = __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrbitq_u8 (uint8x16_t __a)
+vst1_lane_u16 (uint16_t *__a, uint16x4_t __b, const int __lane)
 {
-  return (uint8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t) __a);
+  *__a = __aarch64_vget_lane_any (__b, __lane);
 }
 
-/* vrecpe  */
-
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrecpe_u32 (uint32x2_t __a)
+vst1_lane_u32 (uint32_t *__a, uint32x2_t __b, const int __lane)
 {
-  return (uint32x2_t) __builtin_aarch64_urecpev2si ((int32x2_t) __a);
+  *__a = __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrecpeq_u32 (uint32x4_t __a)
+vst1_lane_u64 (uint64_t *__a, uint64x1_t __b, const int __lane)
 {
-  return (uint32x4_t) __builtin_aarch64_urecpev4si ((int32x4_t) __a);
+  *__a = __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline float32_t
+/* vst1q_lane */
+
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrecpes_f32 (float32_t __a)
+vst1q_lane_f16 (float16_t *__a, float16x8_t __b, const int __lane)
 {
-  return __builtin_aarch64_frecpesf (__a);
+  *__a = __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline float64_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrecped_f64 (float64_t __a)
+vst1q_lane_f32 (float32_t *__a, float32x4_t __b, const int __lane)
 {
-  return __builtin_aarch64_frecpedf (__a);
+  *__a = __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrecpe_f32 (float32x2_t __a)
+vst1q_lane_f64 (float64_t *__a, float64x2_t __b, const int __lane)
 {
-  return __builtin_aarch64_frecpev2sf (__a);
+  *__a = __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrecpe_f64 (float64x1_t __a)
+vst1q_lane_p8 (poly8_t *__a, poly8x16_t __b, const int __lane)
 {
-  return (float64x1_t) { vrecped_f64 (vget_lane_f64 (__a, 0)) };
+  *__a = __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrecpeq_f32 (float32x4_t __a)
+vst1q_lane_p16 (poly16_t *__a, poly16x8_t __b, const int __lane)
 {
-  return __builtin_aarch64_frecpev4sf (__a);
+  *__a = __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrecpeq_f64 (float64x2_t __a)
+vst1q_lane_p64 (poly64_t *__a, poly64x2_t __b, const int __lane)
 {
-  return __builtin_aarch64_frecpev2df (__a);
+  *__a = __aarch64_vget_lane_any (__b, __lane);
 }
 
-/* vrecps  */
-
-__extension__ extern __inline float32_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrecpss_f32 (float32_t __a, float32_t __b)
+vst1q_lane_s8 (int8_t *__a, int8x16_t __b, const int __lane)
 {
-  return __builtin_aarch64_frecpssf (__a, __b);
+  *__a = __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline float64_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrecpsd_f64 (float64_t __a, float64_t __b)
+vst1q_lane_s16 (int16_t *__a, int16x8_t __b, const int __lane)
 {
-  return __builtin_aarch64_frecpsdf (__a, __b);
+  *__a = __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrecps_f32 (float32x2_t __a, float32x2_t __b)
+vst1q_lane_s32 (int32_t *__a, int32x4_t __b, const int __lane)
 {
-  return __builtin_aarch64_frecpsv2sf (__a, __b);
+  *__a = __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrecps_f64 (float64x1_t __a, float64x1_t __b)
+vst1q_lane_s64 (int64_t *__a, int64x2_t __b, const int __lane)
 {
-  return (float64x1_t) { vrecpsd_f64 (vget_lane_f64 (__a, 0),
-				      vget_lane_f64 (__b, 0)) };
+  *__a = __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrecpsq_f32 (float32x4_t __a, float32x4_t __b)
+vst1q_lane_u8 (uint8_t *__a, uint8x16_t __b, const int __lane)
 {
-  return __builtin_aarch64_frecpsv4sf (__a, __b);
+  *__a = __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrecpsq_f64 (float64x2_t __a, float64x2_t __b)
+vst1q_lane_u16 (uint16_t *__a, uint16x8_t __b, const int __lane)
 {
-  return __builtin_aarch64_frecpsv2df (__a, __b);
+  *__a = __aarch64_vget_lane_any (__b, __lane);
 }
 
-/* vrecpx  */
-
-__extension__ extern __inline float32_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrecpxs_f32 (float32_t __a)
+vst1q_lane_u32 (uint32_t *__a, uint32x4_t __b, const int __lane)
 {
-  return __builtin_aarch64_frecpxsf (__a);
+  *__a = __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline float64_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrecpxd_f64 (float64_t __a)
+vst1q_lane_u64 (uint64_t *__a, uint64x2_t __b, const int __lane)
 {
-  return __builtin_aarch64_frecpxdf (__a);
+  *__a = __aarch64_vget_lane_any (__b, __lane);
 }
 
+/* vst1x2 */
 
-/* vrev  */
-
-__extension__ extern __inline poly8x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev16_p8 (poly8x8_t a)
+vst1_s64_x2 (int64_t * __a, int64x1x2_t __val)
 {
-  return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
+  __builtin_aarch64_simd_oi __o;
+  int64x2x2_t __temp;
+  __temp.val[0]
+    = vcombine_s64 (__val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
+  __temp.val[1]
+    = vcombine_s64 (__val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[1], 1);
+  __builtin_aarch64_st1x2di ((__builtin_aarch64_simd_di *) __a, __o);
 }
 
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev16_s8 (int8x8_t a)
+vst1_u64_x2 (uint64_t * __a, uint64x1x2_t __val)
 {
-  return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
+  __builtin_aarch64_simd_oi __o;
+  uint64x2x2_t __temp;
+  __temp.val[0]
+    = vcombine_u64 (__val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
+  __temp.val[1]
+    = vcombine_u64 (__val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[1], 1);
+  __builtin_aarch64_st1x2di ((__builtin_aarch64_simd_di *) __a, __o);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev16_u8 (uint8x8_t a)
+vst1_f64_x2 (float64_t * __a, float64x1x2_t __val)
 {
-  return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
+  __builtin_aarch64_simd_oi __o;
+  float64x2x2_t __temp;
+  __temp.val[0]
+    = vcombine_f64 (__val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
+  __temp.val[1]
+    = vcombine_f64 (__val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __temp.val[1], 1);
+  __builtin_aarch64_st1x2df ((__builtin_aarch64_simd_df *) __a, __o);
 }
 
-__extension__ extern __inline poly8x16_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev16q_p8 (poly8x16_t a)
+vst1_s8_x2 (int8_t * __a, int8x8x2_t __val)
 {
-  return __builtin_shuffle (a,
-      (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
+  __builtin_aarch64_simd_oi __o;
+  int8x16x2_t __temp;
+  __temp.val[0]
+    = vcombine_s8 (__val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
+  __temp.val[1]
+    = vcombine_s8 (__val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1);
+  __builtin_aarch64_st1x2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev16q_s8 (int8x16_t a)
+vst1_p8_x2 (poly8_t * __a, poly8x8x2_t __val)
 {
-  return __builtin_shuffle (a,
-      (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
+  __builtin_aarch64_simd_oi __o;
+  poly8x16x2_t __temp;
+  __temp.val[0]
+    = vcombine_p8 (__val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
+  __temp.val[1]
+    = vcombine_p8 (__val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1);
+  __builtin_aarch64_st1x2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev16q_u8 (uint8x16_t a)
+vst1_s16_x2 (int16_t * __a, int16x4x2_t __val)
 {
-  return __builtin_shuffle (a,
-      (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
+  __builtin_aarch64_simd_oi __o;
+  int16x8x2_t __temp;
+  __temp.val[0]
+    = vcombine_s16 (__val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
+  __temp.val[1]
+    = vcombine_s16 (__val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1);
+  __builtin_aarch64_st1x2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
 }
 
-__extension__ extern __inline poly8x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev32_p8 (poly8x8_t a)
+vst1_p16_x2 (poly16_t * __a, poly16x4x2_t __val)
 {
-  return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
+  __builtin_aarch64_simd_oi __o;
+  poly16x8x2_t __temp;
+  __temp.val[0]
+    = vcombine_p16 (__val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
+  __temp.val[1]
+    = vcombine_p16 (__val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1);
+  __builtin_aarch64_st1x2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
 }
 
-__extension__ extern __inline poly16x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev32_p16 (poly16x4_t a)
+vst1_s32_x2 (int32_t * __a, int32x2x2_t __val)
 {
-  return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
+  __builtin_aarch64_simd_oi __o;
+  int32x4x2_t __temp;
+  __temp.val[0]
+    = vcombine_s32 (__val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
+  __temp.val[1]
+    = vcombine_s32 (__val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[1], 1);
+  __builtin_aarch64_st1x2v2si ((__builtin_aarch64_simd_si *) __a, __o);
 }
 
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev32_s8 (int8x8_t a)
+vst1_u8_x2 (uint8_t * __a, uint8x8x2_t __val)
 {
-  return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
+  __builtin_aarch64_simd_oi __o;
+  uint8x16x2_t __temp;
+  __temp.val[0] = vcombine_u8 (__val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_u8 (__val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1);
+  __builtin_aarch64_st1x2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev32_s16 (int16x4_t a)
+vst1_u16_x2 (uint16_t * __a, uint16x4x2_t __val)
 {
-  return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
+  __builtin_aarch64_simd_oi __o;
+  uint16x8x2_t __temp;
+  __temp.val[0] = vcombine_u16 (__val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_u16 (__val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1);
+  __builtin_aarch64_st1x2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev32_u8 (uint8x8_t a)
+vst1_u32_x2 (uint32_t * __a, uint32x2x2_t __val)
 {
-  return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
+  __builtin_aarch64_simd_oi __o;
+  uint32x4x2_t __temp;
+  __temp.val[0] = vcombine_u32 (__val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_u32 (__val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[1], 1);
+  __builtin_aarch64_st1x2v2si ((__builtin_aarch64_simd_si *) __a, __o);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev32_u16 (uint16x4_t a)
+vst1_f16_x2 (float16_t * __a, float16x4x2_t __val)
 {
-  return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
+  __builtin_aarch64_simd_oi __o;
+  float16x8x2_t __temp;
+  __temp.val[0] = vcombine_f16 (__val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_f16 (__val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv8hf (__o, __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv8hf (__o, __temp.val[1], 1);
+  __builtin_aarch64_st1x2v4hf (__a, __o);
 }
 
-__extension__ extern __inline poly8x16_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev32q_p8 (poly8x16_t a)
+vst1_f32_x2 (float32_t * __a, float32x2x2_t __val)
 {
-  return __builtin_shuffle (a,
-      (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
+  __builtin_aarch64_simd_oi __o;
+  float32x4x2_t __temp;
+  __temp.val[0] = vcombine_f32 (__val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_f32 (__val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __temp.val[1], 1);
+  __builtin_aarch64_st1x2v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
 }
 
-__extension__ extern __inline poly16x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev32q_p16 (poly16x8_t a)
+vst1_p64_x2 (poly64_t * __a, poly64x1x2_t __val)
 {
-  return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
+  __builtin_aarch64_simd_oi __o;
+  poly64x2x2_t __temp;
+  __temp.val[0] = vcombine_p64 (__val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_p64 (__val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv2di_ssps (__o,
+					       (poly64x2_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv2di_ssps (__o,
+					       (poly64x2_t) __temp.val[1], 1);
+  __builtin_aarch64_st1x2di ((__builtin_aarch64_simd_di *) __a, __o);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev32q_s8 (int8x16_t a)
+vst1q_s8_x2 (int8_t * __a, int8x16x2_t __val)
 {
-  return __builtin_shuffle (a,
-      (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1);
+  __builtin_aarch64_st1x2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev32q_s16 (int16x8_t a)
+vst1q_p8_x2 (poly8_t * __a, poly8x16x2_t __val)
 {
-  return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1);
+  __builtin_aarch64_st1x2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev32q_u8 (uint8x16_t a)
+vst1q_s16_x2 (int16_t * __a, int16x8x2_t __val)
 {
-  return __builtin_shuffle (a,
-      (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1);
+  __builtin_aarch64_st1x2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev32q_u16 (uint16x8_t a)
+vst1q_p16_x2 (poly16_t * __a, poly16x8x2_t __val)
 {
-  return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1);
+  __builtin_aarch64_st1x2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
 }
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev64_f16 (float16x4_t __a)
+vst1q_s32_x2 (int32_t * __a, int32x4x2_t __val)
 {
-  return __builtin_shuffle (__a, (uint16x4_t) { 3, 2, 1, 0 });
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[1], 1);
+  __builtin_aarch64_st1x2v4si ((__builtin_aarch64_simd_si *) __a, __o);
 }
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev64_f32 (float32x2_t a)
+vst1q_s64_x2 (int64_t * __a, int64x2x2_t __val)
 {
-  return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[1], 1);
+  __builtin_aarch64_st1x2v2di ((__builtin_aarch64_simd_di *) __a, __o);
 }
 
-__extension__ extern __inline poly8x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev64_p8 (poly8x8_t a)
+vst1q_u8_x2 (uint8_t * __a, uint8x16x2_t __val)
 {
-  return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1);
+  __builtin_aarch64_st1x2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
 }
 
-__extension__ extern __inline poly16x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev64_p16 (poly16x4_t a)
+vst1q_u16_x2 (uint16_t * __a, uint16x8x2_t __val)
 {
-  return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1);
+  __builtin_aarch64_st1x2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
 }
 
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev64_s8 (int8x8_t a)
+vst1q_u32_x2 (uint32_t * __a, uint32x4x2_t __val)
 {
-  return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[1], 1);
+  __builtin_aarch64_st1x2v4si ((__builtin_aarch64_simd_si *) __a, __o);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev64_s16 (int16x4_t a)
+vst1q_u64_x2 (uint64_t * __a, uint64x2x2_t __val)
 {
-  return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[1], 1);
+  __builtin_aarch64_st1x2v2di ((__builtin_aarch64_simd_di *) __a, __o);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev64_s32 (int32x2_t a)
+vst1q_f16_x2 (float16_t * __a, float16x8x2_t __val)
 {
-  return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv8hf (__o, __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv8hf (__o, __val.val[1], 1);
+  __builtin_aarch64_st1x2v8hf (__a, __o);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev64_u8 (uint8x8_t a)
+vst1q_f32_x2 (float32_t * __a, float32x4x2_t __val)
 {
-  return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __val.val[1], 1);
+  __builtin_aarch64_st1x2v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev64_u16 (uint16x4_t a)
+vst1q_f64_x2 (float64_t * __a, float64x2x2_t __val)
 {
-  return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __val.val[1], 1);
+  __builtin_aarch64_st1x2v2df ((__builtin_aarch64_simd_df *) __a, __o);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev64_u32 (uint32x2_t a)
+vst1q_p64_x2 (poly64_t * __a, poly64x2x2_t __val)
 {
-  return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv2di_ssps (__o,
+					       (poly64x2_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv2di_ssps (__o,
+					       (poly64x2_t) __val.val[1], 1);
+  __builtin_aarch64_st1x2v2di ((__builtin_aarch64_simd_di *) __a, __o);
 }
 
-__extension__ extern __inline float16x8_t
+/* vst1x3 */
+
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev64q_f16 (float16x8_t __a)
+vst1_s64_x3 (int64_t * __a, int64x1x3_t __val)
 {
-  return __builtin_shuffle (__a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
+  __builtin_aarch64_simd_ci __o;
+  int64x2x3_t __temp;
+  __temp.val[0] = vcombine_s64 (__val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
+  __temp.val[1] = vcombine_s64 (__val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
+  __temp.val[2] = vcombine_s64 (__val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[2], 2);
+  __builtin_aarch64_st1x3di ((__builtin_aarch64_simd_di *) __a, __o);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev64q_f32 (float32x4_t a)
+vst1_u64_x3 (uint64_t * __a, uint64x1x3_t __val)
 {
-  return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
+  __builtin_aarch64_simd_ci __o;
+  uint64x2x3_t __temp;
+  __temp.val[0] = vcombine_u64 (__val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_u64 (__val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
+  __temp.val[2] = vcombine_u64 (__val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[2], 2);
+  __builtin_aarch64_st1x3di ((__builtin_aarch64_simd_di *) __a, __o);
 }
 
-__extension__ extern __inline poly8x16_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev64q_p8 (poly8x16_t a)
+vst1_f64_x3 (float64_t * __a, float64x1x3_t __val)
 {
-  return __builtin_shuffle (a,
-      (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
+  __builtin_aarch64_simd_ci __o;
+  float64x2x3_t __temp;
+  __temp.val[0] = vcombine_f64 (__val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_f64 (__val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
+  __temp.val[2] = vcombine_f64 (__val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[2], 2);
+  __builtin_aarch64_st1x3df ((__builtin_aarch64_simd_df *) __a, __o);
 }
 
-__extension__ extern __inline poly16x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev64q_p16 (poly16x8_t a)
+vst1_s8_x3 (int8_t * __a, int8x8x3_t __val)
 {
-  return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
+  __builtin_aarch64_simd_ci __o;
+  int8x16x3_t __temp;
+  __temp.val[0] = vcombine_s8 (__val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
+  __temp.val[1] = vcombine_s8 (__val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
+  __temp.val[2] = vcombine_s8 (__val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2);
+  __builtin_aarch64_st1x3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev64q_s8 (int8x16_t a)
+vst1_p8_x3 (poly8_t * __a, poly8x8x3_t __val)
 {
-  return __builtin_shuffle (a,
-      (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
+  __builtin_aarch64_simd_ci __o;
+  poly8x16x3_t __temp;
+  __temp.val[0] = vcombine_p8 (__val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_p8 (__val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
+  __temp.val[2] = vcombine_p8 (__val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2);
+  __builtin_aarch64_st1x3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev64q_s16 (int16x8_t a)
+vst1_s16_x3 (int16_t * __a, int16x4x3_t __val)
 {
-  return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
+  __builtin_aarch64_simd_ci __o;
+  int16x8x3_t __temp;
+  __temp.val[0] = vcombine_s16 (__val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
+  __temp.val[1] = vcombine_s16 (__val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
+  __temp.val[2] = vcombine_s16 (__val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2);
+  __builtin_aarch64_st1x3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev64q_s32 (int32x4_t a)
+vst1_p16_x3 (poly16_t * __a, poly16x4x3_t __val)
 {
-  return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
+  __builtin_aarch64_simd_ci __o;
+  poly16x8x3_t __temp;
+  __temp.val[0] = vcombine_p16 (__val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_p16 (__val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
+  __temp.val[2] = vcombine_p16 (__val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2);
+  __builtin_aarch64_st1x3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev64q_u8 (uint8x16_t a)
+vst1_s32_x3 (int32_t * __a, int32x2x3_t __val)
 {
-  return __builtin_shuffle (a,
-      (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
+  __builtin_aarch64_simd_ci __o;
+  int32x4x3_t __temp;
+  __temp.val[0] = vcombine_s32 (__val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
+  __temp.val[1] = vcombine_s32 (__val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
+  __temp.val[2] = vcombine_s32 (__val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[2], 2);
+  __builtin_aarch64_st1x3v2si ((__builtin_aarch64_simd_si *) __a, __o);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev64q_u16 (uint16x8_t a)
+vst1_u8_x3 (uint8_t * __a, uint8x8x3_t __val)
 {
-  return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
+  __builtin_aarch64_simd_ci __o;
+  uint8x16x3_t __temp;
+  __temp.val[0] = vcombine_u8 (__val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_u8 (__val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
+  __temp.val[2] = vcombine_u8 (__val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2);
+  __builtin_aarch64_st1x3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrev64q_u32 (uint32x4_t a)
+vst1_u16_x3 (uint16_t * __a, uint16x4x3_t __val)
 {
-  return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
+  __builtin_aarch64_simd_ci __o;
+  uint16x8x3_t __temp;
+  __temp.val[0] = vcombine_u16 (__val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_u16 (__val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
+  __temp.val[2] = vcombine_u16 (__val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2);
+  __builtin_aarch64_st1x3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
 }
 
-/* vrnd  */
-
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrnd_f32 (float32x2_t __a)
+vst1_u32_x3 (uint32_t * __a, uint32x2x3_t __val)
 {
-  return __builtin_aarch64_btruncv2sf (__a);
+  __builtin_aarch64_simd_ci __o;
+  uint32x4x3_t __temp;
+  __temp.val[0] = vcombine_u32 (__val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_u32 (__val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
+  __temp.val[2] = vcombine_u32 (__val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[2], 2);
+  __builtin_aarch64_st1x3v2si ((__builtin_aarch64_simd_si *) __a, __o);
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrnd_f64 (float64x1_t __a)
+vst1_f16_x3 (float16_t * __a, float16x4x3_t __val)
 {
-  return vset_lane_f64 (__builtin_trunc (vget_lane_f64 (__a, 0)), __a, 0);
+  __builtin_aarch64_simd_ci __o;
+  float16x8x3_t __temp;
+  __temp.val[0] = vcombine_f16 (__val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_f16 (__val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0)));
+  __temp.val[2] = vcombine_f16 (__val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[2], 2);
+  __builtin_aarch64_st1x3v4hf ((__builtin_aarch64_simd_hf *) __a, __o);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrndq_f32 (float32x4_t __a)
+vst1_f32_x3 (float32_t * __a, float32x2x3_t __val)
 {
-  return __builtin_aarch64_btruncv4sf (__a);
+  __builtin_aarch64_simd_ci __o;
+  float32x4x3_t __temp;
+  __temp.val[0] = vcombine_f32 (__val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_f32 (__val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
+  __temp.val[2] = vcombine_f32 (__val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[2], 2);
+  __builtin_aarch64_st1x3v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrndq_f64 (float64x2_t __a)
+vst1_p64_x3 (poly64_t * __a, poly64x1x3_t __val)
 {
-  return __builtin_aarch64_btruncv2df (__a);
+  __builtin_aarch64_simd_ci __o;
+  poly64x2x3_t __temp;
+  __temp.val[0] = vcombine_p64 (__val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_p64 (__val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0)));
+  __temp.val[2] = vcombine_p64 (__val.val[2], vcreate_p64 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
+					       (poly64x2_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
+					       (poly64x2_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
+					       (poly64x2_t) __temp.val[2], 2);
+  __builtin_aarch64_st1x3di ((__builtin_aarch64_simd_di *) __a, __o);
 }
 
-/* vrnda  */
-
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrnda_f32 (float32x2_t __a)
+vst1q_s8_x3 (int8_t * __a, int8x16x3_t __val)
 {
-  return __builtin_aarch64_roundv2sf (__a);
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2);
+  __builtin_aarch64_st1x3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrnda_f64 (float64x1_t __a)
+vst1q_p8_x3 (poly8_t * __a, poly8x16x3_t __val)
 {
-  return vset_lane_f64 (__builtin_round (vget_lane_f64 (__a, 0)), __a, 0);
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2);
+  __builtin_aarch64_st1x3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrndaq_f32 (float32x4_t __a)
+vst1q_s16_x3 (int16_t * __a, int16x8x3_t __val)
 {
-  return __builtin_aarch64_roundv4sf (__a);
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2);
+  __builtin_aarch64_st1x3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrndaq_f64 (float64x2_t __a)
+vst1q_p16_x3 (poly16_t * __a, poly16x8x3_t __val)
 {
-  return __builtin_aarch64_roundv2df (__a);
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2);
+  __builtin_aarch64_st1x3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
 }
 
-/* vrndi  */
-
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrndi_f32 (float32x2_t __a)
+vst1q_s32_x3 (int32_t * __a, int32x4x3_t __val)
 {
-  return __builtin_aarch64_nearbyintv2sf (__a);
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[2], 2);
+  __builtin_aarch64_st1x3v4si ((__builtin_aarch64_simd_si *) __a, __o);
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrndi_f64 (float64x1_t __a)
+vst1q_s64_x3 (int64_t * __a, int64x2x3_t __val)
 {
-  return vset_lane_f64 (__builtin_nearbyint (vget_lane_f64 (__a, 0)), __a, 0);
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[2], 2);
+  __builtin_aarch64_st1x3v2di ((__builtin_aarch64_simd_di *) __a, __o);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrndiq_f32 (float32x4_t __a)
+vst1q_u8_x3 (uint8_t * __a, uint8x16x3_t __val)
 {
-  return __builtin_aarch64_nearbyintv4sf (__a);
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2);
+  __builtin_aarch64_st1x3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrndiq_f64 (float64x2_t __a)
+vst1q_u16_x3 (uint16_t * __a, uint16x8x3_t __val)
 {
-  return __builtin_aarch64_nearbyintv2df (__a);
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2);
+  __builtin_aarch64_st1x3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
 }
 
-/* vrndm  */
-
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrndm_f32 (float32x2_t __a)
+vst1q_u32_x3 (uint32_t * __a, uint32x4x3_t __val)
 {
-  return __builtin_aarch64_floorv2sf (__a);
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[2], 2);
+  __builtin_aarch64_st1x3v4si ((__builtin_aarch64_simd_si *) __a, __o);
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrndm_f64 (float64x1_t __a)
+vst1q_u64_x3 (uint64_t * __a, uint64x2x3_t __val)
 {
-  return vset_lane_f64 (__builtin_floor (vget_lane_f64 (__a, 0)), __a, 0);
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[2], 2);
+  __builtin_aarch64_st1x3v2di ((__builtin_aarch64_simd_di *) __a, __o);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrndmq_f32 (float32x4_t __a)
+vst1q_f16_x3 (float16_t * __a, float16x8x3_t __val)
 {
-  return __builtin_aarch64_floorv4sf (__a);
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[2], 2);
+  __builtin_aarch64_st1x3v8hf ((__builtin_aarch64_simd_hf *) __a, __o);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrndmq_f64 (float64x2_t __a)
+vst1q_f32_x3 (float32_t * __a, float32x4x3_t __val)
 {
-  return __builtin_aarch64_floorv2df (__a);
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[2], 2);
+  __builtin_aarch64_st1x3v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
 }
 
-/* vrndn  */
-
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrndn_f32 (float32x2_t __a)
+vst1q_f64_x3 (float64_t * __a, float64x2x3_t __val)
 {
-  return __builtin_aarch64_frintnv2sf (__a);
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[2], 2);
+  __builtin_aarch64_st1x3v2df ((__builtin_aarch64_simd_df *) __a, __o);
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrndn_f64 (float64x1_t __a)
+vst1q_p64_x3 (poly64_t * __a, poly64x2x3_t __val)
 {
-  return (float64x1_t) {__builtin_aarch64_frintndf (__a[0])};
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
+					       (poly64x2_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
+					       (poly64x2_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
+					       (poly64x2_t) __val.val[2], 2);
+  __builtin_aarch64_st1x3v2di ((__builtin_aarch64_simd_di *) __a, __o);
 }
 
-__extension__ extern __inline float32x4_t
+/* vst1(q)_x4.  */
+
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrndnq_f32 (float32x4_t __a)
+vst1_s8_x4 (int8_t * __a, int8x8x4_t val)
 {
-  return __builtin_aarch64_frintnv4sf (__a);
+  union { int8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
+  __builtin_aarch64_st1x4v8qi ((__builtin_aarch64_simd_qi *) __a, __u.__o);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrndnq_f64 (float64x2_t __a)
+vst1q_s8_x4 (int8_t * __a, int8x16x4_t val)
 {
-  return __builtin_aarch64_frintnv2df (__a);
+  union { int8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
+  __builtin_aarch64_st1x4v16qi ((__builtin_aarch64_simd_qi *) __a, __u.__o);
 }
 
-/* vrndp  */
-
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrndp_f32 (float32x2_t __a)
+vst1_s16_x4 (int16_t * __a, int16x4x4_t val)
 {
-  return __builtin_aarch64_ceilv2sf (__a);
+  union { int16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
+  __builtin_aarch64_st1x4v4hi ((__builtin_aarch64_simd_hi *) __a, __u.__o);
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrndp_f64 (float64x1_t __a)
+vst1q_s16_x4 (int16_t * __a, int16x8x4_t val)
 {
-  return vset_lane_f64 (__builtin_ceil (vget_lane_f64 (__a, 0)), __a, 0);
+  union { int16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
+  __builtin_aarch64_st1x4v8hi ((__builtin_aarch64_simd_hi *) __a, __u.__o);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrndpq_f32 (float32x4_t __a)
+vst1_s32_x4 (int32_t * __a, int32x2x4_t val)
 {
-  return __builtin_aarch64_ceilv4sf (__a);
+  union { int32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
+  __builtin_aarch64_st1x4v2si ((__builtin_aarch64_simd_si *) __a, __u.__o);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrndpq_f64 (float64x2_t __a)
+vst1q_s32_x4 (int32_t * __a, int32x4x4_t val)
 {
-  return __builtin_aarch64_ceilv2df (__a);
+  union { int32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
+  __builtin_aarch64_st1x4v4si ((__builtin_aarch64_simd_si *) __a, __u.__o);
 }
 
-/* vrndx  */
-
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrndx_f32 (float32x2_t __a)
+vst1_u8_x4 (uint8_t * __a, uint8x8x4_t val)
 {
-  return __builtin_aarch64_rintv2sf (__a);
+  union { uint8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
+  __builtin_aarch64_st1x4v8qi ((__builtin_aarch64_simd_qi *) __a, __u.__o);
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrndx_f64 (float64x1_t __a)
+vst1q_u8_x4 (uint8_t * __a, uint8x16x4_t val)
 {
-  return vset_lane_f64 (__builtin_rint (vget_lane_f64 (__a, 0)), __a, 0);
+  union { uint8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
+  __builtin_aarch64_st1x4v16qi ((__builtin_aarch64_simd_qi *) __a, __u.__o);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrndxq_f32 (float32x4_t __a)
+vst1_u16_x4 (uint16_t * __a, uint16x4x4_t val)
 {
-  return __builtin_aarch64_rintv4sf (__a);
+  union { uint16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
+  __builtin_aarch64_st1x4v4hi ((__builtin_aarch64_simd_hi *) __a, __u.__o);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrndxq_f64 (float64x2_t __a)
+vst1q_u16_x4 (uint16_t * __a, uint16x8x4_t val)
 {
-  return __builtin_aarch64_rintv2df (__a);
+  union { uint16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
+  __builtin_aarch64_st1x4v8hi ((__builtin_aarch64_simd_hi *) __a, __u.__o);
 }
 
-/* vrshl */
-
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrshl_s8 (int8x8_t __a, int8x8_t __b)
+vst1_u32_x4 (uint32_t * __a, uint32x2x4_t val)
 {
-  return (int8x8_t) __builtin_aarch64_srshlv8qi (__a, __b);
+  union { uint32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
+  __builtin_aarch64_st1x4v2si ((__builtin_aarch64_simd_si *) __a, __u.__o);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrshl_s16 (int16x4_t __a, int16x4_t __b)
+vst1q_u32_x4 (uint32_t * __a, uint32x4x4_t val)
 {
-  return (int16x4_t) __builtin_aarch64_srshlv4hi (__a, __b);
+  union { uint32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
+  __builtin_aarch64_st1x4v4si ((__builtin_aarch64_simd_si *) __a, __u.__o);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrshl_s32 (int32x2_t __a, int32x2_t __b)
+vst1_f16_x4 (float16_t * __a, float16x4x4_t val)
 {
-  return (int32x2_t) __builtin_aarch64_srshlv2si (__a, __b);
+  union { float16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
+  __builtin_aarch64_st1x4v4hf ((__builtin_aarch64_simd_hf *) __a, __u.__o);
 }
 
-__extension__ extern __inline int64x1_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrshl_s64 (int64x1_t __a, int64x1_t __b)
+vst1q_f16_x4 (float16_t * __a, float16x8x4_t val)
 {
-  return (int64x1_t) {__builtin_aarch64_srshldi (__a[0], __b[0])};
+  union { float16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
+  __builtin_aarch64_st1x4v8hf ((__builtin_aarch64_simd_hf *) __a, __u.__o);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrshl_u8 (uint8x8_t __a, int8x8_t __b)
+vst1_f32_x4 (float32_t * __a, float32x2x4_t val)
 {
-  return __builtin_aarch64_urshlv8qi_uus (__a, __b);
+  union { float32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
+  __builtin_aarch64_st1x4v2sf ((__builtin_aarch64_simd_sf *) __a, __u.__o);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrshl_u16 (uint16x4_t __a, int16x4_t __b)
+vst1q_f32_x4 (float32_t * __a, float32x4x4_t val)
 {
-  return __builtin_aarch64_urshlv4hi_uus (__a, __b);
+  union { float32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
+  __builtin_aarch64_st1x4v4sf ((__builtin_aarch64_simd_sf *) __a, __u.__o);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrshl_u32 (uint32x2_t __a, int32x2_t __b)
+vst1_p8_x4 (poly8_t * __a, poly8x8x4_t val)
 {
-  return __builtin_aarch64_urshlv2si_uus (__a, __b);
+  union { poly8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
+  __builtin_aarch64_st1x4v8qi ((__builtin_aarch64_simd_qi *) __a, __u.__o);
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrshl_u64 (uint64x1_t __a, int64x1_t __b)
+vst1q_p8_x4 (poly8_t * __a, poly8x16x4_t val)
 {
-  return (uint64x1_t) {__builtin_aarch64_urshldi_uus (__a[0], __b[0])};
+  union { poly8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
+  __builtin_aarch64_st1x4v16qi ((__builtin_aarch64_simd_qi *) __a, __u.__o);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrshlq_s8 (int8x16_t __a, int8x16_t __b)
+vst1_p16_x4 (poly16_t * __a, poly16x4x4_t val)
 {
-  return (int8x16_t) __builtin_aarch64_srshlv16qi (__a, __b);
+  union { poly16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
+  __builtin_aarch64_st1x4v4hi ((__builtin_aarch64_simd_hi *) __a, __u.__o);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrshlq_s16 (int16x8_t __a, int16x8_t __b)
+vst1q_p16_x4 (poly16_t * __a, poly16x8x4_t val)
 {
-  return (int16x8_t) __builtin_aarch64_srshlv8hi (__a, __b);
+  union { poly16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
+  __builtin_aarch64_st1x4v8hi ((__builtin_aarch64_simd_hi *) __a, __u.__o);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrshlq_s32 (int32x4_t __a, int32x4_t __b)
+vst1_s64_x4 (int64_t * __a, int64x1x4_t val)
 {
-  return (int32x4_t) __builtin_aarch64_srshlv4si (__a, __b);
+  union { int64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
+  __builtin_aarch64_st1x4di ((__builtin_aarch64_simd_di *) __a, __u.__o);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrshlq_s64 (int64x2_t __a, int64x2_t __b)
+vst1_u64_x4 (uint64_t * __a, uint64x1x4_t val)
 {
-  return (int64x2_t) __builtin_aarch64_srshlv2di (__a, __b);
+  union { uint64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
+  __builtin_aarch64_st1x4di ((__builtin_aarch64_simd_di *) __a, __u.__o);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrshlq_u8 (uint8x16_t __a, int8x16_t __b)
+vst1_p64_x4 (poly64_t * __a, poly64x1x4_t val)
 {
-  return __builtin_aarch64_urshlv16qi_uus (__a, __b);
+  union { poly64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
+  __builtin_aarch64_st1x4di ((__builtin_aarch64_simd_di *) __a, __u.__o);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrshlq_u16 (uint16x8_t __a, int16x8_t __b)
+vst1q_s64_x4 (int64_t * __a, int64x2x4_t val)
 {
-  return __builtin_aarch64_urshlv8hi_uus (__a, __b);
+  union { int64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
+  __builtin_aarch64_st1x4v2di ((__builtin_aarch64_simd_di *) __a, __u.__o);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrshlq_u32 (uint32x4_t __a, int32x4_t __b)
+vst1q_u64_x4 (uint64_t * __a, uint64x2x4_t val)
 {
-  return __builtin_aarch64_urshlv4si_uus (__a, __b);
+  union { uint64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
+  __builtin_aarch64_st1x4v2di ((__builtin_aarch64_simd_di *) __a, __u.__o);
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrshlq_u64 (uint64x2_t __a, int64x2_t __b)
+vst1q_p64_x4 (poly64_t * __a, poly64x2x4_t val)
 {
-  return __builtin_aarch64_urshlv2di_uus (__a, __b);
+  union { poly64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
+  __builtin_aarch64_st1x4v2di ((__builtin_aarch64_simd_di *) __a, __u.__o);
 }
 
-__extension__ extern __inline int64_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrshld_s64 (int64_t __a, int64_t __b)
+vst1_f64_x4 (float64_t * __a, float64x1x4_t val)
 {
-  return __builtin_aarch64_srshldi (__a, __b);
+  union { float64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
+  __builtin_aarch64_st1x4df ((__builtin_aarch64_simd_df *) __a, __u.__o);
 }
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrshld_u64 (uint64_t __a, int64_t __b)
+vst1q_f64_x4 (float64_t * __a, float64x2x4_t val)
 {
-  return __builtin_aarch64_urshldi_uus (__a, __b);
+  union { float64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
+  __builtin_aarch64_st1x4v2df ((__builtin_aarch64_simd_df *) __a, __u.__o);
 }
 
-/* vrshr */
+/* vstn */
 
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrshr_n_s8 (int8x8_t __a, const int __b)
+vst2_s64 (int64_t * __a, int64x1x2_t __val)
 {
-  return (int8x8_t) __builtin_aarch64_srshr_nv8qi (__a, __b);
+  __builtin_aarch64_simd_oi __o;
+  int64x2x2_t __temp;
+  __temp.val[0] = vcombine_s64 (__val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
+  __temp.val[1] = vcombine_s64 (__val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[1], 1);
+  __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrshr_n_s16 (int16x4_t __a, const int __b)
+vst2_u64 (uint64_t * __a, uint64x1x2_t __val)
 {
-  return (int16x4_t) __builtin_aarch64_srshr_nv4hi (__a, __b);
+  __builtin_aarch64_simd_oi __o;
+  uint64x2x2_t __temp;
+  __temp.val[0] = vcombine_u64 (__val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_u64 (__val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[1], 1);
+  __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrshr_n_s32 (int32x2_t __a, const int __b)
+vst2_f64 (float64_t * __a, float64x1x2_t __val)
 {
-  return (int32x2_t) __builtin_aarch64_srshr_nv2si (__a, __b);
+  __builtin_aarch64_simd_oi __o;
+  float64x2x2_t __temp;
+  __temp.val[0] = vcombine_f64 (__val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_f64 (__val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __temp.val[1], 1);
+  __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o);
 }
 
-__extension__ extern __inline int64x1_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrshr_n_s64 (int64x1_t __a, const int __b)
+vst2_s8 (int8_t * __a, int8x8x2_t __val)
 {
-  return (int64x1_t) {__builtin_aarch64_srshr_ndi (__a[0], __b)};
+  __builtin_aarch64_simd_oi __o;
+  int8x16x2_t __temp;
+  __temp.val[0] = vcombine_s8 (__val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
+  __temp.val[1] = vcombine_s8 (__val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1);
+  __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrshr_n_u8 (uint8x8_t __a, const int __b)
+vst2_p8 (poly8_t * __a, poly8x8x2_t __val)
 {
-  return __builtin_aarch64_urshr_nv8qi_uus (__a, __b);
+  __builtin_aarch64_simd_oi __o;
+  poly8x16x2_t __temp;
+  __temp.val[0] = vcombine_p8 (__val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_p8 (__val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1);
+  __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrshr_n_u16 (uint16x4_t __a, const int __b)
+vst2_s16 (int16_t * __a, int16x4x2_t __val)
 {
-  return __builtin_aarch64_urshr_nv4hi_uus (__a, __b);
+  __builtin_aarch64_simd_oi __o;
+  int16x8x2_t __temp;
+  __temp.val[0] = vcombine_s16 (__val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
+  __temp.val[1] = vcombine_s16 (__val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1);
+  __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrshr_n_u32 (uint32x2_t __a, const int __b)
+vst2_p16 (poly16_t * __a, poly16x4x2_t __val)
 {
-  return __builtin_aarch64_urshr_nv2si_uus (__a, __b);
+  __builtin_aarch64_simd_oi __o;
+  poly16x8x2_t __temp;
+  __temp.val[0] = vcombine_p16 (__val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_p16 (__val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1);
+  __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrshr_n_u64 (uint64x1_t __a, const int __b)
+vst2_s32 (int32_t * __a, int32x2x2_t __val)
 {
-  return (uint64x1_t) {__builtin_aarch64_urshr_ndi_uus (__a[0], __b)};
+  __builtin_aarch64_simd_oi __o;
+  int32x4x2_t __temp;
+  __temp.val[0] = vcombine_s32 (__val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
+  __temp.val[1] = vcombine_s32 (__val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[1], 1);
+  __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrshrq_n_s8 (int8x16_t __a, const int __b)
+vst2_u8 (uint8_t * __a, uint8x8x2_t __val)
 {
-  return (int8x16_t) __builtin_aarch64_srshr_nv16qi (__a, __b);
+  __builtin_aarch64_simd_oi __o;
+  uint8x16x2_t __temp;
+  __temp.val[0] = vcombine_u8 (__val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_u8 (__val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1);
+  __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrshrq_n_s16 (int16x8_t __a, const int __b)
+vst2_u16 (uint16_t * __a, uint16x4x2_t __val)
 {
-  return (int16x8_t) __builtin_aarch64_srshr_nv8hi (__a, __b);
+  __builtin_aarch64_simd_oi __o;
+  uint16x8x2_t __temp;
+  __temp.val[0] = vcombine_u16 (__val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_u16 (__val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1);
+  __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrshrq_n_s32 (int32x4_t __a, const int __b)
+vst2_u32 (uint32_t * __a, uint32x2x2_t __val)
 {
-  return (int32x4_t) __builtin_aarch64_srshr_nv4si (__a, __b);
+  __builtin_aarch64_simd_oi __o;
+  uint32x4x2_t __temp;
+  __temp.val[0] = vcombine_u32 (__val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_u32 (__val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[1], 1);
+  __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrshrq_n_s64 (int64x2_t __a, const int __b)
+vst2_f16 (float16_t * __a, float16x4x2_t __val)
 {
-  return (int64x2_t) __builtin_aarch64_srshr_nv2di (__a, __b);
+  __builtin_aarch64_simd_oi __o;
+  float16x8x2_t __temp;
+  __temp.val[0] = vcombine_f16 (__val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_f16 (__val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv8hf (__o, __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv8hf (__o, __temp.val[1], 1);
+  __builtin_aarch64_st2v4hf (__a, __o);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrshrq_n_u8 (uint8x16_t __a, const int __b)
+vst2_f32 (float32_t * __a, float32x2x2_t __val)
 {
-  return __builtin_aarch64_urshr_nv16qi_uus (__a, __b);
+  __builtin_aarch64_simd_oi __o;
+  float32x4x2_t __temp;
+  __temp.val[0] = vcombine_f32 (__val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_f32 (__val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __temp.val[1], 1);
+  __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrshrq_n_u16 (uint16x8_t __a, const int __b)
+vst2_p64 (poly64_t * __a, poly64x1x2_t __val)
 {
-  return __builtin_aarch64_urshr_nv8hi_uus (__a, __b);
+  __builtin_aarch64_simd_oi __o;
+  poly64x2x2_t __temp;
+  __temp.val[0] = vcombine_p64 (__val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_p64 (__val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv2di_ssps (__o,
+					       (poly64x2_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv2di_ssps (__o,
+					       (poly64x2_t) __temp.val[1], 1);
+  __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrshrq_n_u32 (uint32x4_t __a, const int __b)
+vst2q_s8 (int8_t * __a, int8x16x2_t __val)
 {
-  return __builtin_aarch64_urshr_nv4si_uus (__a, __b);
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1);
+  __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrshrq_n_u64 (uint64x2_t __a, const int __b)
+vst2q_p8 (poly8_t * __a, poly8x16x2_t __val)
 {
-  return __builtin_aarch64_urshr_nv2di_uus (__a, __b);
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1);
+  __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
 }
 
-__extension__ extern __inline int64_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrshrd_n_s64 (int64_t __a, const int __b)
+vst2q_s16 (int16_t * __a, int16x8x2_t __val)
 {
-  return __builtin_aarch64_srshr_ndi (__a, __b);
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1);
+  __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
 }
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrshrd_n_u64 (uint64_t __a, const int __b)
+vst2q_p16 (poly16_t * __a, poly16x8x2_t __val)
 {
-  return __builtin_aarch64_urshr_ndi_uus (__a, __b);
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1);
+  __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
 }
 
-/* vrsqrte.  */
-
-__extension__ extern __inline float32_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrsqrtes_f32 (float32_t __a)
+vst2q_s32 (int32_t * __a, int32x4x2_t __val)
 {
-  return __builtin_aarch64_rsqrtesf (__a);
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[1], 1);
+  __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
 }
 
-__extension__ extern __inline float64_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrsqrted_f64 (float64_t __a)
+vst2q_s64 (int64_t * __a, int64x2x2_t __val)
 {
-  return __builtin_aarch64_rsqrtedf (__a);
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[1], 1);
+  __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
 }
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrsqrte_f32 (float32x2_t __a)
+vst2q_u8 (uint8_t * __a, uint8x16x2_t __val)
 {
-  return __builtin_aarch64_rsqrtev2sf (__a);
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1);
+  __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrsqrte_f64 (float64x1_t __a)
+vst2q_u16 (uint16_t * __a, uint16x8x2_t __val)
 {
-  return (float64x1_t) {vrsqrted_f64 (vget_lane_f64 (__a, 0))};
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1);
+  __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrsqrteq_f32 (float32x4_t __a)
+vst2q_u32 (uint32_t * __a, uint32x4x2_t __val)
 {
-  return __builtin_aarch64_rsqrtev4sf (__a);
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[1], 1);
+  __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrsqrteq_f64 (float64x2_t __a)
+vst2q_u64 (uint64_t * __a, uint64x2x2_t __val)
 {
-  return __builtin_aarch64_rsqrtev2df (__a);
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[1], 1);
+  __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
 }
 
-/* vrsqrts.  */
-
-__extension__ extern __inline float32_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrsqrtss_f32 (float32_t __a, float32_t __b)
+vst2q_f16 (float16_t * __a, float16x8x2_t __val)
 {
-  return __builtin_aarch64_rsqrtssf (__a, __b);
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv8hf (__o, __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv8hf (__o, __val.val[1], 1);
+  __builtin_aarch64_st2v8hf (__a, __o);
 }
 
-__extension__ extern __inline float64_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrsqrtsd_f64 (float64_t __a, float64_t __b)
+vst2q_f32 (float32_t * __a, float32x4x2_t __val)
 {
-  return __builtin_aarch64_rsqrtsdf (__a, __b);
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __val.val[1], 1);
+  __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
 }
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrsqrts_f32 (float32x2_t __a, float32x2_t __b)
+vst2q_f64 (float64_t * __a, float64x2x2_t __val)
 {
-  return __builtin_aarch64_rsqrtsv2sf (__a, __b);
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __val.val[1], 1);
+  __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o);
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrsqrts_f64 (float64x1_t __a, float64x1_t __b)
+vst2q_p64 (poly64_t * __a, poly64x2x2_t __val)
 {
-  return (float64x1_t) {vrsqrtsd_f64 (vget_lane_f64 (__a, 0),
-				      vget_lane_f64 (__b, 0))};
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv2di_ssps (__o,
+					       (poly64x2_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv2di_ssps (__o,
+					       (poly64x2_t) __val.val[1], 1);
+  __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrsqrtsq_f32 (float32x4_t __a, float32x4_t __b)
+vst3_s64 (int64_t * __a, int64x1x3_t __val)
 {
-  return __builtin_aarch64_rsqrtsv4sf (__a, __b);
+  __builtin_aarch64_simd_ci __o;
+  int64x2x3_t __temp;
+  __temp.val[0] = vcombine_s64 (__val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
+  __temp.val[1] = vcombine_s64 (__val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
+  __temp.val[2] = vcombine_s64 (__val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[2], 2);
+  __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrsqrtsq_f64 (float64x2_t __a, float64x2_t __b)
+vst3_u64 (uint64_t * __a, uint64x1x3_t __val)
 {
-  return __builtin_aarch64_rsqrtsv2df (__a, __b);
+  __builtin_aarch64_simd_ci __o;
+  uint64x2x3_t __temp;
+  __temp.val[0] = vcombine_u64 (__val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_u64 (__val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
+  __temp.val[2] = vcombine_u64 (__val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[2], 2);
+  __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
 }
 
-/* vrsra */
-
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
+vst3_f64 (float64_t * __a, float64x1x3_t __val)
 {
-  return (int8x8_t) __builtin_aarch64_srsra_nv8qi (__a, __b, __c);
+  __builtin_aarch64_simd_ci __o;
+  float64x2x3_t __temp;
+  __temp.val[0] = vcombine_f64 (__val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_f64 (__val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
+  __temp.val[2] = vcombine_f64 (__val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[2], 2);
+  __builtin_aarch64_st3df ((__builtin_aarch64_simd_df *) __a, __o);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
+vst3_s8 (int8_t * __a, int8x8x3_t __val)
 {
-  return (int16x4_t) __builtin_aarch64_srsra_nv4hi (__a, __b, __c);
+  __builtin_aarch64_simd_ci __o;
+  int8x16x3_t __temp;
+  __temp.val[0] = vcombine_s8 (__val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
+  __temp.val[1] = vcombine_s8 (__val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
+  __temp.val[2] = vcombine_s8 (__val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2);
+  __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
+vst3_p8 (poly8_t * __a, poly8x8x3_t __val)
 {
-  return (int32x2_t) __builtin_aarch64_srsra_nv2si (__a, __b, __c);
+  __builtin_aarch64_simd_ci __o;
+  poly8x16x3_t __temp;
+  __temp.val[0] = vcombine_p8 (__val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_p8 (__val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
+  __temp.val[2] = vcombine_p8 (__val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2);
+  __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
 }
 
-__extension__ extern __inline int64x1_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
+vst3_s16 (int16_t * __a, int16x4x3_t __val)
 {
-  return (int64x1_t) {__builtin_aarch64_srsra_ndi (__a[0], __b[0], __c)};
+  __builtin_aarch64_simd_ci __o;
+  int16x8x3_t __temp;
+  __temp.val[0] = vcombine_s16 (__val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
+  __temp.val[1] = vcombine_s16 (__val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
+  __temp.val[2] = vcombine_s16 (__val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2);
+  __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
+vst3_p16 (poly16_t * __a, poly16x4x3_t __val)
 {
-  return __builtin_aarch64_ursra_nv8qi_uuus (__a, __b, __c);
+  __builtin_aarch64_simd_ci __o;
+  poly16x8x3_t __temp;
+  __temp.val[0] = vcombine_p16 (__val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_p16 (__val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
+  __temp.val[2] = vcombine_p16 (__val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2);
+  __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
+vst3_s32 (int32_t * __a, int32x2x3_t __val)
 {
-  return __builtin_aarch64_ursra_nv4hi_uuus (__a, __b, __c);
+  __builtin_aarch64_simd_ci __o;
+  int32x4x3_t __temp;
+  __temp.val[0] = vcombine_s32 (__val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
+  __temp.val[1] = vcombine_s32 (__val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
+  __temp.val[2] = vcombine_s32 (__val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[2], 2);
+  __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
+vst3_u8 (uint8_t * __a, uint8x8x3_t __val)
 {
-  return __builtin_aarch64_ursra_nv2si_uuus (__a, __b, __c);
+  __builtin_aarch64_simd_ci __o;
+  uint8x16x3_t __temp;
+  __temp.val[0] = vcombine_u8 (__val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_u8 (__val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
+  __temp.val[2] = vcombine_u8 (__val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2);
+  __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
+vst3_u16 (uint16_t * __a, uint16x4x3_t __val)
 {
-  return (uint64x1_t) {__builtin_aarch64_ursra_ndi_uuus (__a[0], __b[0], __c)};
+  __builtin_aarch64_simd_ci __o;
+  uint16x8x3_t __temp;
+  __temp.val[0] = vcombine_u16 (__val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_u16 (__val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
+  __temp.val[2] = vcombine_u16 (__val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2);
+  __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
+vst3_u32 (uint32_t * __a, uint32x2x3_t __val)
 {
-  return (int8x16_t) __builtin_aarch64_srsra_nv16qi (__a, __b, __c);
+  __builtin_aarch64_simd_ci __o;
+  uint32x4x3_t __temp;
+  __temp.val[0] = vcombine_u32 (__val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_u32 (__val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
+  __temp.val[2] = vcombine_u32 (__val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[2], 2);
+  __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
+vst3_f16 (float16_t * __a, float16x4x3_t __val)
 {
-  return (int16x8_t) __builtin_aarch64_srsra_nv8hi (__a, __b, __c);
+  __builtin_aarch64_simd_ci __o;
+  float16x8x3_t __temp;
+  __temp.val[0] = vcombine_f16 (__val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_f16 (__val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0)));
+  __temp.val[2] = vcombine_f16 (__val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[2], 2);
+  __builtin_aarch64_st3v4hf ((__builtin_aarch64_simd_hf *) __a, __o);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
+vst3_f32 (float32_t * __a, float32x2x3_t __val)
 {
-  return (int32x4_t) __builtin_aarch64_srsra_nv4si (__a, __b, __c);
+  __builtin_aarch64_simd_ci __o;
+  float32x4x3_t __temp;
+  __temp.val[0] = vcombine_f32 (__val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_f32 (__val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
+  __temp.val[2] = vcombine_f32 (__val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[2], 2);
+  __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
+vst3_p64 (poly64_t * __a, poly64x1x3_t __val)
 {
-  return (int64x2_t) __builtin_aarch64_srsra_nv2di (__a, __b, __c);
+  __builtin_aarch64_simd_ci __o;
+  poly64x2x3_t __temp;
+  __temp.val[0] = vcombine_p64 (__val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_p64 (__val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0)));
+  __temp.val[2] = vcombine_p64 (__val.val[2], vcreate_p64 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
+					       (poly64x2_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
+					       (poly64x2_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
+					       (poly64x2_t) __temp.val[2], 2);
+  __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
+vst3q_s8 (int8_t * __a, int8x16x3_t __val)
 {
-  return __builtin_aarch64_ursra_nv16qi_uuus (__a, __b, __c);
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2);
+  __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
+vst3q_p8 (poly8_t * __a, poly8x16x3_t __val)
 {
-  return __builtin_aarch64_ursra_nv8hi_uuus (__a, __b, __c);
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2);
+  __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
+vst3q_s16 (int16_t * __a, int16x8x3_t __val)
 {
-  return __builtin_aarch64_ursra_nv4si_uuus (__a, __b, __c);
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2);
+  __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
+vst3q_p16 (poly16_t * __a, poly16x8x3_t __val)
 {
-  return __builtin_aarch64_ursra_nv2di_uuus (__a, __b, __c);
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2);
+  __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
 }
 
-__extension__ extern __inline int64_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrsrad_n_s64 (int64_t __a, int64_t __b, const int __c)
+vst3q_s32 (int32_t * __a, int32x4x3_t __val)
 {
-  return __builtin_aarch64_srsra_ndi (__a, __b, __c);
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[2], 2);
+  __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
 }
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrsrad_n_u64 (uint64_t __a, uint64_t __b, const int __c)
+vst3q_s64 (int64_t * __a, int64x2x3_t __val)
 {
-  return __builtin_aarch64_ursra_ndi_uuus (__a, __b, __c);
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[2], 2);
+  __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
 }
 
-#pragma GCC push_options
-#pragma GCC target ("+nothing+crypto")
-
-/* vsha1  */
-
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsha1cq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
+vst3q_u8 (uint8_t * __a, uint8x16x3_t __val)
 {
-  return __builtin_aarch64_crypto_sha1cv4si_uuuu (hash_abcd, hash_e, wk);
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2);
+  __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsha1mq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
+vst3q_u16 (uint16_t * __a, uint16x8x3_t __val)
 {
-  return __builtin_aarch64_crypto_sha1mv4si_uuuu (hash_abcd, hash_e, wk);
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2);
+  __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsha1pq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
+vst3q_u32 (uint32_t * __a, uint32x4x3_t __val)
 {
-  return __builtin_aarch64_crypto_sha1pv4si_uuuu (hash_abcd, hash_e, wk);
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[2], 2);
+  __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
 }
 
-__extension__ extern __inline uint32_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsha1h_u32 (uint32_t hash_e)
+vst3q_u64 (uint64_t * __a, uint64x2x3_t __val)
 {
-  return __builtin_aarch64_crypto_sha1hsi_uu (hash_e);
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[2], 2);
+  __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsha1su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11)
+vst3q_f16 (float16_t * __a, float16x8x3_t __val)
 {
-  return __builtin_aarch64_crypto_sha1su0v4si_uuuu (w0_3, w4_7, w8_11);
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[2], 2);
+  __builtin_aarch64_st3v8hf ((__builtin_aarch64_simd_hf *) __a, __o);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsha1su1q_u32 (uint32x4_t tw0_3, uint32x4_t w12_15)
+vst3q_f32 (float32_t * __a, float32x4x3_t __val)
 {
-  return __builtin_aarch64_crypto_sha1su1v4si_uuu (tw0_3, w12_15);
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[2], 2);
+  __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsha256hq_u32 (uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk)
+vst3q_f64 (float64_t * __a, float64x2x3_t __val)
 {
-  return __builtin_aarch64_crypto_sha256hv4si_uuuu (hash_abcd, hash_efgh, wk);
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[2], 2);
+  __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df *) __a, __o);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsha256h2q_u32 (uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk)
+vst3q_p64 (poly64_t * __a, poly64x2x3_t __val)
 {
-  return __builtin_aarch64_crypto_sha256h2v4si_uuuu (hash_efgh, hash_abcd, wk);
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
+					       (poly64x2_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
+					       (poly64x2_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
+					       (poly64x2_t) __val.val[2], 2);
+  __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsha256su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7)
+vst4_s64 (int64_t * __a, int64x1x4_t __val)
 {
-  return __builtin_aarch64_crypto_sha256su0v4si_uuu (w0_3, w4_7);
+  __builtin_aarch64_simd_xi __o;
+  int64x2x4_t __temp;
+  __temp.val[0] = vcombine_s64 (__val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
+  __temp.val[1] = vcombine_s64 (__val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
+  __temp.val[2] = vcombine_s64 (__val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
+  __temp.val[3] = vcombine_s64 (__val.val[3], vcreate_s64 (__AARCH64_INT64_C (0)));
+  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[3], 3);
+  __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsha256su1q_u32 (uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15)
+vst4_u64 (uint64_t * __a, uint64x1x4_t __val)
 {
-  return __builtin_aarch64_crypto_sha256su1v4si_uuuu (tw0_3, w8_11, w12_15);
+  __builtin_aarch64_simd_xi __o;
+  uint64x2x4_t __temp;
+  __temp.val[0] = vcombine_u64 (__val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_u64 (__val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
+  __temp.val[2] = vcombine_u64 (__val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
+  __temp.val[3] = vcombine_u64 (__val.val[3], vcreate_u64 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[3], 3);
+  __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
 }
 
-__extension__ extern __inline poly128_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmull_p64 (poly64_t a, poly64_t b)
+vst4_f64 (float64_t * __a, float64x1x4_t __val)
 {
-  return
-    __builtin_aarch64_crypto_pmulldi_ppp (a, b);
+  __builtin_aarch64_simd_xi __o;
+  float64x2x4_t __temp;
+  __temp.val[0] = vcombine_f64 (__val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_f64 (__val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
+  __temp.val[2] = vcombine_f64 (__val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
+  __temp.val[3] = vcombine_f64 (__val.val[3], vcreate_f64 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __temp.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __temp.val[3], 3);
+  __builtin_aarch64_st4df ((__builtin_aarch64_simd_df *) __a, __o);
 }
 
-__extension__ extern __inline poly128_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmull_high_p64 (poly64x2_t a, poly64x2_t b)
+vst4_s8 (int8_t * __a, int8x8x4_t __val)
 {
-  return __builtin_aarch64_crypto_pmullv2di_ppp (a, b);
+  __builtin_aarch64_simd_xi __o;
+  int8x16x4_t __temp;
+  __temp.val[0] = vcombine_s8 (__val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
+  __temp.val[1] = vcombine_s8 (__val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
+  __temp.val[2] = vcombine_s8 (__val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
+  __temp.val[3] = vcombine_s8 (__val.val[3], vcreate_s8 (__AARCH64_INT64_C (0)));
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[3], 3);
+  __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
 }
 
-#pragma GCC pop_options
-
-/* vshl */
-
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshl_n_s8 (int8x8_t __a, const int __b)
+vst4_p8 (poly8_t * __a, poly8x8x4_t __val)
 {
-  return (int8x8_t) __builtin_aarch64_ashlv8qi (__a, __b);
+  __builtin_aarch64_simd_xi __o;
+  poly8x16x4_t __temp;
+  __temp.val[0] = vcombine_p8 (__val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_p8 (__val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
+  __temp.val[2] = vcombine_p8 (__val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
+  __temp.val[3] = vcombine_p8 (__val.val[3], vcreate_p8 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[3], 3);
+  __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshl_n_s16 (int16x4_t __a, const int __b)
+vst4_s16 (int16_t * __a, int16x4x4_t __val)
 {
-  return (int16x4_t) __builtin_aarch64_ashlv4hi (__a, __b);
+  __builtin_aarch64_simd_xi __o;
+  int16x8x4_t __temp;
+  __temp.val[0] = vcombine_s16 (__val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
+  __temp.val[1] = vcombine_s16 (__val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
+  __temp.val[2] = vcombine_s16 (__val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
+  __temp.val[3] = vcombine_s16 (__val.val[3], vcreate_s16 (__AARCH64_INT64_C (0)));
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[3], 3);
+  __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshl_n_s32 (int32x2_t __a, const int __b)
+vst4_p16 (poly16_t * __a, poly16x4x4_t __val)
 {
-  return (int32x2_t) __builtin_aarch64_ashlv2si (__a, __b);
+  __builtin_aarch64_simd_xi __o;
+  poly16x8x4_t __temp;
+  __temp.val[0] = vcombine_p16 (__val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_p16 (__val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
+  __temp.val[2] = vcombine_p16 (__val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
+  __temp.val[3] = vcombine_p16 (__val.val[3], vcreate_p16 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[3], 3);
+  __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
 }
 
-__extension__ extern __inline int64x1_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshl_n_s64 (int64x1_t __a, const int __b)
+vst4_s32 (int32_t * __a, int32x2x4_t __val)
 {
-  return (int64x1_t) {__builtin_aarch64_ashldi (__a[0], __b)};
+  __builtin_aarch64_simd_xi __o;
+  int32x4x4_t __temp;
+  __temp.val[0] = vcombine_s32 (__val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
+  __temp.val[1] = vcombine_s32 (__val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
+  __temp.val[2] = vcombine_s32 (__val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
+  __temp.val[3] = vcombine_s32 (__val.val[3], vcreate_s32 (__AARCH64_INT64_C (0)));
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[3], 3);
+  __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshl_n_u8 (uint8x8_t __a, const int __b)
+vst4_u8 (uint8_t * __a, uint8x8x4_t __val)
 {
-  return (uint8x8_t) __builtin_aarch64_ashlv8qi ((int8x8_t) __a, __b);
+  __builtin_aarch64_simd_xi __o;
+  uint8x16x4_t __temp;
+  __temp.val[0] = vcombine_u8 (__val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_u8 (__val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
+  __temp.val[2] = vcombine_u8 (__val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
+  __temp.val[3] = vcombine_u8 (__val.val[3], vcreate_u8 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[3], 3);
+  __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshl_n_u16 (uint16x4_t __a, const int __b)
+vst4_u16 (uint16_t * __a, uint16x4x4_t __val)
 {
-  return (uint16x4_t) __builtin_aarch64_ashlv4hi ((int16x4_t) __a, __b);
+  __builtin_aarch64_simd_xi __o;
+  uint16x8x4_t __temp;
+  __temp.val[0] = vcombine_u16 (__val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_u16 (__val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
+  __temp.val[2] = vcombine_u16 (__val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
+  __temp.val[3] = vcombine_u16 (__val.val[3], vcreate_u16 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[3], 3);
+  __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshl_n_u32 (uint32x2_t __a, const int __b)
+vst4_u32 (uint32_t * __a, uint32x2x4_t __val)
 {
-  return (uint32x2_t) __builtin_aarch64_ashlv2si ((int32x2_t) __a, __b);
+  __builtin_aarch64_simd_xi __o;
+  uint32x4x4_t __temp;
+  __temp.val[0] = vcombine_u32 (__val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_u32 (__val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
+  __temp.val[2] = vcombine_u32 (__val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
+  __temp.val[3] = vcombine_u32 (__val.val[3], vcreate_u32 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[3], 3);
+  __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshl_n_u64 (uint64x1_t __a, const int __b)
+vst4_f16 (float16_t * __a, float16x4x4_t __val)
 {
-  return (uint64x1_t) {__builtin_aarch64_ashldi ((int64_t) __a[0], __b)};
+  __builtin_aarch64_simd_xi __o;
+  float16x8x4_t __temp;
+  __temp.val[0] = vcombine_f16 (__val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_f16 (__val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0)));
+  __temp.val[2] = vcombine_f16 (__val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0)));
+  __temp.val[3] = vcombine_f16 (__val.val[3], vcreate_f16 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __temp.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __temp.val[3], 3);
+  __builtin_aarch64_st4v4hf ((__builtin_aarch64_simd_hf *) __a, __o);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshlq_n_s8 (int8x16_t __a, const int __b)
+vst4_f32 (float32_t * __a, float32x2x4_t __val)
 {
-  return (int8x16_t) __builtin_aarch64_ashlv16qi (__a, __b);
+  __builtin_aarch64_simd_xi __o;
+  float32x4x4_t __temp;
+  __temp.val[0] = vcombine_f32 (__val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_f32 (__val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
+  __temp.val[2] = vcombine_f32 (__val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
+  __temp.val[3] = vcombine_f32 (__val.val[3], vcreate_f32 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __temp.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __temp.val[3], 3);
+  __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshlq_n_s16 (int16x8_t __a, const int __b)
+vst4_p64 (poly64_t * __a, poly64x1x4_t __val)
 {
-  return (int16x8_t) __builtin_aarch64_ashlv8hi (__a, __b);
+  __builtin_aarch64_simd_xi __o;
+  poly64x2x4_t __temp;
+  __temp.val[0] = vcombine_p64 (__val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_p64 (__val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0)));
+  __temp.val[2] = vcombine_p64 (__val.val[2], vcreate_p64 (__AARCH64_UINT64_C (0)));
+  __temp.val[3] = vcombine_p64 (__val.val[3], vcreate_p64 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
+					       (poly64x2_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
+					       (poly64x2_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
+					       (poly64x2_t) __temp.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
+					       (poly64x2_t) __temp.val[3], 3);
+  __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshlq_n_s32 (int32x4_t __a, const int __b)
+vst4q_s8 (int8_t * __a, int8x16x4_t __val)
 {
-  return (int32x4_t) __builtin_aarch64_ashlv4si (__a, __b);
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[3], 3);
+  __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshlq_n_s64 (int64x2_t __a, const int __b)
+vst4q_p8 (poly8_t * __a, poly8x16x4_t __val)
 {
-  return (int64x2_t) __builtin_aarch64_ashlv2di (__a, __b);
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[3], 3);
+  __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshlq_n_u8 (uint8x16_t __a, const int __b)
+vst4q_s16 (int16_t * __a, int16x8x4_t __val)
 {
-  return (uint8x16_t) __builtin_aarch64_ashlv16qi ((int8x16_t) __a, __b);
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[3], 3);
+  __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshlq_n_u16 (uint16x8_t __a, const int __b)
+vst4q_p16 (poly16_t * __a, poly16x8x4_t __val)
 {
-  return (uint16x8_t) __builtin_aarch64_ashlv8hi ((int16x8_t) __a, __b);
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[3], 3);
+  __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshlq_n_u32 (uint32x4_t __a, const int __b)
+vst4q_s32 (int32_t * __a, int32x4x4_t __val)
 {
-  return (uint32x4_t) __builtin_aarch64_ashlv4si ((int32x4_t) __a, __b);
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[3], 3);
+  __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshlq_n_u64 (uint64x2_t __a, const int __b)
+vst4q_s64 (int64_t * __a, int64x2x4_t __val)
 {
-  return (uint64x2_t) __builtin_aarch64_ashlv2di ((int64x2_t) __a, __b);
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[3], 3);
+  __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
 }
 
-__extension__ extern __inline int64_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshld_n_s64 (int64_t __a, const int __b)
+vst4q_u8 (uint8_t * __a, uint8x16x4_t __val)
 {
-  return __builtin_aarch64_ashldi (__a, __b);
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[3], 3);
+  __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
 }
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshld_n_u64 (uint64_t __a, const int __b)
+vst4q_u16 (uint16_t * __a, uint16x8x4_t __val)
 {
-  return (uint64_t) __builtin_aarch64_ashldi (__a, __b);
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[3], 3);
+  __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
 }
 
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshl_s8 (int8x8_t __a, int8x8_t __b)
+vst4q_u32 (uint32_t * __a, uint32x4x4_t __val)
 {
-  return __builtin_aarch64_sshlv8qi (__a, __b);
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[3], 3);
+  __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshl_s16 (int16x4_t __a, int16x4_t __b)
+vst4q_u64 (uint64_t * __a, uint64x2x4_t __val)
 {
-  return __builtin_aarch64_sshlv4hi (__a, __b);
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[3], 3);
+  __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshl_s32 (int32x2_t __a, int32x2_t __b)
+vst4q_f16 (float16_t * __a, float16x8x4_t __val)
 {
-  return __builtin_aarch64_sshlv2si (__a, __b);
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __val.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __val.val[3], 3);
+  __builtin_aarch64_st4v8hf ((__builtin_aarch64_simd_hf *) __a, __o);
 }
 
-__extension__ extern __inline int64x1_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshl_s64 (int64x1_t __a, int64x1_t __b)
+vst4q_f32 (float32_t * __a, float32x4x4_t __val)
 {
-  return (int64x1_t) {__builtin_aarch64_sshldi (__a[0], __b[0])};
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __val.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __val.val[3], 3);
+  __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshl_u8 (uint8x8_t __a, int8x8_t __b)
+vst4q_f64 (float64_t * __a, float64x2x4_t __val)
 {
-  return __builtin_aarch64_ushlv8qi_uus (__a, __b);
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __val.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __val.val[3], 3);
+  __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __o);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshl_u16 (uint16x4_t __a, int16x4_t __b)
+vst4q_p64 (poly64_t * __a, poly64x2x4_t __val)
 {
-  return __builtin_aarch64_ushlv4hi_uus (__a, __b);
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
+					       (poly64x2_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
+					       (poly64x2_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
+					       (poly64x2_t) __val.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
+					       (poly64x2_t) __val.val[3], 3);
+  __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshl_u32 (uint32x2_t __a, int32x2_t __b)
+vstrq_p128 (poly128_t * __ptr, poly128_t __val)
 {
-  return __builtin_aarch64_ushlv2si_uus (__a, __b);
+  *__ptr = __val;
 }
 
-__extension__ extern __inline uint64x1_t
+/* vsub */
+
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshl_u64 (uint64x1_t __a, int64x1_t __b)
+vsubd_s64 (int64_t __a, int64_t __b)
 {
-  return (uint64x1_t) {__builtin_aarch64_ushldi_uus (__a[0], __b[0])};
+  return __a - __b;
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshlq_s8 (int8x16_t __a, int8x16_t __b)
+vsubd_u64 (uint64_t __a, uint64_t __b)
 {
-  return __builtin_aarch64_sshlv16qi (__a, __b);
+  return __a - __b;
 }
 
-__extension__ extern __inline int16x8_t
+/* vtbx1  */
+
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshlq_s16 (int16x8_t __a, int16x8_t __b)
+vtbx1_s8 (int8x8_t __r, int8x8_t __tab, int8x8_t __idx)
 {
-  return __builtin_aarch64_sshlv8hi (__a, __b);
+  uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
+			      vmov_n_u8 (8));
+  int8x8_t __tbl = vtbl1_s8 (__tab, __idx);
+
+  return vbsl_s8 (__mask, __tbl, __r);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshlq_s32 (int32x4_t __a, int32x4_t __b)
+vtbx1_u8 (uint8x8_t __r, uint8x8_t __tab, uint8x8_t __idx)
 {
-  return __builtin_aarch64_sshlv4si (__a, __b);
+  uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
+  uint8x8_t __tbl = vtbl1_u8 (__tab, __idx);
+
+  return vbsl_u8 (__mask, __tbl, __r);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshlq_s64 (int64x2_t __a, int64x2_t __b)
+vtbx1_p8 (poly8x8_t __r, poly8x8_t __tab, uint8x8_t __idx)
 {
-  return __builtin_aarch64_sshlv2di (__a, __b);
+  uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
+  poly8x8_t __tbl = vtbl1_p8 (__tab, __idx);
+
+  return vbsl_p8 (__mask, __tbl, __r);
 }
 
-__extension__ extern __inline uint8x16_t
+/* vtbx3  */
+
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshlq_u8 (uint8x16_t __a, int8x16_t __b)
+vtbx3_s8 (int8x8_t __r, int8x8x3_t __tab, int8x8_t __idx)
 {
-  return __builtin_aarch64_ushlv16qi_uus (__a, __b);
+  uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
+			      vmov_n_u8 (24));
+  int8x8_t __tbl = vtbl3_s8 (__tab, __idx);
+
+  return vbsl_s8 (__mask, __tbl, __r);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshlq_u16 (uint16x8_t __a, int16x8_t __b)
+vtbx3_u8 (uint8x8_t __r, uint8x8x3_t __tab, uint8x8_t __idx)
 {
-  return __builtin_aarch64_ushlv8hi_uus (__a, __b);
+  uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
+  uint8x8_t __tbl = vtbl3_u8 (__tab, __idx);
+
+  return vbsl_u8 (__mask, __tbl, __r);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshlq_u32 (uint32x4_t __a, int32x4_t __b)
+vtbx3_p8 (poly8x8_t __r, poly8x8x3_t __tab, uint8x8_t __idx)
 {
-  return __builtin_aarch64_ushlv4si_uus (__a, __b);
+  uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
+  poly8x8_t __tbl = vtbl3_p8 (__tab, __idx);
+
+  return vbsl_p8 (__mask, __tbl, __r);
 }
 
-__extension__ extern __inline uint64x2_t
+/* vtbx4  */
+
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshlq_u64 (uint64x2_t __a, int64x2_t __b)
+vtbx4_s8 (int8x8_t __r, int8x8x4_t __tab, int8x8_t __idx)
 {
-  return __builtin_aarch64_ushlv2di_uus (__a, __b);
+  int8x8_t __result;
+  int8x16x2_t __temp;
+  __builtin_aarch64_simd_oi __o;
+  __temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]);
+  __temp.val[1] = vcombine_s8 (__tab.val[2], __tab.val[3]);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o,
+					   (int8x16_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o,
+					   (int8x16_t) __temp.val[1], 1);
+  __result = __builtin_aarch64_tbx4v8qi (__r, __o, __idx);
+  return __result;
 }
 
-__extension__ extern __inline int64_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshld_s64 (int64_t __a, int64_t __b)
+vtbx4_u8 (uint8x8_t __r, uint8x8x4_t __tab, uint8x8_t __idx)
 {
-  return __builtin_aarch64_sshldi (__a, __b);
+  uint8x8_t __result;
+  uint8x16x2_t __temp;
+  __builtin_aarch64_simd_oi __o;
+  __temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]);
+  __temp.val[1] = vcombine_u8 (__tab.val[2], __tab.val[3]);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o,
+					   (int8x16_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o,
+					   (int8x16_t) __temp.val[1], 1);
+  __result = (uint8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o,
+						  (int8x8_t)__idx);
+  return __result;
 }
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshld_u64 (uint64_t __a, uint64_t __b)
+vtbx4_p8 (poly8x8_t __r, poly8x8x4_t __tab, uint8x8_t __idx)
 {
-  return __builtin_aarch64_ushldi_uus (__a, __b);
+  poly8x8_t __result;
+  poly8x16x2_t __temp;
+  __builtin_aarch64_simd_oi __o;
+  __temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]);
+  __temp.val[1] = vcombine_p8 (__tab.val[2], __tab.val[3]);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o,
+					   (int8x16_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv16qi (__o,
+					   (int8x16_t) __temp.val[1], 1);
+  __result = (poly8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o,
+						  (int8x8_t)__idx);
+  return __result;
 }
 
-__extension__ extern __inline int16x8_t
+/* vtrn */
+
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshll_high_n_s8 (int8x16_t __a, const int __b)
+vtrn1_f16 (float16x4_t __a, float16x4_t __b)
 {
-  return __builtin_aarch64_sshll2_nv16qi (__a, __b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
+#endif
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshll_high_n_s16 (int16x8_t __a, const int __b)
+vtrn1_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return __builtin_aarch64_sshll2_nv8hi (__a, __b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
+#endif
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshll_high_n_s32 (int32x4_t __a, const int __b)
+vtrn1_p8 (poly8x8_t __a, poly8x8_t __b)
 {
-  return __builtin_aarch64_sshll2_nv4si (__a, __b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
+#endif
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline poly16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshll_high_n_u8 (uint8x16_t __a, const int __b)
+vtrn1_p16 (poly16x4_t __a, poly16x4_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_ushll2_nv16qi ((int8x16_t) __a, __b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
+#endif
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshll_high_n_u16 (uint16x8_t __a, const int __b)
+vtrn1_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_ushll2_nv8hi ((int16x8_t) __a, __b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
+#endif
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshll_high_n_u32 (uint32x4_t __a, const int __b)
+vtrn1_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_ushll2_nv4si ((int32x4_t) __a, __b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
+#endif
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshll_n_s8 (int8x8_t __a, const int __b)
+vtrn1_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return __builtin_aarch64_sshll_nv8qi (__a, __b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
+#endif
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshll_n_s16 (int16x4_t __a, const int __b)
+vtrn1_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return __builtin_aarch64_sshll_nv4hi (__a, __b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
+#endif
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshll_n_s32 (int32x2_t __a, const int __b)
+vtrn1_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return __builtin_aarch64_sshll_nv2si (__a, __b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
+#endif
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshll_n_u8 (uint8x8_t __a, const int __b)
+vtrn1_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return __builtin_aarch64_ushll_nv8qi_uus (__a, __b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
+#endif
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshll_n_u16 (uint16x4_t __a, const int __b)
+vtrn1q_f16 (float16x8_t __a, float16x8_t __b)
 {
-  return __builtin_aarch64_ushll_nv4hi_uus (__a, __b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
+#endif
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshll_n_u32 (uint32x2_t __a, const int __b)
+vtrn1q_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return __builtin_aarch64_ushll_nv2si_uus (__a, __b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
+#endif
 }
 
-/* vshr */
-
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshr_n_s8 (int8x8_t __a, const int __b)
+vtrn1q_f64 (float64x2_t __a, float64x2_t __b)
 {
-  return (int8x8_t) __builtin_aarch64_ashrv8qi (__a, __b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
+#endif
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshr_n_s16 (int16x4_t __a, const int __b)
+vtrn1q_p8 (poly8x16_t __a, poly8x16_t __b)
 {
-  return (int16x4_t) __builtin_aarch64_ashrv4hi (__a, __b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
+#endif
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline poly16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshr_n_s32 (int32x2_t __a, const int __b)
+vtrn1q_p16 (poly16x8_t __a, poly16x8_t __b)
 {
-  return (int32x2_t) __builtin_aarch64_ashrv2si (__a, __b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
+#endif
 }
 
-__extension__ extern __inline int64x1_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshr_n_s64 (int64x1_t __a, const int __b)
+vtrn1q_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return (int64x1_t) {__builtin_aarch64_ashr_simddi (__a[0], __b)};
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
+#endif
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshr_n_u8 (uint8x8_t __a, const int __b)
+vtrn1q_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_lshrv8qi ((int8x8_t) __a, __b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
+#endif
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshr_n_u16 (uint16x4_t __a, const int __b)
+vtrn1q_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_lshrv4hi ((int16x4_t) __a, __b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
+#endif
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshr_n_u32 (uint32x2_t __a, const int __b)
+vtrn1q_s64 (int64x2_t __a, int64x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_lshrv2si ((int32x2_t) __a, __b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
+#endif
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshr_n_u64 (uint64x1_t __a, const int __b)
+vtrn1q_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint64x1_t) {__builtin_aarch64_lshr_simddi_uus ( __a[0], __b)};
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
+#endif
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshrq_n_s8 (int8x16_t __a, const int __b)
+vtrn1q_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (int8x16_t) __builtin_aarch64_ashrv16qi (__a, __b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
+#endif
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshrq_n_s16 (int16x8_t __a, const int __b)
+vtrn1q_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (int16x8_t) __builtin_aarch64_ashrv8hi (__a, __b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
+#endif
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline poly64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshrq_n_s32 (int32x4_t __a, const int __b)
+vtrn1q_p64 (poly64x2_t __a, poly64x2_t __b)
 {
-  return (int32x4_t) __builtin_aarch64_ashrv4si (__a, __b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {0, 2});
+#endif
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshrq_n_s64 (int64x2_t __a, const int __b)
+vtrn1q_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  return (int64x2_t) __builtin_aarch64_ashrv2di (__a, __b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
+#endif
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshrq_n_u8 (uint8x16_t __a, const int __b)
+vtrn2_f16 (float16x4_t __a, float16x4_t __b)
 {
-  return (uint8x16_t) __builtin_aarch64_lshrv16qi ((int8x16_t) __a, __b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
+#endif
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshrq_n_u16 (uint16x8_t __a, const int __b)
+vtrn2_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_lshrv8hi ((int16x8_t) __a, __b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
+#endif
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshrq_n_u32 (uint32x4_t __a, const int __b)
+vtrn2_p8 (poly8x8_t __a, poly8x8_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_lshrv4si ((int32x4_t) __a, __b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
+#endif
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline poly16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshrq_n_u64 (uint64x2_t __a, const int __b)
+vtrn2_p16 (poly16x4_t __a, poly16x4_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
+#endif
 }
 
-__extension__ extern __inline int64_t
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshrd_n_s64 (int64_t __a, const int __b)
+vtrn2_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return __builtin_aarch64_ashr_simddi (__a, __b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
+#endif
 }
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshrd_n_u64 (uint64_t __a, const int __b)
+vtrn2_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return __builtin_aarch64_lshr_simddi_uus (__a, __b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
+#endif
 }
 
-/* vsli */
-
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
+vtrn2_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return (int8x8_t) __builtin_aarch64_ssli_nv8qi (__a, __b, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
+#endif
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsli_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
+vtrn2_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (int16x4_t) __builtin_aarch64_ssli_nv4hi (__a, __b, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
+#endif
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
+vtrn2_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (int32x2_t) __builtin_aarch64_ssli_nv2si (__a, __b, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
+#endif
 }
 
-__extension__ extern __inline int64x1_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
+vtrn2_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (int64x1_t) {__builtin_aarch64_ssli_ndi (__a[0], __b[0], __c)};
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
+#endif
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
+vtrn2q_f16 (float16x8_t __a, float16x8_t __b)
 {
-  return __builtin_aarch64_usli_nv8qi_uuus (__a, __b, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
+#endif
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
+vtrn2q_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return __builtin_aarch64_usli_nv4hi_uuus (__a, __b, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
+#endif
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
+vtrn2q_f64 (float64x2_t __a, float64x2_t __b)
 {
-  return __builtin_aarch64_usli_nv2si_uuus (__a, __b, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
+#endif
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
+vtrn2q_p8 (poly8x16_t __a, poly8x16_t __b)
 {
-  return (uint64x1_t) {__builtin_aarch64_usli_ndi_uuus (__a[0], __b[0], __c)};
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
+#endif
 }
 
-__extension__ extern __inline poly64x1_t
+__extension__ extern __inline poly16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsli_n_p64 (poly64x1_t __a, poly64x1_t __b, const int __c)
+vtrn2q_p16 (poly16x8_t __a, poly16x8_t __b)
 {
-  return (poly64x1_t) {__builtin_aarch64_ssli_ndi_ppps (__a[0], __b[0], __c)};
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
+#endif
 }
 
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
+vtrn2q_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return (int8x16_t) __builtin_aarch64_ssli_nv16qi (__a, __b, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
+#endif
 }
 
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
+vtrn2q_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return (int16x8_t) __builtin_aarch64_ssli_nv8hi (__a, __b, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
+#endif
 }
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
+vtrn2q_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return (int32x4_t) __builtin_aarch64_ssli_nv4si (__a, __b, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
+#endif
 }
 
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsliq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
+vtrn2q_s64 (int64x2_t __a, int64x2_t __b)
 {
-  return (int64x2_t) __builtin_aarch64_ssli_nv2di (__a, __b, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
+#endif
 }
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
+vtrn2q_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return __builtin_aarch64_usli_nv16qi_uuus (__a, __b, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
+#endif
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
+vtrn2q_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return __builtin_aarch64_usli_nv8hi_uuus (__a, __b, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
+#endif
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
+vtrn2q_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return __builtin_aarch64_usli_nv4si_uuus (__a, __b, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
+#endif
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
-{
-  return __builtin_aarch64_usli_nv2di_uuus (__a, __b, __c);
-}
-
-__extension__ extern __inline poly64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsliq_n_p64 (poly64x2_t __a, poly64x2_t __b, const int __c)
+vtrn2q_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  return __builtin_aarch64_ssli_nv2di_ppps (__a, __b, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
+#endif
 }
 
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vslid_n_s64 (int64_t __a, int64_t __b, const int __c)
-{
-  return __builtin_aarch64_ssli_ndi (__a, __b, __c);
-}
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline poly64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vslid_n_u64 (uint64_t __a, uint64_t __b, const int __c)
+vtrn2q_p64 (poly64x2_t __a, poly64x2_t __b)
 {
-  return __builtin_aarch64_usli_ndi_uuus (__a, __b, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {1, 3});
+#endif
 }
 
-/* vsqadd */
-
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline float16x4x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsqadd_u8 (uint8x8_t __a, int8x8_t __b)
+vtrn_f16 (float16x4_t __a, float16x4_t __b)
 {
-  return __builtin_aarch64_usqaddv8qi_uus (__a, __b);
+  return (float16x4x2_t) {vtrn1_f16 (__a, __b), vtrn2_f16 (__a, __b)};
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline float32x2x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsqadd_u16 (uint16x4_t __a, int16x4_t __b)
+vtrn_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return __builtin_aarch64_usqaddv4hi_uus (__a, __b);
+  return (float32x2x2_t) {vtrn1_f32 (__a, __b), vtrn2_f32 (__a, __b)};
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline poly8x8x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsqadd_u32 (uint32x2_t __a, int32x2_t __b)
+vtrn_p8 (poly8x8_t __a, poly8x8_t __b)
 {
-  return __builtin_aarch64_usqaddv2si_uus (__a, __b);
+  return (poly8x8x2_t) {vtrn1_p8 (__a, __b), vtrn2_p8 (__a, __b)};
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline poly16x4x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsqadd_u64 (uint64x1_t __a, int64x1_t __b)
+vtrn_p16 (poly16x4_t __a, poly16x4_t __b)
 {
-  return (uint64x1_t) {__builtin_aarch64_usqadddi_uus (__a[0], __b[0])};
+  return (poly16x4x2_t) {vtrn1_p16 (__a, __b), vtrn2_p16 (__a, __b)};
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline int8x8x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsqaddq_u8 (uint8x16_t __a, int8x16_t __b)
+vtrn_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return __builtin_aarch64_usqaddv16qi_uus (__a, __b);
+  return (int8x8x2_t) {vtrn1_s8 (__a, __b), vtrn2_s8 (__a, __b)};
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline int16x4x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsqaddq_u16 (uint16x8_t __a, int16x8_t __b)
+vtrn_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return __builtin_aarch64_usqaddv8hi_uus (__a, __b);
+  return (int16x4x2_t) {vtrn1_s16 (__a, __b), vtrn2_s16 (__a, __b)};
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline int32x2x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsqaddq_u32 (uint32x4_t __a, int32x4_t __b)
+vtrn_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return __builtin_aarch64_usqaddv4si_uus (__a, __b);
+  return (int32x2x2_t) {vtrn1_s32 (__a, __b), vtrn2_s32 (__a, __b)};
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline uint8x8x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsqaddq_u64 (uint64x2_t __a, int64x2_t __b)
+vtrn_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return __builtin_aarch64_usqaddv2di_uus (__a, __b);
+  return (uint8x8x2_t) {vtrn1_u8 (__a, __b), vtrn2_u8 (__a, __b)};
 }
 
-__extension__ extern __inline uint8_t
+__extension__ extern __inline uint16x4x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsqaddb_u8 (uint8_t __a, int8_t __b)
+vtrn_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return __builtin_aarch64_usqaddqi_uus (__a, __b);
+  return (uint16x4x2_t) {vtrn1_u16 (__a, __b), vtrn2_u16 (__a, __b)};
 }
 
-__extension__ extern __inline uint16_t
+__extension__ extern __inline uint32x2x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsqaddh_u16 (uint16_t __a, int16_t __b)
+vtrn_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return __builtin_aarch64_usqaddhi_uus (__a, __b);
+  return (uint32x2x2_t) {vtrn1_u32 (__a, __b), vtrn2_u32 (__a, __b)};
 }
 
-__extension__ extern __inline uint32_t
+__extension__ extern __inline float16x8x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsqadds_u32 (uint32_t __a, int32_t __b)
+vtrnq_f16 (float16x8_t __a, float16x8_t __b)
 {
-  return __builtin_aarch64_usqaddsi_uus (__a, __b);
+  return (float16x8x2_t) {vtrn1q_f16 (__a, __b), vtrn2q_f16 (__a, __b)};
 }
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline float32x4x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsqaddd_u64 (uint64_t __a, int64_t __b)
+vtrnq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return __builtin_aarch64_usqadddi_uus (__a, __b);
+  return (float32x4x2_t) {vtrn1q_f32 (__a, __b), vtrn2q_f32 (__a, __b)};
 }
 
-/* vsqrt */
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline poly8x16x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsqrt_f32 (float32x2_t a)
+vtrnq_p8 (poly8x16_t __a, poly8x16_t __b)
 {
-  return __builtin_aarch64_sqrtv2sf (a);
+  return (poly8x16x2_t) {vtrn1q_p8 (__a, __b), vtrn2q_p8 (__a, __b)};
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline poly16x8x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsqrtq_f32 (float32x4_t a)
+vtrnq_p16 (poly16x8_t __a, poly16x8_t __b)
 {
-  return __builtin_aarch64_sqrtv4sf (a);
+  return (poly16x8x2_t) {vtrn1q_p16 (__a, __b), vtrn2q_p16 (__a, __b)};
 }
 
-__extension__ extern __inline float64x1_t
+__extension__ extern __inline int8x16x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsqrt_f64 (float64x1_t a)
+vtrnq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return (float64x1_t) { __builtin_aarch64_sqrtdf (a[0]) };
+  return (int8x16x2_t) {vtrn1q_s8 (__a, __b), vtrn2q_s8 (__a, __b)};
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline int16x8x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsqrtq_f64 (float64x2_t a)
+vtrnq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return __builtin_aarch64_sqrtv2df (a);
+  return (int16x8x2_t) {vtrn1q_s16 (__a, __b), vtrn2q_s16 (__a, __b)};
 }
 
-/* vsra */
-
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline int32x4x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
+vtrnq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return (int8x8_t) __builtin_aarch64_ssra_nv8qi (__a, __b, __c);
+  return (int32x4x2_t) {vtrn1q_s32 (__a, __b), vtrn2q_s32 (__a, __b)};
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline uint8x16x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
+vtrnq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (int16x4_t) __builtin_aarch64_ssra_nv4hi (__a, __b, __c);
+  return (uint8x16x2_t) {vtrn1q_u8 (__a, __b), vtrn2q_u8 (__a, __b)};
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline uint16x8x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
+vtrnq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (int32x2_t) __builtin_aarch64_ssra_nv2si (__a, __b, __c);
+  return (uint16x8x2_t) {vtrn1q_u16 (__a, __b), vtrn2q_u16 (__a, __b)};
 }
 
-__extension__ extern __inline int64x1_t
+__extension__ extern __inline uint32x4x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
+vtrnq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (int64x1_t) {__builtin_aarch64_ssra_ndi (__a[0], __b[0], __c)};
+  return (uint32x4x2_t) {vtrn1q_u32 (__a, __b), vtrn2q_u32 (__a, __b)};
 }
 
+/* vtst */
+
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
+vtst_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return __builtin_aarch64_usra_nv8qi_uuus (__a, __b, __c);
+  return (uint8x8_t) ((__a & __b) != 0);
 }
 
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
+vtst_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return __builtin_aarch64_usra_nv4hi_uuus (__a, __b, __c);
+  return (uint16x4_t) ((__a & __b) != 0);
 }
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
+vtst_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return __builtin_aarch64_usra_nv2si_uuus (__a, __b, __c);
+  return (uint32x2_t) ((__a & __b) != 0);
 }
 
 __extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
+vtst_s64 (int64x1_t __a, int64x1_t __b)
 {
-  return (uint64x1_t) {__builtin_aarch64_usra_ndi_uuus (__a[0], __b[0], __c)};
+  return (uint64x1_t) ((__a & __b) != __AARCH64_INT64_C (0));
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
+vtst_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (int8x16_t) __builtin_aarch64_ssra_nv16qi (__a, __b, __c);
+  return ((__a & __b) != 0);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
+vtst_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (int16x8_t) __builtin_aarch64_ssra_nv8hi (__a, __b, __c);
+  return ((__a & __b) != 0);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
+vtst_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (int32x4_t) __builtin_aarch64_ssra_nv4si (__a, __b, __c);
+  return ((__a & __b) != 0);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
+vtst_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (int64x2_t) __builtin_aarch64_ssra_nv2di (__a, __b, __c);
+  return ((__a & __b) != __AARCH64_UINT64_C (0));
 }
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
+vtstq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return __builtin_aarch64_usra_nv16qi_uuus (__a, __b, __c);
+  return (uint8x16_t) ((__a & __b) != 0);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
+vtstq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return __builtin_aarch64_usra_nv8hi_uuus (__a, __b, __c);
+  return (uint16x8_t) ((__a & __b) != 0);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
+vtstq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return __builtin_aarch64_usra_nv4si_uuus (__a, __b, __c);
+  return (uint32x4_t) ((__a & __b) != 0);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
+vtstq_s64 (int64x2_t __a, int64x2_t __b)
 {
-  return __builtin_aarch64_usra_nv2di_uuus (__a, __b, __c);
+  return (uint64x2_t) ((__a & __b) != __AARCH64_INT64_C (0));
 }
 
-__extension__ extern __inline int64_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsrad_n_s64 (int64_t __a, int64_t __b, const int __c)
+vtstq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return __builtin_aarch64_ssra_ndi (__a, __b, __c);
+  return ((__a & __b) != 0);
 }
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsrad_n_u64 (uint64_t __a, uint64_t __b, const int __c)
+vtstq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return __builtin_aarch64_usra_ndi_uuus (__a, __b, __c);
+  return ((__a & __b) != 0);
 }
 
-/* vsri */
-
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
+vtstq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (int8x8_t) __builtin_aarch64_ssri_nv8qi (__a, __b, __c);
+  return ((__a & __b) != 0);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
+vtstq_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  return (int16x4_t) __builtin_aarch64_ssri_nv4hi (__a, __b, __c);
+  return ((__a & __b) != __AARCH64_UINT64_C (0));
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
+vtstd_s64 (int64_t __a, int64_t __b)
 {
-  return (int32x2_t) __builtin_aarch64_ssri_nv2si (__a, __b, __c);
+  return (__a & __b) ? -1ll : 0ll;
 }
 
-__extension__ extern __inline int64x1_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
+vtstd_u64 (uint64_t __a, uint64_t __b)
 {
-  return (int64x1_t) {__builtin_aarch64_ssri_ndi (__a[0], __b[0], __c)};
+  return (__a & __b) ? -1ll : 0ll;
 }
 
-__extension__ extern __inline uint8x8_t
+/* vuqadd */
+
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
+vuqadd_s8 (int8x8_t __a, uint8x8_t __b)
 {
-  return __builtin_aarch64_usri_nv8qi_uuus (__a, __b, __c);
+  return __builtin_aarch64_suqaddv8qi_ssu (__a,  __b);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
+vuqadd_s16 (int16x4_t __a, uint16x4_t __b)
 {
-  return __builtin_aarch64_usri_nv4hi_uuus (__a, __b, __c);
+  return __builtin_aarch64_suqaddv4hi_ssu (__a,  __b);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
+vuqadd_s32 (int32x2_t __a, uint32x2_t __b)
 {
-  return __builtin_aarch64_usri_nv2si_uuus (__a, __b, __c);
+  return __builtin_aarch64_suqaddv2si_ssu (__a,  __b);
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline int64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
+vuqadd_s64 (int64x1_t __a, uint64x1_t __b)
 {
-  return (uint64x1_t) {__builtin_aarch64_usri_ndi_uuus (__a[0], __b[0], __c)};
+  return (int64x1_t) {__builtin_aarch64_suqadddi_ssu (__a[0], __b[0])};
 }
 
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
+vuqaddq_s8 (int8x16_t __a, uint8x16_t __b)
 {
-  return (int8x16_t) __builtin_aarch64_ssri_nv16qi (__a, __b, __c);
+  return __builtin_aarch64_suqaddv16qi_ssu (__a,  __b);
 }
 
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
+vuqaddq_s16 (int16x8_t __a, uint16x8_t __b)
 {
-  return (int16x8_t) __builtin_aarch64_ssri_nv8hi (__a, __b, __c);
+  return __builtin_aarch64_suqaddv8hi_ssu (__a,  __b);
 }
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
+vuqaddq_s32 (int32x4_t __a, uint32x4_t __b)
 {
-  return (int32x4_t) __builtin_aarch64_ssri_nv4si (__a, __b, __c);
+  return __builtin_aarch64_suqaddv4si_ssu (__a,  __b);
 }
 
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
-{
-  return (int64x2_t) __builtin_aarch64_ssri_nv2di (__a, __b, __c);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
+vuqaddq_s64 (int64x2_t __a, uint64x2_t __b)
 {
-  return __builtin_aarch64_usri_nv16qi_uuus (__a, __b, __c);
+  return __builtin_aarch64_suqaddv2di_ssu (__a,  __b);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline int8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
+vuqaddb_s8 (int8_t __a, uint8_t __b)
 {
-  return __builtin_aarch64_usri_nv8hi_uuus (__a, __b, __c);
+  return __builtin_aarch64_suqaddqi_ssu (__a,  __b);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
+vuqaddh_s16 (int16_t __a, uint16_t __b)
 {
-  return __builtin_aarch64_usri_nv4si_uuus (__a, __b, __c);
+  return __builtin_aarch64_suqaddhi_ssu (__a,  __b);
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
+vuqadds_s32 (int32_t __a, uint32_t __b)
 {
-  return __builtin_aarch64_usri_nv2di_uuus (__a, __b, __c);
+  return __builtin_aarch64_suqaddsi_ssu (__a,  __b);
 }
 
 __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsrid_n_s64 (int64_t __a, int64_t __b, const int __c)
+vuqaddd_s64 (int64_t __a, uint64_t __b)
 {
-  return __builtin_aarch64_ssri_ndi (__a, __b, __c);
+  return __builtin_aarch64_suqadddi_ssu (__a,  __b);
 }
 
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsrid_n_u64 (uint64_t __a, uint64_t __b, const int __c)
-{
-  return __builtin_aarch64_usri_ndi_uuus (__a, __b, __c);
-}
+#define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) 		\
+  __extension__ extern __inline rettype					\
+  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
+  v ## op ## Q ## _ ## funcsuffix (intype a, intype b)			\
+  {									\
+    return (rettype) {v ## op ## 1 ## Q ## _ ## funcsuffix (a, b),	\
+		      v ## op ## 2 ## Q ## _ ## funcsuffix (a, b)};	\
+  }
 
-/* vst1 */
+#define __INTERLEAVE_LIST(op)					\
+  __DEFINTERLEAVE (op, float16x4x2_t, float16x4_t, f16,)	\
+  __DEFINTERLEAVE (op, float32x2x2_t, float32x2_t, f32,)	\
+  __DEFINTERLEAVE (op, poly8x8x2_t, poly8x8_t, p8,)		\
+  __DEFINTERLEAVE (op, poly16x4x2_t, poly16x4_t, p16,)		\
+  __DEFINTERLEAVE (op, int8x8x2_t, int8x8_t, s8,)		\
+  __DEFINTERLEAVE (op, int16x4x2_t, int16x4_t, s16,)		\
+  __DEFINTERLEAVE (op, int32x2x2_t, int32x2_t, s32,)		\
+  __DEFINTERLEAVE (op, uint8x8x2_t, uint8x8_t, u8,)		\
+  __DEFINTERLEAVE (op, uint16x4x2_t, uint16x4_t, u16,)		\
+  __DEFINTERLEAVE (op, uint32x2x2_t, uint32x2_t, u32,)		\
+  __DEFINTERLEAVE (op, float16x8x2_t, float16x8_t, f16, q)	\
+  __DEFINTERLEAVE (op, float32x4x2_t, float32x4_t, f32, q)	\
+  __DEFINTERLEAVE (op, poly8x16x2_t, poly8x16_t, p8, q)		\
+  __DEFINTERLEAVE (op, poly16x8x2_t, poly16x8_t, p16, q)	\
+  __DEFINTERLEAVE (op, int8x16x2_t, int8x16_t, s8, q)		\
+  __DEFINTERLEAVE (op, int16x8x2_t, int16x8_t, s16, q)		\
+  __DEFINTERLEAVE (op, int32x4x2_t, int32x4_t, s32, q)		\
+  __DEFINTERLEAVE (op, uint8x16x2_t, uint8x16_t, u8, q)		\
+  __DEFINTERLEAVE (op, uint16x8x2_t, uint16x8_t, u16, q)	\
+  __DEFINTERLEAVE (op, uint32x4x2_t, uint32x4_t, u32, q)
 
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_f16 (float16_t *__a, float16x4_t __b)
-{
-  __builtin_aarch64_st1v4hf (__a, __b);
-}
+/* vuzp */
 
-__extension__ extern __inline void
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_f32 (float32_t *a, float32x2_t b)
+vuzp1_f16 (float16x4_t __a, float16x4_t __b)
 {
-  __builtin_aarch64_st1v2sf ((__builtin_aarch64_simd_sf *) a, b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_f64 (float64_t *a, float64x1_t b)
+vuzp1_f32 (float32x2_t __a, float32x2_t __b)
 {
-  *a = b[0];
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_p8 (poly8_t *a, poly8x8_t b)
+vuzp1_p8 (poly8x8_t __a, poly8x8_t __b)
 {
-  __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
-			     (int8x8_t) b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline poly16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_p16 (poly16_t *a, poly16x4_t b)
+vuzp1_p16 (poly16x4_t __a, poly16x4_t __b)
 {
-  __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
-			     (int16x4_t) b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_p64 (poly64_t *a, poly64x1_t b)
+vuzp1_s8 (int8x8_t __a, int8x8_t __b)
 {
-  *a = b[0];
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_s8 (int8_t *a, int8x8_t b)
+vuzp1_s16 (int16x4_t __a, int16x4_t __b)
 {
-  __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_s16 (int16_t *a, int16x4_t b)
+vuzp1_s32 (int32x2_t __a, int32x2_t __b)
 {
-  __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_s32 (int32_t *a, int32x2_t b)
+vuzp1_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_s64 (int64_t *a, int64x1_t b)
+vuzp1_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  *a = b[0];
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_u8 (uint8_t *a, uint8x8_t b)
+vuzp1_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
-			     (int8x8_t) b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_u16 (uint16_t *a, uint16x4_t b)
+vuzp1q_f16 (float16x8_t __a, float16x8_t __b)
 {
-  __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
-			     (int16x4_t) b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_u32 (uint32_t *a, uint32x2_t b)
+vuzp1q_f32 (float32x4_t __a, float32x4_t __b)
 {
-  __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a,
-			     (int32x2_t) b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_u64 (uint64_t *a, uint64x1_t b)
+vuzp1q_f64 (float64x2_t __a, float64x2_t __b)
 {
-  *a = b[0];
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
+#endif
 }
 
-/* vst1q */
-
-__extension__ extern __inline void
+__extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_f16 (float16_t *__a, float16x8_t __b)
+vuzp1q_p8 (poly8x16_t __a, poly8x16_t __b)
 {
-  __builtin_aarch64_st1v8hf (__a, __b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline poly16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_f32 (float32_t *a, float32x4_t b)
+vuzp1q_p16 (poly16x8_t __a, poly16x8_t __b)
 {
-  __builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf *) a, b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_f64 (float64_t *a, float64x2_t b)
+vuzp1q_s8 (int8x16_t __a, int8x16_t __b)
 {
-  __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_p8 (poly8_t *a, poly8x16_t b)
+vuzp1q_s16 (int16x8_t __a, int16x8_t __b)
 {
-  __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
-			      (int8x16_t) b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_p16 (poly16_t *a, poly16x8_t b)
+vuzp1q_s32 (int32x4_t __a, int32x4_t __b)
 {
-  __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
-			     (int16x8_t) b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_p64 (poly64_t *a, poly64x2_t b)
+vuzp1q_s64 (int64x2_t __a, int64x2_t __b)
 {
-  __builtin_aarch64_st1v2di_sp ((__builtin_aarch64_simd_di *) a,
-				(poly64x2_t) b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_s8 (int8_t *a, int8x16_t b)
+vuzp1q_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_s16 (int16_t *a, int16x8_t b)
+vuzp1q_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_s32 (int32_t *a, int32x4_t b)
+vuzp1q_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_s64 (int64_t *a, int64x2_t b)
+vuzp1q_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline poly64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_u8 (uint8_t *a, uint8x16_t b)
+vuzp1q_p64 (poly64x2_t __a, poly64x2_t __b)
 {
-  __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
-			      (int8x16_t) b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {0, 2});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_u16 (uint16_t *a, uint16x8_t b)
+vuzp2_f16 (float16x4_t __a, float16x4_t __b)
 {
-  __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
-			     (int16x8_t) b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_u32 (uint32_t *a, uint32x4_t b)
+vuzp2_f32 (float32x2_t __a, float32x2_t __b)
 {
-  __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a,
-			     (int32x4_t) b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_u64 (uint64_t *a, uint64x2_t b)
+vuzp2_p8 (poly8x8_t __a, poly8x8_t __b)
 {
-  __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a,
-			     (int64x2_t) b);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
+#endif
 }
 
-/* vst1_lane */
-
-__extension__ extern __inline void
+__extension__ extern __inline poly16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_lane_f16 (float16_t *__a, float16x4_t __b, const int __lane)
+vuzp2_p16 (poly16x4_t __a, poly16x4_t __b)
 {
-  *__a = __aarch64_vget_lane_any (__b, __lane);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_lane_f32 (float32_t *__a, float32x2_t __b, const int __lane)
+vuzp2_s8 (int8x8_t __a, int8x8_t __b)
 {
-  *__a = __aarch64_vget_lane_any (__b, __lane);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_lane_f64 (float64_t *__a, float64x1_t __b, const int __lane)
+vuzp2_s16 (int16x4_t __a, int16x4_t __b)
 {
-  *__a = __aarch64_vget_lane_any (__b, __lane);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_lane_p8 (poly8_t *__a, poly8x8_t __b, const int __lane)
+vuzp2_s32 (int32x2_t __a, int32x2_t __b)
 {
-  *__a = __aarch64_vget_lane_any (__b, __lane);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_lane_p16 (poly16_t *__a, poly16x4_t __b, const int __lane)
+vuzp2_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  *__a = __aarch64_vget_lane_any (__b, __lane);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_lane_p64 (poly64_t *__a, poly64x1_t __b, const int __lane)
+vuzp2_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  *__a = __aarch64_vget_lane_any (__b, __lane);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_lane_s8 (int8_t *__a, int8x8_t __b, const int __lane)
+vuzp2_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  *__a = __aarch64_vget_lane_any (__b, __lane);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_lane_s16 (int16_t *__a, int16x4_t __b, const int __lane)
+vuzp2q_f16 (float16x8_t __a, float16x8_t __b)
 {
-  *__a = __aarch64_vget_lane_any (__b, __lane);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_lane_s32 (int32_t *__a, int32x2_t __b, const int __lane)
+vuzp2q_f32 (float32x4_t __a, float32x4_t __b)
 {
-  *__a = __aarch64_vget_lane_any (__b, __lane);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_lane_s64 (int64_t *__a, int64x1_t __b, const int __lane)
+vuzp2q_f64 (float64x2_t __a, float64x2_t __b)
 {
-  *__a = __aarch64_vget_lane_any (__b, __lane);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_lane_u8 (uint8_t *__a, uint8x8_t __b, const int __lane)
+vuzp2q_p8 (poly8x16_t __a, poly8x16_t __b)
 {
-  *__a = __aarch64_vget_lane_any (__b, __lane);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline poly16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_lane_u16 (uint16_t *__a, uint16x4_t __b, const int __lane)
+vuzp2q_p16 (poly16x8_t __a, poly16x8_t __b)
 {
-  *__a = __aarch64_vget_lane_any (__b, __lane);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_lane_u32 (uint32_t *__a, uint32x2_t __b, const int __lane)
+vuzp2q_s8 (int8x16_t __a, int8x16_t __b)
 {
-  *__a = __aarch64_vget_lane_any (__b, __lane);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_lane_u64 (uint64_t *__a, uint64x1_t __b, const int __lane)
+vuzp2q_s16 (int16x8_t __a, int16x8_t __b)
 {
-  *__a = __aarch64_vget_lane_any (__b, __lane);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
+#endif
 }
 
-/* vst1q_lane */
-
-__extension__ extern __inline void
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_lane_f16 (float16_t *__a, float16x8_t __b, const int __lane)
+vuzp2q_s32 (int32x4_t __a, int32x4_t __b)
 {
-  *__a = __aarch64_vget_lane_any (__b, __lane);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_lane_f32 (float32_t *__a, float32x4_t __b, const int __lane)
+vuzp2q_s64 (int64x2_t __a, int64x2_t __b)
 {
-  *__a = __aarch64_vget_lane_any (__b, __lane);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_lane_f64 (float64_t *__a, float64x2_t __b, const int __lane)
+vuzp2q_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  *__a = __aarch64_vget_lane_any (__b, __lane);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_lane_p8 (poly8_t *__a, poly8x16_t __b, const int __lane)
+vuzp2q_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  *__a = __aarch64_vget_lane_any (__b, __lane);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_lane_p16 (poly16_t *__a, poly16x8_t __b, const int __lane)
+vuzp2q_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  *__a = __aarch64_vget_lane_any (__b, __lane);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_lane_p64 (poly64_t *__a, poly64x2_t __b, const int __lane)
+vuzp2q_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  *__a = __aarch64_vget_lane_any (__b, __lane);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline poly64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_lane_s8 (int8_t *__a, int8x16_t __b, const int __lane)
+vuzp2q_p64 (poly64x2_t __a, poly64x2_t __b)
 {
-  *__a = __aarch64_vget_lane_any (__b, __lane);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {1, 3});
+#endif
 }
 
-__extension__ extern __inline void
+__INTERLEAVE_LIST (uzp)
+
+/* vzip */
+
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_lane_s16 (int16_t *__a, int16x8_t __b, const int __lane)
+vzip1_f16 (float16x4_t __a, float16x4_t __b)
 {
-  *__a = __aarch64_vget_lane_any (__b, __lane);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_lane_s32 (int32_t *__a, int32x4_t __b, const int __lane)
+vzip1_f32 (float32x2_t __a, float32x2_t __b)
 {
-  *__a = __aarch64_vget_lane_any (__b, __lane);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_lane_s64 (int64_t *__a, int64x2_t __b, const int __lane)
+vzip1_p8 (poly8x8_t __a, poly8x8_t __b)
 {
-  *__a = __aarch64_vget_lane_any (__b, __lane);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline poly16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_lane_u8 (uint8_t *__a, uint8x16_t __b, const int __lane)
+vzip1_p16 (poly16x4_t __a, poly16x4_t __b)
 {
-  *__a = __aarch64_vget_lane_any (__b, __lane);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_lane_u16 (uint16_t *__a, uint16x8_t __b, const int __lane)
+vzip1_s8 (int8x8_t __a, int8x8_t __b)
 {
-  *__a = __aarch64_vget_lane_any (__b, __lane);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_lane_u32 (uint32_t *__a, uint32x4_t __b, const int __lane)
+vzip1_s16 (int16x4_t __a, int16x4_t __b)
 {
-  *__a = __aarch64_vget_lane_any (__b, __lane);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_lane_u64 (uint64_t *__a, uint64x2_t __b, const int __lane)
+vzip1_s32 (int32x2_t __a, int32x2_t __b)
 {
-  *__a = __aarch64_vget_lane_any (__b, __lane);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
+#endif
 }
 
-/* vstn */
-
-__extension__ extern __inline void
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst2_s64 (int64_t * __a, int64x1x2_t val)
+vzip1_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  int64x2x2_t temp;
-  temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
-  temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
-  __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst2_u64 (uint64_t * __a, uint64x1x2_t val)
+vzip1_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  uint64x2x2_t temp;
-  temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
-  temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
-  __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst2_f64 (float64_t * __a, float64x1x2_t val)
+vzip1_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  float64x2x2_t temp;
-  temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
-  temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[1], 1);
-  __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst2_s8 (int8_t * __a, int8x8x2_t val)
+vzip1q_f16 (float16x8_t __a, float16x8_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  int8x16x2_t temp;
-  temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
-  temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
-  __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b,
+			    (uint16x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
+#else
+  return __builtin_shuffle (__a, __b,
+			    (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst2_p8 (poly8_t * __a, poly8x8x2_t val)
+vzip1q_f32 (float32x4_t __a, float32x4_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  poly8x16x2_t temp;
-  temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
-  temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
-  __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst2_s16 (int16_t * __a, int16x4x2_t val)
+vzip1q_f64 (float64x2_t __a, float64x2_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  int16x8x2_t temp;
-  temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
-  temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
-  __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst2_p16 (poly16_t * __a, poly16x4x2_t val)
+vzip1q_p8 (poly8x16_t __a, poly8x16_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  poly16x8x2_t temp;
-  temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
-  temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
-  __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline poly16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst2_s32 (int32_t * __a, int32x2x2_t val)
+vzip1q_p16 (poly16x8_t __a, poly16x8_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  int32x4x2_t temp;
-  temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
-  temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
-  __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t)
+      {12, 4, 13, 5, 14, 6, 15, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst2_u8 (uint8_t * __a, uint8x8x2_t val)
+vzip1q_s8 (int8x16_t __a, int8x16_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  uint8x16x2_t temp;
-  temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
-  temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
-  __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst2_u16 (uint16_t * __a, uint16x4x2_t val)
+vzip1q_s16 (int16x8_t __a, int16x8_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  uint16x8x2_t temp;
-  temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
-  temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
-  __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t)
+      {12, 4, 13, 5, 14, 6, 15, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst2_u32 (uint32_t * __a, uint32x2x2_t val)
+vzip1q_s32 (int32x4_t __a, int32x4_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  uint32x4x2_t temp;
-  temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
-  temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
-  __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst2_f16 (float16_t * __a, float16x4x2_t val)
+vzip1q_s64 (int64x2_t __a, int64x2_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  float16x8x2_t temp;
-  temp.val[0] = vcombine_f16 (val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0)));
-  temp.val[1] = vcombine_f16 (val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv8hf (__o, temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv8hf (__o, temp.val[1], 1);
-  __builtin_aarch64_st2v4hf (__a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst2_f32 (float32_t * __a, float32x2x2_t val)
+vzip1q_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  float32x4x2_t temp;
-  temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
-  temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[1], 1);
-  __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst2_p64 (poly64_t * __a, poly64x1x2_t val)
+vzip1q_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  poly64x2x2_t temp;
-  temp.val[0] = vcombine_p64 (val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0)));
-  temp.val[1] = vcombine_p64 (val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv2di_ssps (__o,
-					       (poly64x2_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv2di_ssps (__o,
-					       (poly64x2_t) temp.val[1], 1);
-  __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t)
+      {12, 4, 13, 5, 14, 6, 15, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst2q_s8 (int8_t * __a, int8x16x2_t val)
+vzip1q_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
-  __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst2q_p8 (poly8_t * __a, poly8x16x2_t val)
+vzip1q_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
-  __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline poly64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst2q_s16 (int16_t * __a, int16x8x2_t val)
+vzip1q_p64 (poly64x2_t __a, poly64x2_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
-  __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {0, 2});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst2q_p16 (poly16_t * __a, poly16x8x2_t val)
+vzip2_f16 (float16x4_t __a, float16x4_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
-  __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst2q_s32 (int32_t * __a, int32x4x2_t val)
+vzip2_f32 (float32x2_t __a, float32x2_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
-  __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst2q_s64 (int64_t * __a, int64x2x2_t val)
+vzip2_p8 (poly8x8_t __a, poly8x8_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
-  __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline poly16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst2q_u8 (uint8_t * __a, uint8x16x2_t val)
+vzip2_p16 (poly16x4_t __a, poly16x4_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
-  __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst2q_u16 (uint16_t * __a, uint16x8x2_t val)
+vzip2_s8 (int8x8_t __a, int8x8_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
-  __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst2q_u32 (uint32_t * __a, uint32x4x2_t val)
+vzip2_s16 (int16x4_t __a, int16x4_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
-  __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst2q_u64 (uint64_t * __a, uint64x2x2_t val)
+vzip2_s32 (int32x2_t __a, int32x2_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
-  __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst2q_f16 (float16_t * __a, float16x8x2_t val)
+vzip2_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv8hf (__o, val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv8hf (__o, val.val[1], 1);
-  __builtin_aarch64_st2v8hf (__a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst2q_f32 (float32_t * __a, float32x4x2_t val)
+vzip2_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[1], 1);
-  __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst2q_f64 (float64_t * __a, float64x2x2_t val)
+vzip2_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[1], 1);
-  __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst2q_p64 (poly64_t * __a, poly64x2x2_t val)
+vzip2q_f16 (float16x8_t __a, float16x8_t __b)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv2di_ssps (__o,
-					       (poly64x2_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv2di_ssps (__o,
-					       (poly64x2_t) val.val[1], 1);
-  __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b,
+			    (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
+#else
+  return __builtin_shuffle (__a, __b,
+			    (uint16x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst3_s64 (int64_t * __a, int64x1x3_t val)
+vzip2q_f32 (float32x4_t __a, float32x4_t __b)
 {
-  __builtin_aarch64_simd_ci __o;
-  int64x2x3_t temp;
-  temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
-  temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
-  temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
-  __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst3_u64 (uint64_t * __a, uint64x1x3_t val)
+vzip2q_f64 (float64x2_t __a, float64x2_t __b)
 {
-  __builtin_aarch64_simd_ci __o;
-  uint64x2x3_t temp;
-  temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
-  temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
-  temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
-  __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst3_f64 (float64_t * __a, float64x1x3_t val)
+vzip2q_p8 (poly8x16_t __a, poly8x16_t __b)
 {
-  __builtin_aarch64_simd_ci __o;
-  float64x2x3_t temp;
-  temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
-  temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
-  temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[2], 2);
-  __builtin_aarch64_st3df ((__builtin_aarch64_simd_df *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline poly16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst3_s8 (int8_t * __a, int8x8x3_t val)
+vzip2q_p16 (poly16x8_t __a, poly16x8_t __b)
 {
-  __builtin_aarch64_simd_ci __o;
-  int8x16x3_t temp;
-  temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
-  temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
-  temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
-  __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t)
+      {4, 12, 5, 13, 6, 14, 7, 15});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst3_p8 (poly8_t * __a, poly8x8x3_t val)
+vzip2q_s8 (int8x16_t __a, int8x16_t __b)
 {
-  __builtin_aarch64_simd_ci __o;
-  poly8x16x3_t temp;
-  temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
-  temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
-  temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
-  __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst3_s16 (int16_t * __a, int16x4x3_t val)
+vzip2q_s16 (int16x8_t __a, int16x8_t __b)
 {
-  __builtin_aarch64_simd_ci __o;
-  int16x8x3_t temp;
-  temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
-  temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
-  temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
-  __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t)
+      {4, 12, 5, 13, 6, 14, 7, 15});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst3_p16 (poly16_t * __a, poly16x4x3_t val)
+vzip2q_s32 (int32x4_t __a, int32x4_t __b)
 {
-  __builtin_aarch64_simd_ci __o;
-  poly16x8x3_t temp;
-  temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
-  temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
-  temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
-  __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst3_s32 (int32_t * __a, int32x2x3_t val)
+vzip2q_s64 (int64x2_t __a, int64x2_t __b)
 {
-  __builtin_aarch64_simd_ci __o;
-  int32x4x3_t temp;
-  temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
-  temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
-  temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
-  __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst3_u8 (uint8_t * __a, uint8x8x3_t val)
+vzip2q_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  __builtin_aarch64_simd_ci __o;
-  uint8x16x3_t temp;
-  temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
-  temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
-  temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
-  __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst3_u16 (uint16_t * __a, uint16x4x3_t val)
+vzip2q_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  __builtin_aarch64_simd_ci __o;
-  uint16x8x3_t temp;
-  temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
-  temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
-  temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
-  __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t)
+      {4, 12, 5, 13, 6, 14, 7, 15});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst3_u32 (uint32_t * __a, uint32x2x3_t val)
+vzip2q_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  __builtin_aarch64_simd_ci __o;
-  uint32x4x3_t temp;
-  temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
-  temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
-  temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
-  __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst3_f16 (float16_t * __a, float16x4x3_t val)
+vzip2q_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  __builtin_aarch64_simd_ci __o;
-  float16x8x3_t temp;
-  temp.val[0] = vcombine_f16 (val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0)));
-  temp.val[1] = vcombine_f16 (val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0)));
-  temp.val[2] = vcombine_f16 (val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[2], 2);
-  __builtin_aarch64_st3v4hf ((__builtin_aarch64_simd_hf *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
+#endif
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline poly64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst3_f32 (float32_t * __a, float32x2x3_t val)
+vzip2q_p64 (poly64x2_t __a, poly64x2_t __b)
 {
-  __builtin_aarch64_simd_ci __o;
-  float32x4x3_t temp;
-  temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
-  temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
-  temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[2], 2);
-  __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {1, 3});
+#endif
 }
 
-__extension__ extern __inline void
+__INTERLEAVE_LIST (zip)
+
+#undef __INTERLEAVE_LIST
+#undef __DEFINTERLEAVE
+
+/* End of optimal implementations in approved order.  */
+
+#pragma GCC pop_options
+
+/* ARMv8.2-A FP16 intrinsics.  */
+
+#include "arm_fp16.h"
+
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.2-a+fp16")
+
+/* ARMv8.2-A FP16 one operand vector intrinsics.  */
+
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst3_p64 (poly64_t * __a, poly64x1x3_t val)
+vabs_f16 (float16x4_t __a)
 {
-  __builtin_aarch64_simd_ci __o;
-  poly64x2x3_t temp;
-  temp.val[0] = vcombine_p64 (val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0)));
-  temp.val[1] = vcombine_p64 (val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0)));
-  temp.val[2] = vcombine_p64 (val.val[2], vcreate_p64 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
-					       (poly64x2_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
-					       (poly64x2_t) temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
-					       (poly64x2_t) temp.val[2], 2);
-  __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
+  return __builtin_aarch64_absv4hf (__a);
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst3q_s8 (int8_t * __a, int8x16x3_t val)
+vabsq_f16 (float16x8_t __a)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
-  __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  return __builtin_aarch64_absv8hf (__a);
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst3q_p8 (poly8_t * __a, poly8x16x3_t val)
+vceqz_f16 (float16x4_t __a)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
-  __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  return __builtin_aarch64_cmeqv4hf_uss (__a, vdup_n_f16 (0.0f));
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst3q_s16 (int16_t * __a, int16x8x3_t val)
+vceqzq_f16 (float16x8_t __a)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
-  __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  return __builtin_aarch64_cmeqv8hf_uss (__a, vdupq_n_f16 (0.0f));
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst3q_p16 (poly16_t * __a, poly16x8x3_t val)
+vcgez_f16 (float16x4_t __a)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
-  __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  return __builtin_aarch64_cmgev4hf_uss (__a, vdup_n_f16 (0.0f));
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst3q_s32 (int32_t * __a, int32x4x3_t val)
+vcgezq_f16 (float16x8_t __a)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
-  __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
+  return __builtin_aarch64_cmgev8hf_uss (__a, vdupq_n_f16 (0.0f));
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst3q_s64 (int64_t * __a, int64x2x3_t val)
+vcgtz_f16 (float16x4_t __a)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
-  __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
+  return __builtin_aarch64_cmgtv4hf_uss (__a, vdup_n_f16 (0.0f));
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst3q_u8 (uint8_t * __a, uint8x16x3_t val)
+vcgtzq_f16 (float16x8_t __a)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
-  __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  return __builtin_aarch64_cmgtv8hf_uss (__a, vdupq_n_f16 (0.0f));
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst3q_u16 (uint16_t * __a, uint16x8x3_t val)
+vclez_f16 (float16x4_t __a)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
-  __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  return __builtin_aarch64_cmlev4hf_uss (__a, vdup_n_f16 (0.0f));
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst3q_u32 (uint32_t * __a, uint32x4x3_t val)
+vclezq_f16 (float16x8_t __a)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
-  __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
+  return __builtin_aarch64_cmlev8hf_uss (__a, vdupq_n_f16 (0.0f));
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst3q_u64 (uint64_t * __a, uint64x2x3_t val)
+vcltz_f16 (float16x4_t __a)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
-  __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
+  return __builtin_aarch64_cmltv4hf_uss (__a, vdup_n_f16 (0.0f));
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst3q_f16 (float16_t * __a, float16x8x3_t val)
+vcltzq_f16 (float16x8_t __a)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[2], 2);
-  __builtin_aarch64_st3v8hf ((__builtin_aarch64_simd_hf *) __a, __o);
+  return __builtin_aarch64_cmltv8hf_uss (__a, vdupq_n_f16 (0.0f));
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst3q_f32 (float32_t * __a, float32x4x3_t val)
+vcvt_f16_s16 (int16x4_t __a)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[2], 2);
-  __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
+  return __builtin_aarch64_floatv4hiv4hf (__a);
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst3q_f64 (float64_t * __a, float64x2x3_t val)
+vcvtq_f16_s16 (int16x8_t __a)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[2], 2);
-  __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df *) __a, __o);
+  return __builtin_aarch64_floatv8hiv8hf (__a);
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst3q_p64 (poly64_t * __a, poly64x2x3_t val)
+vcvt_f16_u16 (uint16x4_t __a)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
-					       (poly64x2_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
-					       (poly64x2_t) val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
-					       (poly64x2_t) val.val[2], 2);
-  __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
+  return __builtin_aarch64_floatunsv4hiv4hf ((int16x4_t) __a);
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst4_s64 (int64_t * __a, int64x1x4_t val)
+vcvtq_f16_u16 (uint16x8_t __a)
 {
-  __builtin_aarch64_simd_xi __o;
-  int64x2x4_t temp;
-  temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
-  temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
-  temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
-  temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (__AARCH64_INT64_C (0)));
-  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
-  __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
+  return __builtin_aarch64_floatunsv8hiv8hf ((int16x8_t) __a);
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst4_u64 (uint64_t * __a, uint64x1x4_t val)
+vcvt_s16_f16 (float16x4_t __a)
 {
-  __builtin_aarch64_simd_xi __o;
-  uint64x2x4_t temp;
-  temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
-  temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
-  temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
-  temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
-  __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
+  return __builtin_aarch64_lbtruncv4hfv4hi (__a);
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst4_f64 (float64_t * __a, float64x1x4_t val)
+vcvtq_s16_f16 (float16x8_t __a)
 {
-  __builtin_aarch64_simd_xi __o;
-  float64x2x4_t temp;
-  temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
-  temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
-  temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
-  temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[3], 3);
-  __builtin_aarch64_st4df ((__builtin_aarch64_simd_df *) __a, __o);
+  return __builtin_aarch64_lbtruncv8hfv8hi (__a);
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst4_s8 (int8_t * __a, int8x8x4_t val)
+vcvt_u16_f16 (float16x4_t __a)
 {
-  __builtin_aarch64_simd_xi __o;
-  int8x16x4_t temp;
-  temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
-  temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
-  temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
-  temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (__AARCH64_INT64_C (0)));
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
-  __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  return __builtin_aarch64_lbtruncuv4hfv4hi_us (__a);
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst4_p8 (poly8_t * __a, poly8x8x4_t val)
+vcvtq_u16_f16 (float16x8_t __a)
 {
-  __builtin_aarch64_simd_xi __o;
-  poly8x16x4_t temp;
-  temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
-  temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
-  temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
-  temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
-  __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  return __builtin_aarch64_lbtruncuv8hfv8hi_us (__a);
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst4_s16 (int16_t * __a, int16x4x4_t val)
+vcvta_s16_f16 (float16x4_t __a)
 {
-  __builtin_aarch64_simd_xi __o;
-  int16x8x4_t temp;
-  temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
-  temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
-  temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
-  temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (__AARCH64_INT64_C (0)));
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
-  __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  return __builtin_aarch64_lroundv4hfv4hi (__a);
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst4_p16 (poly16_t * __a, poly16x4x4_t val)
+vcvtaq_s16_f16 (float16x8_t __a)
 {
-  __builtin_aarch64_simd_xi __o;
-  poly16x8x4_t temp;
-  temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
-  temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
-  temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
-  temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
-  __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  return __builtin_aarch64_lroundv8hfv8hi (__a);
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst4_s32 (int32_t * __a, int32x2x4_t val)
+vcvta_u16_f16 (float16x4_t __a)
 {
-  __builtin_aarch64_simd_xi __o;
-  int32x4x4_t temp;
-  temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
-  temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
-  temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
-  temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (__AARCH64_INT64_C (0)));
-  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
-  __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
+  return __builtin_aarch64_lrounduv4hfv4hi_us (__a);
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst4_u8 (uint8_t * __a, uint8x8x4_t val)
+vcvtaq_u16_f16 (float16x8_t __a)
 {
-  __builtin_aarch64_simd_xi __o;
-  uint8x16x4_t temp;
-  temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
-  temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
-  temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
-  temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
-  __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  return __builtin_aarch64_lrounduv8hfv8hi_us (__a);
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst4_u16 (uint16_t * __a, uint16x4x4_t val)
+vcvtm_s16_f16 (float16x4_t __a)
 {
-  __builtin_aarch64_simd_xi __o;
-  uint16x8x4_t temp;
-  temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
-  temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
-  temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
-  temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
-  __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  return __builtin_aarch64_lfloorv4hfv4hi (__a);
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst4_u32 (uint32_t * __a, uint32x2x4_t val)
+vcvtmq_s16_f16 (float16x8_t __a)
 {
-  __builtin_aarch64_simd_xi __o;
-  uint32x4x4_t temp;
-  temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
-  temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
-  temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
-  temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
-  __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
+  return __builtin_aarch64_lfloorv8hfv8hi (__a);
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst4_f16 (float16_t * __a, float16x4x4_t val)
+vcvtm_u16_f16 (float16x4_t __a)
 {
-  __builtin_aarch64_simd_xi __o;
-  float16x8x4_t temp;
-  temp.val[0] = vcombine_f16 (val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0)));
-  temp.val[1] = vcombine_f16 (val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0)));
-  temp.val[2] = vcombine_f16 (val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0)));
-  temp.val[3] = vcombine_f16 (val.val[3], vcreate_f16 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[3], 3);
-  __builtin_aarch64_st4v4hf ((__builtin_aarch64_simd_hf *) __a, __o);
+  return __builtin_aarch64_lflooruv4hfv4hi_us (__a);
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst4_f32 (float32_t * __a, float32x2x4_t val)
+vcvtmq_u16_f16 (float16x8_t __a)
 {
-  __builtin_aarch64_simd_xi __o;
-  float32x4x4_t temp;
-  temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
-  temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
-  temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
-  temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[3], 3);
-  __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
+  return __builtin_aarch64_lflooruv8hfv8hi_us (__a);
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst4_p64 (poly64_t * __a, poly64x1x4_t val)
+vcvtn_s16_f16 (float16x4_t __a)
 {
-  __builtin_aarch64_simd_xi __o;
-  poly64x2x4_t temp;
-  temp.val[0] = vcombine_p64 (val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0)));
-  temp.val[1] = vcombine_p64 (val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0)));
-  temp.val[2] = vcombine_p64 (val.val[2], vcreate_p64 (__AARCH64_UINT64_C (0)));
-  temp.val[3] = vcombine_p64 (val.val[3], vcreate_p64 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
-					       (poly64x2_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
-					       (poly64x2_t) temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
-					       (poly64x2_t) temp.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
-					       (poly64x2_t) temp.val[3], 3);
-  __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
+  return __builtin_aarch64_lfrintnv4hfv4hi (__a);
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst4q_s8 (int8_t * __a, int8x16x4_t val)
+vcvtnq_s16_f16 (float16x8_t __a)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
-  __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  return __builtin_aarch64_lfrintnv8hfv8hi (__a);
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst4q_p8 (poly8_t * __a, poly8x16x4_t val)
+vcvtn_u16_f16 (float16x4_t __a)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
-  __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  return __builtin_aarch64_lfrintnuv4hfv4hi_us (__a);
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst4q_s16 (int16_t * __a, int16x8x4_t val)
+vcvtnq_u16_f16 (float16x8_t __a)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
-  __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  return __builtin_aarch64_lfrintnuv8hfv8hi_us (__a);
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst4q_p16 (poly16_t * __a, poly16x8x4_t val)
+vcvtp_s16_f16 (float16x4_t __a)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
-  __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  return __builtin_aarch64_lceilv4hfv4hi (__a);
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst4q_s32 (int32_t * __a, int32x4x4_t val)
+vcvtpq_s16_f16 (float16x8_t __a)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
-  __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
+  return __builtin_aarch64_lceilv8hfv8hi (__a);
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst4q_s64 (int64_t * __a, int64x2x4_t val)
+vcvtp_u16_f16 (float16x4_t __a)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
-  __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
+  return __builtin_aarch64_lceiluv4hfv4hi_us (__a);
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst4q_u8 (uint8_t * __a, uint8x16x4_t val)
+vcvtpq_u16_f16 (float16x8_t __a)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
-  __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  return __builtin_aarch64_lceiluv8hfv8hi_us (__a);
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst4q_u16 (uint16_t * __a, uint16x8x4_t val)
+vneg_f16 (float16x4_t __a)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
-  __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  return -__a;
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst4q_u32 (uint32_t * __a, uint32x4x4_t val)
+vnegq_f16 (float16x8_t __a)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
-  __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
+  return -__a;
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst4q_u64 (uint64_t * __a, uint64x2x4_t val)
+vrecpe_f16 (float16x4_t __a)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
-  __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
+  return __builtin_aarch64_frecpev4hf (__a);
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst4q_f16 (float16_t * __a, float16x8x4_t val)
+vrecpeq_f16 (float16x8_t __a)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[3], 3);
-  __builtin_aarch64_st4v8hf ((__builtin_aarch64_simd_hf *) __a, __o);
+  return __builtin_aarch64_frecpev8hf (__a);
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst4q_f32 (float32_t * __a, float32x4x4_t val)
+vrnd_f16 (float16x4_t __a)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[3], 3);
-  __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
+  return __builtin_aarch64_btruncv4hf (__a);
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst4q_f64 (float64_t * __a, float64x2x4_t val)
+vrndq_f16 (float16x8_t __a)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[3], 3);
-  __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __o);
+  return __builtin_aarch64_btruncv8hf (__a);
 }
 
-__extension__ extern __inline void
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst4q_p64 (poly64_t * __a, poly64x2x4_t val)
+vrnda_f16 (float16x4_t __a)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
-					       (poly64x2_t) val.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
-					       (poly64x2_t) val.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
-					       (poly64x2_t) val.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
-					       (poly64x2_t) val.val[3], 3);
-  __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
+  return __builtin_aarch64_roundv4hf (__a);
 }
 
-/* vsub */
-
-__extension__ extern __inline int64_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsubd_s64 (int64_t __a, int64_t __b)
+vrndaq_f16 (float16x8_t __a)
 {
-  return __a - __b;
+  return __builtin_aarch64_roundv8hf (__a);
 }
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsubd_u64 (uint64_t __a, uint64_t __b)
+vrndi_f16 (float16x4_t __a)
 {
-  return __a - __b;
+  return __builtin_aarch64_nearbyintv4hf (__a);
 }
 
-/* vtbx1  */
-
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtbx1_s8 (int8x8_t __r, int8x8_t __tab, int8x8_t __idx)
+vrndiq_f16 (float16x8_t __a)
 {
-  uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
-			      vmov_n_u8 (8));
-  int8x8_t __tbl = vtbl1_s8 (__tab, __idx);
-
-  return vbsl_s8 (__mask, __tbl, __r);
+  return __builtin_aarch64_nearbyintv8hf (__a);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtbx1_u8 (uint8x8_t __r, uint8x8_t __tab, uint8x8_t __idx)
+vrndm_f16 (float16x4_t __a)
 {
-  uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
-  uint8x8_t __tbl = vtbl1_u8 (__tab, __idx);
-
-  return vbsl_u8 (__mask, __tbl, __r);
+  return __builtin_aarch64_floorv4hf (__a);
 }
 
-__extension__ extern __inline poly8x8_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtbx1_p8 (poly8x8_t __r, poly8x8_t __tab, uint8x8_t __idx)
+vrndmq_f16 (float16x8_t __a)
 {
-  uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
-  poly8x8_t __tbl = vtbl1_p8 (__tab, __idx);
-
-  return vbsl_p8 (__mask, __tbl, __r);
+  return __builtin_aarch64_floorv8hf (__a);
 }
 
-/* vtbx3  */
-
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtbx3_s8 (int8x8_t __r, int8x8x3_t __tab, int8x8_t __idx)
+vrndn_f16 (float16x4_t __a)
 {
-  uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
-			      vmov_n_u8 (24));
-  int8x8_t __tbl = vtbl3_s8 (__tab, __idx);
-
-  return vbsl_s8 (__mask, __tbl, __r);
+  return __builtin_aarch64_frintnv4hf (__a);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtbx3_u8 (uint8x8_t __r, uint8x8x3_t __tab, uint8x8_t __idx)
+vrndnq_f16 (float16x8_t __a)
 {
-  uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
-  uint8x8_t __tbl = vtbl3_u8 (__tab, __idx);
-
-  return vbsl_u8 (__mask, __tbl, __r);
+  return __builtin_aarch64_frintnv8hf (__a);
 }
 
-__extension__ extern __inline poly8x8_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtbx3_p8 (poly8x8_t __r, poly8x8x3_t __tab, uint8x8_t __idx)
+vrndp_f16 (float16x4_t __a)
 {
-  uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
-  poly8x8_t __tbl = vtbl3_p8 (__tab, __idx);
-
-  return vbsl_p8 (__mask, __tbl, __r);
+  return __builtin_aarch64_ceilv4hf (__a);
 }
 
-/* vtbx4  */
-
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtbx4_s8 (int8x8_t __r, int8x8x4_t __tab, int8x8_t __idx)
+vrndpq_f16 (float16x8_t __a)
 {
-  int8x8_t result;
-  int8x16x2_t temp;
-  __builtin_aarch64_simd_oi __o;
-  temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]);
-  temp.val[1] = vcombine_s8 (__tab.val[2], __tab.val[3]);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o,
-					   (int8x16_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o,
-					   (int8x16_t) temp.val[1], 1);
-  result = __builtin_aarch64_tbx4v8qi (__r, __o, __idx);
-  return result;
+  return __builtin_aarch64_ceilv8hf (__a);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtbx4_u8 (uint8x8_t __r, uint8x8x4_t __tab, uint8x8_t __idx)
+vrndx_f16 (float16x4_t __a)
 {
-  uint8x8_t result;
-  uint8x16x2_t temp;
-  __builtin_aarch64_simd_oi __o;
-  temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]);
-  temp.val[1] = vcombine_u8 (__tab.val[2], __tab.val[3]);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o,
-					   (int8x16_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o,
-					   (int8x16_t) temp.val[1], 1);
-  result = (uint8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o,
-						  (int8x8_t)__idx);
-  return result;
+  return __builtin_aarch64_rintv4hf (__a);
 }
 
-__extension__ extern __inline poly8x8_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtbx4_p8 (poly8x8_t __r, poly8x8x4_t __tab, uint8x8_t __idx)
+vrndxq_f16 (float16x8_t __a)
 {
-  poly8x8_t result;
-  poly8x16x2_t temp;
-  __builtin_aarch64_simd_oi __o;
-  temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]);
-  temp.val[1] = vcombine_p8 (__tab.val[2], __tab.val[3]);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o,
-					   (int8x16_t) temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o,
-					   (int8x16_t) temp.val[1], 1);
-  result = (poly8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o,
-						  (int8x8_t)__idx);
-  return result;
+  return __builtin_aarch64_rintv8hf (__a);
 }
 
-/* vtrn */
-
 __extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn1_f16 (float16x4_t __a, float16x4_t __b)
+vrsqrte_f16 (float16x4_t __a)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
-#endif
+  return __builtin_aarch64_rsqrtev4hf (__a);
 }
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn1_f32 (float32x2_t __a, float32x2_t __b)
+vrsqrteq_f16 (float16x8_t __a)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
-#endif
+  return __builtin_aarch64_rsqrtev8hf (__a);
 }
 
-__extension__ extern __inline poly8x8_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn1_p8 (poly8x8_t __a, poly8x8_t __b)
+vsqrt_f16 (float16x4_t __a)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
-#else
-  return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
-#endif
+  return __builtin_aarch64_sqrtv4hf (__a);
 }
 
-__extension__ extern __inline poly16x4_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn1_p16 (poly16x4_t __a, poly16x4_t __b)
+vsqrtq_f16 (float16x8_t __a)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
-#endif
+  return __builtin_aarch64_sqrtv8hf (__a);
 }
 
-__extension__ extern __inline int8x8_t
+/* ARMv8.2-A FP16 two operands vector intrinsics.  */
+
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn1_s8 (int8x8_t __a, int8x8_t __b)
+vadd_f16 (float16x4_t __a, float16x4_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
-#else
-  return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
-#endif
+  return __a + __b;
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn1_s16 (int16x4_t __a, int16x4_t __b)
+vaddq_f16 (float16x8_t __a, float16x8_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
-#endif
+  return __a + __b;
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn1_s32 (int32x2_t __a, int32x2_t __b)
+vabd_f16 (float16x4_t __a, float16x4_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
-#endif
+  return __builtin_aarch64_fabdv4hf (__a, __b);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn1_u8 (uint8x8_t __a, uint8x8_t __b)
+vabdq_f16 (float16x8_t __a, float16x8_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
-#else
-  return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
-#endif
+  return __builtin_aarch64_fabdv8hf (__a, __b);
 }
 
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn1_u16 (uint16x4_t __a, uint16x4_t __b)
+vcage_f16 (float16x4_t __a, float16x4_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
-#endif
+  return __builtin_aarch64_facgev4hf_uss (__a, __b);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn1_u32 (uint32x2_t __a, uint32x2_t __b)
+vcageq_f16 (float16x8_t __a, float16x8_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
-#endif
+  return __builtin_aarch64_facgev8hf_uss (__a, __b);
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn1q_f16 (float16x8_t __a, float16x8_t __b)
+vcagt_f16 (float16x4_t __a, float16x4_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
-#endif
+  return __builtin_aarch64_facgtv4hf_uss (__a, __b);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn1q_f32 (float32x4_t __a, float32x4_t __b)
+vcagtq_f16 (float16x8_t __a, float16x8_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
-#endif
+  return __builtin_aarch64_facgtv8hf_uss (__a, __b);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn1q_f64 (float64x2_t __a, float64x2_t __b)
+vcale_f16 (float16x4_t __a, float16x4_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
-#else
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
-#endif
+  return __builtin_aarch64_faclev4hf_uss (__a, __b);
 }
 
-__extension__ extern __inline poly8x16_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn1q_p8 (poly8x16_t __a, poly8x16_t __b)
+vcaleq_f16 (float16x8_t __a, float16x8_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b,
-      (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
-#else
-  return __builtin_shuffle (__a, __b,
-      (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
-#endif
+  return __builtin_aarch64_faclev8hf_uss (__a, __b);
 }
 
-__extension__ extern __inline poly16x8_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn1q_p16 (poly16x8_t __a, poly16x8_t __b)
+vcalt_f16 (float16x4_t __a, float16x4_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
-#endif
+  return __builtin_aarch64_facltv4hf_uss (__a, __b);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn1q_s8 (int8x16_t __a, int8x16_t __b)
+vcaltq_f16 (float16x8_t __a, float16x8_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b,
-      (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
-#else
-  return __builtin_shuffle (__a, __b,
-      (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
-#endif
+  return __builtin_aarch64_facltv8hf_uss (__a, __b);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn1q_s16 (int16x8_t __a, int16x8_t __b)
+vceq_f16 (float16x4_t __a, float16x4_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
-#endif
+  return __builtin_aarch64_cmeqv4hf_uss (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn1q_s32 (int32x4_t __a, int32x4_t __b)
+vceqq_f16 (float16x8_t __a, float16x8_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
-#endif
+  return __builtin_aarch64_cmeqv8hf_uss (__a, __b);
+}
+
+__extension__ extern __inline uint16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcge_f16 (float16x4_t __a, float16x4_t __b)
+{
+  return __builtin_aarch64_cmgev4hf_uss (__a, __b);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn1q_s64 (int64x2_t __a, int64x2_t __b)
+vcgeq_f16 (float16x8_t __a, float16x8_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
-#else
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
-#endif
+  return __builtin_aarch64_cmgev8hf_uss (__a, __b);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn1q_u8 (uint8x16_t __a, uint8x16_t __b)
+vcgt_f16 (float16x4_t __a, float16x4_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b,
-      (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
-#else
-  return __builtin_shuffle (__a, __b,
-      (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
-#endif
+  return __builtin_aarch64_cmgtv4hf_uss (__a, __b);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn1q_u16 (uint16x8_t __a, uint16x8_t __b)
+vcgtq_f16 (float16x8_t __a, float16x8_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
-#endif
+  return __builtin_aarch64_cmgtv8hf_uss (__a, __b);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn1q_u32 (uint32x4_t __a, uint32x4_t __b)
+vcle_f16 (float16x4_t __a, float16x4_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
-#endif
+  return __builtin_aarch64_cmlev4hf_uss (__a, __b);
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn1q_u64 (uint64x2_t __a, uint64x2_t __b)
+vcleq_f16 (float16x8_t __a, float16x8_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
-#else
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
-#endif
+  return __builtin_aarch64_cmlev8hf_uss (__a, __b);
 }
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn2_f16 (float16x4_t __a, float16x4_t __b)
+vclt_f16 (float16x4_t __a, float16x4_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
-#endif
+  return __builtin_aarch64_cmltv4hf_uss (__a, __b);
 }
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn2_f32 (float32x2_t __a, float32x2_t __b)
+vcltq_f16 (float16x8_t __a, float16x8_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
-#endif
+  return __builtin_aarch64_cmltv8hf_uss (__a, __b);
 }
 
-__extension__ extern __inline poly8x8_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn2_p8 (poly8x8_t __a, poly8x8_t __b)
+vcvt_n_f16_s16 (int16x4_t __a, const int __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
-#else
-  return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
-#endif
+  return __builtin_aarch64_scvtfv4hi (__a, __b);
 }
 
-__extension__ extern __inline poly16x4_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn2_p16 (poly16x4_t __a, poly16x4_t __b)
+vcvtq_n_f16_s16 (int16x8_t __a, const int __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
-#endif
+  return __builtin_aarch64_scvtfv8hi (__a, __b);
 }
 
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn2_s8 (int8x8_t __a, int8x8_t __b)
+vcvt_n_f16_u16 (uint16x4_t __a, const int __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
-#else
-  return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
-#endif
+  return __builtin_aarch64_ucvtfv4hi_sus (__a, __b);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn2_s16 (int16x4_t __a, int16x4_t __b)
+vcvtq_n_f16_u16 (uint16x8_t __a, const int __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
-#endif
+  return __builtin_aarch64_ucvtfv8hi_sus (__a, __b);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn2_s32 (int32x2_t __a, int32x2_t __b)
+vcvt_n_s16_f16 (float16x4_t __a, const int __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
-#endif
+  return __builtin_aarch64_fcvtzsv4hf (__a, __b);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn2_u8 (uint8x8_t __a, uint8x8_t __b)
+vcvtq_n_s16_f16 (float16x8_t __a, const int __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
-#else
-  return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
-#endif
+  return __builtin_aarch64_fcvtzsv8hf (__a, __b);
 }
 
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn2_u16 (uint16x4_t __a, uint16x4_t __b)
-{
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
-#endif
-}
-
-__extension__ extern __inline uint32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn2_u32 (uint32x2_t __a, uint32x2_t __b)
+vcvt_n_u16_f16 (float16x4_t __a, const int __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
-#endif
+  return __builtin_aarch64_fcvtzuv4hf_uss (__a, __b);
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn2q_f16 (float16x8_t __a, float16x8_t __b)
+vcvtq_n_u16_f16 (float16x8_t __a, const int __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
-#endif
+  return __builtin_aarch64_fcvtzuv8hf_uss (__a, __b);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn2q_f32 (float32x4_t __a, float32x4_t __b)
+vdiv_f16 (float16x4_t __a, float16x4_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
-#endif
+  return __a / __b;
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn2q_f64 (float64x2_t __a, float64x2_t __b)
+vdivq_f16 (float16x8_t __a, float16x8_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
-#else
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
-#endif
+  return __a / __b;
 }
 
-__extension__ extern __inline poly8x16_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn2q_p8 (poly8x16_t __a, poly8x16_t __b)
+vmax_f16 (float16x4_t __a, float16x4_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b,
-      (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
-#else
-  return __builtin_shuffle (__a, __b,
-      (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
-#endif
+  return __builtin_aarch64_smax_nanv4hf (__a, __b);
 }
 
-__extension__ extern __inline poly16x8_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn2q_p16 (poly16x8_t __a, poly16x8_t __b)
+vmaxq_f16 (float16x8_t __a, float16x8_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
-#endif
+  return __builtin_aarch64_smax_nanv8hf (__a, __b);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn2q_s8 (int8x16_t __a, int8x16_t __b)
+vmaxnm_f16 (float16x4_t __a, float16x4_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b,
-      (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
-#else
-  return __builtin_shuffle (__a, __b,
-      (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
-#endif
+  return __builtin_aarch64_fmaxv4hf (__a, __b);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn2q_s16 (int16x8_t __a, int16x8_t __b)
+vmaxnmq_f16 (float16x8_t __a, float16x8_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
-#endif
+  return __builtin_aarch64_fmaxv8hf (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn2q_s32 (int32x4_t __a, int32x4_t __b)
+vmin_f16 (float16x4_t __a, float16x4_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
-#endif
+  return __builtin_aarch64_smin_nanv4hf (__a, __b);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn2q_s64 (int64x2_t __a, int64x2_t __b)
+vminq_f16 (float16x8_t __a, float16x8_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
-#else
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
-#endif
+  return __builtin_aarch64_smin_nanv8hf (__a, __b);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn2q_u8 (uint8x16_t __a, uint8x16_t __b)
+vminnm_f16 (float16x4_t __a, float16x4_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b,
-      (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
-#else
-  return __builtin_shuffle (__a, __b,
-      (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
-#endif
+  return __builtin_aarch64_fminv4hf (__a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn2q_u16 (uint16x8_t __a, uint16x8_t __b)
+vminnmq_f16 (float16x8_t __a, float16x8_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
-#endif
+  return __builtin_aarch64_fminv8hf (__a, __b);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn2q_u32 (uint32x4_t __a, uint32x4_t __b)
+vmul_f16 (float16x4_t __a, float16x4_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
-#endif
+  return __a * __b;
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn2q_u64 (uint64x2_t __a, uint64x2_t __b)
+vmulq_f16 (float16x8_t __a, float16x8_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
-#else
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
-#endif
+  return __a * __b;
 }
 
-__extension__ extern __inline float16x4x2_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn_f16 (float16x4_t __a, float16x4_t __b)
+vmulx_f16 (float16x4_t __a, float16x4_t __b)
 {
-  return (float16x4x2_t) {vtrn1_f16 (__a, __b), vtrn2_f16 (__a, __b)};
+  return __builtin_aarch64_fmulxv4hf (__a, __b);
 }
 
-__extension__ extern __inline float32x2x2_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn_f32 (float32x2_t a, float32x2_t b)
+vmulxq_f16 (float16x8_t __a, float16x8_t __b)
 {
-  return (float32x2x2_t) {vtrn1_f32 (a, b), vtrn2_f32 (a, b)};
+  return __builtin_aarch64_fmulxv8hf (__a, __b);
 }
 
-__extension__ extern __inline poly8x8x2_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn_p8 (poly8x8_t a, poly8x8_t b)
+vpadd_f16 (float16x4_t __a, float16x4_t __b)
 {
-  return (poly8x8x2_t) {vtrn1_p8 (a, b), vtrn2_p8 (a, b)};
+  return __builtin_aarch64_faddpv4hf (__a, __b);
 }
 
-__extension__ extern __inline poly16x4x2_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn_p16 (poly16x4_t a, poly16x4_t b)
+vpaddq_f16 (float16x8_t __a, float16x8_t __b)
 {
-  return (poly16x4x2_t) {vtrn1_p16 (a, b), vtrn2_p16 (a, b)};
+  return __builtin_aarch64_faddpv8hf (__a, __b);
 }
 
-__extension__ extern __inline int8x8x2_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn_s8 (int8x8_t a, int8x8_t b)
+vpmax_f16 (float16x4_t __a, float16x4_t __b)
 {
-  return (int8x8x2_t) {vtrn1_s8 (a, b), vtrn2_s8 (a, b)};
+  return __builtin_aarch64_smax_nanpv4hf (__a, __b);
 }
 
-__extension__ extern __inline int16x4x2_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn_s16 (int16x4_t a, int16x4_t b)
+vpmaxq_f16 (float16x8_t __a, float16x8_t __b)
 {
-  return (int16x4x2_t) {vtrn1_s16 (a, b), vtrn2_s16 (a, b)};
+  return __builtin_aarch64_smax_nanpv8hf (__a, __b);
 }
 
-__extension__ extern __inline int32x2x2_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn_s32 (int32x2_t a, int32x2_t b)
+vpmaxnm_f16 (float16x4_t __a, float16x4_t __b)
 {
-  return (int32x2x2_t) {vtrn1_s32 (a, b), vtrn2_s32 (a, b)};
+  return __builtin_aarch64_smaxpv4hf (__a, __b);
 }
 
-__extension__ extern __inline uint8x8x2_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn_u8 (uint8x8_t a, uint8x8_t b)
+vpmaxnmq_f16 (float16x8_t __a, float16x8_t __b)
 {
-  return (uint8x8x2_t) {vtrn1_u8 (a, b), vtrn2_u8 (a, b)};
+  return __builtin_aarch64_smaxpv8hf (__a, __b);
 }
 
-__extension__ extern __inline uint16x4x2_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn_u16 (uint16x4_t a, uint16x4_t b)
+vpmin_f16 (float16x4_t __a, float16x4_t __b)
 {
-  return (uint16x4x2_t) {vtrn1_u16 (a, b), vtrn2_u16 (a, b)};
+  return __builtin_aarch64_smin_nanpv4hf (__a, __b);
 }
 
-__extension__ extern __inline uint32x2x2_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrn_u32 (uint32x2_t a, uint32x2_t b)
+vpminq_f16 (float16x8_t __a, float16x8_t __b)
 {
-  return (uint32x2x2_t) {vtrn1_u32 (a, b), vtrn2_u32 (a, b)};
+  return __builtin_aarch64_smin_nanpv8hf (__a, __b);
 }
 
-__extension__ extern __inline float16x8x2_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrnq_f16 (float16x8_t __a, float16x8_t __b)
+vpminnm_f16 (float16x4_t __a, float16x4_t __b)
 {
-  return (float16x8x2_t) {vtrn1q_f16 (__a, __b), vtrn2q_f16 (__a, __b)};
+  return __builtin_aarch64_sminpv4hf (__a, __b);
 }
 
-__extension__ extern __inline float32x4x2_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrnq_f32 (float32x4_t a, float32x4_t b)
+vpminnmq_f16 (float16x8_t __a, float16x8_t __b)
 {
-  return (float32x4x2_t) {vtrn1q_f32 (a, b), vtrn2q_f32 (a, b)};
+  return __builtin_aarch64_sminpv8hf (__a, __b);
 }
 
-__extension__ extern __inline poly8x16x2_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrnq_p8 (poly8x16_t a, poly8x16_t b)
+vrecps_f16 (float16x4_t __a, float16x4_t __b)
 {
-  return (poly8x16x2_t) {vtrn1q_p8 (a, b), vtrn2q_p8 (a, b)};
+  return __builtin_aarch64_frecpsv4hf (__a, __b);
 }
 
-__extension__ extern __inline poly16x8x2_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrnq_p16 (poly16x8_t a, poly16x8_t b)
+vrecpsq_f16 (float16x8_t __a, float16x8_t __b)
 {
-  return (poly16x8x2_t) {vtrn1q_p16 (a, b), vtrn2q_p16 (a, b)};
+  return __builtin_aarch64_frecpsv8hf (__a, __b);
 }
 
-__extension__ extern __inline int8x16x2_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrnq_s8 (int8x16_t a, int8x16_t b)
+vrsqrts_f16 (float16x4_t __a, float16x4_t __b)
 {
-  return (int8x16x2_t) {vtrn1q_s8 (a, b), vtrn2q_s8 (a, b)};
+  return __builtin_aarch64_rsqrtsv4hf (__a, __b);
 }
 
-__extension__ extern __inline int16x8x2_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrnq_s16 (int16x8_t a, int16x8_t b)
+vrsqrtsq_f16 (float16x8_t __a, float16x8_t __b)
 {
-  return (int16x8x2_t) {vtrn1q_s16 (a, b), vtrn2q_s16 (a, b)};
+  return __builtin_aarch64_rsqrtsv8hf (__a, __b);
 }
 
-__extension__ extern __inline int32x4x2_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrnq_s32 (int32x4_t a, int32x4_t b)
+vsub_f16 (float16x4_t __a, float16x4_t __b)
 {
-  return (int32x4x2_t) {vtrn1q_s32 (a, b), vtrn2q_s32 (a, b)};
+  return __a - __b;
 }
 
-__extension__ extern __inline uint8x16x2_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrnq_u8 (uint8x16_t a, uint8x16_t b)
+vsubq_f16 (float16x8_t __a, float16x8_t __b)
 {
-  return (uint8x16x2_t) {vtrn1q_u8 (a, b), vtrn2q_u8 (a, b)};
+  return __a - __b;
 }
 
-__extension__ extern __inline uint16x8x2_t
+/* ARMv8.2-A FP16 three operands vector intrinsics.  */
+
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrnq_u16 (uint16x8_t a, uint16x8_t b)
+vfma_f16 (float16x4_t __a, float16x4_t __b, float16x4_t __c)
 {
-  return (uint16x8x2_t) {vtrn1q_u16 (a, b), vtrn2q_u16 (a, b)};
+  return __builtin_aarch64_fmav4hf (__b, __c, __a);
 }
 
-__extension__ extern __inline uint32x4x2_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtrnq_u32 (uint32x4_t a, uint32x4_t b)
+vfmaq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
 {
-  return (uint32x4x2_t) {vtrn1q_u32 (a, b), vtrn2q_u32 (a, b)};
+  return __builtin_aarch64_fmav8hf (__b, __c, __a);
 }
 
-/* vtst */
-
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtst_s8 (int8x8_t __a, int8x8_t __b)
+vfms_f16 (float16x4_t __a, float16x4_t __b, float16x4_t __c)
 {
-  return (uint8x8_t) ((__a & __b) != 0);
+  return __builtin_aarch64_fnmav4hf (__b, __c, __a);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtst_s16 (int16x4_t __a, int16x4_t __b)
+vfmsq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
 {
-  return (uint16x4_t) ((__a & __b) != 0);
+  return __builtin_aarch64_fnmav8hf (__b, __c, __a);
 }
 
-__extension__ extern __inline uint32x2_t
+/* ARMv8.2-A FP16 lane vector intrinsics.  */
+
+__extension__ extern __inline float16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtst_s32 (int32x2_t __a, int32x2_t __b)
+vfmah_lane_f16 (float16_t __a, float16_t __b,
+		float16x4_t __c, const int __lane)
 {
-  return (uint32x2_t) ((__a & __b) != 0);
+  return vfmah_f16 (__a, __b, __aarch64_vget_lane_any (__c, __lane));
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline float16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtst_s64 (int64x1_t __a, int64x1_t __b)
+vfmah_laneq_f16 (float16_t __a, float16_t __b,
+		 float16x8_t __c, const int __lane)
 {
-  return (uint64x1_t) ((__a & __b) != __AARCH64_INT64_C (0));
+  return vfmah_f16 (__a, __b, __aarch64_vget_lane_any (__c, __lane));
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtst_u8 (uint8x8_t __a, uint8x8_t __b)
+vfma_lane_f16 (float16x4_t __a, float16x4_t __b,
+	       float16x4_t __c, const int __lane)
 {
-  return ((__a & __b) != 0);
+  return vfma_f16 (__a, __b, __aarch64_vdup_lane_f16 (__c, __lane));
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtst_u16 (uint16x4_t __a, uint16x4_t __b)
+vfmaq_lane_f16 (float16x8_t __a, float16x8_t __b,
+		float16x4_t __c, const int __lane)
 {
-  return ((__a & __b) != 0);
+  return vfmaq_f16 (__a, __b, __aarch64_vdupq_lane_f16 (__c, __lane));
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtst_u32 (uint32x2_t __a, uint32x2_t __b)
+vfma_laneq_f16 (float16x4_t __a, float16x4_t __b,
+		float16x8_t __c, const int __lane)
 {
-  return ((__a & __b) != 0);
+  return vfma_f16 (__a, __b, __aarch64_vdup_laneq_f16 (__c, __lane));
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtst_u64 (uint64x1_t __a, uint64x1_t __b)
+vfmaq_laneq_f16 (float16x8_t __a, float16x8_t __b,
+		 float16x8_t __c, const int __lane)
 {
-  return ((__a & __b) != __AARCH64_UINT64_C (0));
+  return vfmaq_f16 (__a, __b, __aarch64_vdupq_laneq_f16 (__c, __lane));
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtstq_s8 (int8x16_t __a, int8x16_t __b)
+vfma_n_f16 (float16x4_t __a, float16x4_t __b, float16_t __c)
 {
-  return (uint8x16_t) ((__a & __b) != 0);
+  return vfma_f16 (__a, __b, vdup_n_f16 (__c));
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtstq_s16 (int16x8_t __a, int16x8_t __b)
+vfmaq_n_f16 (float16x8_t __a, float16x8_t __b, float16_t __c)
 {
-  return (uint16x8_t) ((__a & __b) != 0);
+  return vfmaq_f16 (__a, __b, vdupq_n_f16 (__c));
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline float16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtstq_s32 (int32x4_t __a, int32x4_t __b)
+vfmsh_lane_f16 (float16_t __a, float16_t __b,
+		float16x4_t __c, const int __lane)
 {
-  return (uint32x4_t) ((__a & __b) != 0);
+  return vfmsh_f16 (__a, __b, __aarch64_vget_lane_any (__c, __lane));
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline float16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtstq_s64 (int64x2_t __a, int64x2_t __b)
+vfmsh_laneq_f16 (float16_t __a, float16_t __b,
+		 float16x8_t __c, const int __lane)
 {
-  return (uint64x2_t) ((__a & __b) != __AARCH64_INT64_C (0));
+  return vfmsh_f16 (__a, __b, __aarch64_vget_lane_any (__c, __lane));
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtstq_u8 (uint8x16_t __a, uint8x16_t __b)
+vfms_lane_f16 (float16x4_t __a, float16x4_t __b,
+	       float16x4_t __c, const int __lane)
 {
-  return ((__a & __b) != 0);
+  return vfms_f16 (__a, __b, __aarch64_vdup_lane_f16 (__c, __lane));
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtstq_u16 (uint16x8_t __a, uint16x8_t __b)
+vfmsq_lane_f16 (float16x8_t __a, float16x8_t __b,
+		float16x4_t __c, const int __lane)
 {
-  return ((__a & __b) != 0);
+  return vfmsq_f16 (__a, __b, __aarch64_vdupq_lane_f16 (__c, __lane));
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtstq_u32 (uint32x4_t __a, uint32x4_t __b)
+vfms_laneq_f16 (float16x4_t __a, float16x4_t __b,
+		float16x8_t __c, const int __lane)
 {
-  return ((__a & __b) != 0);
+  return vfms_f16 (__a, __b, __aarch64_vdup_laneq_f16 (__c, __lane));
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtstq_u64 (uint64x2_t __a, uint64x2_t __b)
+vfmsq_laneq_f16 (float16x8_t __a, float16x8_t __b,
+		 float16x8_t __c, const int __lane)
 {
-  return ((__a & __b) != __AARCH64_UINT64_C (0));
+  return vfmsq_f16 (__a, __b, __aarch64_vdupq_laneq_f16 (__c, __lane));
 }
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtstd_s64 (int64_t __a, int64_t __b)
+vfms_n_f16 (float16x4_t __a, float16x4_t __b, float16_t __c)
 {
-  return (__a & __b) ? -1ll : 0ll;
+  return vfms_f16 (__a, __b, vdup_n_f16 (__c));
 }
 
-__extension__ extern __inline uint64_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vtstd_u64 (uint64_t __a, uint64_t __b)
+vfmsq_n_f16 (float16x8_t __a, float16x8_t __b, float16_t __c)
 {
-  return (__a & __b) ? -1ll : 0ll;
+  return vfmsq_f16 (__a, __b, vdupq_n_f16 (__c));
 }
 
-/* vuqadd */
-
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline float16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuqadd_s8 (int8x8_t __a, uint8x8_t __b)
+vmulh_lane_f16 (float16_t __a, float16x4_t __b, const int __lane)
 {
-  return __builtin_aarch64_suqaddv8qi_ssu (__a,  __b);
+  return __a * __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuqadd_s16 (int16x4_t __a, uint16x4_t __b)
+vmul_lane_f16 (float16x4_t __a, float16x4_t __b, const int __lane)
 {
-  return __builtin_aarch64_suqaddv4hi_ssu (__a,  __b);
+  return vmul_f16 (__a, vdup_n_f16 (__aarch64_vget_lane_any (__b, __lane)));
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuqadd_s32 (int32x2_t __a, uint32x2_t __b)
+vmulq_lane_f16 (float16x8_t __a, float16x4_t __b, const int __lane)
 {
-  return __builtin_aarch64_suqaddv2si_ssu (__a,  __b);
+  return vmulq_f16 (__a, vdupq_n_f16 (__aarch64_vget_lane_any (__b, __lane)));
 }
 
-__extension__ extern __inline int64x1_t
+__extension__ extern __inline float16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuqadd_s64 (int64x1_t __a, uint64x1_t __b)
+vmulh_laneq_f16 (float16_t __a, float16x8_t __b, const int __lane)
 {
-  return (int64x1_t) {__builtin_aarch64_suqadddi_ssu (__a[0], __b[0])};
+  return __a * __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuqaddq_s8 (int8x16_t __a, uint8x16_t __b)
+vmul_laneq_f16 (float16x4_t __a, float16x8_t __b, const int __lane)
 {
-  return __builtin_aarch64_suqaddv16qi_ssu (__a,  __b);
+  return vmul_f16 (__a, vdup_n_f16 (__aarch64_vget_lane_any (__b, __lane)));
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuqaddq_s16 (int16x8_t __a, uint16x8_t __b)
+vmulq_laneq_f16 (float16x8_t __a, float16x8_t __b, const int __lane)
 {
-  return __builtin_aarch64_suqaddv8hi_ssu (__a,  __b);
+  return vmulq_f16 (__a, vdupq_n_f16 (__aarch64_vget_lane_any (__b, __lane)));
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuqaddq_s32 (int32x4_t __a, uint32x4_t __b)
+vmul_n_f16 (float16x4_t __a, float16_t __b)
 {
-  return __builtin_aarch64_suqaddv4si_ssu (__a,  __b);
+  return vmul_lane_f16 (__a, vdup_n_f16 (__b), 0);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuqaddq_s64 (int64x2_t __a, uint64x2_t __b)
+vmulq_n_f16 (float16x8_t __a, float16_t __b)
 {
-  return __builtin_aarch64_suqaddv2di_ssu (__a,  __b);
+  return vmulq_laneq_f16 (__a, vdupq_n_f16 (__b), 0);
 }
 
-__extension__ extern __inline int8_t
+__extension__ extern __inline float16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuqaddb_s8 (int8_t __a, uint8_t __b)
+vmulxh_lane_f16 (float16_t __a, float16x4_t __b, const int __lane)
 {
-  return __builtin_aarch64_suqaddqi_ssu (__a,  __b);
+  return vmulxh_f16 (__a, __aarch64_vget_lane_any (__b, __lane));
 }
 
-__extension__ extern __inline int16_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuqaddh_s16 (int16_t __a, uint16_t __b)
+vmulx_lane_f16 (float16x4_t __a, float16x4_t __b, const int __lane)
 {
-  return __builtin_aarch64_suqaddhi_ssu (__a,  __b);
+  return vmulx_f16 (__a, __aarch64_vdup_lane_f16 (__b, __lane));
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuqadds_s32 (int32_t __a, uint32_t __b)
+vmulxq_lane_f16 (float16x8_t __a, float16x4_t __b, const int __lane)
 {
-  return __builtin_aarch64_suqaddsi_ssu (__a,  __b);
+  return vmulxq_f16 (__a, __aarch64_vdupq_lane_f16 (__b, __lane));
 }
 
-__extension__ extern __inline int64_t
+__extension__ extern __inline float16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuqaddd_s64 (int64_t __a, uint64_t __b)
+vmulxh_laneq_f16 (float16_t __a, float16x8_t __b, const int __lane)
 {
-  return __builtin_aarch64_suqadddi_ssu (__a,  __b);
+  return vmulxh_f16 (__a, __aarch64_vget_lane_any (__b, __lane));
 }
 
-#define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) 		\
-  __extension__ extern __inline rettype					\
-  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
-  v ## op ## Q ## _ ## funcsuffix (intype a, intype b)			\
-  {									\
-    return (rettype) {v ## op ## 1 ## Q ## _ ## funcsuffix (a, b),	\
-		      v ## op ## 2 ## Q ## _ ## funcsuffix (a, b)};	\
-  }
-
-#define __INTERLEAVE_LIST(op)					\
-  __DEFINTERLEAVE (op, float16x4x2_t, float16x4_t, f16,)	\
-  __DEFINTERLEAVE (op, float32x2x2_t, float32x2_t, f32,)	\
-  __DEFINTERLEAVE (op, poly8x8x2_t, poly8x8_t, p8,)		\
-  __DEFINTERLEAVE (op, poly16x4x2_t, poly16x4_t, p16,)		\
-  __DEFINTERLEAVE (op, int8x8x2_t, int8x8_t, s8,)		\
-  __DEFINTERLEAVE (op, int16x4x2_t, int16x4_t, s16,)		\
-  __DEFINTERLEAVE (op, int32x2x2_t, int32x2_t, s32,)		\
-  __DEFINTERLEAVE (op, uint8x8x2_t, uint8x8_t, u8,)		\
-  __DEFINTERLEAVE (op, uint16x4x2_t, uint16x4_t, u16,)		\
-  __DEFINTERLEAVE (op, uint32x2x2_t, uint32x2_t, u32,)		\
-  __DEFINTERLEAVE (op, float16x8x2_t, float16x8_t, f16, q)	\
-  __DEFINTERLEAVE (op, float32x4x2_t, float32x4_t, f32, q)	\
-  __DEFINTERLEAVE (op, poly8x16x2_t, poly8x16_t, p8, q)		\
-  __DEFINTERLEAVE (op, poly16x8x2_t, poly16x8_t, p16, q)	\
-  __DEFINTERLEAVE (op, int8x16x2_t, int8x16_t, s8, q)		\
-  __DEFINTERLEAVE (op, int16x8x2_t, int16x8_t, s16, q)		\
-  __DEFINTERLEAVE (op, int32x4x2_t, int32x4_t, s32, q)		\
-  __DEFINTERLEAVE (op, uint8x16x2_t, uint8x16_t, u8, q)		\
-  __DEFINTERLEAVE (op, uint16x8x2_t, uint16x8_t, u16, q)	\
-  __DEFINTERLEAVE (op, uint32x4x2_t, uint32x4_t, u32, q)
-
-/* vuzp */
-
 __extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp1_f16 (float16x4_t __a, float16x4_t __b)
+vmulx_laneq_f16 (float16x4_t __a, float16x8_t __b, const int __lane)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
-#endif
+  return vmulx_f16 (__a, __aarch64_vdup_laneq_f16 (__b, __lane));
 }
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp1_f32 (float32x2_t __a, float32x2_t __b)
+vmulxq_laneq_f16 (float16x8_t __a, float16x8_t __b, const int __lane)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
-#endif
+  return vmulxq_f16 (__a, __aarch64_vdupq_laneq_f16 (__b, __lane));
 }
 
-__extension__ extern __inline poly8x8_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp1_p8 (poly8x8_t __a, poly8x8_t __b)
+vmulx_n_f16 (float16x4_t __a, float16_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
-#else
-  return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
-#endif
+  return vmulx_f16 (__a, vdup_n_f16 (__b));
 }
 
-__extension__ extern __inline poly16x4_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp1_p16 (poly16x4_t __a, poly16x4_t __b)
+vmulxq_n_f16 (float16x8_t __a, float16_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
-#endif
+  return vmulxq_f16 (__a, vdupq_n_f16 (__b));
 }
 
-__extension__ extern __inline int8x8_t
+/* ARMv8.2-A FP16 reduction vector intrinsics.  */
+
+__extension__ extern __inline float16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp1_s8 (int8x8_t __a, int8x8_t __b)
+vmaxv_f16 (float16x4_t __a)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
-#else
-  return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
-#endif
+  return __builtin_aarch64_reduc_smax_nan_scal_v4hf (__a);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline float16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp1_s16 (int16x4_t __a, int16x4_t __b)
+vmaxvq_f16 (float16x8_t __a)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
-#endif
+  return __builtin_aarch64_reduc_smax_nan_scal_v8hf (__a);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline float16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp1_s32 (int32x2_t __a, int32x2_t __b)
+vminv_f16 (float16x4_t __a)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
-#endif
+  return __builtin_aarch64_reduc_smin_nan_scal_v4hf (__a);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline float16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp1_u8 (uint8x8_t __a, uint8x8_t __b)
+vminvq_f16 (float16x8_t __a)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
-#else
-  return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
-#endif
+  return __builtin_aarch64_reduc_smin_nan_scal_v8hf (__a);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline float16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp1_u16 (uint16x4_t __a, uint16x4_t __b)
+vmaxnmv_f16 (float16x4_t __a)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
-#endif
+  return __builtin_aarch64_reduc_smax_scal_v4hf (__a);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline float16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp1_u32 (uint32x2_t __a, uint32x2_t __b)
+vmaxnmvq_f16 (float16x8_t __a)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
-#endif
+  return __builtin_aarch64_reduc_smax_scal_v8hf (__a);
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline float16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp1q_f16 (float16x8_t __a, float16x8_t __b)
+vminnmv_f16 (float16x4_t __a)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
-#endif
+  return __builtin_aarch64_reduc_smin_scal_v4hf (__a);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline float16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp1q_f32 (float32x4_t __a, float32x4_t __b)
+vminnmvq_f16 (float16x8_t __a)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
-#endif
+  return __builtin_aarch64_reduc_smin_scal_v8hf (__a);
 }
 
-__extension__ extern __inline float64x2_t
+#pragma GCC pop_options
+
+/* AdvSIMD Dot Product intrinsics.  */
+
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.2-a+dotprod")
+
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp1q_f64 (float64x2_t __a, float64x2_t __b)
+vdot_u32 (uint32x2_t __r, uint8x8_t __a, uint8x8_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
-#else
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
-#endif
+  return __builtin_aarch64_udotv8qi_uuuu (__r, __a, __b);
 }
 
-__extension__ extern __inline poly8x16_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp1q_p8 (poly8x16_t __a, poly8x16_t __b)
+vdotq_u32 (uint32x4_t __r, uint8x16_t __a, uint8x16_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint8x16_t)
-      {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
-#else
-  return __builtin_shuffle (__a, __b, (uint8x16_t)
-      {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
-#endif
+  return __builtin_aarch64_udotv16qi_uuuu (__r, __a, __b);
 }
 
-__extension__ extern __inline poly16x8_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp1q_p16 (poly16x8_t __a, poly16x8_t __b)
+vdot_s32 (int32x2_t __r, int8x8_t __a, int8x8_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
-#endif
+  return __builtin_aarch64_sdotv8qi (__r, __a, __b);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp1q_s8 (int8x16_t __a, int8x16_t __b)
+vdotq_s32 (int32x4_t __r, int8x16_t __a, int8x16_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b,
-      (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
-#else
-  return __builtin_shuffle (__a, __b,
-      (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
-#endif
+  return __builtin_aarch64_sdotv16qi (__r, __a, __b);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp1q_s16 (int16x8_t __a, int16x8_t __b)
+vdot_lane_u32 (uint32x2_t __r, uint8x8_t __a, uint8x8_t __b, const int __index)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
-#endif
+  return __builtin_aarch64_udot_lanev8qi_uuuus (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp1q_s32 (int32x4_t __a, int32x4_t __b)
-{
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
-#endif
+vdot_laneq_u32 (uint32x2_t __r, uint8x8_t __a, uint8x16_t __b,
+		const int __index)
+{
+  return __builtin_aarch64_udot_laneqv8qi_uuuus (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp1q_s64 (int64x2_t __a, int64x2_t __b)
+vdotq_lane_u32 (uint32x4_t __r, uint8x16_t __a, uint8x8_t __b,
+		const int __index)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
-#else
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
-#endif
+  return __builtin_aarch64_udot_lanev16qi_uuuus (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp1q_u8 (uint8x16_t __a, uint8x16_t __b)
+vdotq_laneq_u32 (uint32x4_t __r, uint8x16_t __a, uint8x16_t __b,
+		 const int __index)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b,
-      (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
-#else
-  return __builtin_shuffle (__a, __b,
-      (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
-#endif
+  return __builtin_aarch64_udot_laneqv16qi_uuuus (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp1q_u16 (uint16x8_t __a, uint16x8_t __b)
+vdot_lane_s32 (int32x2_t __r, int8x8_t __a, int8x8_t __b, const int __index)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
-#endif
+  return __builtin_aarch64_sdot_lanev8qi (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp1q_u32 (uint32x4_t __a, uint32x4_t __b)
+vdot_laneq_s32 (int32x2_t __r, int8x8_t __a, int8x16_t __b, const int __index)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
-#endif
+  return __builtin_aarch64_sdot_laneqv8qi (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp1q_u64 (uint64x2_t __a, uint64x2_t __b)
+vdotq_lane_s32 (int32x4_t __r, int8x16_t __a, int8x8_t __b, const int __index)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
-#else
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
-#endif
+  return __builtin_aarch64_sdot_lanev16qi (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp2_f16 (float16x4_t __a, float16x4_t __b)
+vdotq_laneq_s32 (int32x4_t __r, int8x16_t __a, int8x16_t __b, const int __index)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
-#endif
+  return __builtin_aarch64_sdot_laneqv16qi (__r, __a, __b, __index);
 }
+#pragma GCC pop_options
 
-__extension__ extern __inline float32x2_t
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.2-a+sm4")
+
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp2_f32 (float32x2_t __a, float32x2_t __b)
+vsm3ss1q_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
-#endif
+  return __builtin_aarch64_sm3ss1qv4si_uuuu (__a, __b, __c);
 }
 
-__extension__ extern __inline poly8x8_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp2_p8 (poly8x8_t __a, poly8x8_t __b)
+vsm3tt1aq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c, const int __imm2)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
-#else
-  return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
-#endif
+  return __builtin_aarch64_sm3tt1aqv4si_uuuus (__a, __b, __c, __imm2);
 }
 
-__extension__ extern __inline poly16x4_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp2_p16 (poly16x4_t __a, poly16x4_t __b)
+vsm3tt1bq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c, const int __imm2)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
-#endif
+  return __builtin_aarch64_sm3tt1bqv4si_uuuus (__a, __b, __c, __imm2);
 }
 
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp2_s8 (int8x8_t __a, int8x8_t __b)
+vsm3tt2aq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c, const int __imm2)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
-#else
-  return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
-#endif
+  return __builtin_aarch64_sm3tt2aqv4si_uuuus (__a, __b, __c, __imm2);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp2_s16 (int16x4_t __a, int16x4_t __b)
+vsm3tt2bq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c, const int __imm2)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
-#endif
+  return __builtin_aarch64_sm3tt2bqv4si_uuuus (__a, __b, __c, __imm2);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp2_s32 (int32x2_t __a, int32x2_t __b)
+vsm3partw1q_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
-#endif
+  return __builtin_aarch64_sm3partw1qv4si_uuuu (__a, __b, __c);
 }
-
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp2_u8 (uint8x8_t __a, uint8x8_t __b)
+vsm3partw2q_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
-#else
-  return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
-#endif
+  return __builtin_aarch64_sm3partw2qv4si_uuuu (__a, __b, __c);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp2_u16 (uint16x4_t __a, uint16x4_t __b)
+vsm4eq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
-#endif
+  return __builtin_aarch64_sm4eqv4si_uuu (__a, __b);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp2_u32 (uint32x2_t __a, uint32x2_t __b)
+vsm4ekeyq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
-#endif
+  return __builtin_aarch64_sm4ekeyqv4si_uuu (__a, __b);
 }
 
-__extension__ extern __inline float16x8_t
+#pragma GCC pop_options
+
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.2-a+sha3")
+
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp2q_f16 (float16x8_t __a, float16x8_t __b)
+vsha512hq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
-#endif
+  return __builtin_aarch64_crypto_sha512hqv2di_uuuu (__a, __b, __c);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp2q_f32 (float32x4_t __a, float32x4_t __b)
+vsha512h2q_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
-#endif
+  return __builtin_aarch64_crypto_sha512h2qv2di_uuuu (__a, __b, __c);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp2q_f64 (float64x2_t __a, float64x2_t __b)
+vsha512su0q_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
-#else
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
-#endif
+  return __builtin_aarch64_crypto_sha512su0qv2di_uuu (__a, __b);
 }
 
-__extension__ extern __inline poly8x16_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp2q_p8 (poly8x16_t __a, poly8x16_t __b)
+vsha512su1q_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b,
-      (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
-#else
-  return __builtin_shuffle (__a, __b,
-      (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
-#endif
+  return __builtin_aarch64_crypto_sha512su1qv2di_uuuu (__a, __b, __c);
 }
 
-__extension__ extern __inline poly16x8_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp2q_p16 (poly16x8_t __a, poly16x8_t __b)
+veor3q_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
-#endif
+  return __builtin_aarch64_eor3qv16qi_uuuu (__a, __b, __c);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp2q_s8 (int8x16_t __a, int8x16_t __b)
+veor3q_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b,
-      (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
-#else
-  return __builtin_shuffle (__a, __b,
-      (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
-#endif
+  return __builtin_aarch64_eor3qv8hi_uuuu (__a, __b, __c);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp2q_s16 (int16x8_t __a, int16x8_t __b)
+veor3q_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
-#endif
+  return __builtin_aarch64_eor3qv4si_uuuu (__a, __b, __c);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp2q_s32 (int32x4_t __a, int32x4_t __b)
+veor3q_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
-#endif
+  return __builtin_aarch64_eor3qv2di_uuuu (__a, __b, __c);
 }
 
-__extension__ extern __inline int64x2_t
+
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp2q_s64 (int64x2_t __a, int64x2_t __b)
-{
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
-#else
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
-#endif
+veor3q_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c)
+{
+  return __builtin_aarch64_eor3qv16qi (__a, __b, __c);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp2q_u8 (uint8x16_t __a, uint8x16_t __b)
+veor3q_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint8x16_t)
-      {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
-#else
-  return __builtin_shuffle (__a, __b, (uint8x16_t)
-      {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
-#endif
+  return __builtin_aarch64_eor3qv8hi (__a, __b, __c);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp2q_u16 (uint16x8_t __a, uint16x8_t __b)
+veor3q_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
-#endif
+  return __builtin_aarch64_eor3qv4si (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp2q_u32 (uint32x4_t __a, uint32x4_t __b)
+veor3q_s64 (int64x2_t __a, int64x2_t __b, int64x2_t __c)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
-#endif
+  return __builtin_aarch64_eor3qv2di (__a, __b, __c);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vuzp2q_u64 (uint64x2_t __a, uint64x2_t __b)
+vrax1q_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
-#else
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
-#endif
+  return __builtin_aarch64_rax1qv2di_uuu (__a, __b);
 }
 
-__INTERLEAVE_LIST (uzp)
-
-/* vzip */
-
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip1_f16 (float16x4_t __a, float16x4_t __b)
+vxarq_u64 (uint64x2_t __a, uint64x2_t __b, const int imm6)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
-#endif
+  return __builtin_aarch64_xarqv2di_uuus (__a, __b,imm6);
 }
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip1_f32 (float32x2_t __a, float32x2_t __b)
+vbcaxq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
-#endif
+  return __builtin_aarch64_bcaxqv16qi_uuuu (__a, __b, __c);
 }
 
-__extension__ extern __inline poly8x8_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip1_p8 (poly8x8_t __a, poly8x8_t __b)
+vbcaxq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
-#else
-  return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
-#endif
+  return __builtin_aarch64_bcaxqv8hi_uuuu (__a, __b, __c);
 }
 
-__extension__ extern __inline poly16x4_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip1_p16 (poly16x4_t __a, poly16x4_t __b)
+vbcaxq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
-#endif
+  return __builtin_aarch64_bcaxqv4si_uuuu (__a, __b, __c);
 }
 
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip1_s8 (int8x8_t __a, int8x8_t __b)
+vbcaxq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
-#else
-  return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
-#endif
+  return __builtin_aarch64_bcaxqv2di_uuuu (__a, __b, __c);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip1_s16 (int16x4_t __a, int16x4_t __b)
+vbcaxq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
-#endif
+  return __builtin_aarch64_bcaxqv16qi (__a, __b, __c);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip1_s32 (int32x2_t __a, int32x2_t __b)
+vbcaxq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
-#endif
+  return __builtin_aarch64_bcaxqv8hi (__a, __b, __c);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip1_u8 (uint8x8_t __a, uint8x8_t __b)
+vbcaxq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
-#else
-  return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
-#endif
+  return __builtin_aarch64_bcaxqv4si (__a, __b, __c);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip1_u16 (uint16x4_t __a, uint16x4_t __b)
+vbcaxq_s64 (int64x2_t __a, int64x2_t __b, int64x2_t __c)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
-#endif
+  return __builtin_aarch64_bcaxqv2di (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32x2_t
+#pragma GCC pop_options
+
+/* AdvSIMD Complex numbers intrinsics.  */
+
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.3-a")
+
+#pragma GCC push_options
+#pragma GCC target ("+fp16")
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip1_u32 (uint32x2_t __a, uint32x2_t __b)
+vcadd_rot90_f16 (float16x4_t __a, float16x4_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
-#endif
+  return __builtin_aarch64_fcadd90v4hf (__a, __b);
 }
 
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip1q_f16 (float16x8_t __a, float16x8_t __b)
+vcaddq_rot90_f16 (float16x8_t __a, float16x8_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b,
-			    (uint16x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
-#else
-  return __builtin_shuffle (__a, __b,
-			    (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
-#endif
+  return __builtin_aarch64_fcadd90v8hf (__a, __b);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip1q_f32 (float32x4_t __a, float32x4_t __b)
+vcadd_rot270_f16 (float16x4_t __a, float16x4_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
-#endif
+  return __builtin_aarch64_fcadd270v4hf (__a, __b);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip1q_f64 (float64x2_t __a, float64x2_t __b)
+vcaddq_rot270_f16 (float16x8_t __a, float16x8_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
-#else
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
-#endif
+  return __builtin_aarch64_fcadd270v8hf (__a, __b);
 }
 
-__extension__ extern __inline poly8x16_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip1q_p8 (poly8x16_t __a, poly8x16_t __b)
+vcmla_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint8x16_t)
-      {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
-#else
-  return __builtin_shuffle (__a, __b, (uint8x16_t)
-      {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
-#endif
+  return __builtin_aarch64_fcmla0v4hf (__r, __a, __b);
 }
 
-__extension__ extern __inline poly16x8_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip1q_p16 (poly16x8_t __a, poly16x8_t __b)
+vcmlaq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x8_t)
-      {12, 4, 13, 5, 14, 6, 15, 7});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
-#endif
+  return __builtin_aarch64_fcmla0v8hf (__r, __a, __b);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip1q_s8 (int8x16_t __a, int8x16_t __b)
+vcmla_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b,
+		const int __index)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint8x16_t)
-      {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
-#else
-  return __builtin_shuffle (__a, __b, (uint8x16_t)
-      {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
-#endif
+  return __builtin_aarch64_fcmla_lane0v4hf (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip1q_s16 (int16x8_t __a, int16x8_t __b)
+vcmla_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b,
+		 const int __index)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x8_t)
-      {12, 4, 13, 5, 14, 6, 15, 7});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
-#endif
+  return __builtin_aarch64_fcmla_laneq0v4hf (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip1q_s32 (int32x4_t __a, int32x4_t __b)
+vcmlaq_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b,
+		 const int __index)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
-#endif
+  return __builtin_aarch64_fcmlaq_lane0v8hf (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip1q_s64 (int64x2_t __a, int64x2_t __b)
+vcmlaq_rot90_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b,
+		       const int __index)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
-#else
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
-#endif
+  return __builtin_aarch64_fcmlaq_lane90v8hf (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip1q_u8 (uint8x16_t __a, uint8x16_t __b)
+vcmla_rot90_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b,
+		       const int __index)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint8x16_t)
-      {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
-#else
-  return __builtin_shuffle (__a, __b, (uint8x16_t)
-      {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
-#endif
+  return __builtin_aarch64_fcmla_laneq90v4hf (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip1q_u16 (uint16x8_t __a, uint16x8_t __b)
+vcmla_rot90_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b,
+		      const int __index)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x8_t)
-      {12, 4, 13, 5, 14, 6, 15, 7});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
-#endif
+  return __builtin_aarch64_fcmla_lane90v4hf (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip1q_u32 (uint32x4_t __a, uint32x4_t __b)
+vcmlaq_rot90_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
-#endif
+  return __builtin_aarch64_fcmla90v8hf (__r, __a, __b);
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip1q_u64 (uint64x2_t __a, uint64x2_t __b)
+vcmla_rot90_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
-#else
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
-#endif
+  return __builtin_aarch64_fcmla90v4hf (__r, __a, __b);
 }
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip2_f16 (float16x4_t __a, float16x4_t __b)
+vcmlaq_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b,
+		  const int __index)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
-#endif
+  return __builtin_aarch64_fcmla_lane0v8hf (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline float16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmla_rot180_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b,
+			const int __index)
+{
+  return __builtin_aarch64_fcmla_laneq180v4hf (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip2_f32 (float32x2_t __a, float32x2_t __b)
+vcmla_rot180_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b,
+		       const int __index)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
-#endif
+  return __builtin_aarch64_fcmla_lane180v4hf (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline poly8x8_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip2_p8 (poly8x8_t __a, poly8x8_t __b)
+vcmlaq_rot180_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
-#else
-  return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
-#endif
+  return __builtin_aarch64_fcmla180v8hf (__r, __a, __b);
 }
 
-__extension__ extern __inline poly16x4_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip2_p16 (poly16x4_t __a, poly16x4_t __b)
+vcmla_rot180_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
-#endif
+  return __builtin_aarch64_fcmla180v4hf (__r, __a, __b);
 }
 
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip2_s8 (int8x8_t __a, int8x8_t __b)
+vcmlaq_rot90_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b,
+			const int __index)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
-#else
-  return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
-#endif
+  return __builtin_aarch64_fcmla_lane90v8hf (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip2_s16 (int16x4_t __a, int16x4_t __b)
+vcmlaq_rot270_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b,
+			 const int __index)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
-#endif
+  return __builtin_aarch64_fcmla_lane270v8hf (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip2_s32 (int32x2_t __a, int32x2_t __b)
+vcmlaq_rot270_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b,
+			const int __index)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
-#endif
+  return __builtin_aarch64_fcmlaq_lane270v8hf (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip2_u8 (uint8x8_t __a, uint8x8_t __b)
+vcmla_rot270_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b,
+			const int __index)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
-#else
-  return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
-#endif
+  return __builtin_aarch64_fcmla_laneq270v4hf (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip2_u16 (uint16x4_t __a, uint16x4_t __b)
+vcmlaq_rot270_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
-#endif
+  return __builtin_aarch64_fcmla270v8hf (__r, __a, __b);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip2_u32 (uint32x2_t __a, uint32x2_t __b)
+vcmla_rot270_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
-#endif
+  return __builtin_aarch64_fcmla270v4hf (__r, __a, __b);
 }
 
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip2q_f16 (float16x8_t __a, float16x8_t __b)
+vcmlaq_rot180_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b,
+			 const int __index)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b,
-			    (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
-#else
-  return __builtin_shuffle (__a, __b,
-			    (uint16x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
-#endif
+  return __builtin_aarch64_fcmla_lane180v8hf (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip2q_f32 (float32x4_t __a, float32x4_t __b)
+vcmlaq_rot180_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b,
+			const int __index)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
-#endif
+  return __builtin_aarch64_fcmlaq_lane180v8hf (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip2q_f64 (float64x2_t __a, float64x2_t __b)
+vcmla_rot270_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b,
+		       const int __index)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
-#else
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
-#endif
+  return __builtin_aarch64_fcmla_lane270v4hf (__r, __a, __b, __index);
 }
+#pragma GCC pop_options
 
-__extension__ extern __inline poly8x16_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip2q_p8 (poly8x16_t __a, poly8x16_t __b)
+vcadd_rot90_f32 (float32x2_t __a, float32x2_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint8x16_t)
-      {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
-#else
-  return __builtin_shuffle (__a, __b, (uint8x16_t)
-      {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
-#endif
+  return __builtin_aarch64_fcadd90v2sf (__a, __b);
 }
 
-__extension__ extern __inline poly16x8_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip2q_p16 (poly16x8_t __a, poly16x8_t __b)
+vcaddq_rot90_f32 (float32x4_t __a, float32x4_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x8_t)
-      {4, 12, 5, 13, 6, 14, 7, 15});
-#endif
+  return __builtin_aarch64_fcadd90v4sf (__a, __b);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip2q_s8 (int8x16_t __a, int8x16_t __b)
+vcaddq_rot90_f64 (float64x2_t __a, float64x2_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint8x16_t)
-      {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
-#else
-  return __builtin_shuffle (__a, __b, (uint8x16_t)
-      {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
-#endif
+  return __builtin_aarch64_fcadd90v2df (__a, __b);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip2q_s16 (int16x8_t __a, int16x8_t __b)
+vcadd_rot270_f32 (float32x2_t __a, float32x2_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x8_t)
-      {4, 12, 5, 13, 6, 14, 7, 15});
-#endif
+  return __builtin_aarch64_fcadd270v2sf (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip2q_s32 (int32x4_t __a, int32x4_t __b)
+vcaddq_rot270_f32 (float32x4_t __a, float32x4_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
-#endif
+  return __builtin_aarch64_fcadd270v4sf (__a, __b);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip2q_s64 (int64x2_t __a, int64x2_t __b)
+vcaddq_rot270_f64 (float64x2_t __a, float64x2_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
-#else
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
-#endif
+  return __builtin_aarch64_fcadd270v2df (__a, __b);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip2q_u8 (uint8x16_t __a, uint8x16_t __b)
+vcmla_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint8x16_t)
-      {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
-#else
-  return __builtin_shuffle (__a, __b, (uint8x16_t)
-      {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
-#endif
+  return __builtin_aarch64_fcmla0v2sf (__r, __a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip2q_u16 (uint16x8_t __a, uint16x8_t __b)
+vcmlaq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
-#else
-  return __builtin_shuffle (__a, __b, (uint16x8_t)
-      {4, 12, 5, 13, 6, 14, 7, 15});
-#endif
+  return __builtin_aarch64_fcmla0v4sf (__r, __a, __b);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip2q_u32 (uint32x4_t __a, uint32x4_t __b)
+vcmlaq_f64 (float64x2_t __r, float64x2_t __a, float64x2_t __b)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
-#else
-  return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
-#endif
+  return __builtin_aarch64_fcmla0v2df (__r, __a, __b);
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vzip2q_u64 (uint64x2_t __a, uint64x2_t __b)
+vcmla_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b,
+		const int __index)
 {
-#ifdef __AARCH64EB__
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
-#else
-  return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
-#endif
+  return __builtin_aarch64_fcmla_lane0v2sf (__r, __a, __b, __index);
 }
 
-__INTERLEAVE_LIST (zip)
-
-#undef __INTERLEAVE_LIST
-#undef __DEFINTERLEAVE
-
-/* End of optimal implementations in approved order.  */
-
-#pragma GCC pop_options
-
-/* ARMv8.2-A FP16 intrinsics.  */
-
-#include "arm_fp16.h"
-
-#pragma GCC push_options
-#pragma GCC target ("arch=armv8.2-a+fp16")
-
-/* ARMv8.2-A FP16 one operand vector intrinsics.  */
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmla_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b,
+		 const int __index)
+{
+  return __builtin_aarch64_fcmla_laneq0v2sf (__r, __a, __b, __index);
+}
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabs_f16 (float16x4_t __a)
+vcmlaq_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b,
+		 const int __index)
 {
-  return __builtin_aarch64_absv4hf (__a);
+  return __builtin_aarch64_fcmlaq_lane0v4sf (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabsq_f16 (float16x8_t __a)
+vcmlaq_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b,
+		  const int __index)
 {
-  return __builtin_aarch64_absv8hf (__a);
+  return __builtin_aarch64_fcmla_lane0v4sf (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqz_f16 (float16x4_t __a)
+vcmla_rot90_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b)
 {
-  return __builtin_aarch64_cmeqv4hf_uss (__a, vdup_n_f16 (0.0f));
+  return __builtin_aarch64_fcmla90v2sf (__r, __a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqzq_f16 (float16x8_t __a)
+vcmlaq_rot90_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b)
 {
-  return __builtin_aarch64_cmeqv8hf_uss (__a, vdupq_n_f16 (0.0f));
+  return __builtin_aarch64_fcmla90v4sf (__r, __a, __b);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgez_f16 (float16x4_t __a)
+vcmlaq_rot90_f64 (float64x2_t __r, float64x2_t __a, float64x2_t __b)
 {
-  return __builtin_aarch64_cmgev4hf_uss (__a, vdup_n_f16 (0.0f));
+  return __builtin_aarch64_fcmla90v2df (__r, __a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgezq_f16 (float16x8_t __a)
+vcmla_rot90_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b,
+		      const int __index)
 {
-  return __builtin_aarch64_cmgev8hf_uss (__a, vdupq_n_f16 (0.0f));
+  return __builtin_aarch64_fcmla_lane90v2sf (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgtz_f16 (float16x4_t __a)
+vcmla_rot90_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b,
+		       const int __index)
 {
-  return __builtin_aarch64_cmgtv4hf_uss (__a, vdup_n_f16 (0.0f));
+  return __builtin_aarch64_fcmla_laneq90v2sf (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgtzq_f16 (float16x8_t __a)
+vcmlaq_rot90_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b,
+		       const int __index)
 {
-  return __builtin_aarch64_cmgtv8hf_uss (__a, vdupq_n_f16 (0.0f));
+  return __builtin_aarch64_fcmlaq_lane90v4sf (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclez_f16 (float16x4_t __a)
+vcmlaq_rot90_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b,
+			const int __index)
 {
-  return __builtin_aarch64_cmlev4hf_uss (__a, vdup_n_f16 (0.0f));
+  return __builtin_aarch64_fcmla_lane90v4sf (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclezq_f16 (float16x8_t __a)
+vcmla_rot180_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b)
 {
-  return __builtin_aarch64_cmlev8hf_uss (__a, vdupq_n_f16 (0.0f));
+  return __builtin_aarch64_fcmla180v2sf (__r, __a, __b);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltz_f16 (float16x4_t __a)
+vcmlaq_rot180_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b)
 {
-  return __builtin_aarch64_cmltv4hf_uss (__a, vdup_n_f16 (0.0f));
+  return __builtin_aarch64_fcmla180v4sf (__r, __a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltzq_f16 (float16x8_t __a)
+vcmlaq_rot180_f64 (float64x2_t __r, float64x2_t __a, float64x2_t __b)
 {
-  return __builtin_aarch64_cmltv8hf_uss (__a, vdupq_n_f16 (0.0f));
+  return __builtin_aarch64_fcmla180v2df (__r, __a, __b);
 }
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvt_f16_s16 (int16x4_t __a)
+vcmla_rot180_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b,
+		       const int __index)
 {
-  return __builtin_aarch64_floatv4hiv4hf (__a);
+  return __builtin_aarch64_fcmla_lane180v2sf (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtq_f16_s16 (int16x8_t __a)
+vcmla_rot180_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b,
+			const int __index)
 {
-  return __builtin_aarch64_floatv8hiv8hf (__a);
+  return __builtin_aarch64_fcmla_laneq180v2sf (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvt_f16_u16 (uint16x4_t __a)
+vcmlaq_rot180_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b,
+			const int __index)
 {
-  return __builtin_aarch64_floatunsv4hiv4hf ((int16x4_t) __a);
+  return __builtin_aarch64_fcmlaq_lane180v4sf (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtq_f16_u16 (uint16x8_t __a)
+vcmlaq_rot180_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b,
+			 const int __index)
 {
-  return __builtin_aarch64_floatunsv8hiv8hf ((int16x8_t) __a);
+  return __builtin_aarch64_fcmla_lane180v4sf (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvt_s16_f16 (float16x4_t __a)
+vcmla_rot270_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b)
 {
-  return __builtin_aarch64_lbtruncv4hfv4hi (__a);
+  return __builtin_aarch64_fcmla270v2sf (__r, __a, __b);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtq_s16_f16 (float16x8_t __a)
+vcmlaq_rot270_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b)
 {
-  return __builtin_aarch64_lbtruncv8hfv8hi (__a);
+  return __builtin_aarch64_fcmla270v4sf (__r, __a, __b);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvt_u16_f16 (float16x4_t __a)
+vcmlaq_rot270_f64 (float64x2_t __r, float64x2_t __a, float64x2_t __b)
 {
-  return __builtin_aarch64_lbtruncuv4hfv4hi_us (__a);
+  return __builtin_aarch64_fcmla270v2df (__r, __a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtq_u16_f16 (float16x8_t __a)
+vcmla_rot270_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b,
+		       const int __index)
 {
-  return __builtin_aarch64_lbtruncuv8hfv8hi_us (__a);
+  return __builtin_aarch64_fcmla_lane270v2sf (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvta_s16_f16 (float16x4_t __a)
+vcmla_rot270_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b,
+			const int __index)
 {
-  return __builtin_aarch64_lroundv4hfv4hi (__a);
+  return __builtin_aarch64_fcmla_laneq270v2sf (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtaq_s16_f16 (float16x8_t __a)
+vcmlaq_rot270_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b,
+			const int __index)
 {
-  return __builtin_aarch64_lroundv8hfv8hi (__a);
+  return __builtin_aarch64_fcmlaq_lane270v4sf (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvta_u16_f16 (float16x4_t __a)
+vcmlaq_rot270_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b,
+			 const int __index)
 {
-  return __builtin_aarch64_lrounduv4hfv4hi_us (__a);
+  return __builtin_aarch64_fcmla_lane270v4sf (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline uint16x8_t
+#pragma GCC pop_options
+
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.2-a+fp16fml")
+
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtaq_u16_f16 (float16x8_t __a)
+vfmlal_low_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
 {
-  return __builtin_aarch64_lrounduv8hfv8hi_us (__a);
+  return __builtin_aarch64_fmlal_lowv2sf (__r, __a, __b);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtm_s16_f16 (float16x4_t __a)
+vfmlsl_low_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
 {
-  return __builtin_aarch64_lfloorv4hfv4hi (__a);
+  return __builtin_aarch64_fmlsl_lowv2sf (__r, __a, __b);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtmq_s16_f16 (float16x8_t __a)
+vfmlalq_low_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
 {
-  return __builtin_aarch64_lfloorv8hfv8hi (__a);
+  return __builtin_aarch64_fmlalq_lowv4sf (__r, __a, __b);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtm_u16_f16 (float16x4_t __a)
+vfmlslq_low_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
 {
-  return __builtin_aarch64_lflooruv4hfv4hi_us (__a);
+  return __builtin_aarch64_fmlslq_lowv4sf (__r, __a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtmq_u16_f16 (float16x8_t __a)
+vfmlal_high_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
 {
-  return __builtin_aarch64_lflooruv8hfv8hi_us (__a);
+  return __builtin_aarch64_fmlal_highv2sf (__r, __a, __b);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtn_s16_f16 (float16x4_t __a)
+vfmlsl_high_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
 {
-  return __builtin_aarch64_lfrintnv4hfv4hi (__a);
+  return __builtin_aarch64_fmlsl_highv2sf (__r, __a, __b);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtnq_s16_f16 (float16x8_t __a)
+vfmlalq_high_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
 {
-  return __builtin_aarch64_lfrintnv8hfv8hi (__a);
+  return __builtin_aarch64_fmlalq_highv4sf (__r, __a, __b);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtn_u16_f16 (float16x4_t __a)
+vfmlslq_high_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
 {
-  return __builtin_aarch64_lfrintnuv4hfv4hi_us (__a);
+  return __builtin_aarch64_fmlslq_highv4sf (__r, __a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtnq_u16_f16 (float16x8_t __a)
+vfmlal_lane_low_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
+		     const int __lane)
 {
-  return __builtin_aarch64_lfrintnuv8hfv8hi_us (__a);
+  return __builtin_aarch64_fmlal_lane_lowv2sf (__r, __a, __b, __lane);
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtp_s16_f16 (float16x4_t __a)
+vfmlsl_lane_low_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
+		     const int __lane)
 {
-  return __builtin_aarch64_lceilv4hfv4hi (__a);
+  return __builtin_aarch64_fmlsl_lane_lowv2sf (__r, __a, __b, __lane);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtpq_s16_f16 (float16x8_t __a)
+vfmlal_laneq_low_f16 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
+		      const int __lane)
 {
-  return __builtin_aarch64_lceilv8hfv8hi (__a);
+  return __builtin_aarch64_fmlal_laneq_lowv2sf (__r, __a, __b, __lane);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtp_u16_f16 (float16x4_t __a)
+vfmlsl_laneq_low_f16 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
+		      const int __lane)
 {
-  return __builtin_aarch64_lceiluv4hfv4hi_us (__a);
+  return __builtin_aarch64_fmlsl_laneq_lowv2sf (__r, __a, __b, __lane);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtpq_u16_f16 (float16x8_t __a)
+vfmlalq_lane_low_f16 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
+		      const int __lane)
 {
-  return __builtin_aarch64_lceiluv8hfv8hi_us (__a);
+  return __builtin_aarch64_fmlalq_lane_lowv4sf (__r, __a, __b, __lane);
 }
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vneg_f16 (float16x4_t __a)
+vfmlslq_lane_low_f16 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
+		      const int __lane)
 {
-  return -__a;
+  return __builtin_aarch64_fmlslq_lane_lowv4sf (__r, __a, __b, __lane);
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vnegq_f16 (float16x8_t __a)
+vfmlalq_laneq_low_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
+		       const int __lane)
 {
-  return -__a;
+  return __builtin_aarch64_fmlalq_laneq_lowv4sf (__r, __a, __b, __lane);
 }
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrecpe_f16 (float16x4_t __a)
+vfmlslq_laneq_low_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
+		      const int __lane)
 {
-  return __builtin_aarch64_frecpev4hf (__a);
+  return __builtin_aarch64_fmlslq_laneq_lowv4sf (__r, __a, __b, __lane);
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrecpeq_f16 (float16x8_t __a)
+vfmlal_lane_high_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
+		     const int __lane)
 {
-  return __builtin_aarch64_frecpev8hf (__a);
+  return __builtin_aarch64_fmlal_lane_highv2sf (__r, __a, __b, __lane);
 }
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrnd_f16 (float16x4_t __a)
+vfmlsl_lane_high_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
+		     const int __lane)
 {
-  return __builtin_aarch64_btruncv4hf (__a);
+  return __builtin_aarch64_fmlsl_lane_highv2sf (__r, __a, __b, __lane);
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrndq_f16 (float16x8_t __a)
+vfmlal_laneq_high_f16 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
+		      const int __lane)
 {
-  return __builtin_aarch64_btruncv8hf (__a);
+  return __builtin_aarch64_fmlal_laneq_highv2sf (__r, __a, __b, __lane);
 }
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrnda_f16 (float16x4_t __a)
+vfmlsl_laneq_high_f16 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
+		      const int __lane)
 {
-  return __builtin_aarch64_roundv4hf (__a);
+  return __builtin_aarch64_fmlsl_laneq_highv2sf (__r, __a, __b, __lane);
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrndaq_f16 (float16x8_t __a)
+vfmlalq_lane_high_f16 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
+		      const int __lane)
 {
-  return __builtin_aarch64_roundv8hf (__a);
+  return __builtin_aarch64_fmlalq_lane_highv4sf (__r, __a, __b, __lane);
 }
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrndi_f16 (float16x4_t __a)
+vfmlslq_lane_high_f16 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
+		      const int __lane)
 {
-  return __builtin_aarch64_nearbyintv4hf (__a);
+  return __builtin_aarch64_fmlslq_lane_highv4sf (__r, __a, __b, __lane);
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrndiq_f16 (float16x8_t __a)
+vfmlalq_laneq_high_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
+		       const int __lane)
 {
-  return __builtin_aarch64_nearbyintv8hf (__a);
+  return __builtin_aarch64_fmlalq_laneq_highv4sf (__r, __a, __b, __lane);
 }
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrndm_f16 (float16x4_t __a)
+vfmlslq_laneq_high_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
+		      const int __lane)
 {
-  return __builtin_aarch64_floorv4hf (__a);
+  return __builtin_aarch64_fmlslq_laneq_highv4sf (__r, __a, __b, __lane);
 }
 
-__extension__ extern __inline float16x8_t
+#pragma GCC pop_options
+
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.5-a")
+
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrndmq_f16 (float16x8_t __a)
+vrnd32z_f32 (float32x2_t __a)
 {
-  return __builtin_aarch64_floorv8hf (__a);
+  return __builtin_aarch64_frint32zv2sf (__a);
 }
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrndn_f16 (float16x4_t __a)
+vrnd32zq_f32 (float32x4_t __a)
 {
-  return __builtin_aarch64_frintnv4hf (__a);
+  return __builtin_aarch64_frint32zv4sf (__a);
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrndnq_f16 (float16x8_t __a)
+vrnd32z_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_frintnv8hf (__a);
+  return (float64x1_t)
+	   {__builtin_aarch64_frint32zdf (vget_lane_f64 (__a, 0))};
 }
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrndp_f16 (float16x4_t __a)
+vrnd32zq_f64 (float64x2_t __a)
 {
-  return __builtin_aarch64_ceilv4hf (__a);
+  return __builtin_aarch64_frint32zv2df (__a);
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrndpq_f16 (float16x8_t __a)
+vrnd32x_f32 (float32x2_t __a)
 {
-  return __builtin_aarch64_ceilv8hf (__a);
+  return __builtin_aarch64_frint32xv2sf (__a);
 }
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrndx_f16 (float16x4_t __a)
+vrnd32xq_f32 (float32x4_t __a)
 {
-  return __builtin_aarch64_rintv4hf (__a);
+  return __builtin_aarch64_frint32xv4sf (__a);
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrndxq_f16 (float16x8_t __a)
+vrnd32x_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_rintv8hf (__a);
+  return (float64x1_t) {__builtin_aarch64_frint32xdf (vget_lane_f64 (__a, 0))};
 }
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrsqrte_f16 (float16x4_t a)
+vrnd32xq_f64 (float64x2_t __a)
 {
-  return __builtin_aarch64_rsqrtev4hf (a);
+  return __builtin_aarch64_frint32xv2df (__a);
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrsqrteq_f16 (float16x8_t a)
+vrnd64z_f32 (float32x2_t __a)
 {
-  return __builtin_aarch64_rsqrtev8hf (a);
+  return __builtin_aarch64_frint64zv2sf (__a);
 }
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsqrt_f16 (float16x4_t a)
+vrnd64zq_f32 (float32x4_t __a)
 {
-  return __builtin_aarch64_sqrtv4hf (a);
+  return __builtin_aarch64_frint64zv4sf (__a);
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsqrtq_f16 (float16x8_t a)
+vrnd64z_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_sqrtv8hf (a);
+  return (float64x1_t) {__builtin_aarch64_frint64zdf (vget_lane_f64 (__a, 0))};
 }
 
-/* ARMv8.2-A FP16 two operands vector intrinsics.  */
-
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vadd_f16 (float16x4_t __a, float16x4_t __b)
+vrnd64zq_f64 (float64x2_t __a)
 {
-  return __a + __b;
+  return __builtin_aarch64_frint64zv2df (__a);
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddq_f16 (float16x8_t __a, float16x8_t __b)
+vrnd64x_f32 (float32x2_t __a)
 {
-  return __a + __b;
+  return __builtin_aarch64_frint64xv2sf (__a);
 }
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabd_f16 (float16x4_t a, float16x4_t b)
+vrnd64xq_f32 (float32x4_t __a)
 {
-  return __builtin_aarch64_fabdv4hf (a, b);
+  return __builtin_aarch64_frint64xv4sf (__a);
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vabdq_f16 (float16x8_t a, float16x8_t b)
+vrnd64x_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_fabdv8hf (a, b);
+  return (float64x1_t) {__builtin_aarch64_frint64xdf (vget_lane_f64 (__a, 0))};
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcage_f16 (float16x4_t __a, float16x4_t __b)
+vrnd64xq_f64 (float64x2_t __a)
 {
-  return __builtin_aarch64_facgev4hf_uss (__a, __b);
+  return __builtin_aarch64_frint64xv2df (__a);
 }
 
-__extension__ extern __inline uint16x8_t
+#pragma GCC pop_options
+
+#include "arm_bf16.h"
+
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.2-a+bf16")
+
+__extension__ extern __inline bfloat16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcageq_f16 (float16x8_t __a, float16x8_t __b)
+vset_lane_bf16 (bfloat16_t __elem, bfloat16x4_t __vec, const int __index)
 {
-  return __builtin_aarch64_facgev8hf_uss (__a, __b);
+  return __aarch64_vset_lane_any (__elem, __vec, __index);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline bfloat16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcagt_f16 (float16x4_t __a, float16x4_t __b)
+vsetq_lane_bf16 (bfloat16_t __elem, bfloat16x8_t __vec, const int __index)
 {
-  return __builtin_aarch64_facgtv4hf_uss (__a, __b);
+  return __aarch64_vset_lane_any (__elem, __vec, __index);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline bfloat16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcagtq_f16 (float16x8_t __a, float16x8_t __b)
+vget_lane_bf16 (bfloat16x4_t __a, const int __b)
 {
-  return __builtin_aarch64_facgtv8hf_uss (__a, __b);
+  return __aarch64_vget_lane_any (__a, __b);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline bfloat16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcale_f16 (float16x4_t __a, float16x4_t __b)
+vgetq_lane_bf16 (bfloat16x8_t __a, const int __b)
 {
-  return __builtin_aarch64_faclev4hf_uss (__a, __b);
+  return __aarch64_vget_lane_any (__a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline bfloat16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcaleq_f16 (float16x8_t __a, float16x8_t __b)
+vcreate_bf16 (uint64_t __a)
 {
-  return __builtin_aarch64_faclev8hf_uss (__a, __b);
+  return (bfloat16x4_t) __a;
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline bfloat16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcalt_f16 (float16x4_t __a, float16x4_t __b)
+vcombine_bf16 (bfloat16x4_t __a, bfloat16x4_t __b)
 {
-  return __builtin_aarch64_facltv4hf_uss (__a, __b);
+  return (bfloat16x8_t)__builtin_aarch64_combinev4bf (__a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
+/* vdup */
+
+__extension__ extern __inline bfloat16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcaltq_f16 (float16x8_t __a, float16x8_t __b)
+vdup_n_bf16 (bfloat16_t __a)
 {
-  return __builtin_aarch64_facltv8hf_uss (__a, __b);
+  return (bfloat16x4_t) {__a, __a, __a, __a};
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline bfloat16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceq_f16 (float16x4_t __a, float16x4_t __b)
+vdupq_n_bf16 (bfloat16_t __a)
 {
-  return __builtin_aarch64_cmeqv4hf_uss (__a, __b);
+  return (bfloat16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline bfloat16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vceqq_f16 (float16x8_t __a, float16x8_t __b)
+vdup_lane_bf16 (bfloat16x4_t __a, const int __b)
 {
-  return __builtin_aarch64_cmeqv8hf_uss (__a, __b);
+  return vdup_n_bf16 (__aarch64_vget_lane_any (__a, __b));
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline bfloat16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcge_f16 (float16x4_t __a, float16x4_t __b)
+vdup_laneq_bf16 (bfloat16x8_t __a, const int __b)
 {
-  return __builtin_aarch64_cmgev4hf_uss (__a, __b);
+  return vdup_n_bf16 (__aarch64_vget_lane_any (__a, __b));
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline bfloat16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgeq_f16 (float16x8_t __a, float16x8_t __b)
+vdupq_lane_bf16 (bfloat16x4_t __a, const int __b)
 {
-  return __builtin_aarch64_cmgev8hf_uss (__a, __b);
+  return vdupq_n_bf16 (__aarch64_vget_lane_any (__a, __b));
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline bfloat16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgt_f16 (float16x4_t __a, float16x4_t __b)
+vdupq_laneq_bf16 (bfloat16x8_t __a, const int __b)
 {
-  return __builtin_aarch64_cmgtv4hf_uss (__a, __b);
+  return vdupq_n_bf16 (__aarch64_vget_lane_any (__a, __b));
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline bfloat16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcgtq_f16 (float16x8_t __a, float16x8_t __b)
+vduph_lane_bf16 (bfloat16x4_t __a, const int __b)
 {
-  return __builtin_aarch64_cmgtv8hf_uss (__a, __b);
+  return __aarch64_vget_lane_any (__a, __b);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline bfloat16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcle_f16 (float16x4_t __a, float16x4_t __b)
+vduph_laneq_bf16 (bfloat16x8_t __a, const int __b)
 {
-  return __builtin_aarch64_cmlev4hf_uss (__a, __b);
+  return __aarch64_vget_lane_any (__a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
+/* vld */
+
+__extension__ extern __inline bfloat16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcleq_f16 (float16x8_t __a, float16x8_t __b)
+vld1_bf16 (const bfloat16_t *__a)
 {
-  return __builtin_aarch64_cmlev8hf_uss (__a, __b);
+  return (bfloat16x4_t) __builtin_aarch64_ld1v4bf (__a);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline bfloat16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclt_f16 (float16x4_t __a, float16x4_t __b)
+vld1q_bf16 (const bfloat16_t *__a)
 {
-  return __builtin_aarch64_cmltv4hf_uss (__a, __b);
+  return __builtin_aarch64_ld1v8bf (__a);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline bfloat16x4x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltq_f16 (float16x8_t __a, float16x8_t __b)
+vld1_bf16_x2 (const bfloat16_t *__a)
 {
-  return __builtin_aarch64_cmltv8hf_uss (__a, __b);
+  bfloat16x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v4bf ((const __builtin_aarch64_simd_bf *) __a);
+  ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 0);
+  ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 1);
+  return ret;
 }
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline bfloat16x8x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvt_n_f16_s16 (int16x4_t __a, const int __b)
+vld1q_bf16_x2 (const bfloat16_t *__a)
 {
-  return __builtin_aarch64_scvtfv4hi (__a, __b);
+  bfloat16x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v8bf ((const __builtin_aarch64_simd_bf *) __a);
+  ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregoiv8bf (__o, 0);
+  ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregoiv8bf (__o, 1);
+  return ret;
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline bfloat16x4x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtq_n_f16_s16 (int16x8_t __a, const int __b)
+vld1_bf16_x3 (const bfloat16_t *__a)
 {
-  return __builtin_aarch64_scvtfv8hi (__a, __b);
+  bfloat16x4x3_t __i;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld1x3v4bf ((const __builtin_aarch64_simd_bf *) __a);
+  __i.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf  (__o, 0);
+  __i.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf  (__o, 1);
+  __i.val[2] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf  (__o, 2);
+  return __i;
 }
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline bfloat16x8x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvt_n_f16_u16 (uint16x4_t __a, const int __b)
+vld1q_bf16_x3 (const bfloat16_t *__a)
 {
-  return __builtin_aarch64_ucvtfv4hi_sus (__a, __b);
+  bfloat16x8x3_t __i;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld1x3v8bf ((const __builtin_aarch64_simd_bf *) __a);
+  __i.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf  (__o, 0);
+  __i.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf  (__o, 1);
+  __i.val[2] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf  (__o, 2);
+  return __i;
 }
-
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline bfloat16x4x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtq_n_f16_u16 (uint16x8_t __a, const int __b)
+vld1_bf16_x4 (const bfloat16_t *__a)
 {
-  return __builtin_aarch64_ucvtfv8hi_sus (__a, __b);
+  union { bfloat16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
+  __au.__o
+    = __builtin_aarch64_ld1x4v4bf ((const __builtin_aarch64_simd_bf *) __a);
+  return __au.__i;
 }
 
-__extension__ extern __inline int16x4_t
+__extension__ extern __inline bfloat16x8x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvt_n_s16_f16 (float16x4_t __a, const int __b)
+vld1q_bf16_x4 (const bfloat16_t *__a)
 {
-  return __builtin_aarch64_fcvtzsv4hf (__a, __b);
+  union { bfloat16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
+  __au.__o
+    = __builtin_aarch64_ld1x4v8bf ((const __builtin_aarch64_simd_bf *) __a);
+  return __au.__i;
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline bfloat16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtq_n_s16_f16 (float16x8_t __a, const int __b)
+vld1_lane_bf16 (const bfloat16_t *__src, bfloat16x4_t __vec, const int __lane)
 {
-  return __builtin_aarch64_fcvtzsv8hf (__a, __b);
+  return __aarch64_vset_lane_any (*__src, __vec, __lane);
 }
 
-__extension__ extern __inline uint16x4_t
+__extension__ extern __inline bfloat16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvt_n_u16_f16 (float16x4_t __a, const int __b)
+vld1q_lane_bf16 (const bfloat16_t *__src, bfloat16x8_t __vec, const int __lane)
 {
-  return __builtin_aarch64_fcvtzuv4hf_uss (__a, __b);
+  return __aarch64_vset_lane_any (*__src, __vec, __lane);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline bfloat16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcvtq_n_u16_f16 (float16x8_t __a, const int __b)
+vld1_dup_bf16 (const bfloat16_t* __a)
 {
-  return __builtin_aarch64_fcvtzuv8hf_uss (__a, __b);
+  return vdup_n_bf16 (*__a);
 }
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline bfloat16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdiv_f16 (float16x4_t __a, float16x4_t __b)
+vld1q_dup_bf16 (const bfloat16_t* __a)
 {
-  return __a / __b;
+  return vdupq_n_bf16 (*__a);
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline bfloat16x4x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdivq_f16 (float16x8_t __a, float16x8_t __b)
+vld2_bf16 (const bfloat16_t * __a)
 {
-  return __a / __b;
+  bfloat16x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v4bf (__a);
+  ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 0);
+  ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 1);
+  return ret;
 }
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline bfloat16x8x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmax_f16 (float16x4_t __a, float16x4_t __b)
+vld2q_bf16 (const bfloat16_t * __a)
 {
-  return __builtin_aarch64_smax_nanv4hf (__a, __b);
+  bfloat16x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2v8bf ((const __builtin_aarch64_simd_bf *) __a);
+  ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregoiv8bf (__o, 0);
+  ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregoiv8bf (__o, 1);
+  return ret;
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline bfloat16x4x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmaxq_f16 (float16x8_t __a, float16x8_t __b)
+vld2_dup_bf16 (const bfloat16_t * __a)
 {
-  return __builtin_aarch64_smax_nanv8hf (__a, __b);
+  bfloat16x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv4bf ((const __builtin_aarch64_simd_bf *) __a);
+  ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 0);
+  ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 1);
+  return ret;
 }
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline bfloat16x8x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmaxnm_f16 (float16x4_t __a, float16x4_t __b)
+vld2q_dup_bf16 (const bfloat16_t * __a)
 {
-  return __builtin_aarch64_fmaxv4hf (__a, __b);
+  bfloat16x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv8bf ((const __builtin_aarch64_simd_bf *) __a);
+  ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregoiv8bf (__o, 0);
+  ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregoiv8bf (__o, 1);
+  return ret;
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline bfloat16x4x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmaxnmq_f16 (float16x8_t __a, float16x8_t __b)
+vld3_bf16 (const bfloat16_t * __a)
 {
-  return __builtin_aarch64_fmaxv8hf (__a, __b);
+  bfloat16x4x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v4bf ((const __builtin_aarch64_simd_bf *) __a);
+  ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 0);
+  ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 1);
+  ret.val[2] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 2);
+  return ret;
 }
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline bfloat16x8x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmin_f16 (float16x4_t __a, float16x4_t __b)
+vld3q_bf16 (const bfloat16_t * __a)
 {
-  return __builtin_aarch64_smin_nanv4hf (__a, __b);
+  bfloat16x8x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3v8bf ((const __builtin_aarch64_simd_bf *) __a);
+  ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 0);
+  ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 1);
+  ret.val[2] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 2);
+  return ret;
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline bfloat16x4x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vminq_f16 (float16x8_t __a, float16x8_t __b)
+vld3_dup_bf16 (const bfloat16_t * __a)
 {
-  return __builtin_aarch64_smin_nanv8hf (__a, __b);
+  bfloat16x4x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv4bf ((const __builtin_aarch64_simd_bf *) __a);
+  ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 0);
+  ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 1);
+  ret.val[2] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 2);
+  return ret;
 }
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline bfloat16x8x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vminnm_f16 (float16x4_t __a, float16x4_t __b)
+vld3q_dup_bf16 (const bfloat16_t * __a)
 {
-  return __builtin_aarch64_fminv4hf (__a, __b);
+  bfloat16x8x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv8bf ((const __builtin_aarch64_simd_bf *) __a);
+  ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 0);
+  ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 1);
+  ret.val[2] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 2);
+  return ret;
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline bfloat16x4x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vminnmq_f16 (float16x8_t __a, float16x8_t __b)
+vld4_bf16 (const bfloat16_t * __a)
 {
-  return __builtin_aarch64_fminv8hf (__a, __b);
+  bfloat16x4x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v4bf ((const __builtin_aarch64_simd_bf *) __a);
+  ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 0);
+  ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 1);
+  ret.val[2] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 2);
+  ret.val[3] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 3);
+  return ret;
 }
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline bfloat16x8x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmul_f16 (float16x4_t __a, float16x4_t __b)
+vld4q_bf16 (const bfloat16_t * __a)
 {
-  return __a * __b;
+  bfloat16x8x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4v8bf ((const __builtin_aarch64_simd_bf *) __a);
+  ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 0);
+  ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 1);
+  ret.val[2] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 2);
+  ret.val[3] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 3);
+  return ret;
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline bfloat16x4x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulq_f16 (float16x8_t __a, float16x8_t __b)
+vld4_dup_bf16 (const bfloat16_t * __a)
 {
-  return __a * __b;
+  bfloat16x4x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv4bf ((const __builtin_aarch64_simd_bf *) __a);
+  ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 0);
+  ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 1);
+  ret.val[2] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 2);
+  ret.val[3] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 3);
+  return ret;
 }
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline bfloat16x8x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulx_f16 (float16x4_t __a, float16x4_t __b)
+vld4q_dup_bf16 (const bfloat16_t * __a)
 {
-  return __builtin_aarch64_fmulxv4hf (__a, __b);
+  bfloat16x8x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv8bf ((const __builtin_aarch64_simd_bf *) __a);
+  ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 0);
+  ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 1);
+  ret.val[2] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 2);
+  ret.val[3] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 3);
+  return ret;
 }
 
-__extension__ extern __inline float16x8_t
+/* vst */
+
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulxq_f16 (float16x8_t __a, float16x8_t __b)
+vst1_bf16 (bfloat16_t *__a, bfloat16x4_t __b)
 {
-  return __builtin_aarch64_fmulxv8hf (__a, __b);
+  __builtin_aarch64_st1v4bf (__a, __b);
 }
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpadd_f16 (float16x4_t a, float16x4_t b)
+vst1_bf16_x2 (bfloat16_t * __a, bfloat16x4x2_t __val)
 {
-  return __builtin_aarch64_faddpv4hf (a, b);
+  __builtin_aarch64_simd_oi __o;
+  bfloat16x8x2_t __temp;
+  __temp.val[0] = vcombine_bf16 (__val.val[0], vcreate_bf16 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_bf16 (__val.val[1], vcreate_bf16 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv8bf (__o, __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv8bf (__o, __temp.val[1], 1);
+  __builtin_aarch64_st1x2v4bf (__a, __o);
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpaddq_f16 (float16x8_t a, float16x8_t b)
+vst1q_bf16_x2 (bfloat16_t * __a, bfloat16x8x2_t __val)
 {
-  return __builtin_aarch64_faddpv8hf (a, b);
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv8bf (__o, __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv8bf (__o, __val.val[1], 1);
+  __builtin_aarch64_st1x2v8bf (__a, __o);
 }
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpmax_f16 (float16x4_t a, float16x4_t b)
+vst1_bf16_x3 (bfloat16_t * __a, bfloat16x4x3_t __val)
 {
-  return __builtin_aarch64_smax_nanpv4hf (a, b);
+  __builtin_aarch64_simd_ci __o;
+  bfloat16x8x3_t __temp;
+  __temp.val[0] = vcombine_bf16 (__val.val[0], vcreate_bf16 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_bf16 (__val.val[1], vcreate_bf16 (__AARCH64_UINT64_C (0)));
+  __temp.val[2] = vcombine_bf16 (__val.val[2], vcreate_bf16 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[2], 2);
+  __builtin_aarch64_st1x3v4bf ((__builtin_aarch64_simd_bf *) __a, __o);
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpmaxq_f16 (float16x8_t a, float16x8_t b)
+vst1q_bf16_x3 (bfloat16_t * __a, bfloat16x8x3_t __val)
 {
-  return __builtin_aarch64_smax_nanpv8hf (a, b);
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __val.val[2], 2);
+  __builtin_aarch64_st1x3v8bf ((__builtin_aarch64_simd_bf *) __a, __o);
 }
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpmaxnm_f16 (float16x4_t a, float16x4_t b)
+vst1_bf16_x4 (bfloat16_t * __a, bfloat16x4x4_t val)
 {
-  return __builtin_aarch64_smaxpv4hf (a, b);
+  union { bfloat16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
+  __builtin_aarch64_st1x4v4bf ((__builtin_aarch64_simd_bf *) __a, __u.__o);
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpmaxnmq_f16 (float16x8_t a, float16x8_t b)
+vst1q_bf16_x4 (bfloat16_t * __a, bfloat16x8x4_t val)
 {
-  return __builtin_aarch64_smaxpv8hf (a, b);
+  union { bfloat16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
+  __builtin_aarch64_st1x4v8bf ((__builtin_aarch64_simd_bf *) __a, __u.__o);
 }
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpmin_f16 (float16x4_t a, float16x4_t b)
+vst1q_bf16 (bfloat16_t *__a, bfloat16x8_t __b)
 {
-  return __builtin_aarch64_smin_nanpv4hf (a, b);
+  __builtin_aarch64_st1v8bf (__a, __b);
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpminq_f16 (float16x8_t a, float16x8_t b)
+vst1_lane_bf16 (bfloat16_t *__a, bfloat16x4_t __b, const int __lane)
 {
-  return __builtin_aarch64_smin_nanpv8hf (a, b);
+  *__a = __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpminnm_f16 (float16x4_t a, float16x4_t b)
+vst1q_lane_bf16 (bfloat16_t *__a, bfloat16x8_t __b, const int __lane)
 {
-  return __builtin_aarch64_sminpv4hf (a, b);
+  *__a = __aarch64_vget_lane_any (__b, __lane);
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vpminnmq_f16 (float16x8_t a, float16x8_t b)
+vst2_bf16 (bfloat16_t * __a, bfloat16x4x2_t __val)
 {
-  return __builtin_aarch64_sminpv8hf (a, b);
+  __builtin_aarch64_simd_oi __o;
+  bfloat16x8x2_t __temp;
+  __temp.val[0] = vcombine_bf16 (__val.val[0], vcreate_bf16 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_bf16 (__val.val[1], vcreate_bf16 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregoiv8bf (__o, __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv8bf (__o, __temp.val[1], 1);
+  __builtin_aarch64_st2v4bf (__a, __o);
 }
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrecps_f16 (float16x4_t __a, float16x4_t __b)
+vst2q_bf16 (bfloat16_t * __a, bfloat16x8x2_t __val)
 {
-  return __builtin_aarch64_frecpsv4hf (__a, __b);
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_set_qregoiv8bf (__o, __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregoiv8bf (__o, __val.val[1], 1);
+  __builtin_aarch64_st2v8bf (__a, __o);
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrecpsq_f16 (float16x8_t __a, float16x8_t __b)
+vst3_bf16 (bfloat16_t * __a, bfloat16x4x3_t __val)
 {
-  return __builtin_aarch64_frecpsv8hf (__a, __b);
+  __builtin_aarch64_simd_ci __o;
+  bfloat16x8x3_t __temp;
+  __temp.val[0] = vcombine_bf16 (__val.val[0], vcreate_bf16 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_bf16 (__val.val[1], vcreate_bf16 (__AARCH64_UINT64_C (0)));
+  __temp.val[2] = vcombine_bf16 (__val.val[2], vcreate_bf16 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[2], 2);
+  __builtin_aarch64_st3v4bf ((__builtin_aarch64_simd_bf *) __a, __o);
 }
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrsqrts_f16 (float16x4_t a, float16x4_t b)
+vst3q_bf16 (bfloat16_t * __a, bfloat16x8x3_t __val)
 {
-  return __builtin_aarch64_rsqrtsv4hf (a, b);
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __val.val[2], 2);
+  __builtin_aarch64_st3v8bf ((__builtin_aarch64_simd_bf *) __a, __o);
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrsqrtsq_f16 (float16x8_t a, float16x8_t b)
+vst4_bf16 (bfloat16_t * __a, bfloat16x4x4_t __val)
 {
-  return __builtin_aarch64_rsqrtsv8hf (a, b);
+  __builtin_aarch64_simd_xi __o;
+  bfloat16x8x4_t __temp;
+  __temp.val[0] = vcombine_bf16 (__val.val[0], vcreate_bf16 (__AARCH64_UINT64_C (0)));
+  __temp.val[1] = vcombine_bf16 (__val.val[1], vcreate_bf16 (__AARCH64_UINT64_C (0)));
+  __temp.val[2] = vcombine_bf16 (__val.val[2], vcreate_bf16 (__AARCH64_UINT64_C (0)));
+  __temp.val[3] = vcombine_bf16 (__val.val[3], vcreate_bf16 (__AARCH64_UINT64_C (0)));
+  __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __temp.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __temp.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __temp.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __temp.val[3], 3);
+  __builtin_aarch64_st4v4bf ((__builtin_aarch64_simd_bf *) __a, __o);
 }
 
-__extension__ extern __inline float16x4_t
+__extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsub_f16 (float16x4_t __a, float16x4_t __b)
+vst4q_bf16 (bfloat16_t * __a, bfloat16x8x4_t __val)
 {
-  return __a - __b;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __val.val[0], 0);
+  __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __val.val[1], 1);
+  __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __val.val[2], 2);
+  __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __val.val[3], 3);
+  __builtin_aarch64_st4v8bf ((__builtin_aarch64_simd_bf *) __a, __o);
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsubq_f16 (float16x8_t __a, float16x8_t __b)
+/* vreinterpret */
+
+__extension__ extern __inline bfloat16x4_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_bf16_u8 (uint8x8_t __a)
 {
-  return __a - __b;
+  return (bfloat16x4_t)__a;
 }
 
-/* ARMv8.2-A FP16 three operands vector intrinsics.  */
+__extension__ extern __inline bfloat16x4_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_bf16_u16 (uint16x4_t __a)
+{
+  return (bfloat16x4_t)__a;
+}
 
-__extension__ extern __inline float16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfma_f16 (float16x4_t __a, float16x4_t __b, float16x4_t __c)
+__extension__ extern __inline bfloat16x4_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_bf16_u32 (uint32x2_t __a)
 {
-  return __builtin_aarch64_fmav4hf (__b, __c, __a);
+  return (bfloat16x4_t)__a;
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmaq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
+__extension__ extern __inline bfloat16x4_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_bf16_u64 (uint64x1_t __a)
 {
-  return __builtin_aarch64_fmav8hf (__b, __c, __a);
+  return (bfloat16x4_t)__a;
 }
 
-__extension__ extern __inline float16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfms_f16 (float16x4_t __a, float16x4_t __b, float16x4_t __c)
+__extension__ extern __inline bfloat16x4_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_bf16_s8 (int8x8_t __a)
 {
-  return __builtin_aarch64_fnmav4hf (__b, __c, __a);
+  return (bfloat16x4_t)__a;
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmsq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
+__extension__ extern __inline bfloat16x4_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_bf16_s16 (int16x4_t __a)
 {
-  return __builtin_aarch64_fnmav8hf (__b, __c, __a);
+  return (bfloat16x4_t)__a;
 }
 
-/* ARMv8.2-A FP16 lane vector intrinsics.  */
+__extension__ extern __inline bfloat16x4_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_bf16_s32 (int32x2_t __a)
+{
+  return (bfloat16x4_t)__a;
+}
 
-__extension__ extern __inline float16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmah_lane_f16 (float16_t __a, float16_t __b,
-		float16x4_t __c, const int __lane)
+__extension__ extern __inline bfloat16x4_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_bf16_s64 (int64x1_t __a)
 {
-  return vfmah_f16 (__a, __b, __aarch64_vget_lane_any (__c, __lane));
+  return (bfloat16x4_t)__a;
 }
 
-__extension__ extern __inline float16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmah_laneq_f16 (float16_t __a, float16_t __b,
-		 float16x8_t __c, const int __lane)
+__extension__ extern __inline bfloat16x4_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_bf16_p8 (poly8x8_t __a)
 {
-  return vfmah_f16 (__a, __b, __aarch64_vget_lane_any (__c, __lane));
+  return (bfloat16x4_t)__a;
 }
 
-__extension__ extern __inline float16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfma_lane_f16 (float16x4_t __a, float16x4_t __b,
-	       float16x4_t __c, const int __lane)
+__extension__ extern __inline bfloat16x4_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_bf16_p16 (poly16x4_t __a)
 {
-  return vfma_f16 (__a, __b, __aarch64_vdup_lane_f16 (__c, __lane));
+  return (bfloat16x4_t)__a;
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmaq_lane_f16 (float16x8_t __a, float16x8_t __b,
-		float16x4_t __c, const int __lane)
+__extension__ extern __inline bfloat16x4_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_bf16_p64 (poly64x1_t __a)
 {
-  return vfmaq_f16 (__a, __b, __aarch64_vdupq_lane_f16 (__c, __lane));
+  return (bfloat16x4_t)__a;
 }
 
-__extension__ extern __inline float16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfma_laneq_f16 (float16x4_t __a, float16x4_t __b,
-		float16x8_t __c, const int __lane)
+__extension__ extern __inline bfloat16x4_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_bf16_f16 (float16x4_t __a)
 {
-  return vfma_f16 (__a, __b, __aarch64_vdup_laneq_f16 (__c, __lane));
+  return (bfloat16x4_t)__a;
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmaq_laneq_f16 (float16x8_t __a, float16x8_t __b,
-		 float16x8_t __c, const int __lane)
+__extension__ extern __inline bfloat16x4_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_bf16_f32 (float32x2_t __a)
 {
-  return vfmaq_f16 (__a, __b, __aarch64_vdupq_laneq_f16 (__c, __lane));
+  return (bfloat16x4_t)__a;
 }
 
-__extension__ extern __inline float16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfma_n_f16 (float16x4_t __a, float16x4_t __b, float16_t __c)
+__extension__ extern __inline bfloat16x4_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_bf16_f64 (float64x1_t __a)
 {
-  return vfma_f16 (__a, __b, vdup_n_f16 (__c));
+  return (bfloat16x4_t)__a;
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmaq_n_f16 (float16x8_t __a, float16x8_t __b, float16_t __c)
+__extension__ extern __inline bfloat16x8_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_bf16_u8 (uint8x16_t __a)
 {
-  return vfmaq_f16 (__a, __b, vdupq_n_f16 (__c));
+  return (bfloat16x8_t)__a;
 }
 
-__extension__ extern __inline float16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmsh_lane_f16 (float16_t __a, float16_t __b,
-		float16x4_t __c, const int __lane)
+__extension__ extern __inline bfloat16x8_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_bf16_u16 (uint16x8_t __a)
 {
-  return vfmsh_f16 (__a, __b, __aarch64_vget_lane_any (__c, __lane));
+  return (bfloat16x8_t)__a;
 }
 
-__extension__ extern __inline float16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmsh_laneq_f16 (float16_t __a, float16_t __b,
-		 float16x8_t __c, const int __lane)
+__extension__ extern __inline bfloat16x8_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_bf16_u32 (uint32x4_t __a)
 {
-  return vfmsh_f16 (__a, __b, __aarch64_vget_lane_any (__c, __lane));
+  return (bfloat16x8_t)__a;
 }
 
-__extension__ extern __inline float16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfms_lane_f16 (float16x4_t __a, float16x4_t __b,
-	       float16x4_t __c, const int __lane)
+__extension__ extern __inline bfloat16x8_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_bf16_u64 (uint64x2_t __a)
 {
-  return vfms_f16 (__a, __b, __aarch64_vdup_lane_f16 (__c, __lane));
+  return (bfloat16x8_t)__a;
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmsq_lane_f16 (float16x8_t __a, float16x8_t __b,
-		float16x4_t __c, const int __lane)
+__extension__ extern __inline bfloat16x8_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_bf16_s8 (int8x16_t __a)
 {
-  return vfmsq_f16 (__a, __b, __aarch64_vdupq_lane_f16 (__c, __lane));
+  return (bfloat16x8_t)__a;
 }
 
-__extension__ extern __inline float16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfms_laneq_f16 (float16x4_t __a, float16x4_t __b,
-		float16x8_t __c, const int __lane)
+__extension__ extern __inline bfloat16x8_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_bf16_s16 (int16x8_t __a)
 {
-  return vfms_f16 (__a, __b, __aarch64_vdup_laneq_f16 (__c, __lane));
+  return (bfloat16x8_t)__a;
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmsq_laneq_f16 (float16x8_t __a, float16x8_t __b,
-		 float16x8_t __c, const int __lane)
+__extension__ extern __inline bfloat16x8_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_bf16_s32 (int32x4_t __a)
 {
-  return vfmsq_f16 (__a, __b, __aarch64_vdupq_laneq_f16 (__c, __lane));
+  return (bfloat16x8_t)__a;
 }
 
-__extension__ extern __inline float16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfms_n_f16 (float16x4_t __a, float16x4_t __b, float16_t __c)
+__extension__ extern __inline bfloat16x8_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_bf16_s64 (int64x2_t __a)
 {
-  return vfms_f16 (__a, __b, vdup_n_f16 (__c));
+  return (bfloat16x8_t)__a;
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmsq_n_f16 (float16x8_t __a, float16x8_t __b, float16_t __c)
+__extension__ extern __inline bfloat16x8_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_bf16_p8 (poly8x16_t __a)
 {
-  return vfmsq_f16 (__a, __b, vdupq_n_f16 (__c));
+  return (bfloat16x8_t)__a;
 }
 
-__extension__ extern __inline float16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulh_lane_f16 (float16_t __a, float16x4_t __b, const int __lane)
+__extension__ extern __inline bfloat16x8_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_bf16_p16 (poly16x8_t __a)
 {
-  return __a * __aarch64_vget_lane_any (__b, __lane);
+  return (bfloat16x8_t)__a;
 }
 
-__extension__ extern __inline float16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmul_lane_f16 (float16x4_t __a, float16x4_t __b, const int __lane)
+__extension__ extern __inline bfloat16x8_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_bf16_p64 (poly64x2_t __a)
 {
-  return vmul_f16 (__a, vdup_n_f16 (__aarch64_vget_lane_any (__b, __lane)));
+  return (bfloat16x8_t)__a;
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulq_lane_f16 (float16x8_t __a, float16x4_t __b, const int __lane)
+__extension__ extern __inline bfloat16x8_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_bf16_p128 (poly128_t __a)
 {
-  return vmulq_f16 (__a, vdupq_n_f16 (__aarch64_vget_lane_any (__b, __lane)));
+  return (bfloat16x8_t)__a;
 }
 
-__extension__ extern __inline float16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulh_laneq_f16 (float16_t __a, float16x8_t __b, const int __lane)
+__extension__ extern __inline bfloat16x8_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_bf16_f16 (float16x8_t __a)
 {
-  return __a * __aarch64_vget_lane_any (__b, __lane);
+  return (bfloat16x8_t)__a;
 }
 
-__extension__ extern __inline float16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmul_laneq_f16 (float16x4_t __a, float16x8_t __b, const int __lane)
+__extension__ extern __inline bfloat16x8_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_bf16_f32 (float32x4_t __a)
 {
-  return vmul_f16 (__a, vdup_n_f16 (__aarch64_vget_lane_any (__b, __lane)));
+  return (bfloat16x8_t)__a;
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulq_laneq_f16 (float16x8_t __a, float16x8_t __b, const int __lane)
+__extension__ extern __inline bfloat16x8_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_bf16_f64 (float64x2_t __a)
 {
-  return vmulq_f16 (__a, vdupq_n_f16 (__aarch64_vget_lane_any (__b, __lane)));
+  return (bfloat16x8_t)__a;
 }
 
-__extension__ extern __inline float16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmul_n_f16 (float16x4_t __a, float16_t __b)
+__extension__ extern __inline int8x8_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_s8_bf16 (bfloat16x4_t __a)
 {
-  return vmul_lane_f16 (__a, vdup_n_f16 (__b), 0);
+  return (int8x8_t)__a;
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulq_n_f16 (float16x8_t __a, float16_t __b)
+__extension__ extern __inline int16x4_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_s16_bf16 (bfloat16x4_t __a)
 {
-  return vmulq_laneq_f16 (__a, vdupq_n_f16 (__b), 0);
+  return (int16x4_t)__a;
 }
 
-__extension__ extern __inline float16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulxh_lane_f16 (float16_t __a, float16x4_t __b, const int __lane)
+__extension__ extern __inline int32x2_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_s32_bf16 (bfloat16x4_t __a)
 {
-  return vmulxh_f16 (__a, __aarch64_vget_lane_any (__b, __lane));
+  return (int32x2_t)__a;
 }
 
-__extension__ extern __inline float16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulx_lane_f16 (float16x4_t __a, float16x4_t __b, const int __lane)
+__extension__ extern __inline int64x1_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_s64_bf16 (bfloat16x4_t __a)
 {
-  return vmulx_f16 (__a, __aarch64_vdup_lane_f16 (__b, __lane));
+  return (int64x1_t)__a;
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulxq_lane_f16 (float16x8_t __a, float16x4_t __b, const int __lane)
+__extension__ extern __inline uint8x8_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_u8_bf16 (bfloat16x4_t __a)
 {
-  return vmulxq_f16 (__a, __aarch64_vdupq_lane_f16 (__b, __lane));
+  return (uint8x8_t)__a;
 }
 
-__extension__ extern __inline float16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulxh_laneq_f16 (float16_t __a, float16x8_t __b, const int __lane)
+__extension__ extern __inline uint16x4_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_u16_bf16 (bfloat16x4_t __a)
 {
-  return vmulxh_f16 (__a, __aarch64_vget_lane_any (__b, __lane));
+  return (uint16x4_t)__a;
 }
 
-__extension__ extern __inline float16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulx_laneq_f16 (float16x4_t __a, float16x8_t __b, const int __lane)
+__extension__ extern __inline uint32x2_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_u32_bf16 (bfloat16x4_t __a)
 {
-  return vmulx_f16 (__a, __aarch64_vdup_laneq_f16 (__b, __lane));
+  return (uint32x2_t)__a;
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulxq_laneq_f16 (float16x8_t __a, float16x8_t __b, const int __lane)
+__extension__ extern __inline uint64x1_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_u64_bf16 (bfloat16x4_t __a)
 {
-  return vmulxq_f16 (__a, __aarch64_vdupq_laneq_f16 (__b, __lane));
+  return (uint64x1_t)__a;
 }
 
 __extension__ extern __inline float16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulx_n_f16 (float16x4_t __a, float16_t __b)
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_f16_bf16 (bfloat16x4_t __a)
 {
-  return vmulx_f16 (__a, vdup_n_f16 (__b));
+  return (float16x4_t)__a;
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmulxq_n_f16 (float16x8_t __a, float16_t __b)
+__extension__ extern __inline float32x2_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_f32_bf16 (bfloat16x4_t __a)
 {
-  return vmulxq_f16 (__a, vdupq_n_f16 (__b));
+  return (float32x2_t)__a;
 }
 
-/* ARMv8.2-A FP16 reduction vector intrinsics.  */
-
-__extension__ extern __inline float16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmaxv_f16 (float16x4_t __a)
+__extension__ extern __inline float64x1_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_f64_bf16 (bfloat16x4_t __a)
 {
-  return __builtin_aarch64_reduc_smax_nan_scal_v4hf (__a);
+  return (float64x1_t)__a;
 }
 
-__extension__ extern __inline float16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmaxvq_f16 (float16x8_t __a)
+__extension__ extern __inline poly8x8_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_p8_bf16 (bfloat16x4_t __a)
 {
-  return __builtin_aarch64_reduc_smax_nan_scal_v8hf (__a);
+  return (poly8x8_t)__a;
 }
 
-__extension__ extern __inline float16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vminv_f16 (float16x4_t __a)
+__extension__ extern __inline poly16x4_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_p16_bf16 (bfloat16x4_t __a)
 {
-  return __builtin_aarch64_reduc_smin_nan_scal_v4hf (__a);
+  return (poly16x4_t)__a;
 }
 
-__extension__ extern __inline float16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vminvq_f16 (float16x8_t __a)
+__extension__ extern __inline poly64x1_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_p64_bf16 (bfloat16x4_t __a)
 {
-  return __builtin_aarch64_reduc_smin_nan_scal_v8hf (__a);
+  return (poly64x1_t)__a;
 }
 
-__extension__ extern __inline float16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmaxnmv_f16 (float16x4_t __a)
+__extension__ extern __inline int8x16_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_s8_bf16 (bfloat16x8_t __a)
 {
-  return __builtin_aarch64_reduc_smax_scal_v4hf (__a);
+  return (int8x16_t)__a;
 }
 
-__extension__ extern __inline float16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmaxnmvq_f16 (float16x8_t __a)
+__extension__ extern __inline int16x8_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_s16_bf16 (bfloat16x8_t __a)
 {
-  return __builtin_aarch64_reduc_smax_scal_v8hf (__a);
+  return (int16x8_t)__a;
 }
 
-__extension__ extern __inline float16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vminnmv_f16 (float16x4_t __a)
+__extension__ extern __inline int32x4_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_s32_bf16 (bfloat16x8_t __a)
 {
-  return __builtin_aarch64_reduc_smin_scal_v4hf (__a);
+  return (int32x4_t)__a;
 }
 
-__extension__ extern __inline float16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vminnmvq_f16 (float16x8_t __a)
+__extension__ extern __inline int64x2_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_s64_bf16 (bfloat16x8_t __a)
 {
-  return __builtin_aarch64_reduc_smin_scal_v8hf (__a);
+  return (int64x2_t)__a;
 }
 
-#pragma GCC pop_options
+__extension__ extern __inline uint8x16_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_u8_bf16 (bfloat16x8_t __a)
+{
+  return (uint8x16_t)__a;
+}
 
-/* AdvSIMD Dot Product intrinsics.  */
+__extension__ extern __inline uint16x8_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_u16_bf16 (bfloat16x8_t __a)
+{
+  return (uint16x8_t)__a;
+}
 
-#pragma GCC push_options
-#pragma GCC target ("arch=armv8.2-a+dotprod")
+__extension__ extern __inline uint32x4_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_u32_bf16 (bfloat16x8_t __a)
+{
+  return (uint32x4_t)__a;
+}
 
-__extension__ extern __inline uint32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdot_u32 (uint32x2_t __r, uint8x8_t __a, uint8x8_t __b)
+__extension__ extern __inline uint64x2_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_u64_bf16 (bfloat16x8_t __a)
 {
-  return __builtin_aarch64_udotv8qi_uuuu (__r, __a, __b);
+  return (uint64x2_t)__a;
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdotq_u32 (uint32x4_t __r, uint8x16_t __a, uint8x16_t __b)
+__extension__ extern __inline float16x8_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_f16_bf16 (bfloat16x8_t __a)
 {
-  return __builtin_aarch64_udotv16qi_uuuu (__r, __a, __b);
+  return (float16x8_t)__a;
 }
 
-__extension__ extern __inline int32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdot_s32 (int32x2_t __r, int8x8_t __a, int8x8_t __b)
+__extension__ extern __inline float32x4_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_f32_bf16 (bfloat16x8_t __a)
 {
-  return __builtin_aarch64_sdotv8qi (__r, __a, __b);
+  return (float32x4_t)__a;
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdotq_s32 (int32x4_t __r, int8x16_t __a, int8x16_t __b)
+__extension__ extern __inline float64x2_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_f64_bf16 (bfloat16x8_t __a)
 {
-  return __builtin_aarch64_sdotv16qi (__r, __a, __b);
+  return (float64x2_t)__a;
 }
 
-__extension__ extern __inline uint32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdot_lane_u32 (uint32x2_t __r, uint8x8_t __a, uint8x8_t __b, const int __index)
+__extension__ extern __inline poly8x16_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p8_bf16 (bfloat16x8_t __a)
 {
-  return __builtin_aarch64_udot_lanev8qi_uuuus (__r, __a, __b, __index);
+  return (poly8x16_t)__a;
 }
 
-__extension__ extern __inline uint32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdot_laneq_u32 (uint32x2_t __r, uint8x8_t __a, uint8x16_t __b,
-		const int __index)
+__extension__ extern __inline poly16x8_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p16_bf16 (bfloat16x8_t __a)
 {
-  return __builtin_aarch64_udot_laneqv8qi_uuuus (__r, __a, __b, __index);
+  return (poly16x8_t)__a;
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdotq_lane_u32 (uint32x4_t __r, uint8x16_t __a, uint8x8_t __b,
-		const int __index)
+__extension__ extern __inline poly64x2_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p64_bf16 (bfloat16x8_t __a)
 {
-  return __builtin_aarch64_udot_lanev16qi_uuuus (__r, __a, __b, __index);
+  return (poly64x2_t)__a;
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdotq_laneq_u32 (uint32x4_t __r, uint8x16_t __a, uint8x16_t __b,
-		 const int __index)
+__extension__ extern __inline poly128_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p128_bf16 (bfloat16x8_t __a)
 {
-  return __builtin_aarch64_udot_laneqv16qi_uuuus (__r, __a, __b, __index);
+  return (poly128_t)__a;
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdot_lane_s32 (int32x2_t __r, int8x8_t __a, int8x8_t __b, const int __index)
+vbfdot_f32 (float32x2_t __r, bfloat16x4_t __a, bfloat16x4_t __b)
 {
-  return __builtin_aarch64_sdot_lanev8qi (__r, __a, __b, __index);
+  return __builtin_aarch64_bfdotv2sf (__r, __a, __b);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdot_laneq_s32 (int32x2_t __r, int8x8_t __a, int8x16_t __b, const int __index)
+vbfdotq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b)
 {
-  return __builtin_aarch64_sdot_laneqv8qi (__r, __a, __b, __index);
+  return __builtin_aarch64_bfdotv4sf (__r, __a, __b);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdotq_lane_s32 (int32x4_t __r, int8x16_t __a, int8x8_t __b, const int __index)
+vbfdot_lane_f32 (float32x2_t __r, bfloat16x4_t __a, bfloat16x4_t __b,
+		 const int __index)
 {
-  return __builtin_aarch64_sdot_lanev16qi (__r, __a, __b, __index);
+  return __builtin_aarch64_bfdot_lanev2sf (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdotq_laneq_s32 (int32x4_t __r, int8x16_t __a, int8x16_t __b, const int __index)
+vbfdotq_lane_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x4_t __b,
+		  const int __index)
 {
-  return __builtin_aarch64_sdot_laneqv16qi (__r, __a, __b, __index);
+  return __builtin_aarch64_bfdot_lanev4sf (__r, __a, __b, __index);
 }
-#pragma GCC pop_options
 
-#pragma GCC push_options
-#pragma GCC target ("arch=armv8.2-a+sm4")
-
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsm3ss1q_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
+vbfdot_laneq_f32 (float32x2_t __r, bfloat16x4_t __a, bfloat16x8_t __b,
+		  const int __index)
 {
-  return __builtin_aarch64_sm3ss1qv4si_uuuu (__a, __b, __c);
+  return __builtin_aarch64_bfdot_laneqv2sf (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsm3tt1aq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c, const int __imm2)
+vbfdotq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b,
+		   const int __index)
 {
-  return __builtin_aarch64_sm3tt1aqv4si_uuuus (__a, __b, __c, __imm2);
+  return __builtin_aarch64_bfdot_laneqv4sf (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsm3tt1bq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c, const int __imm2)
+vbfmmlaq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b)
+
 {
-  return __builtin_aarch64_sm3tt1bqv4si_uuuus (__a, __b, __c, __imm2);
+  return __builtin_aarch64_bfmmlaqv4sf (__r, __a, __b);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsm3tt2aq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c, const int __imm2)
+vbfmlalbq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b)
 {
-  return __builtin_aarch64_sm3tt2aqv4si_uuuus (__a, __b, __c, __imm2);
+  return __builtin_aarch64_bfmlalbv4sf (__r, __a, __b);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsm3tt2bq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c, const int __imm2)
+vbfmlaltq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b)
 {
-  return __builtin_aarch64_sm3tt2bqv4si_uuuus (__a, __b, __c, __imm2);
+  return __builtin_aarch64_bfmlaltv4sf (__r, __a, __b);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsm3partw1q_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
+vbfmlalbq_lane_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x4_t __b,
+		    const int __index)
 {
-  return __builtin_aarch64_sm3partw1qv4si_uuuu (__a, __b, __c);
+  return __builtin_aarch64_bfmlalb_lanev4sf (__r, __a, __b, __index);
 }
-__extension__ extern __inline uint32x4_t
+
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsm3partw2q_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
+vbfmlaltq_lane_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x4_t __b,
+		    const int __index)
 {
-  return __builtin_aarch64_sm3partw2qv4si_uuuu (__a, __b, __c);
+  return __builtin_aarch64_bfmlalt_lanev4sf (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsm4eq_u32 (uint32x4_t __a, uint32x4_t __b)
+vbfmlalbq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b,
+		     const int __index)
 {
-  return __builtin_aarch64_sm4eqv4si_uuu (__a, __b);
+  return __builtin_aarch64_bfmlalb_lane_qv4sf (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsm4ekeyq_u32 (uint32x4_t __a, uint32x4_t __b)
+vbfmlaltq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b,
+		     const int __index)
 {
-  return __builtin_aarch64_sm4ekeyqv4si_uuu (__a, __b);
+  return __builtin_aarch64_bfmlalt_lane_qv4sf (__r, __a, __b, __index);
 }
 
-#pragma GCC pop_options
-
-#pragma GCC push_options
-#pragma GCC target ("arch=armv8.2-a+sha3")
-
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline bfloat16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsha512hq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
+vget_low_bf16 (bfloat16x8_t __a)
 {
-  return __builtin_aarch64_crypto_sha512hqv2di_uuuu (__a, __b, __c);
+  return __builtin_aarch64_vget_lo_halfv8bf (__a);
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline bfloat16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsha512h2q_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
+vget_high_bf16 (bfloat16x8_t __a)
 {
-  return __builtin_aarch64_crypto_sha512h2qv2di_uuuu (__a, __b, __c);
+  return __builtin_aarch64_vget_hi_halfv8bf (__a);
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsha512su0q_u64 (uint64x2_t __a, uint64x2_t __b)
+vcvt_f32_bf16 (bfloat16x4_t __a)
 {
-  return __builtin_aarch64_crypto_sha512su0qv2di_uuu (__a, __b);
+  return __builtin_aarch64_vbfcvtv4bf (__a);
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vsha512su1q_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
+vcvtq_low_f32_bf16 (bfloat16x8_t __a)
 {
-  return __builtin_aarch64_crypto_sha512su1qv2di_uuuu (__a, __b, __c);
+  return __builtin_aarch64_vbfcvtv8bf (__a);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-veor3q_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
+vcvtq_high_f32_bf16 (bfloat16x8_t __a)
 {
-  return __builtin_aarch64_eor3qv8hi_uuuu (__a, __b, __c);
+  return __builtin_aarch64_vbfcvt_highv8bf (__a);
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline bfloat16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrax1q_u64 (uint64x2_t __a, uint64x2_t __b)
+vcvt_bf16_f32 (float32x4_t __a)
 {
-  return __builtin_aarch64_rax1qv2di_uuu (__a, __b);
+  return __builtin_aarch64_bfcvtnv4bf (__a);
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline bfloat16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vxarq_u64 (uint64x2_t __a, uint64x2_t __b, const int imm6)
+vcvtq_low_bf16_f32 (float32x4_t __a)
 {
-  return __builtin_aarch64_xarqv2di_uuus (__a, __b,imm6);
+  return __builtin_aarch64_bfcvtn_qv8bf (__a);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline bfloat16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbcaxq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
+vcvtq_high_bf16_f32 (bfloat16x8_t __inactive, float32x4_t __a)
 {
-  return __builtin_aarch64_bcaxqv8hi_uuuu (__a, __b, __c);
+  return __builtin_aarch64_bfcvtn2v8bf (__inactive, __a);
 }
-#pragma GCC pop_options
-
-#pragma GCC push_options
-#pragma GCC target ("arch=armv8.2-a+fp16fml")
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline bfloat16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlal_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
+vcopy_lane_bf16 (bfloat16x4_t __a, const int __lane1,
+		 bfloat16x4_t __b, const int __lane2)
 {
-  return __builtin_aarch64_fmlal_lowv2sf (__r, __a, __b);
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				  __a, __lane1);
 }
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline bfloat16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlsl_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
+vcopyq_lane_bf16 (bfloat16x8_t __a, const int __lane1,
+		  bfloat16x4_t __b, const int __lane2)
 {
-  return __builtin_aarch64_fmlsl_lowv2sf (__r, __a, __b);
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				  __a, __lane1);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline bfloat16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlalq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
+vcopy_laneq_bf16 (bfloat16x4_t __a, const int __lane1,
+		  bfloat16x8_t __b, const int __lane2)
 {
-  return __builtin_aarch64_fmlalq_lowv4sf (__r, __a, __b);
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				  __a, __lane1);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline bfloat16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlslq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
+vcopyq_laneq_bf16 (bfloat16x8_t __a, const int __lane1,
+		   bfloat16x8_t __b, const int __lane2)
 {
-  return __builtin_aarch64_fmlslq_lowv4sf (__r, __a, __b);
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+				  __a, __lane1);
 }
 
-__extension__ extern __inline float32x2_t
+__LD2_LANE_FUNC (bfloat16x4x2_t, bfloat16x4_t, bfloat16x8x2_t, bfloat16_t, v4bf,
+		 v8bf, bf, bf16, bfloat16x8_t)
+__LD2Q_LANE_FUNC (bfloat16x8x2_t, bfloat16x8_t, bfloat16_t, v8bf, bf, bf16)
+__LD3_LANE_FUNC (bfloat16x4x3_t, bfloat16x4_t, bfloat16x8x3_t, bfloat16_t, v4bf,
+		 v8bf, bf, bf16, bfloat16x8_t)
+__LD3Q_LANE_FUNC (bfloat16x8x3_t, bfloat16x8_t, bfloat16_t, v8bf, bf, bf16)
+__LD4_LANE_FUNC (bfloat16x4x4_t, bfloat16x4_t, bfloat16x8x4_t, bfloat16_t, v4bf,
+		 v8bf, bf, bf16, bfloat16x8_t)
+__LD4Q_LANE_FUNC (bfloat16x8x4_t, bfloat16x8_t, bfloat16_t, v8bf, bf, bf16)
+
+__ST2_LANE_FUNC (bfloat16x4x2_t, bfloat16x8x2_t, bfloat16_t, v4bf, v8bf, bf,
+		 bf16, bfloat16x8_t)
+__ST2Q_LANE_FUNC (bfloat16x8x2_t, bfloat16_t, v8bf, bf, bf16)
+__ST3_LANE_FUNC (bfloat16x4x3_t, bfloat16x8x3_t, bfloat16_t, v4bf, v8bf, bf,
+		 bf16, bfloat16x8_t)
+__ST3Q_LANE_FUNC (bfloat16x8x3_t, bfloat16_t, v8bf, bf, bf16)
+__ST4_LANE_FUNC (bfloat16x4x4_t, bfloat16x8x4_t, bfloat16_t, v4bf, v8bf, bf,
+		 bf16, bfloat16x8_t)
+__ST4Q_LANE_FUNC (bfloat16x8x4_t, bfloat16_t, v8bf, bf, bf16)
+
+#pragma GCC pop_options
+
+/* AdvSIMD 8-bit Integer Matrix Multiply (I8MM) intrinsics.  */
+
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.2-a+i8mm")
+
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlal_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
+vusdot_s32 (int32x2_t __r, uint8x8_t __a, int8x8_t __b)
 {
-  return __builtin_aarch64_fmlal_highv2sf (__r, __a, __b);
+  return __builtin_aarch64_usdotv8qi_ssus (__r, __a, __b);
 }
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlsl_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
+vusdotq_s32 (int32x4_t __r, uint8x16_t __a, int8x16_t __b)
 {
-  return __builtin_aarch64_fmlsl_highv2sf (__r, __a, __b);
+  return __builtin_aarch64_usdotv16qi_ssus (__r, __a, __b);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlalq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
+vusdot_lane_s32 (int32x2_t __r, uint8x8_t __a, int8x8_t __b, const int __index)
 {
-  return __builtin_aarch64_fmlalq_highv4sf (__r, __a, __b);
+  return __builtin_aarch64_usdot_lanev8qi_ssuss (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlslq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
+vusdot_laneq_s32 (int32x2_t __r, uint8x8_t __a, int8x16_t __b,
+		  const int __index)
 {
-  return __builtin_aarch64_fmlslq_highv4sf (__r, __a, __b);
+  return __builtin_aarch64_usdot_laneqv8qi_ssuss (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlal_lane_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
-		     const int __lane)
+vusdotq_lane_s32 (int32x4_t __r, uint8x16_t __a, int8x8_t __b,
+		  const int __index)
 {
-  return __builtin_aarch64_fmlal_lane_lowv2sf (__r, __a, __b, __lane);
+  return __builtin_aarch64_usdot_lanev16qi_ssuss (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlsl_lane_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
-		     const int __lane)
+vusdotq_laneq_s32 (int32x4_t __r, uint8x16_t __a, int8x16_t __b,
+		   const int __index)
 {
-  return __builtin_aarch64_fmlsl_lane_lowv2sf (__r, __a, __b, __lane);
+  return __builtin_aarch64_usdot_laneqv16qi_ssuss (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlal_laneq_low_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
-		      const int __lane)
+vsudot_lane_s32 (int32x2_t __r, int8x8_t __a, uint8x8_t __b, const int __index)
 {
-  return __builtin_aarch64_fmlal_laneq_lowv2sf (__r, __a, __b, __lane);
+  return __builtin_aarch64_sudot_lanev8qi_sssus (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlsl_laneq_low_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
-		      const int __lane)
+vsudot_laneq_s32 (int32x2_t __r, int8x8_t __a, uint8x16_t __b,
+		  const int __index)
 {
-  return __builtin_aarch64_fmlsl_laneq_lowv2sf (__r, __a, __b, __lane);
+  return __builtin_aarch64_sudot_laneqv8qi_sssus (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlalq_lane_low_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
-		      const int __lane)
+vsudotq_lane_s32 (int32x4_t __r, int8x16_t __a, uint8x8_t __b,
+		  const int __index)
 {
-  return __builtin_aarch64_fmlalq_lane_lowv4sf (__r, __a, __b, __lane);
+  return __builtin_aarch64_sudot_lanev16qi_sssus (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlslq_lane_low_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
-		      const int __lane)
+vsudotq_laneq_s32 (int32x4_t __r, int8x16_t __a, uint8x16_t __b,
+		   const int __index)
 {
-  return __builtin_aarch64_fmlslq_lane_lowv4sf (__r, __a, __b, __lane);
+  return __builtin_aarch64_sudot_laneqv16qi_sssus (__r, __a, __b, __index);
 }
 
-__extension__ extern __inline float32x4_t
+/* Matrix Multiply-Accumulate.  */
+
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlalq_laneq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
-		       const int __lane)
+vmmlaq_s32 (int32x4_t __r, int8x16_t __a, int8x16_t __b)
 {
-  return __builtin_aarch64_fmlalq_laneq_lowv4sf (__r, __a, __b, __lane);
+  return __builtin_aarch64_simd_smmlav16qi (__r, __a, __b);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlslq_laneq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
-		      const int __lane)
+vmmlaq_u32 (uint32x4_t __r, uint8x16_t __a, uint8x16_t __b)
 {
-  return __builtin_aarch64_fmlslq_laneq_lowv4sf (__r, __a, __b, __lane);
+  return __builtin_aarch64_simd_ummlav16qi_uuuu (__r, __a, __b);
 }
 
-__extension__ extern __inline float32x2_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlal_lane_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
-		     const int __lane)
+vusmmlaq_s32 (int32x4_t __r, uint8x16_t __a, int8x16_t __b)
 {
-  return __builtin_aarch64_fmlal_lane_highv2sf (__r, __a, __b, __lane);
+  return __builtin_aarch64_simd_usmmlav16qi_ssus (__r, __a, __b);
 }
 
-__extension__ extern __inline float32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlsl_lane_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
-		     const int __lane)
+#pragma GCC pop_options
+
+__extension__ extern __inline poly8x8_t
+__attribute ((__always_inline__, __gnu_inline__, __artificial__))
+vadd_p8 (poly8x8_t __a, poly8x8_t __b)
 {
-  return __builtin_aarch64_fmlsl_lane_highv2sf (__r, __a, __b, __lane);
+  return __a ^ __b;
 }
 
-__extension__ extern __inline float32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlal_laneq_high_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
-		      const int __lane)
+__extension__ extern __inline poly16x4_t
+__attribute ((__always_inline__, __gnu_inline__, __artificial__))
+vadd_p16 (poly16x4_t __a, poly16x4_t __b)
 {
-  return __builtin_aarch64_fmlal_laneq_highv2sf (__r, __a, __b, __lane);
+  return __a ^ __b;
 }
 
-__extension__ extern __inline float32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlsl_laneq_high_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
-		      const int __lane)
+__extension__ extern __inline poly64x1_t
+__attribute ((__always_inline__, __gnu_inline__, __artificial__))
+vadd_p64 (poly64x1_t __a, poly64x1_t __b)
 {
-  return __builtin_aarch64_fmlsl_laneq_highv2sf (__r, __a, __b, __lane);
+  return __a ^ __b;
 }
 
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlalq_lane_high_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
-		      const int __lane)
+__extension__ extern __inline poly8x16_t
+__attribute ((__always_inline__, __gnu_inline__, __artificial__))
+vaddq_p8 (poly8x16_t __a, poly8x16_t __b)
 {
-  return __builtin_aarch64_fmlalq_lane_highv4sf (__r, __a, __b, __lane);
+  return __a ^ __b;
 }
 
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlslq_lane_high_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
-		      const int __lane)
+__extension__ extern __inline poly16x8_t
+__attribute ((__always_inline__, __gnu_inline__, __artificial__))
+vaddq_p16 (poly16x8_t __a, poly16x8_t __b)
 {
-  return __builtin_aarch64_fmlslq_lane_highv4sf (__r, __a, __b, __lane);
+  return __a ^__b;
 }
 
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlalq_laneq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
-		       const int __lane)
+__extension__ extern __inline poly64x2_t
+__attribute ((__always_inline__, __gnu_inline__, __artificial__))
+vaddq_p64 (poly64x2_t __a, poly64x2_t __b)
 {
-  return __builtin_aarch64_fmlalq_laneq_highv4sf (__r, __a, __b, __lane);
+  return __a ^ __b;
 }
 
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlslq_laneq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
-		      const int __lane)
+__extension__ extern __inline poly128_t
+__attribute ((__always_inline__, __gnu_inline__, __artificial__))
+vaddq_p128 (poly128_t __a, poly128_t __b)
 {
-  return __builtin_aarch64_fmlslq_laneq_highv4sf (__r, __a, __b, __lane);
+  return __a ^ __b;
 }
 
-#pragma GCC pop_options
-
 #undef __aarch64_vget_lane_any
 
 #undef __aarch64_vdup_lane_any
@@ -32342,4 +34586,17 @@ vfmlslq_laneq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
 #undef __aarch64_vdupq_laneq_u32
 #undef __aarch64_vdupq_laneq_u64
 
+#undef __LD2_LANE_FUNC
+#undef __LD2Q_LANE_FUNC
+#undef __LD3_LANE_FUNC
+#undef __LD3Q_LANE_FUNC
+#undef __LD4_LANE_FUNC
+#undef __LD4Q_LANE_FUNC
+#undef __ST2_LANE_FUNC
+#undef __ST2Q_LANE_FUNC
+#undef __ST3_LANE_FUNC
+#undef __ST3Q_LANE_FUNC
+#undef __ST4_LANE_FUNC
+#undef __ST4Q_LANE_FUNC
+
 #endif
diff --git a/gcc/config/aarch64/arm_sve.h b/gcc/config/aarch64/arm_sve.h
new file mode 100644
index 0000000000000..708114c67dd91
--- /dev/null
+++ b/gcc/config/aarch64/arm_sve.h
@@ -0,0 +1,42 @@
+/* AArch64 SVE intrinsics include file.
+   Copyright (C) 2018-2021 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _ARM_SVE_H_
+#define _ARM_SVE_H_
+
+#include <stdint.h>
+#include <arm_bf16.h>
+
+typedef __fp16 float16_t;
+typedef float float32_t;
+typedef double float64_t;
+
+/* NOTE: This implementation of arm_sve.h is intentionally short.  It does
+   not define the SVE types and intrinsic functions directly in C and C++
+   code, but instead uses the following pragma to tell GCC to insert the
+   necessary type and function definitions itself.  The net effect is the
+   same, and the file is a complete implementation of arm_sve.h.  */
+#pragma GCC aarch64 "arm_sve.h"
+
+#endif
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
index 686e39ff2ee59..7bdb4ba7f5e38 100644
--- a/gcc/config/aarch64/atomics.md
+++ b/gcc/config/aarch64/atomics.md
@@ -1,5 +1,5 @@
 ;; Machine description for AArch64 processor synchronization primitives.
-;; Copyright (C) 2009-2018 Free Software Foundation, Inc.
+;; Copyright (C) 2009-2021 Free Software Foundation, Inc.
 ;; Contributed by ARM Ltd.
 ;;
 ;; This file is part of GCC.
@@ -20,12 +20,12 @@
 
 ;; Instruction patterns.
 
-(define_expand "atomic_compare_and_swap<mode>"
+(define_expand "@atomic_compare_and_swap<mode>"
   [(match_operand:SI 0 "register_operand" "")			;; bool out
-   (match_operand:ALLI 1 "register_operand" "")			;; val out
-   (match_operand:ALLI 2 "aarch64_sync_memory_operand" "")	;; memory
-   (match_operand:ALLI 3 "general_operand" "")			;; expected
-   (match_operand:ALLI 4 "aarch64_reg_or_zero" "")			;; desired
+   (match_operand:ALLI_TI 1 "register_operand" "")		;; val out
+   (match_operand:ALLI_TI 2 "aarch64_sync_memory_operand" "")	;; memory
+   (match_operand:ALLI_TI 3 "nonmemory_operand" "")		;; expected
+   (match_operand:ALLI_TI 4 "aarch64_reg_or_zero" "")		;; desired
    (match_operand:SI 5 "const_int_operand")			;; is_weak
    (match_operand:SI 6 "const_int_operand")			;; mod_s
    (match_operand:SI 7 "const_int_operand")]			;; mod_f
@@ -36,19 +36,25 @@
   }
 )
 
-(define_insn_and_split "aarch64_compare_and_swap<mode>"
+(define_mode_attr cas_short_expected_pred
+  [(QI "aarch64_reg_or_imm") (HI "aarch64_plushi_operand")])
+(define_mode_attr cas_short_expected_imm
+  [(QI "n") (HI "Uph")])
+
+(define_insn_and_split "@aarch64_compare_and_swap<mode>"
   [(set (reg:CC CC_REGNUM)					;; bool out
     (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW))
-   (set (match_operand:SI 0 "register_operand" "=&r")	   ;; val out
+   (set (match_operand:SI 0 "register_operand" "=&r")		;; val out
     (zero_extend:SI
       (match_operand:SHORT 1 "aarch64_sync_memory_operand" "+Q"))) ;; memory
    (set (match_dup 1)
     (unspec_volatile:SHORT
-      [(match_operand:SI 2 "aarch64_plus_operand" "rI")	;; expected
+      [(match_operand:SHORT 2 "<cas_short_expected_pred>"
+			      "r<cas_short_expected_imm>")	;; expected
        (match_operand:SHORT 3 "aarch64_reg_or_zero" "rZ")	;; desired
-       (match_operand:SI 4 "const_int_operand")		;; is_weak
-       (match_operand:SI 5 "const_int_operand")		;; mod_s
-       (match_operand:SI 6 "const_int_operand")]	;; mod_f
+       (match_operand:SI 4 "const_int_operand")			;; is_weak
+       (match_operand:SI 5 "const_int_operand")			;; mod_s
+       (match_operand:SI 6 "const_int_operand")]		;; mod_f
       UNSPECV_ATOMIC_CMPSW))
    (clobber (match_scratch:SI 7 "=&r"))]
   ""
@@ -61,14 +67,14 @@
   }
 )
 
-(define_insn_and_split "aarch64_compare_and_swap<mode>"
+(define_insn_and_split "@aarch64_compare_and_swap<mode>"
   [(set (reg:CC CC_REGNUM)					;; bool out
     (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW))
    (set (match_operand:GPI 0 "register_operand" "=&r")		;; val out
     (match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q"))   ;; memory
    (set (match_dup 1)
     (unspec_volatile:GPI
-      [(match_operand:GPI 2 "aarch64_plus_operand" "rI")	;; expect
+      [(match_operand:GPI 2 "aarch64_plus_operand" "rIJ")	;; expect
        (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")		;; desired
        (match_operand:SI 4 "const_int_operand")			;; is_weak
        (match_operand:SI 5 "const_int_operand")			;; mod_s
@@ -85,84 +91,135 @@
   }
 )
 
-(define_insn_and_split "aarch64_compare_and_swap<mode>_lse"
+(define_insn_and_split "@aarch64_compare_and_swap<mode>"
   [(set (reg:CC CC_REGNUM)					;; bool out
     (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW))
-   (set (match_operand:SI 0 "register_operand" "=&r")		;; val out
-    (zero_extend:SI
-      (match_operand:SHORT 1 "aarch64_sync_memory_operand" "+Q"))) ;; memory
+   (set (match_operand:JUST_TI 0 "register_operand" "=&r")	;; val out
+    (match_operand:JUST_TI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory
    (set (match_dup 1)
-    (unspec_volatile:SHORT
-      [(match_operand:SI 2 "aarch64_plus_operand" "rI")	;; expected
-       (match_operand:SHORT 3 "aarch64_reg_or_zero" "rZ")	;; desired
-       (match_operand:SI 4 "const_int_operand")		;; is_weak
-       (match_operand:SI 5 "const_int_operand")		;; mod_s
-       (match_operand:SI 6 "const_int_operand")]	;; mod_f
-      UNSPECV_ATOMIC_CMPSW))]
-  "TARGET_LSE"
+    (unspec_volatile:JUST_TI
+      [(match_operand:JUST_TI 2 "aarch64_reg_or_zero" "rZ")	;; expect
+       (match_operand:JUST_TI 3 "aarch64_reg_or_zero" "rZ")	;; desired
+       (match_operand:SI 4 "const_int_operand")			;; is_weak
+       (match_operand:SI 5 "const_int_operand")			;; mod_s
+       (match_operand:SI 6 "const_int_operand")]		;; mod_f
+      UNSPECV_ATOMIC_CMPSW))
+   (clobber (match_scratch:SI 7 "=&r"))]
+  ""
   "#"
-  "&& reload_completed"
+  "&& epilogue_completed"
   [(const_int 0)]
   {
-    aarch64_gen_atomic_cas (operands[0], operands[1],
-			    operands[2], operands[3],
-			    operands[5]);
+    aarch64_split_compare_and_swap (operands);
     DONE;
   }
 )
 
-(define_insn_and_split "aarch64_compare_and_swap<mode>_lse"
-  [(set (reg:CC CC_REGNUM)					;; bool out
-    (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW))
-   (set (match_operand:GPI 0 "register_operand" "=&r")		;; val out
+(define_insn "@aarch64_compare_and_swap<mode>_lse"
+  [(set (match_operand:SI 0 "register_operand" "+r")		;; val out
+    (zero_extend:SI
+     (match_operand:SHORT 1 "aarch64_sync_memory_operand" "+Q"))) ;; memory
+   (set (match_dup 1)
+    (unspec_volatile:SHORT
+      [(match_dup 0)						;; expected
+       (match_operand:SHORT 2 "aarch64_reg_or_zero" "rZ")	;; desired
+       (match_operand:SI 3 "const_int_operand")]		;; mod_s
+      UNSPECV_ATOMIC_CMPSW))]
+  "TARGET_LSE"
+{
+  enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
+  if (is_mm_relaxed (model))
+    return "cas<atomic_sfx>\t%<w>0, %<w>2, %1";
+  else if (is_mm_acquire (model) || is_mm_consume (model))
+    return "casa<atomic_sfx>\t%<w>0, %<w>2, %1";
+  else if (is_mm_release (model))
+    return "casl<atomic_sfx>\t%<w>0, %<w>2, %1";
+  else
+    return "casal<atomic_sfx>\t%<w>0, %<w>2, %1";
+})
+
+(define_insn "@aarch64_compare_and_swap<mode>_lse"
+  [(set (match_operand:GPI 0 "register_operand" "+r")		;; val out
     (match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q"))   ;; memory
    (set (match_dup 1)
     (unspec_volatile:GPI
-      [(match_operand:GPI 2 "aarch64_plus_operand" "rI")	;; expect
-       (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")		;; desired
-       (match_operand:SI 4 "const_int_operand")			;; is_weak
-       (match_operand:SI 5 "const_int_operand")			;; mod_s
-       (match_operand:SI 6 "const_int_operand")]		;; mod_f
+      [(match_dup 0)						;; expected
+       (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")		;; desired
+       (match_operand:SI 3 "const_int_operand")]		;; mod_s
       UNSPECV_ATOMIC_CMPSW))]
   "TARGET_LSE"
-  "#"
-  "&& reload_completed"
-  [(const_int 0)]
-  {
-    aarch64_gen_atomic_cas (operands[0], operands[1],
-			    operands[2], operands[3],
-			    operands[5]);
-    DONE;
-  }
-)
+{
+  enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
+  if (is_mm_relaxed (model))
+    return "cas<atomic_sfx>\t%<w>0, %<w>2, %1";
+  else if (is_mm_acquire (model) || is_mm_consume (model))
+    return "casa<atomic_sfx>\t%<w>0, %<w>2, %1";
+  else if (is_mm_release (model))
+    return "casl<atomic_sfx>\t%<w>0, %<w>2, %1";
+  else
+    return "casal<atomic_sfx>\t%<w>0, %<w>2, %1";
+})
+
+(define_insn "@aarch64_compare_and_swap<mode>_lse"
+  [(set (match_operand:JUST_TI 0 "register_operand" "+r")	;; val out
+    (match_operand:JUST_TI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory
+   (set (match_dup 1)
+    (unspec_volatile:JUST_TI
+      [(match_dup 0)						;; expect
+       (match_operand:JUST_TI 2 "register_operand" "r")		;; desired
+       (match_operand:SI 3 "const_int_operand")]		;; mod_s
+      UNSPECV_ATOMIC_CMPSW))]
+  "TARGET_LSE"
+{
+  enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
+  if (is_mm_relaxed (model))
+    return "casp\t%0, %R0, %2, %R2, %1";
+  else if (is_mm_acquire (model) || is_mm_consume (model))
+    return "caspa\t%0, %R0, %2, %R2, %1";
+  else if (is_mm_release (model))
+    return "caspl\t%0, %R0, %2, %R2, %1";
+  else
+    return "caspal\t%0, %R0, %2, %R2, %1";
+})
 
 (define_expand "atomic_exchange<mode>"
- [(match_operand:ALLI 0 "register_operand" "")
-  (match_operand:ALLI 1 "aarch64_sync_memory_operand" "")
-  (match_operand:ALLI 2 "register_operand" "")
-  (match_operand:SI 3 "const_int_operand" "")]
+ [(match_operand:ALLI 0 "register_operand")
+  (match_operand:ALLI 1 "aarch64_sync_memory_operand")
+  (match_operand:ALLI 2 "aarch64_reg_or_zero")
+  (match_operand:SI 3 "const_int_operand")]
   ""
   {
-    rtx (*gen) (rtx, rtx, rtx, rtx);
-
     /* Use an atomic SWP when available.  */
     if (TARGET_LSE)
-      gen = gen_aarch64_atomic_exchange<mode>_lse;
+      {
+	emit_insn (gen_aarch64_atomic_exchange<mode>_lse
+		   (operands[0], operands[1], operands[2], operands[3]));
+      }
+    else if (TARGET_OUTLINE_ATOMICS)
+      {
+	machine_mode mode = <MODE>mode;
+	rtx func = aarch64_atomic_ool_func (mode, operands[3],
+					    &aarch64_ool_swp_names);
+	rtx rval = emit_library_call_value (func, operands[0], LCT_NORMAL,
+					    mode, operands[2], mode,
+					    XEXP (operands[1], 0), Pmode);
+        emit_move_insn (operands[0], rval);
+      }
     else
-      gen = gen_aarch64_atomic_exchange<mode>;
-
-    emit_insn (gen (operands[0], operands[1], operands[2], operands[3]));
-
+      {
+	emit_insn (gen_aarch64_atomic_exchange<mode>
+		   (operands[0], operands[1], operands[2], operands[3]));
+      }
     DONE;
   }
 )
 
 (define_insn_and_split "aarch64_atomic_exchange<mode>"
   [(set (match_operand:ALLI 0 "register_operand" "=&r")		;; output
-    (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory
+    (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))	;; memory
    (set (match_dup 1)
     (unspec_volatile:ALLI
-      [(match_operand:ALLI 2 "register_operand" "r")	;; input
+      [(match_operand:ALLI 2 "aarch64_reg_or_zero" "rZ")	;; input
        (match_operand:SI 3 "const_int_operand" "")]		;; model
       UNSPECV_ATOMIC_EXCHG))
    (clobber (reg:CC CC_REGNUM))
@@ -178,42 +235,102 @@
   }
 )
 
-(define_insn_and_split "aarch64_atomic_exchange<mode>_lse"
-  [(set (match_operand:ALLI 0 "register_operand" "=&r")
+(define_insn "aarch64_atomic_exchange<mode>_lse"
+  [(set (match_operand:ALLI 0 "register_operand" "=r")
     (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
    (set (match_dup 1)
     (unspec_volatile:ALLI
-      [(match_operand:ALLI 2 "register_operand" "r")
+      [(match_operand:ALLI 2 "aarch64_reg_or_zero" "rZ")
        (match_operand:SI 3 "const_int_operand" "")]
       UNSPECV_ATOMIC_EXCHG))]
   "TARGET_LSE"
-  "#"
-  "&& reload_completed"
-  [(const_int 0)]
   {
-    aarch64_gen_atomic_ldop (SET, operands[0], NULL, operands[1],
-			     operands[2], operands[3]);
-    DONE;
+    enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
+    if (is_mm_relaxed (model))
+      return "swp<atomic_sfx>\t%<w>2, %<w>0, %1";
+    else if (is_mm_acquire (model) || is_mm_consume (model))
+      return "swpa<atomic_sfx>\t%<w>2, %<w>0, %1";
+    else if (is_mm_release (model))
+      return "swpl<atomic_sfx>\t%<w>2, %<w>0, %1";
+    else
+      return "swpal<atomic_sfx>\t%<w>2, %<w>0, %1";
   }
 )
 
 (define_expand "atomic_<atomic_optab><mode>"
- [(match_operand:ALLI 0 "aarch64_sync_memory_operand" "")
+ [(match_operand:ALLI 0 "aarch64_sync_memory_operand")
   (atomic_op:ALLI
-   (match_operand:ALLI 1 "<atomic_op_operand>" "")
+   (match_operand:ALLI 1 "<atomic_op_operand>")
    (match_operand:SI 2 "const_int_operand"))]
   ""
   {
     rtx (*gen) (rtx, rtx, rtx);
 
     /* Use an atomic load-operate instruction when possible.  */
-    if (aarch64_atomic_ldop_supported_p (<CODE>))
-      gen = gen_aarch64_atomic_<atomic_optab><mode>_lse;
+    if (TARGET_LSE)
+      {
+	switch (<CODE>)
+	  {
+	  case MINUS:
+	    operands[1] = expand_simple_unop (<MODE>mode, NEG, operands[1],
+					      NULL, 1);
+	    /* fallthru */
+	  case PLUS:
+	    gen = gen_aarch64_atomic_add<mode>_lse;
+	    break;
+	  case IOR:
+	    gen = gen_aarch64_atomic_ior<mode>_lse;
+	    break;
+	  case XOR:
+	    gen = gen_aarch64_atomic_xor<mode>_lse;
+	    break;
+	  case AND:
+	    operands[1] = expand_simple_unop (<MODE>mode, NOT, operands[1],
+					      NULL, 1);
+	    gen = gen_aarch64_atomic_bic<mode>_lse;
+	    break;
+	  default:
+	    gcc_unreachable ();
+	  }
+	operands[1] = force_reg (<MODE>mode, operands[1]);
+      }
+    else if (TARGET_OUTLINE_ATOMICS)
+      {
+        const atomic_ool_names *names;
+	switch (<CODE>)
+	  {
+	  case MINUS:
+	    operands[1] = expand_simple_unop (<MODE>mode, NEG, operands[1],
+					      NULL, 1);
+	    /* fallthru */
+	  case PLUS:
+	    names = &aarch64_ool_ldadd_names;
+	    break;
+	  case IOR:
+	    names = &aarch64_ool_ldset_names;
+	    break;
+	  case XOR:
+	    names = &aarch64_ool_ldeor_names;
+	    break;
+	  case AND:
+	    operands[1] = expand_simple_unop (<MODE>mode, NOT, operands[1],
+					      NULL, 1);
+	    names = &aarch64_ool_ldclr_names;
+	    break;
+	  default:
+	    gcc_unreachable ();
+	  }
+        machine_mode mode = <MODE>mode;
+	rtx func = aarch64_atomic_ool_func (mode, operands[2], names);
+	emit_library_call_value (func, NULL_RTX, LCT_NORMAL, mode,
+				 operands[1], mode,
+				 XEXP (operands[0], 0), Pmode);
+        DONE;
+      }
     else
       gen = gen_aarch64_atomic_<atomic_optab><mode>;
 
     emit_insn (gen (operands[0], operands[1], operands[2]));
-
     DONE;
   }
 )
@@ -239,22 +356,37 @@
   }
 )
 
-(define_insn_and_split "aarch64_atomic_<atomic_optab><mode>_lse"
+;; It is tempting to want to use ST<OP> for relaxed and release
+;; memory models here.  However, that is incompatible with the
+;; C++ memory model for the following case:
+;;
+;;	atomic_fetch_add(ptr, 1, memory_order_relaxed);
+;;	atomic_thread_fence(memory_order_acquire);
+;;
+;; The problem is that the architecture says that ST<OP> (and LD<OP>
+;; insns where the destination is XZR) are not regarded as a read.
+;; However we also implement the acquire memory barrier with DMB LD,
+;; and so the ST<OP> is not blocked by the barrier.
+
+(define_insn "aarch64_atomic_<atomic_ldoptab><mode>_lse"
   [(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q")
-    (unspec_volatile:ALLI
-      [(atomic_op:ALLI (match_dup 0)
-	(match_operand:ALLI 1 "<atomic_op_operand>" "r<const_atomic>"))
-       (match_operand:SI 2 "const_int_operand")]
-      UNSPECV_ATOMIC_OP))
-   (clobber (match_scratch:ALLI 3 "=&r"))]
+	(unspec_volatile:ALLI
+	  [(match_dup 0)
+	   (match_operand:ALLI 1 "register_operand" "r")
+	   (match_operand:SI 2 "const_int_operand")]
+      ATOMIC_LDOP))
+   (clobber (match_scratch:ALLI 3 "=r"))]
   "TARGET_LSE"
-  "#"
-  "&& reload_completed"
-  [(const_int 0)]
   {
-    aarch64_gen_atomic_ldop (<CODE>, operands[3], NULL, operands[0],
-			     operands[1], operands[2]);
-    DONE;
+   enum memmodel model = memmodel_from_int (INTVAL (operands[2]));
+   if (is_mm_relaxed (model))
+     return "ld<atomic_ldop><atomic_sfx>\t%<w>1, %<w>3, %0";
+   else if (is_mm_release (model))
+     return "ld<atomic_ldop>l<atomic_sfx>\t%<w>1, %<w>3, %0";
+   else if (is_mm_acquire (model) || is_mm_consume (model))
+     return "ld<atomic_ldop>a<atomic_sfx>\t%<w>1, %<w>3, %0";
+   else
+     return "ld<atomic_ldop>al<atomic_sfx>\t%<w>1, %<w>3, %0";
   }
 )
 
@@ -280,26 +412,84 @@
   }
 )
 
-;; Load-operate-store, returning the updated memory data.
+;; Load-operate-store, returning the original memory data.
 
 (define_expand "atomic_fetch_<atomic_optab><mode>"
- [(match_operand:ALLI 0 "register_operand" "")
-  (match_operand:ALLI 1 "aarch64_sync_memory_operand" "")
+ [(match_operand:ALLI 0 "register_operand")
+  (match_operand:ALLI 1 "aarch64_sync_memory_operand")
   (atomic_op:ALLI
-   (match_operand:ALLI 2 "<atomic_op_operand>" "")
+   (match_operand:ALLI 2 "<atomic_op_operand>")
    (match_operand:SI 3 "const_int_operand"))]
  ""
 {
   rtx (*gen) (rtx, rtx, rtx, rtx);
 
   /* Use an atomic load-operate instruction when possible.  */
-  if (aarch64_atomic_ldop_supported_p (<CODE>))
-    gen = gen_aarch64_atomic_fetch_<atomic_optab><mode>_lse;
+  if (TARGET_LSE)
+    {
+      switch (<CODE>)
+        {
+	case MINUS:
+	  operands[2] = expand_simple_unop (<MODE>mode, NEG, operands[2],
+					    NULL, 1);
+	  /* fallthru */
+	case PLUS:
+	  gen = gen_aarch64_atomic_fetch_add<mode>_lse;
+	  break;
+	case IOR:
+	  gen = gen_aarch64_atomic_fetch_ior<mode>_lse;
+	  break;
+	case XOR:
+	  gen = gen_aarch64_atomic_fetch_xor<mode>_lse;
+	  break;
+	case AND:
+	  operands[2] = expand_simple_unop (<MODE>mode, NOT, operands[2],
+					    NULL, 1);
+	  gen = gen_aarch64_atomic_fetch_bic<mode>_lse;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      operands[2] = force_reg (<MODE>mode, operands[2]);
+    }
+  else if (TARGET_OUTLINE_ATOMICS)
+    {
+      const atomic_ool_names *names;
+      switch (<CODE>)
+	{
+	case MINUS:
+	  operands[2] = expand_simple_unop (<MODE>mode, NEG, operands[2],
+					    NULL, 1);
+	  /* fallthru */
+	case PLUS:
+	  names = &aarch64_ool_ldadd_names;
+	  break;
+	case IOR:
+	  names = &aarch64_ool_ldset_names;
+	  break;
+	case XOR:
+	  names = &aarch64_ool_ldeor_names;
+	  break;
+	case AND:
+	  operands[2] = expand_simple_unop (<MODE>mode, NOT, operands[2],
+					    NULL, 1);
+	  names = &aarch64_ool_ldclr_names;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      machine_mode mode = <MODE>mode;
+      rtx func = aarch64_atomic_ool_func (mode, operands[3], names);
+      rtx rval = emit_library_call_value (func, operands[0], LCT_NORMAL, mode,
+					  operands[2], mode,
+					  XEXP (operands[1], 0), Pmode);
+      emit_move_insn (operands[0], rval);
+      DONE;
+    }
   else
     gen = gen_aarch64_atomic_fetch_<atomic_optab><mode>;
 
   emit_insn (gen (operands[0], operands[1], operands[2], operands[3]));
-
   DONE;
 })
 
@@ -326,23 +516,26 @@
   }
 )
 
-(define_insn_and_split "aarch64_atomic_fetch_<atomic_optab><mode>_lse"
-  [(set (match_operand:ALLI 0 "register_operand" "=&r")
-    (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
+(define_insn "aarch64_atomic_fetch_<atomic_ldoptab><mode>_lse"
+  [(set (match_operand:ALLI 0 "register_operand" "=r")
+	(match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
    (set (match_dup 1)
-    (unspec_volatile:ALLI
-      [(atomic_op:ALLI (match_dup 1)
-	(match_operand:ALLI 2 "<atomic_op_operand>" "r<const_atomic>"))
-       (match_operand:SI 3 "const_int_operand")]
-      UNSPECV_ATOMIC_LDOP))]
+	(unspec_volatile:ALLI
+	  [(match_dup 1)
+	   (match_operand:ALLI 2 "register_operand" "r")
+	   (match_operand:SI 3 "const_int_operand")]
+	  ATOMIC_LDOP))]
   "TARGET_LSE"
-  "#"
-  "&& reload_completed"
-  [(const_int 0)]
   {
-    aarch64_gen_atomic_ldop (<CODE>, operands[0], NULL, operands[1],
-			     operands[2], operands[3]);
-    DONE;
+   enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
+   if (is_mm_relaxed (model))
+     return "ld<atomic_ldop><atomic_sfx>\t%<w>2, %<w>0, %1";
+   else if (is_mm_acquire (model) || is_mm_consume (model))
+     return "ld<atomic_ldop>a<atomic_sfx>\t%<w>2, %<w>0, %1";
+   else if (is_mm_release (model))
+     return "ld<atomic_ldop>l<atomic_sfx>\t%<w>2, %<w>0, %1";
+   else
+     return "ld<atomic_ldop>al<atomic_sfx>\t%<w>2, %<w>0, %1";
   }
 )
 
@@ -370,27 +563,33 @@
   }
 )
 
-;; Load-operate-store, returning the original memory data.
+;; Load-operate-store, returning the updated memory data.
 
 (define_expand "atomic_<atomic_optab>_fetch<mode>"
- [(match_operand:ALLI 0 "register_operand" "")
+ [(match_operand:ALLI 0 "register_operand")
   (atomic_op:ALLI
-   (match_operand:ALLI 1 "aarch64_sync_memory_operand" "")
-   (match_operand:ALLI 2 "<atomic_op_operand>" ""))
+   (match_operand:ALLI 1 "aarch64_sync_memory_operand")
+   (match_operand:ALLI 2 "<atomic_op_operand>"))
   (match_operand:SI 3 "const_int_operand")]
  ""
 {
-  rtx (*gen) (rtx, rtx, rtx, rtx);
-  rtx value = operands[2];
-
-  /* Use an atomic load-operate instruction when possible.  */
-  if (aarch64_atomic_ldop_supported_p (<CODE>))
-    gen = gen_aarch64_atomic_<atomic_optab>_fetch<mode>_lse;
+  /* Use an atomic load-operate instruction when possible.  In this case
+     we will re-compute the result from the original mem value. */
+  if (TARGET_LSE || TARGET_OUTLINE_ATOMICS)
+    {
+      rtx tmp = gen_reg_rtx (<MODE>mode);
+      operands[2] = force_reg (<MODE>mode, operands[2]);
+      emit_insn (gen_atomic_fetch_<atomic_optab><mode>
+                 (tmp, operands[1], operands[2], operands[3]));
+      tmp = expand_simple_binop (<MODE>mode, <CODE>, tmp, operands[2],
+				 operands[0], 1, OPTAB_WIDEN);
+      emit_move_insn (operands[0], tmp);
+    }
   else
-    gen = gen_aarch64_atomic_<atomic_optab>_fetch<mode>;
-
-  emit_insn (gen (operands[0], operands[1], value, operands[3]));
-
+    {
+      emit_insn (gen_aarch64_atomic_<atomic_optab>_fetch<mode>
+                 (operands[0], operands[1], operands[2], operands[3]));
+    }
   DONE;
 })
 
@@ -417,29 +616,6 @@
   }
 )
 
-(define_insn_and_split "aarch64_atomic_<atomic_optab>_fetch<mode>_lse"
-  [(set (match_operand:ALLI 0 "register_operand" "=&r")
-    (atomic_op:ALLI
-     (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")
-     (match_operand:ALLI 2 "<atomic_op_operand>" "r<const_atomic>")))
-   (set (match_dup 1)
-    (unspec_volatile:ALLI
-      [(match_dup 1)
-       (match_dup 2)
-       (match_operand:SI 3 "const_int_operand")]
-      UNSPECV_ATOMIC_LDOP))
-     (clobber (match_scratch:ALLI 4 "=&r"))]
-  "TARGET_LSE"
-  "#"
-  "&& reload_completed"
-  [(const_int 0)]
-  {
-    aarch64_gen_atomic_ldop (<CODE>, operands[4], operands[0], operands[1],
-			     operands[2], operands[3]);
-    DONE;
-  }
-)
-
 (define_insn_and_split "atomic_nand_fetch<mode>"
   [(set (match_operand:ALLI 0 "register_operand" "=&r")
     (not:ALLI
@@ -481,9 +657,9 @@
 )
 
 (define_insn "atomic_store<mode>"
-  [(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "=Q")
+  [(set (match_operand:ALLI 0 "aarch64_rcpc_memory_operand" "=Q,Ust")
     (unspec_volatile:ALLI
-      [(match_operand:ALLI 1 "general_operand" "rZ")
+      [(match_operand:ALLI 1 "general_operand" "rZ,rZ")
        (match_operand:SI 2 "const_int_operand")]			;; model
       UNSPECV_STL))]
   ""
@@ -491,12 +667,15 @@
     enum memmodel model = memmodel_from_int (INTVAL (operands[2]));
     if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire (model))
       return "str<atomic_sfx>\t%<w>1, %0";
-    else
+    else if (which_alternative == 0)
       return "stlr<atomic_sfx>\t%<w>1, %0";
+    else
+      return "stlur<atomic_sfx>\t%<w>1, %0";
   }
+  [(set_attr "arch" "*,rcpc8_4")]
 )
 
-(define_insn "aarch64_load_exclusive<mode>"
+(define_insn "@aarch64_load_exclusive<mode>"
   [(set (match_operand:SI 0 "register_operand" "=r")
     (zero_extend:SI
       (unspec_volatile:SHORT
@@ -513,7 +692,7 @@
   }
 )
 
-(define_insn "aarch64_load_exclusive<mode>"
+(define_insn "@aarch64_load_exclusive<mode>"
   [(set (match_operand:GPI 0 "register_operand" "=r")
     (unspec_volatile:GPI
       [(match_operand:GPI 1 "aarch64_sync_memory_operand" "Q")
@@ -529,8 +708,26 @@
   }
 )
 
-(define_insn "aarch64_store_exclusive<mode>"
-  [(set (match_operand:SI 0 "register_operand" "=r")
+(define_insn "aarch64_load_exclusive_pair"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec_volatile:DI
+	  [(match_operand:TI 2 "aarch64_sync_memory_operand" "Q")
+	   (match_operand:SI 3 "const_int_operand")]
+	  UNSPECV_LX))
+   (set (match_operand:DI 1 "register_operand" "=r")
+	(unspec_volatile:DI [(match_dup 2) (match_dup 3)] UNSPECV_LX))]
+  ""
+  {
+    enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
+    if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_release (model))
+      return "ldxp\t%0, %1, %2";
+    else
+      return "ldaxp\t%0, %1, %2";
+  }
+)
+
+(define_insn "@aarch64_store_exclusive<mode>"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
     (unspec_volatile:SI [(const_int 0)] UNSPECV_SX))
    (set (match_operand:ALLI 1 "aarch64_sync_memory_operand" "=Q")
     (unspec_volatile:ALLI
@@ -547,8 +744,27 @@
   }
 )
 
+(define_insn "aarch64_store_exclusive_pair"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(unspec_volatile:SI [(const_int 0)] UNSPECV_SX))
+   (set (match_operand:TI 1 "aarch64_sync_memory_operand" "=Q")
+	(unspec_volatile:TI
+	  [(match_operand:DI 2 "aarch64_reg_or_zero" "rZ")
+	   (match_operand:DI 3 "aarch64_reg_or_zero" "rZ")
+	   (match_operand:SI 4 "const_int_operand")]
+	  UNSPECV_SX))]
+  ""
+  {
+    enum memmodel model = memmodel_from_int (INTVAL (operands[4]));
+    if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire (model))
+      return "stxp\t%w0, %x2, %x3, %1";
+    else
+      return "stlxp\t%w0, %x2, %x3, %1";
+  }
+)
+
 (define_expand "mem_thread_fence"
-  [(match_operand:SI 0 "const_int_operand" "")]
+  [(match_operand:SI 0 "const_int_operand")]
   ""
   {
     enum memmodel model = memmodel_from_int (INTVAL (operands[0]));
@@ -582,100 +798,3 @@
       return "dmb\\tish";
   }
 )
-
-;; ARMv8.1-A LSE instructions.
-
-;; Atomic swap with memory.
-(define_insn "aarch64_atomic_swp<mode>"
- [(set (match_operand:ALLI 0 "register_operand" "+&r")
-   (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
-  (set (match_dup 1)
-   (unspec_volatile:ALLI
-    [(match_operand:ALLI 2 "register_operand" "r")
-     (match_operand:SI 3 "const_int_operand" "")]
-    UNSPECV_ATOMIC_SWP))]
-  "TARGET_LSE && reload_completed"
-  {
-    enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
-    if (is_mm_relaxed (model))
-      return "swp<atomic_sfx>\t%<w>2, %<w>0, %1";
-    else if (is_mm_acquire (model) || is_mm_consume (model))
-      return "swpa<atomic_sfx>\t%<w>2, %<w>0, %1";
-    else if (is_mm_release (model))
-      return "swpl<atomic_sfx>\t%<w>2, %<w>0, %1";
-    else
-      return "swpal<atomic_sfx>\t%<w>2, %<w>0, %1";
-  })
-
-;; Atomic compare-and-swap: HI and smaller modes.
-
-(define_insn "aarch64_atomic_cas<mode>"
- [(set (match_operand:SI 0 "register_operand" "+&r")		  ;; out
-   (zero_extend:SI
-    (match_operand:SHORT 1 "aarch64_sync_memory_operand" "+Q")))  ;; memory.
-  (set (match_dup 1)
-   (unspec_volatile:SHORT
-    [(match_dup 0)
-     (match_operand:SHORT 2 "aarch64_reg_or_zero" "rZ")	;; value.
-     (match_operand:SI 3 "const_int_operand" "")]	;; model.
-    UNSPECV_ATOMIC_CAS))]
- "TARGET_LSE && reload_completed"
-{
-  enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
-  if (is_mm_relaxed (model))
-    return "cas<atomic_sfx>\t%<w>0, %<w>2, %1";
-  else if (is_mm_acquire (model) || is_mm_consume (model))
-    return "casa<atomic_sfx>\t%<w>0, %<w>2, %1";
-  else if (is_mm_release (model))
-    return "casl<atomic_sfx>\t%<w>0, %<w>2, %1";
-  else
-    return "casal<atomic_sfx>\t%<w>0, %<w>2, %1";
-})
-
-;; Atomic compare-and-swap: SI and larger modes.
-
-(define_insn "aarch64_atomic_cas<mode>"
- [(set (match_operand:GPI 0 "register_operand" "+&r")	      ;; out
-   (match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q"))  ;; memory.
-  (set (match_dup 1)
-   (unspec_volatile:GPI
-    [(match_dup 0)
-     (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")	;; value.
-     (match_operand:SI 3 "const_int_operand" "")]	;; model.
-    UNSPECV_ATOMIC_CAS))]
-  "TARGET_LSE && reload_completed"
-{
-    enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
-    if (is_mm_relaxed (model))
-      return "cas<atomic_sfx>\t%<w>0, %<w>2, %1";
-    else if (is_mm_acquire (model) || is_mm_consume (model))
-      return "casa<atomic_sfx>\t%<w>0, %<w>2, %1";
-    else if (is_mm_release (model))
-      return "casl<atomic_sfx>\t%<w>0, %<w>2, %1";
-    else
-      return "casal<atomic_sfx>\t%<w>0, %<w>2, %1";
-})
-
-;; Atomic load-op: Load data, operate, store result, keep data.
-
-(define_insn "aarch64_atomic_load<atomic_ldop><mode>"
- [(set (match_operand:ALLI 0 "register_operand" "=r")
-   (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
-  (set (match_dup 1)
-   (unspec_volatile:ALLI
-    [(match_dup 1)
-     (match_operand:ALLI 2 "register_operand")
-     (match_operand:SI 3 "const_int_operand")]
-    ATOMIC_LDOP))]
- "TARGET_LSE && reload_completed"
- {
-   enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
-   if (is_mm_relaxed (model))
-     return "ld<atomic_ldop><atomic_sfx>\t%<w>2, %<w>0, %1";
-   else if (is_mm_acquire (model) || is_mm_consume (model))
-     return "ld<atomic_ldop>a<atomic_sfx>\t%<w>2, %<w>0, %1";
-   else if (is_mm_release (model))
-     return "ld<atomic_ldop>l<atomic_sfx>\t%<w>2, %<w>0, %1";
-   else
-     return "ld<atomic_ldop>al<atomic_sfx>\t%<w>2, %<w>0, %1";
- })
diff --git a/gcc/config/aarch64/biarchilp32.h b/gcc/config/aarch64/biarchilp32.h
index c070d20794940..8b4bc5555210a 100644
--- a/gcc/config/aarch64/biarchilp32.h
+++ b/gcc/config/aarch64/biarchilp32.h
@@ -1,7 +1,7 @@
 /* Make configure files to produce biarch compiler defaulting to ilp32 ABI.
    This file must be included very first, while the OS specific file later
    to overwrite otherwise wrong defaults.
-   Copyright (C) 2013-2018 Free Software Foundation, Inc.
+   Copyright (C) 2013-2021 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
 This file is part of GCC.
diff --git a/gcc/config/aarch64/biarchlp64.h b/gcc/config/aarch64/biarchlp64.h
index efdf17b4c5435..daede57480830 100644
--- a/gcc/config/aarch64/biarchlp64.h
+++ b/gcc/config/aarch64/biarchlp64.h
@@ -1,7 +1,7 @@
 /* Make configure files to produce biarch compiler defaulting to ilp64 ABI.
    This file must be included very first, while the OS specific file later
    to overwrite otherwise wrong defaults.
-   Copyright (C) 2013-2018 Free Software Foundation, Inc.
+   Copyright (C) 2013-2021 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
 This file is part of GCC.
diff --git a/gcc/config/aarch64/check-sve-md.awk b/gcc/config/aarch64/check-sve-md.awk
new file mode 100644
index 0000000000000..b482f048e62b6
--- /dev/null
+++ b/gcc/config/aarch64/check-sve-md.awk
@@ -0,0 +1,66 @@
+#!/usr/bin/awk -f
+# Copyright (C) 2019-2021 Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3, or (at your option) any
+# later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# This awk script checks that aarch64-sve.md (passed either on the
+# command line or via stdin) has an up-to-date contents section.
+
+BEGIN {
+  seen1 = 0
+  seen2 = 0
+  errors = 0
+}
+
+# The headings in the comments use a two-level hierarchy: ";; == ..."
+# for major sections and ";; ---- ..." for minor sections.  Each section
+# heading must be unique.
+#
+# The contents section should list all the section headings, using the
+# same text and in the same order.  We should therefore see exactly two
+# copies of the section list.
+/^;; == / || /^;; ---- / {
+  if ($0 in seen || seen2 > 0)
+    {
+      if (seen2 >= seen1)
+	{
+	  printf "error: line not in contents: %s\n", $0 > "/dev/stderr"
+	  errors += 1
+	  exit(1)
+	}
+      if ($0 != order[seen2])
+	{
+	  printf "error: mismatched contents\n     saw: %s\nexpected: %s\n", \
+	    $0, order[seen2] > "/dev/stderr"
+	  errors += 1
+	  exit(1)
+	}
+      seen2 += 1
+    }
+  else
+    {
+      seen[$0] = 1
+      order[seen1] = $0
+      seen1 += 1
+    }
+}
+
+END {
+  if (seen2 < seen1 && errors == 0)
+    {
+      printf "error: line only in contents: %s\n", order[seen2] > "/dev/stderr"
+      exit(1)
+    }
+}
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 32a0fa60a198c..3b49b452119c4 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -1,5 +1,5 @@
 ;; Machine description for AArch64 architecture.
-;; Copyright (C) 2009-2018 Free Software Foundation, Inc.
+;; Copyright (C) 2009-2021 Free Software Foundation, Inc.
 ;; Contributed by ARM Ltd.
 ;;
 ;; This file is part of GCC.
@@ -24,6 +24,15 @@
 (define_register_constraint "Ucs" "TAILCALL_ADDR_REGS"
   "@internal Registers suitable for an indirect tail call")
 
+(define_register_constraint "Ucr"
+    "aarch64_harden_sls_blr_p () ? STUB_REGS : GENERAL_REGS"
+  "@internal Registers to be used for an indirect call.
+   This is usually the general registers, but when we are hardening against
+   Straight Line Speculation we disallow x16, x17, and x30 so we can use
+   indirection stubs.  These indirection stubs cannot use the above registers
+   since they will be reached by a BL that may have to go through a linker
+   veneer.")
+
 (define_register_constraint "w" "FP_REGS"
   "Floating point and SIMD vector registers.")
 
@@ -36,6 +45,13 @@
 (define_register_constraint "x" "FP_LO_REGS"
   "Floating point and SIMD vector registers V0 - V15.")
 
+(define_register_constraint "y" "FP_LO8_REGS"
+  "Floating point and SIMD vector registers V0 - V7.")
+
+(define_constraint "c"
+ "@internal The condition code register."
+  (match_operand 0 "cc_register"))
+
 (define_constraint "I"
  "A constant that can be used with an ADD operation."
  (and (match_code "const_int")
@@ -46,6 +62,12 @@
   (and (match_code "const_int")
        (match_test "aarch64_pluslong_strict_immedate (op, VOIDmode)")))
 
+(define_constraint "Uai"
+  "@internal
+   A constraint that matches a VG-based constant that can be added by
+   a single INC or DEC."
+  (match_operand 0 "aarch64_sve_scalar_inc_dec_immediate"))
+
 (define_constraint "Uav"
   "@internal
    A constraint that matches a VG-based constant that can be added by
@@ -114,8 +136,8 @@
        (match_test "aarch64_float_const_zero_rtx_p (op)")))
 
 (define_constraint "Z"
-  "Integer constant zero."
-  (match_test "op == const0_rtx"))
+  "Integer or floating-point constant zero."
+  (match_test "op == CONST0_RTX (GET_MODE (op))"))
 
 (define_constraint "Ush"
   "A constraint that matches an absolute symbolic address high part."
@@ -172,6 +194,13 @@
   A constraint that matches the immediate constant -1."
   (match_test "op == constm1_rtx"))
 
+(define_constraint "Ulc"
+ "@internal
+ A constraint that matches a constant integer whose bits are consecutive ones
+ from the MSB."
+ (and (match_code "const_int")
+      (match_test "aarch64_high_bits_all_ones_p (ival)")))
+
 (define_constraint "Usv"
   "@internal
    A constraint that matches a VG-based constant that can be loaded by
@@ -213,18 +242,22 @@
   (and (match_code "const_int")
        (match_test "(unsigned) exact_log2 (ival) <= 4")))
 
+(define_constraint "Uph"
+  "@internal
+  A constraint that matches HImode integers zero extendable to
+  SImode plus_operand."
+  (and (match_code "const_int")
+       (match_test "aarch64_plushi_immediate (op, VOIDmode)")))
+
 (define_memory_constraint "Q"
  "A memory address which uses a single base register with no offset."
  (and (match_code "mem")
       (match_test "REG_P (XEXP (op, 0))")))
 
-(define_memory_constraint "Umq"
+(define_memory_constraint "Ust"
   "@internal
-   A memory address which uses a base register with an offset small enough for
-   a load/store pair operation in DI mode."
-   (and (match_code "mem")
-	(match_test "aarch64_legitimate_address_p (DImode, XEXP (op, 0), false,
-						   ADDR_QUERY_LDP_STP)")))
+  A memory address with 9bit unscaled offset."
+  (match_operand 0 "aarch64_9bit_offset_memory_operand"))
 
 (define_memory_constraint "Ump"
   "@internal
@@ -233,14 +266,48 @@
        (match_test "aarch64_legitimate_address_p (GET_MODE (op), XEXP (op, 0),
 						  true, ADDR_QUERY_LDP_STP)")))
 
-;; Used for storing two 64-bit values in an AdvSIMD register using an STP
-;; as a 128-bit vec_concat.
-(define_memory_constraint "Uml"
+;; Used for storing or loading pairs in an AdvSIMD register using an STP/LDP
+;; as a vector-concat.  The address mode uses the same constraints as if it
+;; were for a single value.
+(define_memory_constraint "Umn"
   "@internal
   A memory address suitable for a load/store pair operation."
   (and (match_code "mem")
-       (match_test "aarch64_legitimate_address_p (DFmode, XEXP (op, 0), 1,
-						  ADDR_QUERY_LDP_STP)")))
+       (match_test "aarch64_legitimate_address_p (GET_MODE (op), XEXP (op, 0),
+						  true,
+						  ADDR_QUERY_LDP_STP_N)")))
+
+(define_address_constraint "UPb"
+  "@internal
+   An address valid for SVE PRFB instructions."
+  (match_test "aarch64_sve_prefetch_operand_p (op, VNx16QImode)"))
+
+(define_address_constraint "UPd"
+  "@internal
+   An address valid for SVE PRFD instructions."
+  (match_test "aarch64_sve_prefetch_operand_p (op, VNx2DImode)"))
+
+(define_address_constraint "UPh"
+  "@internal
+   An address valid for SVE PRFH instructions."
+  (match_test "aarch64_sve_prefetch_operand_p (op, VNx8HImode)"))
+
+(define_address_constraint "UPw"
+  "@internal
+   An address valid for SVE PRFW instructions."
+  (match_test "aarch64_sve_prefetch_operand_p (op, VNx4SImode)"))
+
+(define_memory_constraint "Utf"
+  "@internal
+   An address valid for SVE LDFF1 instructions."
+  (and (match_code "mem")
+       (match_test "aarch64_sve_ldff1_operand_p (op)")))
+
+(define_memory_constraint "Utn"
+  "@internal
+   An address valid for SVE LDNF1 instructions."
+  (and (match_code "mem")
+       (match_test "aarch64_sve_ldnf1_operand_p (op)")))
 
 (define_memory_constraint "Utr"
   "@internal
@@ -256,14 +323,47 @@
   (and (match_code "mem")
        (match_test "aarch64_simd_mem_operand_p (op)")))
 
-(define_memory_constraint "Utq"
+(define_relaxed_memory_constraint "Utq"
   "@internal
    An address valid for loading or storing a 128-bit AdvSIMD register"
   (and (match_code "mem")
+       (match_test "aarch64_legitimate_address_p (GET_MODE (op),
+						  XEXP (op, 0), 1)")
        (match_test "aarch64_legitimate_address_p (V2DImode,
 						  XEXP (op, 0), 1)")))
 
-(define_memory_constraint "Uty"
+(define_relaxed_memory_constraint "UtQ"
+  "@internal
+   An address valid for SVE LD1RQs."
+  (and (match_code "mem")
+       (match_test "aarch64_sve_ld1rq_operand_p (op)")))
+
+(define_relaxed_memory_constraint "UOb"
+  "@internal
+   An address valid for SVE LD1ROH."
+  (and (match_code "mem")
+       (match_test "aarch64_sve_ld1ro_operand_p (op, QImode)")))
+
+(define_relaxed_memory_constraint "UOh"
+  "@internal
+   An address valid for SVE LD1ROH."
+  (and (match_code "mem")
+       (match_test "aarch64_sve_ld1ro_operand_p (op, HImode)")))
+
+
+(define_relaxed_memory_constraint "UOw"
+  "@internal
+   An address valid for SVE LD1ROW."
+  (and (match_code "mem")
+       (match_test "aarch64_sve_ld1ro_operand_p (op, SImode)")))
+
+(define_relaxed_memory_constraint "UOd"
+  "@internal
+   An address valid for SVE LD1ROD."
+  (and (match_code "mem")
+       (match_test "aarch64_sve_ld1ro_operand_p (op, DImode)")))
+
+(define_relaxed_memory_constraint "Uty"
   "@internal
    An address valid for SVE LD1Rs."
   (and (match_code "mem")
@@ -278,7 +378,7 @@
 (define_constraint "Ufc"
   "A floating point constant which can be used with an\
    FMOV immediate operation."
-  (and (match_code "const_double")
+  (and (match_code "const_double,const_vector")
        (match_test "aarch64_float_const_representable_p (op)")))
 
 (define_constraint "Uvi"
@@ -323,6 +423,13 @@
       (match_test "aarch64_simd_scalar_immediate_valid_for_move (op,
 						 QImode)")))
 
+(define_constraint "Dt"
+  "@internal
+ A const_double which is the reciprocal of an exact power of two, can be
+ used in an scvtf with fract bits operation"
+ (and (match_code "const_double")
+      (match_test "aarch64_fpconst_pow2_recip (op) > 0")))
+
 (define_constraint "Dl"
   "@internal
  A constraint that matches vector of immediates for left shifts."
@@ -367,18 +474,54 @@
  An address valid for a prefetch instruction."
  (match_test "aarch64_address_valid_for_prefetch_p (op, true)"))
 
+(define_constraint "vgb"
+  "@internal
+   A constraint that matches an immediate offset valid for SVE LD1B
+   gather instructions."
+ (match_operand 0 "aarch64_sve_gather_immediate_b"))
+
+(define_constraint "vgd"
+  "@internal
+   A constraint that matches an immediate offset valid for SVE LD1D
+   gather instructions."
+ (match_operand 0 "aarch64_sve_gather_immediate_d"))
+
+(define_constraint "vgh"
+  "@internal
+   A constraint that matches an immediate offset valid for SVE LD1H
+   gather instructions."
+ (match_operand 0 "aarch64_sve_gather_immediate_h"))
+
+(define_constraint "vgw"
+  "@internal
+   A constraint that matches an immediate offset valid for SVE LD1W
+   gather instructions."
+ (match_operand 0 "aarch64_sve_gather_immediate_w"))
+
 (define_constraint "vsa"
   "@internal
    A constraint that matches an immediate operand valid for SVE
    arithmetic instructions."
  (match_operand 0 "aarch64_sve_arith_immediate"))
 
+(define_constraint "vsb"
+  "@internal
+   A constraint that matches an immediate operand valid for SVE UMAX
+   and UMIN operations."
+ (match_operand 0 "aarch64_sve_vsb_immediate"))
+
 (define_constraint "vsc"
   "@internal
    A constraint that matches a signed immediate operand valid for SVE
    CMP instructions."
  (match_operand 0 "aarch64_sve_cmp_vsc_immediate"))
 
+(define_constraint "vss"
+  "@internal
+   A constraint that matches a signed immediate operand valid for SVE
+   DUP instructions."
+ (match_test "aarch64_sve_dup_immediate_p (op)"))
+
 (define_constraint "vsd"
   "@internal
    A constraint that matches an unsigned immediate operand valid for SVE
@@ -389,7 +532,7 @@
   "@internal
    A constraint that matches a vector count operand valid for SVE INC and
    DEC instructions."
- (match_operand 0 "aarch64_sve_inc_dec_immediate"))
+ (match_operand 0 "aarch64_sve_vector_inc_dec_immediate"))
 
 (define_constraint "vsn"
   "@internal
@@ -397,6 +540,18 @@
    is valid for SVE SUB instructions."
  (match_operand 0 "aarch64_sve_sub_arith_immediate"))
 
+(define_constraint "vsQ"
+  "@internal
+   Like vsa, but additionally check that the immediate is nonnegative
+   when interpreted as a signed value."
+ (match_operand 0 "aarch64_sve_qadd_immediate"))
+
+(define_constraint "vsS"
+  "@internal
+   Like vsn, but additionally check that the immediate is negative
+   when interpreted as a signed value."
+ (match_operand 0 "aarch64_sve_qsub_immediate"))
+
 (define_constraint "vsl"
   "@internal
    A constraint that matches an immediate operand valid for SVE logical
@@ -405,9 +560,9 @@
 
 (define_constraint "vsm"
   "@internal
-   A constraint that matches an immediate operand valid for SVE MUL
-   operations."
- (match_operand 0 "aarch64_sve_mul_immediate"))
+   A constraint that matches an immediate operand valid for SVE MUL,
+   SMAX and SMIN operations."
+ (match_operand 0 "aarch64_sve_vsm_immediate"))
 
 (define_constraint "vsA"
   "@internal
@@ -415,13 +570,20 @@
    and FSUB operations."
  (match_operand 0 "aarch64_sve_float_arith_immediate"))
 
+;; "B" for "bound".
+(define_constraint "vsB"
+  "@internal
+   A constraint that matches an immediate operand valid for SVE FMAX
+   and FMIN operations."
+ (match_operand 0 "aarch64_sve_float_maxmin_immediate"))
+
 (define_constraint "vsM"
   "@internal
-   A constraint that matches an imediate operand valid for SVE FMUL
+   A constraint that matches an immediate operand valid for SVE FMUL
    operations."
  (match_operand 0 "aarch64_sve_float_mul_immediate"))
 
 (define_constraint "vsN"
   "@internal
    A constraint that matches the negative of vsA"
- (match_operand 0 "aarch64_sve_float_arith_with_sub_immediate"))
+ (match_operand 0 "aarch64_sve_float_negated_arith_immediate"))
diff --git a/gcc/config/aarch64/cortex-a57-fma-steering.c b/gcc/config/aarch64/cortex-a57-fma-steering.c
index 50e5108aa59f6..724dfd80f9301 100644
--- a/gcc/config/aarch64/cortex-a57-fma-steering.c
+++ b/gcc/config/aarch64/cortex-a57-fma-steering.c
@@ -1,5 +1,5 @@
 /* FMA steering optimization pass for Cortex-A57.
-   Copyright (C) 2015-2018 Free Software Foundation, Inc.
+   Copyright (C) 2015-2021 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -37,6 +37,7 @@
 #include "insn-attr.h"
 #include "context.h"
 #include "tree-pass.h"
+#include "function-abi.h"
 #include "regrename.h"
 #include "aarch64-protos.h"
 
@@ -114,6 +115,9 @@ class fma_forest
   void dispatch ();
 
 private:
+  /* Prohibit copy construction.  */
+  fma_forest (const fma_forest &);
+
   /* The list of roots that form this forest.  */
   std::list<fma_root_node *> *m_roots;
 
@@ -148,6 +152,10 @@ class fma_node
   void rename (fma_forest *);
   void dump_info (fma_forest *);
 
+private:
+  /* Prohibit copy construction.  */
+  fma_node (const fma_node &);
+
 protected:
   /* Root node that lead to this node.  */
   fma_root_node *m_root;
@@ -203,6 +211,9 @@ class func_fma_steering
   void execute_fma_steering ();
 
 private:
+  /* Prohibit copy construction.  */
+  func_fma_steering (const func_fma_steering &);
+
   void dfs (void (*) (fma_forest *), void (*) (fma_forest *, fma_root_node *),
 	    void (*) (fma_forest *, fma_node *), bool);
   void analyze ();
@@ -257,7 +268,7 @@ rename_single_chain (du_head_p head, HARD_REG_SET *unavailable)
       if (DEBUG_INSN_P (tmp->insn))
 	continue;
       n_uses++;
-      IOR_COMPL_HARD_REG_SET (*unavailable, reg_class_contents[tmp->cl]);
+      *unavailable |= ~reg_class_contents[tmp->cl];
       super_class = reg_class_superunion[(int) super_class][(int) tmp->cl];
     }
 
@@ -271,7 +282,7 @@ rename_single_chain (du_head_p head, HARD_REG_SET *unavailable)
     {
       fprintf (dump_file, "Register %s in insn %d", reg_names[reg],
 	       INSN_UID (head->first->insn));
-      if (head->need_caller_save_reg)
+      if (head->call_abis)
 	fprintf (dump_file, " crosses a call");
     }
 
diff --git a/gcc/config/aarch64/driver-aarch64.c b/gcc/config/aarch64/driver-aarch64.c
index 823b9bd9fce5f..e2935a1156412 100644
--- a/gcc/config/aarch64/driver-aarch64.c
+++ b/gcc/config/aarch64/driver-aarch64.c
@@ -1,5 +1,5 @@
 /* Native CPU detection for aarch64.
-   Copyright (C) 2015-2018 Free Software Foundation, Inc.
+   Copyright (C) 2015-2021 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -21,18 +21,16 @@
 
 #include "config.h"
 #define INCLUDE_STRING
+#define INCLUDE_SET
 #include "system.h"
 #include "coretypes.h"
 #include "tm.h"
-
-/* Defined in common/config/aarch64/aarch64-common.c.  */
-std::string aarch64_get_extension_string_for_isa_flags (unsigned long,
-							unsigned long);
+#include "aarch64-protos.h"
 
 struct aarch64_arch_extension
 {
   const char *ext;
-  unsigned int flag;
+  uint64_t flag;
   const char *feat_string;
 };
 
@@ -52,7 +50,7 @@ struct aarch64_core_data
   unsigned char implementer_id; /* Exactly 8 bits */
   unsigned int part_no; /* 12 bits + 12 bits */
   unsigned variant;
-  const unsigned long flags;
+  const uint64_t flags;
 };
 
 #define AARCH64_BIG_LITTLE(BIG, LITTLE) \
@@ -75,7 +73,7 @@ struct aarch64_arch_driver_info
 {
   const char* id;
   const char* name;
-  const unsigned long flags;
+  const uint64_t flags;
 };
 
 #define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \
@@ -119,9 +117,15 @@ valid_bL_core_p (unsigned int *core, unsigned int bL_core)
 /* Returns the hex integer that is after ':' for the FIELD.
    Returns -1 is returned if there was problem parsing the integer. */
 static unsigned
-parse_field (const char *field)
+parse_field (const std::string &field)
 {
-  const char *rest = strchr (field, ':');
+  const char *rest = strchr (field.c_str (), ':');
+
+  /* The line must be in the format of <name>:<value>, if it's not
+     then we have a weird format.  */
+  if (rest == NULL)
+    return -1;
+
   char *after;
   unsigned fint = strtol (rest + 1, &after, 16);
   if (after == rest + 1)
@@ -129,6 +133,82 @@ parse_field (const char *field)
   return fint;
 }
 
+/* Returns the index of the ':' inside the FIELD which must be found
+   after the value of KEY.  Returns string::npos if line does not contain
+   a field.  */
+
+static size_t
+find_field (const std::string &field, const std::string &key)
+{
+  size_t key_pos, sep_pos;
+  key_pos = field.find (key);
+  if (key_pos == std::string::npos)
+    return std::string::npos;
+
+  sep_pos = field.find (":", key_pos + 1);
+  if (sep_pos == std::string::npos)
+    return std::string::npos;
+
+  return sep_pos;
+}
+
+/* Splits and returns a string based on whitespace and return it as
+   part of a set. Empty strings are ignored.  */
+
+static void
+split_words (const std::string &val, std::set<std::string> &result)
+{
+  size_t cur, prev = 0;
+  std::string word;
+  while ((cur = val.find_first_of (" \n", prev)) != std::string::npos)
+    {
+      word = val.substr (prev, cur - prev);
+      /* Skip adding empty words.  */
+      if (!word.empty ())
+	result.insert (word);
+      prev = cur + 1;
+    }
+
+  if (prev != cur)
+    result.insert (val.substr (prev));
+}
+
+/* Read an entire line from F until '\n' or EOF.  */
+
+static std::string
+readline (FILE *f)
+{
+  char *buf = NULL;
+  int size = 0;
+  int last = 0;
+  const int buf_size = 128;
+
+  if (feof (f))
+    return std::string ();
+
+  do
+    {
+      size += buf_size;
+      buf = (char*) xrealloc (buf, size);
+      gcc_assert (buf);
+      /* If fgets fails it returns NULL, but if it reaches EOF
+	 with 0 characters read it also returns EOF.  However
+	 the condition on the loop would have broken out of the
+	 loop in that case,  and if we are in the first iteration
+	 then the empty string is the correct thing to return.  */
+      if (!fgets (buf + last, buf_size, f))
+	return std::string ();
+      /* If we're not at the end of the line then override the
+	 \0 added by fgets.  */
+      last = strnlen (buf, size) - 1;
+    }
+  while (!feof (f) && buf[last] != '\n');
+
+  std::string result (buf);
+  free (buf);
+  return result;
+}
+
 /*  Return true iff ARR contains CORE, in either of the two elements. */
 
 static bool
@@ -167,7 +247,6 @@ host_detect_local_cpu (int argc, const char **argv)
 {
   const char *res = NULL;
   static const int num_exts = ARRAY_SIZE (aarch64_extensions);
-  char buf[128];
   FILE *f = NULL;
   bool arch = false;
   bool tune = false;
@@ -179,8 +258,11 @@ host_detect_local_cpu (int argc, const char **argv)
   unsigned int variants[2] = { ALL_VARIANTS, ALL_VARIANTS };
   unsigned int n_variants = 0;
   bool processed_exts = false;
-  unsigned long extension_flags = 0;
-  unsigned long default_flags = 0;
+  uint64_t extension_flags = 0;
+  uint64_t default_flags = 0;
+  std::string buf;
+  size_t sep_pos = -1;
+  char *fcpu_info;
 
   gcc_assert (argc);
 
@@ -198,16 +280,20 @@ host_detect_local_cpu (int argc, const char **argv)
   if (!arch && !tune && !cpu)
     goto not_found;
 
-  f = fopen ("/proc/cpuinfo", "r");
+  fcpu_info = getenv ("GCC_CPUINFO");
+  if (fcpu_info)
+    f = fopen (fcpu_info, "r");
+  else
+    f = fopen ("/proc/cpuinfo", "r");
 
   if (f == NULL)
     goto not_found;
 
   /* Look through /proc/cpuinfo to determine the implementer
      and then the part number that identifies a particular core.  */
-  while (fgets (buf, sizeof (buf), f) != NULL)
+  while (!(buf = readline (f)).empty ())
     {
-      if (strstr (buf, "implementer") != NULL)
+      if (find_field (buf, "implementer") != std::string::npos)
 	{
 	  unsigned cimp = parse_field (buf);
 	  if (cimp == INVALID_IMP)
@@ -219,8 +305,7 @@ host_detect_local_cpu (int argc, const char **argv)
 	  else if (imp != cimp)
 	    goto not_found;
 	}
-
-      if (strstr (buf, "variant") != NULL)
+      else if (find_field (buf, "variant") != std::string::npos)
 	{
 	  unsigned cvariant = parse_field (buf);
 	  if (!contains_core_p (variants, cvariant))
@@ -232,8 +317,7 @@ host_detect_local_cpu (int argc, const char **argv)
 	    }
           continue;
         }
-
-      if (strstr (buf, "part") != NULL)
+      else if (find_field (buf, "part") != std::string::npos)
 	{
 	  unsigned ccore = parse_field (buf);
 	  if (!contains_core_p (cores, ccore))
@@ -245,39 +329,36 @@ host_detect_local_cpu (int argc, const char **argv)
 	    }
 	  continue;
 	}
-      if (!tune && !processed_exts && strstr (buf, "Features") != NULL)
+      else if (!tune && !processed_exts
+	       && (sep_pos = find_field (buf, "Features")) != std::string::npos)
 	{
+	  /* First create the list of features in the buffer.  */
+	  std::set<std::string> features;
+	  /* Drop everything till the :.  */
+	  buf = buf.substr (sep_pos + 1);
+	  split_words (buf, features);
+
 	  for (i = 0; i < num_exts; i++)
 	    {
-	      const char *p = aarch64_extensions[i].feat_string;
+	      const std::string val (aarch64_extensions[i].feat_string);
 
 	      /* If the feature contains no HWCAPS string then ignore it for the
 		 auto detection.  */
-	      if (*p == '\0')
+	      if (val.empty ())
 		continue;
 
 	      bool enabled = true;
 
 	      /* This may be a multi-token feature string.  We need
 		 to match all parts, which could be in any order.  */
-	      size_t len = strlen (buf);
-	      do
-		{
-		  const char *end = strchr (p, ' ');
-		  if (end == NULL)
-		    end = strchr (p, '\0');
-		  if (memmem (buf, len, p, end - p) == NULL)
-		    {
-		      /* Failed to match this token.  Turn off the
-			 features we'd otherwise enable.  */
-		      enabled = false;
-		      break;
-		    }
-		  if (*end == '\0')
-		    break;
-		  p = end + 1;
-		}
-	      while (1);
+	      std::set<std::string> tokens;
+	      split_words (val, tokens);
+	      std::set<std::string>::iterator it;
+
+	      /* Iterate till the first feature isn't found or all of them
+		 are found.  */
+	      for (it = tokens.begin (); enabled && it != tokens.end (); ++it)
+		enabled = enabled && features.count (*it);
 
 	      if (enabled)
 		extension_flags |= aarch64_extensions[i].flag;
diff --git a/gcc/config/aarch64/falkor-tag-collision-avoidance.c b/gcc/config/aarch64/falkor-tag-collision-avoidance.c
new file mode 100644
index 0000000000000..de214e4a0f75b
--- /dev/null
+++ b/gcc/config/aarch64/falkor-tag-collision-avoidance.c
@@ -0,0 +1,890 @@
+/* Tag Collision Avoidance pass for Falkor.
+   Copyright (C) 2018-2021 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#define IN_TARGET_CODE 1
+
+#include "config.h"
+#define INCLUDE_LIST
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "target.h"
+#include "rtl.h"
+#include "tree.h"
+#include "tree-pass.h"
+#include "aarch64-protos.h"
+#include "hash-map.h"
+#include "cfgloop.h"
+#include "cfgrtl.h"
+#include "rtl-iter.h"
+#include "df.h"
+#include "memmodel.h"
+#include "optabs.h"
+#include "regs.h"
+#include "recog.h"
+#include "function-abi.h"
+#include "regrename.h"
+#include "print-rtl.h"
+
+/* The Falkor hardware prefetching system uses the encoding of the registers
+   and offsets of loads to decide which of the multiple hardware prefetchers to
+   assign the load to.  This has the positive effect of accelerating prefetches
+   when all related loads with uniform strides are assigned to the same
+   prefetcher unit.  The down side is that because of the way the assignment
+   works, multiple unrelated loads may end up on the same prefetch unit, thus
+   causing the unit to bounce between different sets of addresses and never
+   train correctly.  The point of this pass is to avoid such collisions so that
+   unrelated loads are spread out to different prefetchers.  It also makes a
+   rudimentary attempt to ensure that related loads with the same tags don't
+   get moved out unnecessarily.
+
+   Perhaps a future enhancement would be to make a more concerted attempt to
+   get related loads under the same tag.  See the memcpy/memset implementation
+   for falkor in glibc to understand the kind of impact this can have on
+   falkor.
+
+   The assignment of loads is based on a tag that is computed from the encoding
+   of the first destination register (the only destination in case of LDR), the
+   base register and the offset (either the register or the immediate value, as
+   encoded in the instruction).  This is what the 14 bit tag looks like:
+
+   |<- 6 bits ->|<- 4b ->|<- 4b ->|
+   --------------------------------
+   |  OFFSET    |  SRC   |  DST   |
+   --------------------------------
+
+   For all cases, the SRC and DST are the 4 LSB of the encoding of the register
+   in the instruction.  Offset computation is more involved and is as follows:
+
+   - For register offset addressing: 4 LSB of the offset register with the MSB
+     of the 6 bits set to 1.
+
+   - For immediate offset: 4 LSB of the encoded immediate offset.  The encoding
+     depends on the width of the load and is expressed as multiples of the
+     width.
+
+   - For loads with update: 4 LSB of the offset.  The encoding here is the
+     exact number by which the base is offset and incremented.
+
+   Based on the above it is clear that registers 0 and 16 will result in
+   collisions, 1 and 17 and so on.  This pass detects such collisions within a
+   def/use chain of the source register in a loop and tries to resolve the
+   collision by renaming one of the destination registers.  */
+
+/* Get the destination part of the tag.  */
+#define TAG_GET_DEST(__tag) ((__tag) & 0xf)
+
+/* Get the tag with the destination part updated.  */
+#define TAG_UPDATE_DEST(__tag, __dest) (((__tag) & ~0xf) | (__dest & 0xf))
+
+#define MAX_PREFETCH_STRIDE 2048
+
+/* The instruction information structure.  This is used to cache information
+   about the INSN that we derive when traversing through all of the insns in
+   loops.  */
+class tag_insn_info
+{
+public:
+  rtx_insn *insn;
+  rtx dest;
+  rtx base;
+  rtx offset;
+  bool writeback;
+  bool ldp;
+
+  tag_insn_info (rtx_insn *i, rtx d, rtx b, rtx o, bool w, bool p)
+    : insn (i), dest (d), base (b), offset (o), writeback (w), ldp (p)
+  {}
+
+  /* Compute the tag based on BASE, DEST and OFFSET of the load.  */
+  unsigned tag ()
+    {
+      unsigned int_offset = 0;
+      rtx offset = this->offset;
+      unsigned dest = REGNO (this->dest);
+      unsigned base = REGNO (this->base);
+      machine_mode dest_mode = GET_MODE (this->dest);
+
+      /* Falkor does not support SVE; GET_LOAD_INFO ensures that the
+	 destination mode is constant here.  */
+      unsigned dest_mode_size = GET_MODE_SIZE (dest_mode).to_constant ();
+
+      /* For loads of larger than 16 bytes, the DEST part of the tag is 0.  */
+      if ((dest_mode_size << this->ldp) > 16)
+	dest = 0;
+
+      if (offset && REG_P (offset))
+	int_offset = (1 << 5) | REGNO (offset);
+      else if (offset && CONST_INT_P (offset))
+	{
+	  int_offset = INTVAL (offset);
+	  int_offset /= dest_mode_size;
+	  if (!this->writeback)
+	    int_offset >>= 2;
+	}
+      return ((dest & 0xf)
+	      | ((base & 0xf) << 4)
+	      | ((int_offset & 0x3f) << 8));
+    }
+};
+
+/* Hash map to traverse and process instructions with colliding tags.  */
+typedef hash_map <rtx, auto_vec <tag_insn_info *> > tag_map_t;
+
+/* Vector of instructions with colliding tags.  */
+typedef auto_vec <tag_insn_info *> insn_info_list_t;
+
+/* Pair of instruction information and unavailable register set to pass to
+   CHECK_COLLIDING_TAGS.  */
+typedef std::pair <tag_insn_info *, HARD_REG_SET *> arg_pair_t;
+
+
+/* Callback to free all tag_insn_info objects.  */
+bool
+free_insn_info (const rtx &t ATTRIBUTE_UNUSED, insn_info_list_t *v,
+		void *arg ATTRIBUTE_UNUSED)
+{
+  while (v->length () > 0)
+    delete v->pop ();
+
+  return true;
+}
+
+
+/* Add all aliases of the register to the unavailable register set.  REG is the
+   smallest register number that can then be used to reference its aliases.
+   UNAVAILABLE is the hard register set to add the ignored register numbers to
+   and MODE is the mode in which the registers would have been used.  */
+static void
+ignore_all_aliases (HARD_REG_SET *unavailable, machine_mode mode, unsigned reg)
+{
+  add_to_hard_reg_set (unavailable, mode, reg);
+  add_to_hard_reg_set (unavailable, mode, reg + 16);
+  add_to_hard_reg_set (unavailable, mode, reg + 32);
+  add_to_hard_reg_set (unavailable, mode, reg + 48);
+}
+
+
+/* Callback to check which destination registers are unavailable to us for
+   renaming because of the base and offset colliding.  This is a callback that
+   gets called for every name value pair (T, V) in the TAG_MAP.  The ARG is an
+   std::pair of the tag_insn_info of the original insn and the hard register
+   set UNAVAILABLE that is used to record hard register numbers that cannot be
+   used for the renaming.  This always returns true since we want to traverse
+   through the entire TAG_MAP.  */
+bool
+check_colliding_tags (const rtx &t, const insn_info_list_t &v, arg_pair_t *arg)
+{
+  HARD_REG_SET *unavailable = arg->second;
+  unsigned orig_tag = arg->first->tag ();
+  unsigned tag = INTVAL (t);
+  machine_mode mode = GET_MODE (arg->first->dest);
+
+  /* Can't collide with emptiness.  */
+  if (v.length () == 0)
+    return true;
+
+  /* Drop all aliased destination registers that result in the same
+     tag.  It is not necessary to drop all of them but we do anyway
+     because it is quicker than checking ranges.  */
+  if (TAG_UPDATE_DEST (tag, 0) == TAG_UPDATE_DEST (orig_tag, 0))
+    ignore_all_aliases (unavailable, mode, TAG_GET_DEST (tag));
+
+  return true;
+}
+
+
+/* Initialize and build a set of hard register numbers UNAVAILABLE to avoid for
+   renaming.  INSN_INFO is the original insn, TAG_MAP is the map of the list of
+   insns indexed by their tags, HEAD is the def/use chain head of the
+   destination register of the original insn.  The routine returns the super
+   class of register classes that may be used during the renaming.  */
+static enum reg_class
+init_unavailable (tag_insn_info *insn_info, tag_map_t &tag_map, du_head_p head,
+		  HARD_REG_SET *unavailable)
+{
+  unsigned dest = head->regno;
+  enum reg_class super_class = NO_REGS;
+  machine_mode mode = GET_MODE (insn_info->dest);
+
+  CLEAR_HARD_REG_SET (*unavailable);
+
+  for (struct du_chain *tmp = head->first; tmp; tmp = tmp->next_use)
+    {
+      if (DEBUG_INSN_P (tmp->insn))
+	continue;
+
+      *unavailable |= ~reg_class_contents[tmp->cl];
+      super_class = reg_class_superunion[(int) super_class][(int) tmp->cl];
+    }
+
+  for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    if (fixed_regs[i] || global_regs[i])
+      add_to_hard_reg_set (unavailable, mode, i);
+
+  arg_pair_t arg = arg_pair_t (insn_info, unavailable);
+
+  /* Exclude all registers that would lead to collisions with other loads.  */
+  tag_map.traverse <arg_pair_t *, check_colliding_tags> (&arg);
+
+  /* Finally, also ignore all aliases of the current reg.  */
+  ignore_all_aliases (unavailable, mode, dest & 0xf);
+
+  return super_class;
+}
+
+
+/* Find a suitable and available register and rename the chain of occurrences
+   of the register  defined in the def/use chain headed by HEAD in which INSN
+   exists.  CUR_TAG, TAGS and TAG_MAP are used to determine which registers are
+   unavailable due to a potential collision due to the rename.  The routine
+   returns the register number in case of a successful rename or -1 to indicate
+   failure.  */
+static int
+rename_chain (tag_insn_info *insn_info, tag_map_t &tag_map, du_head_p head)
+{
+  unsigned dest_regno = head->regno;
+
+  if (head->cannot_rename || head->renamed)
+    return -1;
+
+  HARD_REG_SET unavailable;
+
+  enum reg_class super_class = init_unavailable (insn_info, tag_map, head,
+						 &unavailable);
+
+  unsigned new_regno = find_rename_reg (head, super_class, &unavailable,
+					dest_regno, false);
+
+  /* Attempt to rename as long as regrename doesn't just throw the same
+     register at us.  */
+  if (new_regno != dest_regno && regrename_do_replace (head, new_regno))
+    {
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	  fprintf (dump_file, "\tInsn %d: Renamed %d to %d\n",
+		   INSN_UID (insn_info->insn), dest_regno, new_regno);
+
+      return new_regno;
+    }
+
+  return -1;
+}
+
+
+/* Return true if REGNO is not safe to rename.  */
+static bool
+unsafe_rename_p (unsigned regno)
+{
+  /* Avoid renaming registers used for argument passing and return value.  In
+     future we could be a little less conservative and walk through the basic
+     blocks to see if there are any call or syscall sites.  */
+  if (regno <= R8_REGNUM
+      || (regno >= V0_REGNUM && regno < V8_REGNUM))
+    return true;
+
+  /* Don't attempt to rename registers that may have specific meanings.  */
+  switch (regno)
+    {
+    case LR_REGNUM:
+    case HARD_FRAME_POINTER_REGNUM:
+    case FRAME_POINTER_REGNUM:
+    case STACK_POINTER_REGNUM:
+      return true;
+    }
+
+  return false;
+}
+
+
+/* Go through the def/use chains for the register and find the chain for this
+   insn to rename.  The function returns the hard register number in case of a
+   successful rename and -1 otherwise.  */
+static int
+rename_dest (tag_insn_info *insn_info, tag_map_t &tag_map)
+{
+  struct du_chain *chain = NULL;
+  du_head_p head = NULL;
+  int i;
+
+  unsigned dest_regno = REGNO (insn_info->dest);
+
+  if (unsafe_rename_p (dest_regno))
+    return -1;
+
+  /* Search the chain where this instruction is (one of) the root.  */
+  rtx_insn *insn = insn_info->insn;
+  operand_rr_info *dest_op_info = insn_rr[INSN_UID (insn)].op_info;
+
+  for (i = 0; i < dest_op_info->n_chains; i++)
+    {
+      /* The register tracked by this chain does not match the
+	 destination register of insn.  */
+      if (dest_op_info->heads[i]->regno != dest_regno)
+	continue;
+
+      head = dest_op_info->heads[i];
+      /* The chain was merged in another, find the new head.  */
+      if (!head->first)
+	head = regrename_chain_from_id (head->id);
+
+      for (chain = head->first; chain; chain = chain->next_use)
+	/* Found the insn in the chain, so try renaming the register in this
+	   chain.  */
+	if (chain->insn == insn)
+	  return rename_chain (insn_info, tag_map, head);
+    }
+
+  return -1;
+}
+
+
+/* Flag to track if the map has changed.  */
+static bool map_changed = false;
+
+/* The actual reallocation logic.  For each vector of collisions V, try to
+   resolve the collision by attempting to rename the destination register of
+   all but one of the loads.  This is a callback that is invoked for each
+   name-value pair (T, V) in TAG_MAP.  The function returns true whenever it
+   returns unchanged and false otherwise to halt traversal.  */
+bool
+avoid_collisions_1 (const rtx &t, insn_info_list_t *v, tag_map_t *tag_map)
+{
+  /* We need at least two loads to cause a tag collision, return unchanged.  */
+  if (v->length () < 2)
+    return true;
+
+  tag_insn_info *vec_start = v->pop ();
+  tag_insn_info *insn_info = vec_start;
+
+  /* Try to rename at least one register to reduce the collision.  If we
+     iterate all the way through, we end up dropping one of the loads from the
+     list.  This is fine because we want at most one element to ensure that a
+     subsequent rename attempt does not end up worsening the collision.  */
+  do
+    {
+      int new_regno;
+
+      if ((new_regno = rename_dest (insn_info, *tag_map)) != -1)
+	{
+	  rtx new_tag = GEN_INT (TAG_UPDATE_DEST (INTVAL (t), new_regno));
+
+	  tag_map->get_or_insert (new_tag).safe_push (insn_info);
+	  df_set_regs_ever_live (new_regno, true);
+	  map_changed = true;
+	  return false;
+	}
+
+      v->safe_insert (0, insn_info);
+      insn_info = v->pop ();
+    }
+  while (insn_info != vec_start);
+
+  if (dump_file)
+    fprintf (dump_file, "\t>> Failed to rename destination in insn %d\n\t>>",
+	     INSN_UID (insn_info->insn));
+
+  /* Drop the last element and move on to the next tag.  */
+  delete insn_info;
+  return true;
+}
+
+
+/* For each set of collisions, attempt to rename the registers or insert a move
+   to avoid the collision.  We repeatedly traverse through TAG_MAP using
+   AVOID_COLLISIONS_1 trying to rename registers to avoid collisions until a
+   full traversal results in no change in the map.  */
+static void
+avoid_collisions (tag_map_t &tag_map)
+{
+  do
+    {
+      map_changed = false;
+      tag_map.traverse <tag_map_t *, avoid_collisions_1> (&tag_map);
+    }
+  while (map_changed);
+}
+
+
+
+/* Find the use def chain in which INSN exists and then see if there is a
+   definition inside the loop and outside it.  We use this as a simple
+   approximation to determine whether the base register is an IV.  The basic
+   idea is to find INSN in the use-def chains for its base register and find
+   all definitions that reach it.  Of all these definitions, there should be at
+   least one definition that is a simple addition of a constant value, either
+   as a binary operation or a pre or post update.
+
+   The function returns true if the base register is estimated to be an IV.  */
+static bool
+iv_p (rtx_insn *insn, rtx reg, struct loop *loop)
+{
+  df_ref ause;
+  unsigned regno = REGNO (reg);
+
+  /* Ignore loads from the stack.  */
+  if (regno == SP_REGNUM)
+    return false;
+
+  for (ause = DF_REG_USE_CHAIN (regno); ause; ause = DF_REF_NEXT_REG (ause))
+    {
+      if (!DF_REF_INSN_INFO (ause)
+	  || !NONDEBUG_INSN_P (DF_REF_INSN (ause)))
+	continue;
+
+      if (insn != DF_REF_INSN (ause))
+	continue;
+
+      struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
+      df_ref def_rec;
+
+      FOR_EACH_INSN_INFO_DEF (def_rec, insn_info)
+	{
+	  rtx_insn *insn = DF_REF_INSN (def_rec);
+	  basic_block bb = BLOCK_FOR_INSN (insn);
+
+	  if (dominated_by_p (CDI_DOMINATORS, bb, loop->header)
+	      && bb->loop_father == loop)
+	    {
+	      if (recog_memoized (insn) < 0)
+		continue;
+
+	      rtx pat = PATTERN (insn);
+
+	      /* Prefetch or clobber; unlikely to be a constant stride.  The
+		 falkor software prefetcher tuning is pretty conservative, so
+		 its presence indicates that the access pattern is probably
+		 strided but most likely with an unknown stride size or a
+		 stride size that is quite large.  */
+	      if (GET_CODE (pat) != SET)
+		continue;
+
+	      rtx x = SET_SRC (pat);
+	      if (GET_CODE (x) == ZERO_EXTRACT
+		  || GET_CODE (x) == ZERO_EXTEND
+		  || GET_CODE (x) == SIGN_EXTEND)
+		x = XEXP (x, 0);
+
+	      /* Loading the value from memory; unlikely to be a constant
+		 stride.  */
+	      if (MEM_P (x))
+		continue;
+
+	      /* An increment or decrement by a constant MODE_SIZE amount or
+		 the result of a binary expression is likely to be an IV.  */
+	      if (GET_CODE (x) == POST_INC
+		  || GET_CODE (x) == POST_DEC
+		  || GET_CODE (x) == PRE_INC
+		  || GET_CODE (x) == PRE_DEC)
+		return true;
+	      else if (BINARY_P (x)
+		       && (CONST_INT_P (XEXP (x, 0))
+			   || CONST_INT_P (XEXP (x, 1))))
+		{
+		  rtx stride = (CONST_INT_P (XEXP (x, 0))
+				? XEXP (x, 0) : XEXP (x, 1));
+
+		  /* Don't bother with very long strides because the prefetcher
+		     is unable to train on them anyway.  */
+		  if (INTVAL (stride) < MAX_PREFETCH_STRIDE)
+		    return true;
+		}
+	    }
+	}
+      return false;
+    }
+  return false;
+}
+
+
+/* Return true if SRC is a strided load in the LOOP, false otherwise.
+   If it is a strided load, set the BASE and OFFSET.  Also, if this is
+   a pre/post increment load, set PRE_POST to true.  */
+static bool
+valid_src_p (rtx src, rtx_insn *insn, struct loop *loop, bool *pre_post,
+	     rtx *base, rtx *offset, bool load_pair)
+{
+  subrtx_var_iterator::array_type array;
+  rtx x = NULL_RTX;
+
+  FOR_EACH_SUBRTX_VAR (iter, array, src, NONCONST)
+    if (MEM_P (*iter))
+      {
+	x = *iter;
+	break;
+      }
+
+  if (!x)
+    return false;
+
+  struct aarch64_address_info addr;
+  machine_mode mode = GET_MODE (x);
+
+  if (!aarch64_classify_address (&addr, XEXP (x, 0), mode, true))
+    return false;
+
+  if (addr.type != ADDRESS_REG_IMM
+      && addr.type != ADDRESS_REG_WB
+      && addr.type != ADDRESS_REG_REG
+      && addr.type != ADDRESS_REG_UXTW
+      && addr.type != ADDRESS_REG_SXTW)
+    return false;
+
+  unsigned regno = REGNO (addr.base);
+  if (global_regs[regno] || fixed_regs[regno])
+    return false;
+
+  if (addr.type == ADDRESS_REG_WB)
+    {
+      unsigned code = GET_CODE (XEXP (x, 0));
+
+      *pre_post = true;
+      *base = addr.base;
+
+      if (code == PRE_MODIFY || code == POST_MODIFY)
+	*offset = addr.offset;
+      else
+	{
+	  /*Writeback is only supported for fixed-width modes.  */
+	  unsigned int_offset = GET_MODE_SIZE (mode).to_constant ();
+
+	  /* For post-incremented load pairs we would increment the base twice
+	     over, so make that adjustment.  */
+	  if (load_pair && (code == POST_INC || code == POST_DEC))
+	    int_offset *= 2;
+
+	  *offset = GEN_INT (int_offset);
+	}
+      return true;
+    }
+  else if (addr.type == ADDRESS_REG_IMM || addr.type == ADDRESS_REG_REG)
+    {
+      /* Check if the load is strided.  */
+      if (!iv_p (insn, addr.base, loop))
+	return false;
+
+      *base = addr.base;
+      *offset = addr.offset;
+      return true;
+    }
+
+  return false;
+}
+
+
+/* Return true if INSN is a strided load in LOOP.  If it is a strided load, set
+   the DEST, BASE and OFFSET.  Also, if this is a pre/post increment load, set
+   PRE_POST to true.
+
+   The routine does checks on the destination of the insn and depends on
+   STRIDED_LOAD_P to check the source and fill in the BASE and OFFSET.  */
+static bool
+get_load_info (rtx_insn *insn, struct loop *loop, rtx *dest, rtx *base,
+	       rtx *offset, bool *pre_post, bool *ldp)
+{
+  if (!INSN_P (insn) || recog_memoized (insn) < 0)
+    return false;
+
+  rtx pat = PATTERN (insn);
+  unsigned code = GET_CODE (pat);
+  bool load_pair = (code == PARALLEL);
+
+  /* For a load pair we need only the first base and destination
+     registers.  We however need to ensure that our pre/post increment
+     offset is doubled; we do that in STRIDED_LOAD_P.  */
+  if (load_pair)
+    {
+      pat = XVECEXP (pat, 0, 0);
+      code = GET_CODE (pat);
+    }
+
+  if (code != SET)
+    return false;
+
+  rtx dest_rtx = SET_DEST (pat);
+
+  if (!REG_P (dest_rtx))
+    return false;
+
+  unsigned regno = REGNO (dest_rtx);
+  machine_mode mode = GET_MODE (dest_rtx);
+  machine_mode inner_mode = GET_MODE_INNER (mode);
+
+  /* Falkor does not support SVE vectors.  */
+  if (!GET_MODE_SIZE (mode).is_constant ())
+    return false;
+
+  /* Ignore vector struct or lane loads.  */
+  if (GET_MODE_SIZE (mode).to_constant ()
+      != GET_MODE_SIZE (inner_mode).to_constant ())
+    return false;
+
+  /* The largest width we want to bother with is a load of a pair of
+     quad-words.  */
+  if ((GET_MODE_SIZE (mode).to_constant () << load_pair)
+      > GET_MODE_SIZE (OImode))
+    return false;
+
+  /* Ignore loads into the stack pointer because it is unlikely to be a
+     stream.  */
+  if (regno == SP_REGNUM)
+    return false;
+
+  if (valid_src_p (SET_SRC (pat), insn, loop, pre_post, base, offset,
+		   load_pair))
+    {
+      *dest = dest_rtx;
+      *ldp = load_pair;
+
+      return true;
+    }
+
+  return false;
+}
+
+
+/* Return whether INSN and CAND are in the same def/use chain.  */
+static bool
+in_same_chain (rtx_insn *insn, rtx_insn *cand, unsigned regno)
+{
+  struct du_chain *chain = NULL;
+  du_head_p head = NULL;
+  int i;
+
+  /* Search the chain where this instruction is (one of) the root.  */
+  operand_rr_info *op_info = insn_rr[INSN_UID (insn)].op_info;
+
+  for (i = 0; i < op_info->n_chains; i++)
+    {
+      /* The register tracked by this chain does not match the
+	 dest register of insn.  */
+      if (op_info->heads[i]->regno != regno)
+	continue;
+
+      head = op_info->heads[i];
+      /* The chain was merged in another, find the new head.  */
+      if (!head->first)
+	head = regrename_chain_from_id (head->id);
+
+      bool found_insn = false, found_cand = false;
+
+      for (chain = head->first; chain; chain = chain->next_use)
+	{
+	  rtx *loc = &SET_DEST (PATTERN (chain->insn));
+
+	  if (chain->loc != loc)
+	    continue;
+
+	  if (chain->insn == insn)
+	    found_insn = true;
+
+	  if (chain->insn == cand)
+	    found_cand = true;
+
+	  if (found_insn && found_cand)
+	    return true;
+	}
+    }
+
+  return false;
+}
+
+
+/* Callback function to traverse the tag map and drop loads that have the same
+   destination and are in the same chain of occurrence.  Routine always returns
+   true to allow traversal through all of TAG_MAP.  */
+bool
+single_dest_per_chain (const rtx &t ATTRIBUTE_UNUSED, insn_info_list_t *v,
+		       void *arg ATTRIBUTE_UNUSED)
+{
+  for (int i = v->length () - 1; i>= 1; i--)
+    {
+      tag_insn_info *insn_info = (*v)[i];
+
+      for (int j = v->length () - 2; j >= 0; j--)
+	{
+	  /* Filter out destinations in the same chain.  */
+	  if (in_same_chain (insn_info->insn, (*v)[j]->insn,
+			     REGNO (insn_info->dest)))
+	    {
+	      v->ordered_remove (j);
+	      i = v->length ();
+	      break;
+	    }
+	}
+    }
+
+  return true;
+}
+
+
+/* Callback invoked for each name-value pair (T, INSN_INFO) to dump the insn
+   list INSN_INFO for tag T.  */
+bool
+dump_insn_list (const rtx &t, const insn_info_list_t &insn_info,
+		void *unused ATTRIBUTE_UNUSED)
+{
+  gcc_assert (dump_file);
+  fprintf (dump_file, "Tag 0x%lx ::\n", INTVAL (t));
+
+  for (unsigned i = 0; i < insn_info.length (); i++)
+    dump_insn_slim (dump_file, insn_info[i]->insn);
+
+  fprintf (dump_file, "\n");
+
+  return true;
+}
+
+
+/* Record all loads in LOOP into TAG_MAP indexed by the falkor hardware
+   prefetcher memory tags.  */
+static void
+record_loads (tag_map_t &tag_map, struct loop *loop)
+{
+  rtx_insn *insn;
+  basic_block *body, bb;
+
+  body = get_loop_body (loop);
+
+  for (unsigned i = 0; i < loop->num_nodes; i++)
+    {
+      bb = body[i];
+      FOR_BB_INSNS (bb, insn)
+	{
+	  rtx base = NULL_RTX;
+	  rtx dest = NULL_RTX;
+	  rtx offset = NULL_RTX;
+	  bool writeback = false;
+	  bool ldp = false;
+
+	  if (!INSN_P (insn) || DEBUG_INSN_P (insn))
+	    continue;
+
+	  if (get_load_info (insn, loop, &dest, &base, &offset, &writeback,
+			     &ldp))
+	    {
+	      tag_insn_info *i = new tag_insn_info (insn, dest, base, offset,
+						    writeback, ldp);
+	      rtx tag = GEN_INT (i->tag ());
+	      tag_map.get_or_insert (tag).safe_push (i);
+	    }
+	}
+    }
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "Loop %d: Tag map generated.\n", loop->num);
+      tag_map.traverse <void *, dump_insn_list> (NULL);
+    }
+
+  /* Try to reduce the dataset before launching into the rename attempt.  Drop
+     destinations in the same collision chain that appear in the same def/use
+     chain, all as defs.  These chains will move together in a rename so
+     there's no point in keeping both in there.  */
+  tag_map.traverse <void *, single_dest_per_chain> (NULL);
+}
+
+
+/* Tag collision avoidance pass for Falkor.  The pass runs in two phases for
+   each loop; the first phase collects all loads that we consider as
+   interesting for renaming into a tag-indexed map of lists.  The second phase
+   renames the destination register of the loads in an attempt to spread out
+   the loads into different tags.  */
+void
+execute_tag_collision_avoidance ()
+{
+  struct loop *loop;
+
+  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
+  df_chain_add_problem (DF_UD_CHAIN);
+  df_compute_regs_ever_live (true);
+  df_note_add_problem ();
+  df_analyze ();
+  df_set_flags (DF_DEFER_INSN_RESCAN);
+
+  regrename_init (true);
+  regrename_analyze (NULL);
+
+  compute_bb_for_insn ();
+  calculate_dominance_info (CDI_DOMINATORS);
+  loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
+
+  FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
+    {
+      tag_map_t tag_map (512);
+
+      record_loads (tag_map, loop);
+      avoid_collisions (tag_map);
+      if (dump_file)
+	{
+	  fprintf (dump_file, "Loop %d: Completed rename.\n", loop->num);
+	  tag_map.traverse <void *, dump_insn_list> (NULL);
+	}
+      tag_map.traverse <void *, free_insn_info> (NULL);
+    }
+
+  loop_optimizer_finalize ();
+  free_dominance_info (CDI_DOMINATORS);
+  regrename_finish ();
+}
+
+
+const pass_data pass_data_tag_collision_avoidance =
+{
+  RTL_PASS, /* type */
+  "tag_collision_avoidance", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  TV_NONE, /* tv_id */
+  0, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  TODO_df_finish, /* todo_flags_finish */
+};
+
+
+class pass_tag_collision_avoidance : public rtl_opt_pass
+{
+public:
+  pass_tag_collision_avoidance (gcc::context *ctxt)
+    : rtl_opt_pass (pass_data_tag_collision_avoidance, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *)
+    {
+      return ((aarch64_tune_params.extra_tuning_flags
+	       & AARCH64_EXTRA_TUNE_RENAME_LOAD_REGS)
+	      && optimize >= 2);
+    }
+
+  virtual unsigned int execute (function *)
+    {
+      execute_tag_collision_avoidance ();
+      return 0;
+    }
+
+}; // class pass_tag_collision_avoidance
+
+
+/* Create a new pass instance.  */
+rtl_opt_pass *
+make_pass_tag_collision_avoidance (gcc::context *ctxt)
+{
+  return new pass_tag_collision_avoidance (ctxt);
+}
diff --git a/gcc/config/aarch64/falkor.md b/gcc/config/aarch64/falkor.md
index 45cbff93b24fc..6118a528c038e 100644
--- a/gcc/config/aarch64/falkor.md
+++ b/gcc/config/aarch64/falkor.md
@@ -1,5 +1,5 @@
 ;; Falkor pipeline description
-;; Copyright (C) 2017-2018 Free Software Foundation, Inc.
+;; Copyright (C) 2017-2021 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -302,7 +302,7 @@
 
 (define_insn_reservation "falkor_alu_1_xyz" 1
   (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "alus_sreg,alus_imm,alus_shift_imm,csel,adc_reg,alu_imm,alu_sreg,alu_shift_imm,alu_ext,alus_ext,logic_imm,logic_reg,logic_shift_imm,logics_imm,logics_reg,logics_shift_imm,mov_reg"))
+       (eq_attr "type" "alus_sreg,alus_imm,alus_shift_imm,csel,adc_reg,alu_imm,alu_sreg,alu_shift_imm_lsl_1to4,alu_shift_imm_other,alu_ext,alus_ext,logic_imm,logic_reg,logic_shift_imm,logics_imm,logics_reg,logics_shift_imm,mov_reg"))
   "falkor_xyz")
 
 ;; SIMD Miscellaneous Instructions
@@ -648,7 +648,7 @@
 
 (define_insn_reservation "falkor_other_0_nothing" 0
   (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "no_insn,trap,block"))
+       (eq_attr "type" "trap,block"))
   "nothing")
 
 (define_insn_reservation "falkor_other_2_z" 2
diff --git a/gcc/config/aarch64/geniterators.sh b/gcc/config/aarch64/geniterators.sh
index 0a02f995ea275..5fd8bec9e7a7f 100644
--- a/gcc/config/aarch64/geniterators.sh
+++ b/gcc/config/aarch64/geniterators.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 #
-# Copyright (C) 2014-2018 Free Software Foundation, Inc.
+# Copyright (C) 2014-2021 Free Software Foundation, Inc.
 # Contributed by ARM Ltd.
 #
 # This file is part of GCC.
@@ -70,8 +70,8 @@ iterdef {
 	sub(/ *\]/, "", s)
 
 	n = split(s, a)
-	printf "#define BUILTIN_" a[1] "(T, N, MAP) \\\n"
-	printf "  VAR" (n-1) " (T, N, MAP"
+	printf "#define BUILTIN_" a[1] "(T, N, MAP, FLAG) \\\n"
+	printf "  VAR" (n-1) " (T, N, MAP, FLAG"
 	for (i = 2; i <= n; i++)
 		printf ", "  tolower(a[i])
 	printf ")\n"
diff --git a/gcc/config/aarch64/gentune.sh b/gcc/config/aarch64/gentune.sh
index 83645e45573f3..bde0813013fd2 100644
--- a/gcc/config/aarch64/gentune.sh
+++ b/gcc/config/aarch64/gentune.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 #
-# Copyright (C) 2011-2018 Free Software Foundation, Inc.
+# Copyright (C) 2011-2021 Free Software Foundation, Inc.
 # Contributed by ARM Ltd.
 #
 # This file is part of GCC.
diff --git a/gcc/config/aarch64/host-aarch64-darwin.c b/gcc/config/aarch64/host-aarch64-darwin.c
new file mode 100644
index 0000000000000..d70f2df3bf1b3
--- /dev/null
+++ b/gcc/config/aarch64/host-aarch64-darwin.c
@@ -0,0 +1,33 @@
+/* aarch64/arm64-darwin host-specific hook definitions.
+
+Copyright The GNU Toolchain Authors.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define IN_TARGET_CODE 1
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "hosthooks.h"
+#include "hosthooks-def.h"
+#include "config/host-darwin.h"
+
+/* Darwin doesn't do anything special for arm64/aarch64 hosts; this file
+   exists just to include the generic config/host-darwin.h.  */
+
+const struct host_hooks host_hooks = HOST_HOOKS_INITIALIZER;
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 21d66d36f82cf..cac33ae812b38 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1,5 +1,5 @@
 ;; Machine description for AArch64 architecture.
-;; Copyright (C) 2009-2018 Free Software Foundation, Inc.
+;; Copyright (C) 2009-2021 Free Software Foundation, Inc.
 ;; Contributed by ARM Ltd.
 ;;
 ;; This file is part of GCC.
@@ -22,6 +22,9 @@
 ;; Mode Iterators
 ;; -------------------------------------------------------------------
 
+;; Condition-code iterators.
+(define_mode_iterator CC_ONLY [CC])
+(define_mode_iterator CCFP_CCFPE [CCFP CCFPE])
 
 ;; Iterator for General Purpose Integer registers (32- and 64-bit modes)
 (define_mode_iterator GPI [SI DI])
@@ -29,12 +32,22 @@
 ;; Iterator for HI, SI, DI, some instructions can only work on these modes.
 (define_mode_iterator GPI_I16 [(HI "AARCH64_ISA_F16") SI DI])
 
+;; "Iterator" for just TI -- features like @pattern only work with iterators.
+(define_mode_iterator JUST_TI [TI])
+
 ;; Iterator for QI and HI modes
 (define_mode_iterator SHORT [QI HI])
 
+;; Iterators for single modes, for "@" patterns.
+(define_mode_iterator SI_ONLY [SI])
+(define_mode_iterator DI_ONLY [DI])
+
 ;; Iterator for all integer modes (up to 64-bit)
 (define_mode_iterator ALLI [QI HI SI DI])
 
+;; Iterator for all integer modes (up to 128-bit)
+(define_mode_iterator ALLI_TI [QI HI SI DI TI])
+
 ;; Iterator for all integer modes that can be extended (up to 64-bit)
 (define_mode_iterator ALLX [QI HI SI])
 
@@ -47,9 +60,16 @@
 ;; Iterator for all scalar floating point modes (HF, SF, DF)
 (define_mode_iterator GPF_HF [HF SF DF])
 
+;; Iterator for all 16-bit scalar floating point modes (HF, BF)
+(define_mode_iterator HFBF [HF BF])
+
 ;; Iterator for all scalar floating point modes (HF, SF, DF and TF)
 (define_mode_iterator GPF_TF_F16 [HF SF DF TF])
 
+;; Iterator for all scalar floating point modes suitable for moving, including
+;; special BF type (HF, SF, DF, TF and BF)
+(define_mode_iterator GPF_TF_F16_MOV [HF BF SF DF TF])
+
 ;; Double vector modes.
 (define_mode_iterator VDF [V2SF V4HF])
 
@@ -67,7 +87,19 @@
 (define_mode_iterator VSDQ_I_DI [V8QI V16QI V4HI V8HI V2SI V4SI V2DI DI])
 
 ;; Double vector modes.
-(define_mode_iterator VD [V8QI V4HI V4HF V2SI V2SF])
+(define_mode_iterator VD [V8QI V4HI V4HF V2SI V2SF V4BF])
+
+;; Double vector modes suitable for moving.  Includes BFmode.
+(define_mode_iterator VDMOV [V8QI V4HI V4HF V4BF V2SI V2SF])
+
+;; All modes stored in registers d0-d31.
+(define_mode_iterator DREG [V8QI V4HI V4HF V2SI V2SF DF])
+
+;; Copy of the above.
+(define_mode_iterator DREG2 [V8QI V4HI V4HF V2SI V2SF DF])
+
+;; All modes suitable to store/load pair (2 elements) using STP/LDP.
+(define_mode_iterator VP_2E [V2SI V2SF V2DI V2DF])
 
 ;; Advanced SIMD, 64-bit container, all integer modes.
 (define_mode_iterator VD_BHSI [V8QI V4HI V2SI])
@@ -76,13 +108,25 @@
 (define_mode_iterator VDQ_BHSI [V8QI V16QI V4HI V8HI V2SI V4SI])
 
 ;; Quad vector modes.
-(define_mode_iterator VQ [V16QI V8HI V4SI V2DI V8HF V4SF V2DF])
+(define_mode_iterator VQ [V16QI V8HI V4SI V2DI V8HF V4SF V2DF V8BF])
+
+;; Copy of the above.
+(define_mode_iterator VQ2 [V16QI V8HI V4SI V2DI V8HF V8BF V4SF V2DF])
+
+;; Quad vector modes suitable for moving.  Includes BFmode.
+(define_mode_iterator VQMOV [V16QI V8HI V4SI V2DI V8HF V8BF V4SF V2DF])
+
+;; VQMOV without 2-element modes.
+(define_mode_iterator VQMOV_NO2E [V16QI V8HI V4SI V8HF V8BF V4SF])
+
+;; Quad integer vector modes.
+(define_mode_iterator VQ_I [V16QI V8HI V4SI V2DI])
 
 ;; VQ without 2 element modes.
-(define_mode_iterator VQ_NO2E [V16QI V8HI V4SI V8HF V4SF])
+(define_mode_iterator VQ_NO2E [V16QI V8HI V4SI V8HF V4SF V8BF])
 
-;; Quad vector with only 2 element modes.
-(define_mode_iterator VQ_2E [V2DI V2DF])
+;; BFmode vector modes.
+(define_mode_iterator VBF [V4BF V8BF])
 
 ;; This mode iterator allows :P to be used for patterns that operate on
 ;; addresses in different modes.  In LP64, only DI will match, while in
@@ -95,7 +139,8 @@
 (define_mode_iterator PTR [(SI "ptr_mode == SImode") (DI "ptr_mode == DImode")])
 
 ;; Advanced SIMD Float modes suitable for moving, loading and storing.
-(define_mode_iterator VDQF_F16 [V4HF V8HF V2SF V4SF V2DF])
+(define_mode_iterator VDQF_F16 [V4HF V8HF V2SF V4SF V2DF
+				V4BF V8BF])
 
 ;; Advanced SIMD Float modes.
 (define_mode_iterator VDQF [V2SF V4SF V2DF])
@@ -113,6 +158,9 @@
 				  (HF "TARGET_SIMD_F16INST")
 				  SF DF])
 
+;; Scalar and vetor modes for SF, DF.
+(define_mode_iterator VSFDF [V2SF V4SF V2DF DF SF])
+
 ;; Advanced SIMD single Float modes.
 (define_mode_iterator VDQSF [V2SF V4SF])
 
@@ -133,7 +181,12 @@
 
 ;; All Advanced SIMD modes suitable for moving, loading, and storing.
 (define_mode_iterator VALL_F16 [V8QI V16QI V4HI V8HI V2SI V4SI V2DI
-				V4HF V8HF V2SF V4SF V2DF])
+				V4HF V8HF V4BF V8BF V2SF V4SF V2DF])
+
+;; All Advanced SIMD modes suitable for moving, loading, and storing,
+;; including special Bfloat vector types.
+(define_mode_iterator VALL_F16MOV [V8QI V16QI V4HI V8HI V2SI V4SI V2DI
+				   V4HF V8HF V4BF V8BF V2SF V4SF V2DF])
 
 ;; The VALL_F16 modes except the 128-bit 2-element ones.
 (define_mode_iterator VALL_F16_NO_V2Q [V8QI V16QI V4HI V8HI V2SI V4SI
@@ -144,10 +197,10 @@
 
 ;; All Advanced SIMD modes and DI.
 (define_mode_iterator VALLDI_F16 [V8QI V16QI V4HI V8HI V2SI V4SI V2DI
-				  V4HF V8HF V2SF V4SF V2DF DI])
+				  V4HF V8HF V4BF V8BF V2SF V4SF V2DF DI])
 
 ;; All Advanced SIMD modes, plus DI and DF.
-(define_mode_iterator VALLDIF [V8QI V16QI V4HI V8HI V2SI V4SI
+(define_mode_iterator VALLDIF [V8QI V16QI V4HI V8HI V2SI V4SI V4BF V8BF
 			       V2DI V4HF V8HF V2SF V4SF V2DF DI DF])
 
 ;; Advanced SIMD modes for Integer reduction across lanes.
@@ -156,6 +209,12 @@
 ;; Advanced SIMD modes (except V2DI) for Integer reduction across lanes.
 (define_mode_iterator VDQV_S [V8QI V16QI V4HI V8HI V4SI])
 
+;; Advanced SIMD modes for Integer reduction across lanes (zero/sign extended).
+(define_mode_iterator VDQV_E [V8QI V16QI V4HI V8HI])
+
+;; Advanced SIMD modes for Integer widening reduction across lanes.
+(define_mode_iterator VDQV_L [V8QI V16QI V4HI V8HI V4SI V2SI])
+
 ;; All double integer narrow-able modes.
 (define_mode_iterator VDN [V4HI V2SI DI])
 
@@ -170,7 +229,7 @@
 (define_mode_iterator VQW [V16QI V8HI V4SI])
 
 ;; Double vector modes for combines.
-(define_mode_iterator VDC [V8QI V4HI V4HF V2SI V2SF DI DF])
+(define_mode_iterator VDC [V8QI V4HI V4BF V4HF V2SI V2SF DI DF])
 
 ;; Advanced SIMD modes except double int.
 (define_mode_iterator VDQIF [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF V2DF])
@@ -236,6 +295,19 @@
 ;; Double scalar modes
 (define_mode_iterator DX [DI DF])
 
+;; Duplicate of the above
+(define_mode_iterator DX2 [DI DF])
+
+;; Single scalar modes
+(define_mode_iterator SX [SI SF])
+
+;; Duplicate of the above
+(define_mode_iterator SX2 [SI SF])
+
+;; Single and double integer and float modes
+(define_mode_iterator DSX [DF DI SF SI])
+
+
 ;; Modes available for Advanced SIMD <f>mul lane operations.
 (define_mode_iterator VMUL [V4HI V8HI V2SI V4SI
 			    (V4HF "TARGET_SIMD_F16INST")
@@ -246,50 +318,149 @@
 ;; count.
 (define_mode_iterator VMUL_CHANGE_NLANES [V4HI V8HI V2SI V4SI V2SF V4SF])
 
-;; All SVE vector modes.
-(define_mode_iterator SVE_ALL [VNx16QI VNx8HI VNx4SI VNx2DI
-			       VNx8HF VNx4SF VNx2DF])
+;; Iterators for single modes, for "@" patterns.
+(define_mode_iterator VNx16QI_ONLY [VNx16QI])
+(define_mode_iterator VNx8HI_ONLY [VNx8HI])
+(define_mode_iterator VNx8BF_ONLY [VNx8BF])
+(define_mode_iterator VNx4SI_ONLY [VNx4SI])
+(define_mode_iterator VNx4SF_ONLY [VNx4SF])
+(define_mode_iterator VNx2DI_ONLY [VNx2DI])
+(define_mode_iterator VNx2DF_ONLY [VNx2DF])
 
 ;; All SVE vector structure modes.
 (define_mode_iterator SVE_STRUCT [VNx32QI VNx16HI VNx8SI VNx4DI
-				  VNx16HF VNx8SF VNx4DF
+				  VNx16BF VNx16HF VNx8SF VNx4DF
 				  VNx48QI VNx24HI VNx12SI VNx6DI
-				  VNx24HF VNx12SF VNx6DF
+				  VNx24BF VNx24HF VNx12SF VNx6DF
 				  VNx64QI VNx32HI VNx16SI VNx8DI
-				  VNx32HF VNx16SF VNx8DF])
+				  VNx32BF VNx32HF VNx16SF VNx8DF])
+
+;; All fully-packed SVE vector modes.
+(define_mode_iterator SVE_FULL [VNx16QI VNx8HI VNx4SI VNx2DI
+			        VNx8BF VNx8HF VNx4SF VNx2DF])
+
+;; All fully-packed SVE integer vector modes.
+(define_mode_iterator SVE_FULL_I [VNx16QI VNx8HI VNx4SI VNx2DI])
+
+;; All fully-packed SVE floating-point vector modes.
+(define_mode_iterator SVE_FULL_F [VNx8HF VNx4SF VNx2DF])
+
+;; Fully-packed SVE integer vector modes that have 8-bit or 16-bit elements.
+(define_mode_iterator SVE_FULL_BHI [VNx16QI VNx8HI])
+
+;; Fully-packed SVE integer vector modes that have 8-bit, 16-bit or 32-bit
+;; elements.
+(define_mode_iterator SVE_FULL_BHSI [VNx16QI VNx8HI VNx4SI])
+
+;; Fully-packed SVE vector modes that have 16-bit, 32-bit or 64-bit elements.
+(define_mode_iterator SVE_FULL_HSD [VNx8HI VNx4SI VNx2DI
+				    VNx8BF VNx8HF VNx4SF VNx2DF])
+
+;; Fully-packed SVE integer vector modes that have 16-bit, 32-bit or 64-bit
+;; elements.
+(define_mode_iterator SVE_FULL_HSDI [VNx8HI VNx4SI VNx2DI])
 
-;; All SVE vector modes that have 8-bit or 16-bit elements.
-(define_mode_iterator SVE_BH [VNx16QI VNx8HI VNx8HF])
+;; Fully-packed SVE integer vector modes that have 16-bit or 32-bit
+;; elements.
+(define_mode_iterator SVE_FULL_HSI [VNx8HI VNx4SI])
 
-;; All SVE vector modes that have 8-bit, 16-bit or 32-bit elements.
-(define_mode_iterator SVE_BHS [VNx16QI VNx8HI VNx4SI VNx8HF VNx4SF])
+;; Fully-packed SVE floating-point vector modes that have 16-bit or 32-bit
+;; elements.
+(define_mode_iterator SVE_FULL_HSF [VNx8HF VNx4SF])
 
-;; All SVE integer vector modes that have 8-bit, 16-bit or 32-bit elements.
-(define_mode_iterator SVE_BHSI [VNx16QI VNx8HI VNx4SI])
+;; Fully-packed SVE integer vector modes that have 16-bit or 64-bit elements.
+(define_mode_iterator SVE_FULL_HDI [VNx8HI VNx2DI])
 
-;; All SVE integer vector modes that have 16-bit, 32-bit or 64-bit elements.
-(define_mode_iterator SVE_HSDI [VNx16QI VNx8HI VNx4SI])
+;; Fully-packed SVE vector modes that have 32-bit or 64-bit elements.
+(define_mode_iterator SVE_FULL_SD [VNx4SI VNx2DI VNx4SF VNx2DF])
 
-;; All SVE floating-point vector modes that have 16-bit or 32-bit elements.
-(define_mode_iterator SVE_HSF [VNx8HF VNx4SF])
+;; Fully-packed SVE integer vector modes that have 32-bit or 64-bit elements.
+(define_mode_iterator SVE_FULL_SDI [VNx4SI VNx2DI])
 
-;; All SVE vector modes that have 32-bit or 64-bit elements.
-(define_mode_iterator SVE_SD [VNx4SI VNx2DI VNx4SF VNx2DF])
+;; Fully-packed SVE floating-point vector modes that have 32-bit or 64-bit
+;; elements.
+(define_mode_iterator SVE_FULL_SDF [VNx4SF VNx2DF])
 
-;; All SVE vector modes that have 32-bit elements.
-(define_mode_iterator SVE_S [VNx4SI VNx4SF])
+;; Same, but with the appropriate conditions for FMMLA support.
+(define_mode_iterator SVE_MATMULF [(VNx4SF "TARGET_SVE_F32MM")
+				   (VNx2DF "TARGET_SVE_F64MM")])
 
-;; All SVE vector modes that have 64-bit elements.
-(define_mode_iterator SVE_D [VNx2DI VNx2DF])
+;; Fully-packed SVE vector modes that have 32-bit elements.
+(define_mode_iterator SVE_FULL_S [VNx4SI VNx4SF])
 
-;; All SVE integer vector modes that have 32-bit or 64-bit elements.
-(define_mode_iterator SVE_SDI [VNx4SI VNx2DI])
+;; Fully-packed SVE vector modes that have 64-bit elements.
+(define_mode_iterator SVE_FULL_D [VNx2DI VNx2DF])
+
+;; All partial SVE integer modes.
+(define_mode_iterator SVE_PARTIAL_I [VNx8QI VNx4QI VNx2QI
+				     VNx4HI VNx2HI
+				     VNx2SI])
+
+;; All SVE vector modes.
+(define_mode_iterator SVE_ALL [VNx16QI VNx8QI VNx4QI VNx2QI
+			       VNx8HI VNx4HI VNx2HI
+			       VNx8HF VNx4HF VNx2HF
+			       VNx8BF VNx4BF VNx2BF
+			       VNx4SI VNx2SI
+			       VNx4SF VNx2SF
+			       VNx2DI
+			       VNx2DF])
 
 ;; All SVE integer vector modes.
-(define_mode_iterator SVE_I [VNx16QI VNx8HI VNx4SI VNx2DI])
+(define_mode_iterator SVE_I [VNx16QI VNx8QI VNx4QI VNx2QI
+			     VNx8HI VNx4HI VNx2HI
+			     VNx4SI VNx2SI
+			     VNx2DI])
+
+;; SVE integer vector modes whose elements are 16 bits or wider.
+(define_mode_iterator SVE_HSDI [VNx8HI VNx4HI VNx2HI
+				VNx4SI VNx2SI
+				VNx2DI])
+
+;; SVE modes with 2 or 4 elements.
+(define_mode_iterator SVE_24 [VNx2QI VNx2HI VNx2HF VNx2BF VNx2SI VNx2SF
+			      VNx2DI VNx2DF
+			      VNx4QI VNx4HI VNx4HF VNx4BF VNx4SI VNx4SF])
+
+;; SVE integer modes with 2 or 4 elements.
+(define_mode_iterator SVE_24I [VNx2QI VNx2HI VNx2SI VNx2DI
+			       VNx4QI VNx4HI VNx4SI])
+
+;; SVE modes with 2 elements.
+(define_mode_iterator SVE_2 [VNx2QI VNx2HI VNx2HF VNx2BF
+			     VNx2SI VNx2SF VNx2DI VNx2DF])
+
+;; SVE integer modes with 2 elements, excluding the widest element.
+(define_mode_iterator SVE_2BHSI [VNx2QI VNx2HI VNx2SI])
+
+;; SVE integer modes with 2 elements, excluding the narrowest element.
+(define_mode_iterator SVE_2HSDI [VNx2HI VNx2SI VNx2DI])
+
+;; SVE modes with 4 elements.
+(define_mode_iterator SVE_4 [VNx4QI VNx4HI VNx4HF VNx4BF VNx4SI VNx4SF])
+
+;; SVE integer modes with 4 elements, excluding the widest element.
+(define_mode_iterator SVE_4BHI [VNx4QI VNx4HI])
+
+;; SVE integer modes with 4 elements, excluding the narrowest element.
+(define_mode_iterator SVE_4HSI [VNx4HI VNx4SI])
+
+;; SVE integer modes that can form the input to an SVE2 PMULL[BT] instruction.
+(define_mode_iterator SVE2_PMULL_PAIR_I [VNx16QI VNx4SI
+					 (VNx2DI "TARGET_SVE2_AES")])
+
+;; Modes involved in extending or truncating SVE data, for 8 elements per
+;; 128-bit block.
+(define_mode_iterator VNx8_NARROW [VNx8QI])
+(define_mode_iterator VNx8_WIDE [VNx8HI])
+
+;; ...same for 4 elements per 128-bit block.
+(define_mode_iterator VNx4_NARROW [VNx4QI VNx4HI])
+(define_mode_iterator VNx4_WIDE [VNx4SI])
 
-;; All SVE floating-point vector modes.
-(define_mode_iterator SVE_F [VNx8HF VNx4SF VNx2DF])
+;; ...same for 2 elements per 128-bit block.
+(define_mode_iterator VNx2_NARROW [VNx2QI VNx2HI VNx2SI])
+(define_mode_iterator VNx2_WIDE [VNx2DI])
 
 ;; All SVE predicate modes.
 (define_mode_iterator PRED_ALL [VNx16BI VNx8BI VNx4BI VNx2BI])
@@ -297,6 +468,12 @@
 ;; SVE predicate modes that control 8-bit, 16-bit or 32-bit elements.
 (define_mode_iterator PRED_BHS [VNx16BI VNx8BI VNx4BI])
 
+;; SVE predicate modes that control 16-bit, 32-bit or 64-bit elements.
+(define_mode_iterator PRED_HSD [VNx8BI VNx4BI VNx2BI])
+
+;; Bfloat16 modes to which V4SF can be converted
+(define_mode_iterator V4SF_TO_BF [V4BF V8BF])
+
 ;; ------------------------------------------------------------------
 ;; Unspec enumerations for Advance SIMD. These could well go into
 ;; aarch64.md but for their use in int_iterators here.
@@ -315,6 +492,8 @@
     UNSPEC_FMINV	; Used in aarch64-simd.md.
     UNSPEC_FADDV	; Used in aarch64-simd.md.
     UNSPEC_ADDV		; Used in aarch64-simd.md.
+    UNSPEC_SADDLV	; Used in aarch64-simd.md.
+    UNSPEC_UADDLV	; Used in aarch64-simd.md.
     UNSPEC_SMAXV	; Used in aarch64-simd.md.
     UNSPEC_SMINV	; Used in aarch64-simd.md.
     UNSPEC_UMAXV	; Used in aarch64-simd.md.
@@ -325,8 +504,6 @@
     UNSPEC_URHADD	; Used in aarch64-simd.md.
     UNSPEC_SHSUB	; Used in aarch64-simd.md.
     UNSPEC_UHSUB	; Used in aarch64-simd.md.
-    UNSPEC_SRHSUB	; Used in aarch64-simd.md.
-    UNSPEC_URHSUB	; Used in aarch64-simd.md.
     UNSPEC_ADDHN	; Used in aarch64-simd.md.
     UNSPEC_RADDHN	; Used in aarch64-simd.md.
     UNSPEC_SUBHN	; Used in aarch64-simd.md.
@@ -342,6 +519,7 @@
     UNSPEC_USQADD	; Used in aarch64-simd.md.
     UNSPEC_SUQADD	; Used in aarch64-simd.md.
     UNSPEC_SQXTUN	; Used in aarch64-simd.md.
+    UNSPEC_SQXTUN2	; Used in aarch64-simd.md.
     UNSPEC_SQXTN	; Used in aarch64-simd.md.
     UNSPEC_UQXTN	; Used in aarch64-simd.md.
     UNSPEC_SSRA		; Used in aarch64-simd.md.
@@ -431,36 +609,253 @@
     UNSPEC_FMLSL	; Used in aarch64-simd.md.
     UNSPEC_FMLAL2	; Used in aarch64-simd.md.
     UNSPEC_FMLSL2	; Used in aarch64-simd.md.
+    UNSPEC_ADR		; Used in aarch64-sve.md.
     UNSPEC_SEL		; Used in aarch64-sve.md.
+    UNSPEC_BRKA		; Used in aarch64-sve.md.
+    UNSPEC_BRKB		; Used in aarch64-sve.md.
+    UNSPEC_BRKN		; Used in aarch64-sve.md.
+    UNSPEC_BRKPA	; Used in aarch64-sve.md.
+    UNSPEC_BRKPB	; Used in aarch64-sve.md.
+    UNSPEC_PFIRST	; Used in aarch64-sve.md.
+    UNSPEC_PNEXT	; Used in aarch64-sve.md.
+    UNSPEC_CNTP		; Used in aarch64-sve.md.
+    UNSPEC_SADDV	; Used in aarch64-sve.md.
+    UNSPEC_UADDV	; Used in aarch64-sve.md.
     UNSPEC_ANDV		; Used in aarch64-sve.md.
     UNSPEC_IORV		; Used in aarch64-sve.md.
     UNSPEC_XORV		; Used in aarch64-sve.md.
     UNSPEC_ANDF		; Used in aarch64-sve.md.
     UNSPEC_IORF		; Used in aarch64-sve.md.
     UNSPEC_XORF		; Used in aarch64-sve.md.
+    UNSPEC_REVB		; Used in aarch64-sve.md.
+    UNSPEC_REVH		; Used in aarch64-sve.md.
+    UNSPEC_REVW		; Used in aarch64-sve.md.
+    UNSPEC_REVBHW	; Used in aarch64-sve.md.
     UNSPEC_SMUL_HIGHPART ; Used in aarch64-sve.md.
     UNSPEC_UMUL_HIGHPART ; Used in aarch64-sve.md.
-    UNSPEC_COND_ADD	; Used in aarch64-sve.md.
-    UNSPEC_COND_SUB	; Used in aarch64-sve.md.
-    UNSPEC_COND_SMAX	; Used in aarch64-sve.md.
-    UNSPEC_COND_UMAX	; Used in aarch64-sve.md.
-    UNSPEC_COND_SMIN	; Used in aarch64-sve.md.
-    UNSPEC_COND_UMIN	; Used in aarch64-sve.md.
-    UNSPEC_COND_AND	; Used in aarch64-sve.md.
-    UNSPEC_COND_ORR	; Used in aarch64-sve.md.
-    UNSPEC_COND_EOR	; Used in aarch64-sve.md.
-    UNSPEC_COND_LT	; Used in aarch64-sve.md.
-    UNSPEC_COND_LE	; Used in aarch64-sve.md.
-    UNSPEC_COND_EQ	; Used in aarch64-sve.md.
-    UNSPEC_COND_NE	; Used in aarch64-sve.md.
-    UNSPEC_COND_GE	; Used in aarch64-sve.md.
-    UNSPEC_COND_GT	; Used in aarch64-sve.md.
-    UNSPEC_COND_LO	; Used in aarch64-sve.md.
-    UNSPEC_COND_LS	; Used in aarch64-sve.md.
-    UNSPEC_COND_HS	; Used in aarch64-sve.md.
-    UNSPEC_COND_HI	; Used in aarch64-sve.md.
-    UNSPEC_COND_UO	; Used in aarch64-sve.md.
+    UNSPEC_FMLA		; Used in aarch64-sve.md.
+    UNSPEC_FMLS		; Used in aarch64-sve.md.
+    UNSPEC_FEXPA	; Used in aarch64-sve.md.
+    UNSPEC_FMMLA	; Used in aarch64-sve.md.
+    UNSPEC_FTMAD	; Used in aarch64-sve.md.
+    UNSPEC_FTSMUL	; Used in aarch64-sve.md.
+    UNSPEC_FTSSEL	; Used in aarch64-sve.md.
+    UNSPEC_SMATMUL	; Used in aarch64-sve.md.
+    UNSPEC_UMATMUL	; Used in aarch64-sve.md.
+    UNSPEC_USMATMUL	; Used in aarch64-sve.md.
+    UNSPEC_TRN1Q	; Used in aarch64-sve.md.
+    UNSPEC_TRN2Q	; Used in aarch64-sve.md.
+    UNSPEC_UZP1Q	; Used in aarch64-sve.md.
+    UNSPEC_UZP2Q	; Used in aarch64-sve.md.
+    UNSPEC_ZIP1Q	; Used in aarch64-sve.md.
+    UNSPEC_ZIP2Q	; Used in aarch64-sve.md.
+    UNSPEC_TRN1_CONV	; Used in aarch64-sve.md.
+    UNSPEC_COND_CMPEQ_WIDE ; Used in aarch64-sve.md.
+    UNSPEC_COND_CMPGE_WIDE ; Used in aarch64-sve.md.
+    UNSPEC_COND_CMPGT_WIDE ; Used in aarch64-sve.md.
+    UNSPEC_COND_CMPHI_WIDE ; Used in aarch64-sve.md.
+    UNSPEC_COND_CMPHS_WIDE ; Used in aarch64-sve.md.
+    UNSPEC_COND_CMPLE_WIDE ; Used in aarch64-sve.md.
+    UNSPEC_COND_CMPLO_WIDE ; Used in aarch64-sve.md.
+    UNSPEC_COND_CMPLS_WIDE ; Used in aarch64-sve.md.
+    UNSPEC_COND_CMPLT_WIDE ; Used in aarch64-sve.md.
+    UNSPEC_COND_CMPNE_WIDE ; Used in aarch64-sve.md.
+    UNSPEC_COND_FABS	; Used in aarch64-sve.md.
+    UNSPEC_COND_FADD	; Used in aarch64-sve.md.
+    UNSPEC_COND_FCADD90	; Used in aarch64-sve.md.
+    UNSPEC_COND_FCADD270 ; Used in aarch64-sve.md.
+    UNSPEC_COND_FCMEQ	; Used in aarch64-sve.md.
+    UNSPEC_COND_FCMGE	; Used in aarch64-sve.md.
+    UNSPEC_COND_FCMGT	; Used in aarch64-sve.md.
+    UNSPEC_COND_FCMLA	; Used in aarch64-sve.md.
+    UNSPEC_COND_FCMLA90	; Used in aarch64-sve.md.
+    UNSPEC_COND_FCMLA180 ; Used in aarch64-sve.md.
+    UNSPEC_COND_FCMLA270 ; Used in aarch64-sve.md.
+    UNSPEC_COND_FCMLE	; Used in aarch64-sve.md.
+    UNSPEC_COND_FCMLT	; Used in aarch64-sve.md.
+    UNSPEC_COND_FCMNE	; Used in aarch64-sve.md.
+    UNSPEC_COND_FCMUO	; Used in aarch64-sve.md.
+    UNSPEC_COND_FCVT	; Used in aarch64-sve.md.
+    UNSPEC_COND_FCVTZS	; Used in aarch64-sve.md.
+    UNSPEC_COND_FCVTZU	; Used in aarch64-sve.md.
+    UNSPEC_COND_FDIV	; Used in aarch64-sve.md.
+    UNSPEC_COND_FMAX	; Used in aarch64-sve.md.
+    UNSPEC_COND_FMAXNM	; Used in aarch64-sve.md.
+    UNSPEC_COND_FMIN	; Used in aarch64-sve.md.
+    UNSPEC_COND_FMINNM	; Used in aarch64-sve.md.
+    UNSPEC_COND_FMLA	; Used in aarch64-sve.md.
+    UNSPEC_COND_FMLS	; Used in aarch64-sve.md.
+    UNSPEC_COND_FMUL	; Used in aarch64-sve.md.
+    UNSPEC_COND_FMULX	; Used in aarch64-sve.md.
+    UNSPEC_COND_FNEG	; Used in aarch64-sve.md.
+    UNSPEC_COND_FNMLA	; Used in aarch64-sve.md.
+    UNSPEC_COND_FNMLS	; Used in aarch64-sve.md.
+    UNSPEC_COND_FRECPX	; Used in aarch64-sve.md.
+    UNSPEC_COND_FRINTA	; Used in aarch64-sve.md.
+    UNSPEC_COND_FRINTI	; Used in aarch64-sve.md.
+    UNSPEC_COND_FRINTM	; Used in aarch64-sve.md.
+    UNSPEC_COND_FRINTN	; Used in aarch64-sve.md.
+    UNSPEC_COND_FRINTP	; Used in aarch64-sve.md.
+    UNSPEC_COND_FRINTX	; Used in aarch64-sve.md.
+    UNSPEC_COND_FRINTZ	; Used in aarch64-sve.md.
+    UNSPEC_COND_FSCALE	; Used in aarch64-sve.md.
+    UNSPEC_COND_FSQRT	; Used in aarch64-sve.md.
+    UNSPEC_COND_FSUB	; Used in aarch64-sve.md.
+    UNSPEC_COND_SCVTF	; Used in aarch64-sve.md.
+    UNSPEC_COND_UCVTF	; Used in aarch64-sve.md.
+    UNSPEC_LASTA	; Used in aarch64-sve.md.
     UNSPEC_LASTB	; Used in aarch64-sve.md.
+    UNSPEC_ASHIFT_WIDE  ; Used in aarch64-sve.md.
+    UNSPEC_ASHIFTRT_WIDE ; Used in aarch64-sve.md.
+    UNSPEC_LSHIFTRT_WIDE ; Used in aarch64-sve.md.
+    UNSPEC_LDFF1	; Used in aarch64-sve.md.
+    UNSPEC_LDNF1	; Used in aarch64-sve.md.
+    UNSPEC_FCADD90	; Used in aarch64-simd.md.
+    UNSPEC_FCADD270	; Used in aarch64-simd.md.
+    UNSPEC_FCMLA	; Used in aarch64-simd.md.
+    UNSPEC_FCMLA90	; Used in aarch64-simd.md.
+    UNSPEC_FCMLA180	; Used in aarch64-simd.md.
+    UNSPEC_FCMLA270	; Used in aarch64-simd.md.
+    UNSPEC_FCMUL	; Used in aarch64-simd.md.
+    UNSPEC_FCMUL_CONJ	; Used in aarch64-simd.md.
+    UNSPEC_FCMLA_CONJ	; Used in aarch64-simd.md.
+    UNSPEC_FCMLA180_CONJ	; Used in aarch64-simd.md.
+    UNSPEC_ASRD		; Used in aarch64-sve.md.
+    UNSPEC_ADCLB	; Used in aarch64-sve2.md.
+    UNSPEC_ADCLT	; Used in aarch64-sve2.md.
+    UNSPEC_ADDHNB	; Used in aarch64-sve2.md.
+    UNSPEC_ADDHNT	; Used in aarch64-sve2.md.
+    UNSPEC_BDEP		; Used in aarch64-sve2.md.
+    UNSPEC_BEXT		; Used in aarch64-sve2.md.
+    UNSPEC_BGRP		; Used in aarch64-sve2.md.
+    UNSPEC_CADD270	; Used in aarch64-sve2.md.
+    UNSPEC_CADD90	; Used in aarch64-sve2.md.
+    UNSPEC_CDOT		; Used in aarch64-sve2.md.
+    UNSPEC_CDOT180	; Used in aarch64-sve2.md.
+    UNSPEC_CDOT270	; Used in aarch64-sve2.md.
+    UNSPEC_CDOT90	; Used in aarch64-sve2.md.
+    UNSPEC_CMLA		; Used in aarch64-sve2.md.
+    UNSPEC_CMLA180	; Used in aarch64-sve2.md.
+    UNSPEC_CMLA270	; Used in aarch64-sve2.md.
+    UNSPEC_CMLA90	; Used in aarch64-sve2.md.
+    UNSPEC_CMLA_CONJ	; Used in aarch64-sve2.md.
+    UNSPEC_CMLA180_CONJ	; Used in aarch64-sve2.md.
+    UNSPEC_CMUL		; Used in aarch64-sve2.md.
+    UNSPEC_CMUL_CONJ	; Used in aarch64-sve2.md.
+    UNSPEC_COND_FCVTLT	; Used in aarch64-sve2.md.
+    UNSPEC_COND_FCVTNT	; Used in aarch64-sve2.md.
+    UNSPEC_COND_FCVTX	; Used in aarch64-sve2.md.
+    UNSPEC_COND_FCVTXNT	; Used in aarch64-sve2.md.
+    UNSPEC_COND_FLOGB	; Used in aarch64-sve2.md.
+    UNSPEC_EORBT	; Used in aarch64-sve2.md.
+    UNSPEC_EORTB	; Used in aarch64-sve2.md.
+    UNSPEC_FADDP	; Used in aarch64-sve2.md.
+    UNSPEC_FMAXNMP	; Used in aarch64-sve2.md.
+    UNSPEC_FMAXP	; Used in aarch64-sve2.md.
+    UNSPEC_FMINNMP	; Used in aarch64-sve2.md.
+    UNSPEC_FMINP	; Used in aarch64-sve2.md.
+    UNSPEC_FMLALB	; Used in aarch64-sve2.md.
+    UNSPEC_FMLALT	; Used in aarch64-sve2.md.
+    UNSPEC_FMLSLB	; Used in aarch64-sve2.md.
+    UNSPEC_FMLSLT	; Used in aarch64-sve2.md.
+    UNSPEC_HISTCNT	; Used in aarch64-sve2.md.
+    UNSPEC_HISTSEG	; Used in aarch64-sve2.md.
+    UNSPEC_MATCH	; Used in aarch64-sve2.md.
+    UNSPEC_NMATCH	; Used in aarch64-sve2.md.
+    UNSPEC_PMULLB	; Used in aarch64-sve2.md.
+    UNSPEC_PMULLB_PAIR	; Used in aarch64-sve2.md.
+    UNSPEC_PMULLT	; Used in aarch64-sve2.md.
+    UNSPEC_PMULLT_PAIR	; Used in aarch64-sve2.md.
+    UNSPEC_RADDHNB	; Used in aarch64-sve2.md.
+    UNSPEC_RADDHNT	; Used in aarch64-sve2.md.
+    UNSPEC_RSHRNB	; Used in aarch64-sve2.md.
+    UNSPEC_RSHRNT	; Used in aarch64-sve2.md.
+    UNSPEC_RSUBHNB	; Used in aarch64-sve2.md.
+    UNSPEC_RSUBHNT	; Used in aarch64-sve2.md.
+    UNSPEC_SABDLB	; Used in aarch64-sve2.md.
+    UNSPEC_SABDLT	; Used in aarch64-sve2.md.
+    UNSPEC_SADDLB	; Used in aarch64-sve2.md.
+    UNSPEC_SADDLBT	; Used in aarch64-sve2.md.
+    UNSPEC_SADDLT	; Used in aarch64-sve2.md.
+    UNSPEC_SADDWB	; Used in aarch64-sve2.md.
+    UNSPEC_SADDWT	; Used in aarch64-sve2.md.
+    UNSPEC_SBCLB	; Used in aarch64-sve2.md.
+    UNSPEC_SBCLT	; Used in aarch64-sve2.md.
+    UNSPEC_SHRNB	; Used in aarch64-sve2.md.
+    UNSPEC_SHRNT	; Used in aarch64-sve2.md.
+    UNSPEC_SLI		; Used in aarch64-sve2.md.
+    UNSPEC_SMAXP	; Used in aarch64-sve2.md.
+    UNSPEC_SMINP	; Used in aarch64-sve2.md.
+    UNSPEC_SMULHRS	; Used in aarch64-sve2.md.
+    UNSPEC_SMULHS	; Used in aarch64-sve2.md.
+    UNSPEC_SMULLB	; Used in aarch64-sve2.md.
+    UNSPEC_SMULLT	; Used in aarch64-sve2.md.
+    UNSPEC_SQCADD270	; Used in aarch64-sve2.md.
+    UNSPEC_SQCADD90	; Used in aarch64-sve2.md.
+    UNSPEC_SQDMULLB	; Used in aarch64-sve2.md.
+    UNSPEC_SQDMULLBT	; Used in aarch64-sve2.md.
+    UNSPEC_SQDMULLT	; Used in aarch64-sve2.md.
+    UNSPEC_SQRDCMLAH	; Used in aarch64-sve2.md.
+    UNSPEC_SQRDCMLAH180	; Used in aarch64-sve2.md.
+    UNSPEC_SQRDCMLAH270	; Used in aarch64-sve2.md.
+    UNSPEC_SQRDCMLAH90	; Used in aarch64-sve2.md.
+    UNSPEC_SQRSHRNB	; Used in aarch64-sve2.md.
+    UNSPEC_SQRSHRNT	; Used in aarch64-sve2.md.
+    UNSPEC_SQRSHRUNB	; Used in aarch64-sve2.md.
+    UNSPEC_SQRSHRUNT	; Used in aarch64-sve2.md.
+    UNSPEC_SQSHRNB	; Used in aarch64-sve2.md.
+    UNSPEC_SQSHRNT	; Used in aarch64-sve2.md.
+    UNSPEC_SQSHRUNB	; Used in aarch64-sve2.md.
+    UNSPEC_SQSHRUNT	; Used in aarch64-sve2.md.
+    UNSPEC_SQXTNB	; Used in aarch64-sve2.md.
+    UNSPEC_SQXTNT	; Used in aarch64-sve2.md.
+    UNSPEC_SQXTUNB	; Used in aarch64-sve2.md.
+    UNSPEC_SQXTUNT	; Used in aarch64-sve2.md.
+    UNSPEC_SRI		; Used in aarch64-sve2.md.
+    UNSPEC_SSHLLB	; Used in aarch64-sve2.md.
+    UNSPEC_SSHLLT	; Used in aarch64-sve2.md.
+    UNSPEC_SSUBLB	; Used in aarch64-sve2.md.
+    UNSPEC_SSUBLBT	; Used in aarch64-sve2.md.
+    UNSPEC_SSUBLT	; Used in aarch64-sve2.md.
+    UNSPEC_SSUBLTB	; Used in aarch64-sve2.md.
+    UNSPEC_SSUBWB	; Used in aarch64-sve2.md.
+    UNSPEC_SSUBWT	; Used in aarch64-sve2.md.
+    UNSPEC_SUBHNB	; Used in aarch64-sve2.md.
+    UNSPEC_SUBHNT	; Used in aarch64-sve2.md.
+    UNSPEC_TBL2		; Used in aarch64-sve2.md.
+    UNSPEC_UABDLB	; Used in aarch64-sve2.md.
+    UNSPEC_UABDLT	; Used in aarch64-sve2.md.
+    UNSPEC_UADDLB	; Used in aarch64-sve2.md.
+    UNSPEC_UADDLT	; Used in aarch64-sve2.md.
+    UNSPEC_UADDWB	; Used in aarch64-sve2.md.
+    UNSPEC_UADDWT	; Used in aarch64-sve2.md.
+    UNSPEC_UMAXP	; Used in aarch64-sve2.md.
+    UNSPEC_UMINP	; Used in aarch64-sve2.md.
+    UNSPEC_UMULHRS	; Used in aarch64-sve2.md.
+    UNSPEC_UMULHS	; Used in aarch64-sve2.md.
+    UNSPEC_UMULLB	; Used in aarch64-sve2.md.
+    UNSPEC_UMULLT	; Used in aarch64-sve2.md.
+    UNSPEC_UQRSHRNB	; Used in aarch64-sve2.md.
+    UNSPEC_UQRSHRNT	; Used in aarch64-sve2.md.
+    UNSPEC_UQSHRNB	; Used in aarch64-sve2.md.
+    UNSPEC_UQSHRNT	; Used in aarch64-sve2.md.
+    UNSPEC_UQXTNB	; Used in aarch64-sve2.md.
+    UNSPEC_UQXTNT	; Used in aarch64-sve2.md.
+    UNSPEC_USHLLB	; Used in aarch64-sve2.md.
+    UNSPEC_USHLLT	; Used in aarch64-sve2.md.
+    UNSPEC_USUBLB	; Used in aarch64-sve2.md.
+    UNSPEC_USUBLT	; Used in aarch64-sve2.md.
+    UNSPEC_USUBWB	; Used in aarch64-sve2.md.
+    UNSPEC_USUBWT	; Used in aarch64-sve2.md.
+    UNSPEC_USDOT	; Used in aarch64-simd.md.
+    UNSPEC_SUDOT	; Used in aarch64-simd.md.
+    UNSPEC_BFDOT	; Used in aarch64-simd.md.
+    UNSPEC_BFMLALB	; Used in aarch64-sve.md.
+    UNSPEC_BFMLALT	; Used in aarch64-sve.md.
+    UNSPEC_BFMMLA	; Used in aarch64-sve.md.
+    UNSPEC_BFCVTN      ; Used in aarch64-simd.md.
+    UNSPEC_BFCVTN2     ; Used in aarch64-simd.md.
+    UNSPEC_BFCVT       ; Used in aarch64-simd.md.
 ])
 
 ;; ------------------------------------------------------------------
@@ -479,7 +874,6 @@
     UNSPECV_ATOMIC_CAS		; Represent an atomic CAS.
     UNSPECV_ATOMIC_SWP		; Represent an atomic SWP.
     UNSPECV_ATOMIC_OP		; Represent an atomic operation.
-    UNSPECV_ATOMIC_LDOP		; Represent an atomic load-operation
     UNSPECV_ATOMIC_LDOP_OR	; Represent an atomic load-or
     UNSPECV_ATOMIC_LDOP_BIC	; Represent an atomic load-bic
     UNSPECV_ATOMIC_LDOP_XOR	; Represent an atomic load-xor
@@ -490,6 +884,9 @@
 ;; Mode attributes
 ;; -------------------------------------------------------------------
 
+;; "e" for signaling operations, "" for quiet operations.
+(define_mode_attr e [(CCFP "") (CCFPE "e")])
+
 ;; In GPI templates, a string like "%<w>0" will expand to "%w0" in the
 ;; 32-bit version and "%x0" in the 64-bit version.
 (define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")])
@@ -557,6 +954,7 @@
 			  (V2SI "2") (V4SI "4")
 				     (V2DI "2")
 			  (V4HF "4") (V8HF "8")
+			  (V4BF "4") (V8BF "8")
 			  (V2SF "2") (V4SF "4")
 			  (V1DF "1") (V2DF "2")
 			  (DI "1") (DF "1")])
@@ -581,6 +979,24 @@
 (define_mode_attr sizem1 [(QI "#7") (HI "#15") (SI "#31") (DI "#63")
 			  (HF "#15") (SF "#31") (DF "#63")])
 
+;; The number of bits in a vector element, or controlled by a predicate
+;; element.
+(define_mode_attr elem_bits [(VNx16BI "8") (VNx8BI "16")
+			     (VNx4BI "32") (VNx2BI "64")
+			     (VNx16QI "8") (VNx8HI "16")
+			     (VNx4SI "32") (VNx2DI "64")
+			     (VNx8HF "16") (VNx4SF "32") (VNx2DF "64")])
+
+;; The number of bits in a vector container.
+(define_mode_attr container_bits [(VNx16QI "8")
+				  (VNx8HI "16") (VNx8QI "16") (VNx8HF "16")
+				  (VNx8BF "16")
+				  (VNx4SI "32") (VNx4HI "32") (VNx4QI "32")
+				  (VNx4SF "32") (VNx4HF "32") (VNx4BF "32")
+				  (VNx2DI "64") (VNx2SI "64") (VNx2HI "64")
+				  (VNx2QI "64") (VNx2DF "64") (VNx2SF "64")
+				  (VNx2HF "64") (VNx2BF "64")])
+
 ;; Attribute to describe constants acceptable in logical operations
 (define_mode_attr lconst [(SI "K") (DI "L")])
 
@@ -595,12 +1011,20 @@
 
 (define_mode_attr Vtype [(V8QI "8b") (V16QI "16b")
 			 (V4HI "4h") (V8HI  "8h")
+			 (V4BF "4h") (V8BF  "8h")
                          (V2SI "2s") (V4SI  "4s")
                          (DI   "1d") (DF    "1d")
                          (V2DI "2d") (V2SF "2s")
 			 (V4SF "4s") (V2DF "2d")
 			 (V4HF "4h") (V8HF "8h")])
 
+;; Map mode to type used in widening multiplies.
+(define_mode_attr Vcondtype [(V4HI "4h") (V8HI "4h") (V2SI "2s") (V4SI "2s")])
+
+;; Map lane mode to name
+(define_mode_attr Qlane [(V4HI "_v4hi") (V8HI  "q_v4hi")
+			 (V2SI "_v2si") (V4SI  "q_v2si")])
+
 (define_mode_attr Vrevsuff [(V4HI "16") (V8HI "16") (V2SI "32")
                             (V4SI "32") (V2DI "64")])
 
@@ -608,7 +1032,8 @@
 			 (V4HI ".4h") (V8HI  ".8h")
 			 (V2SI ".2s") (V4SI  ".4s")
 			 (V2DI ".2d") (V4HF ".4h")
-			 (V8HF ".8h") (V2SF ".2s")
+			 (V8HF ".8h") (V4BF ".4h")
+			 (V8BF ".8h") (V2SF ".2s")
 			 (V4SF ".4s") (V2DF ".2d")
 			 (DI   "")    (SI   "")
 			 (HI   "")    (QI   "")
@@ -622,31 +1047,71 @@
 			   (HI   "")])
 
 ;; Mode-to-individual element type mapping.
-(define_mode_attr Vetype [(V8QI "b") (V16QI "b") (VNx16QI "b") (VNx16BI "b")
-			  (V4HI "h") (V8HI  "h") (VNx8HI  "h") (VNx8BI  "h")
-			  (V2SI "s") (V4SI  "s") (VNx4SI  "s") (VNx4BI  "s")
-			  (V2DI "d")             (VNx2DI  "d") (VNx2BI  "d")
-			  (V4HF "h") (V8HF  "h") (VNx8HF  "h")
-			  (V2SF "s") (V4SF  "s") (VNx4SF  "s")
-			  (V2DF "d")             (VNx2DF  "d")
-			  (HF   "h")
-			  (SF   "s") (DF  "d")
-			  (QI "b")   (HI "h")
-			  (SI "s")   (DI "d")])
+(define_mode_attr Vetype [(V8QI "b") (V16QI "b")
+			  (V4HI "h") (V8HI  "h")
+			  (V2SI "s") (V4SI  "s")
+			  (V2DI "d")
+			  (V4HF "h") (V8HF  "h")
+			  (V2SF "s") (V4SF  "s")
+			  (V2DF "d")
+			  (VNx16BI "b") (VNx8BI "h") (VNx4BI "s") (VNx2BI "d")
+			  (VNx16QI "b") (VNx8QI "b") (VNx4QI "b") (VNx2QI "b")
+			  (VNx8HI "h") (VNx4HI "h") (VNx2HI "h")
+			  (VNx8HF "h") (VNx4HF "h") (VNx2HF "h")
+			  (VNx8BF "h") (VNx4BF "h") (VNx2BF "h")
+			  (VNx4SI "s") (VNx2SI "s")
+			  (VNx4SF "s") (VNx2SF "s")
+			  (VNx2DI "d")
+			  (VNx2DF "d")
+			  (BF "h") (V4BF "h") (V8BF "h")
+			  (HF "h")
+			  (SF "s") (DF "d")
+			  (QI "b") (HI "h")
+			  (SI "s") (DI "d")])
+
+;; Like Vetype, but map to types that are a quarter of the element size.
+(define_mode_attr Vetype_fourth [(VNx4SI "b") (VNx2DI "h")])
 
 ;; Equivalent of "size" for a vector element.
-(define_mode_attr Vesize [(VNx16QI "b")
-			  (VNx8HI  "h") (VNx8HF  "h")
-			  (VNx4SI  "w") (VNx4SF  "w")
-			  (VNx2DI  "d") (VNx2DF  "d")
+(define_mode_attr Vesize [(VNx16QI "b") (VNx8QI "b") (VNx4QI "b") (VNx2QI "b")
+			  (VNx8HI "h") (VNx4HI "h") (VNx2HI "h")
+			  (VNx8HF "h") (VNx4HF "h") (VNx2HF "h")
+			  (VNx8BF "h") (VNx4BF "h") (VNx2BF "h")
+			  (VNx4SI "w") (VNx2SI "w")
+			  (VNx4SF "w") (VNx2SF "w")
+			  (VNx2DI "d")
+			  (VNx2DF "d")
 			  (VNx32QI "b") (VNx48QI "b") (VNx64QI "b")
 			  (VNx16HI "h") (VNx24HI "h") (VNx32HI "h")
 			  (VNx16HF "h") (VNx24HF "h") (VNx32HF "h")
+			  (VNx16BF "h") (VNx24BF "h") (VNx32BF "h")
 			  (VNx8SI  "w") (VNx12SI "w") (VNx16SI "w")
 			  (VNx8SF  "w") (VNx12SF "w") (VNx16SF "w")
 			  (VNx4DI  "d") (VNx6DI  "d") (VNx8DI  "d")
 			  (VNx4DF  "d") (VNx6DF  "d") (VNx8DF  "d")])
 
+;; The Z register suffix for an SVE mode's element container, i.e. the
+;; Vetype of full SVE modes that have the same number of elements.
+(define_mode_attr Vctype [(VNx16QI "b") (VNx8QI "h") (VNx4QI "s") (VNx2QI "d")
+			  (VNx8HI "h") (VNx4HI "s") (VNx2HI "d")
+			  (VNx8HF "h") (VNx4HF "s") (VNx2HF "d")
+			  (VNx8BF "h") (VNx4BF "s") (VNx2BF "d")
+			  (VNx4SI "s") (VNx2SI "d")
+			  (VNx4SF "s") (VNx2SF "d")
+			  (VNx2DI "d")
+			  (VNx2DF "d")])
+
+;; The instruction mnemonic suffix for an SVE mode's element container,
+;; i.e. the Vewtype of full SVE modes that have the same number of elements.
+(define_mode_attr Vcwtype [(VNx16QI "b") (VNx8QI "h") (VNx4QI "w") (VNx2QI "d")
+			   (VNx8HI "h") (VNx4HI "w") (VNx2HI "d")
+			   (VNx8HF "h") (VNx4HF "w") (VNx2HF "d")
+			   (VNx8BF "h") (VNx4BF "w") (VNx2BF "d")
+			   (VNx4SI "w") (VNx2SI "d")
+			   (VNx4SF "w") (VNx2SF "d")
+			   (VNx2DI "d")
+			   (VNx2DF "d")])
+
 ;; Vetype is used everywhere in scheduling type and assembly output,
 ;; sometimes they are not the same, for example HF modes on some
 ;; instructions.  stype is defined to represent scheduling type
@@ -665,45 +1130,64 @@
 			  (V8HF "16b") (V2SF  "8b")
 			  (V4SF "16b") (V2DF  "16b")
 			  (DI   "8b")  (DF    "8b")
-			  (SI   "8b")  (SF    "8b")])
+			  (SI   "8b")  (SF    "8b")
+			  (V4BF "8b")  (V8BF  "16b")])
 
 ;; Define element mode for each vector mode.
-(define_mode_attr VEL [(V8QI  "QI") (V16QI "QI") (VNx16QI "QI")
-			(V4HI "HI") (V8HI  "HI") (VNx8HI  "HI")
-			(V2SI "SI") (V4SI  "SI") (VNx4SI  "SI")
-			(DI   "DI") (V2DI  "DI") (VNx2DI  "DI")
-			(V4HF "HF") (V8HF  "HF") (VNx8HF  "HF")
-			(V2SF "SF") (V4SF  "SF") (VNx4SF  "SF")
-			(DF   "DF") (V2DF  "DF") (VNx2DF  "DF")
-			(SI   "SI") (HI    "HI")
-			(QI   "QI")])
+(define_mode_attr VEL [(V8QI  "QI") (V16QI "QI")
+		       (V4HI "HI") (V8HI  "HI")
+		       (V2SI "SI") (V4SI  "SI")
+		       (DI   "DI") (V2DI  "DI")
+		       (V4HF "HF") (V8HF  "HF")
+		       (V2SF "SF") (V4SF  "SF")
+		       (DF   "DF") (V2DF  "DF")
+		       (SI   "SI") (HI    "HI")
+		       (QI   "QI")
+		       (V4BF "BF") (V8BF "BF")
+		       (VNx16QI "QI") (VNx8QI "QI") (VNx4QI "QI") (VNx2QI "QI")
+		       (VNx8HI "HI") (VNx4HI "HI") (VNx2HI "HI")
+		       (VNx8HF "HF") (VNx4HF "HF") (VNx2HF "HF")
+		       (VNx8BF "BF") (VNx4BF "BF") (VNx2BF "BF")
+		       (VNx4SI "SI") (VNx2SI "SI")
+		       (VNx4SF "SF") (VNx2SF "SF")
+		       (VNx2DI "DI")
+		       (VNx2DF "DF")])
 
 ;; Define element mode for each vector mode (lower case).
-(define_mode_attr Vel [(V8QI "qi") (V16QI "qi") (VNx16QI "qi")
-			(V4HI "hi") (V8HI "hi") (VNx8HI  "hi")
-			(V2SI "si") (V4SI "si") (VNx4SI  "si")
-			(DI "di")   (V2DI "di") (VNx2DI  "di")
-			(V4HF "hf") (V8HF "hf") (VNx8HF  "hf")
-			(V2SF "sf") (V4SF "sf") (VNx4SF  "sf")
-			(V2DF "df") (DF "df")   (VNx2DF  "df")
-			(SI   "si") (HI   "hi")
-			(QI   "qi")])
+(define_mode_attr Vel [(V8QI "qi") (V16QI "qi")
+		       (V4HI "hi") (V8HI "hi")
+		       (V2SI "si") (V4SI "si")
+		       (DI   "di") (V2DI "di")
+		       (V4HF "hf") (V8HF "hf")
+		       (V2SF "sf") (V4SF "sf")
+		       (V2DF "df") (DF   "df")
+		       (SI   "si") (HI   "hi")
+		       (QI   "qi")
+		       (V4BF "bf") (V8BF "bf")
+		       (VNx16QI "qi") (VNx8QI "qi") (VNx4QI "qi") (VNx2QI "qi")
+		       (VNx8HI "hi") (VNx4HI "hi") (VNx2HI "hi")
+		       (VNx8HF "hf") (VNx4HF "hf") (VNx2HF "hf")
+		       (VNx8BF "bf") (VNx4BF "bf") (VNx2BF "bf")
+		       (VNx4SI "si") (VNx2SI "si")
+		       (VNx4SF "sf") (VNx2SF "sf")
+		       (VNx2DI "di")
+		       (VNx2DF "df")])
 
 ;; Element mode with floating-point values replaced by like-sized integers.
 (define_mode_attr VEL_INT [(VNx16QI "QI")
-			   (VNx8HI  "HI") (VNx8HF "HI")
+			   (VNx8HI  "HI") (VNx8HF "HI") (VNx8BF "HI")
 			   (VNx4SI  "SI") (VNx4SF "SI")
 			   (VNx2DI  "DI") (VNx2DF "DI")])
 
 ;; Gives the mode of the 128-bit lowpart of an SVE vector.
 (define_mode_attr V128 [(VNx16QI "V16QI")
-			(VNx8HI  "V8HI") (VNx8HF "V8HF")
+			(VNx8HI  "V8HI") (VNx8HF "V8HF") (VNx8BF "V8BF")
 			(VNx4SI  "V4SI") (VNx4SF "V4SF")
 			(VNx2DI  "V2DI") (VNx2DF "V2DF")])
 
 ;; ...and again in lower case.
 (define_mode_attr v128 [(VNx16QI "v16qi")
-			(VNx8HI  "v8hi") (VNx8HF "v8hf")
+			(VNx8HI  "v8hi") (VNx8HF "v8hf") (VNx8BF "v8bf")
 			(VNx4SI  "v4si") (VNx4SF "v4sf")
 			(VNx2DI  "v2di") (VNx2DF "v2df")])
 
@@ -731,18 +1215,20 @@
 			 (V2SI "SI")    (V4SI  "V2SI")
 			 (V2DI "DI")    (V2SF  "SF")
 			 (V4SF "V2SF")  (V4HF "V2HF")
-			 (V8HF "V4HF")  (V2DF  "DF")])
+			 (V8HF "V4HF")  (V2DF  "DF")
+			 (V8BF "V4BF")])
 
 ;; Half modes of all vector modes, in lower-case.
 (define_mode_attr Vhalf [(V8QI "v4qi")  (V16QI "v8qi")
 			 (V4HI "v2hi")  (V8HI  "v4hi")
+			 (V8HF  "v4hf") (V8BF  "v4bf")
 			 (V2SI "si")    (V4SI  "v2si")
 			 (V2DI "di")    (V2SF  "sf")
 			 (V4SF "v2sf")  (V2DF  "df")])
 
 ;; Double modes of vector modes.
 (define_mode_attr VDBL [(V8QI "V16QI") (V4HI "V8HI")
-			(V4HF "V8HF")
+			(V4HF "V8HF")  (V4BF "V8BF")
 			(V2SI "V4SI")  (V2SF "V4SF")
 			(SI   "V2SI")  (DI   "V2DI")
 			(DF   "V2DF")])
@@ -752,7 +1238,7 @@
 
 ;; Double modes of vector modes (lower case).
 (define_mode_attr Vdbl [(V8QI "v16qi") (V4HI "v8hi")
-			(V4HF "v8hf")
+			(V4HF "v8hf")  (V4BF "v8bf")
 			(V2SI "v4si")  (V2SF "v4sf")
 			(SI   "v2si")  (DI   "v2di")
 			(DF   "v2df")])
@@ -771,11 +1257,18 @@
 			    (V2DI "V2SI")
 			    (DI	  "SI")	  (SI	"HI")
 			    (HI	  "QI")])
+(define_mode_attr Vnarrowq [(V8HI "v8qi") (V4SI "v4hi")
+			    (V2DI "v2si")])
 
 ;; Narrowed quad-modes for VQN (Used for XTN2).
 (define_mode_attr VNARROWQ2 [(V8HI "V16QI") (V4SI "V8HI")
 			     (V2DI "V4SI")])
 
+;; Narrowed modes of vector modes.
+(define_mode_attr VNARROW [(VNx8HI "VNx16QI")
+			   (VNx4SI "VNx8HI") (VNx4SF "VNx8HF")
+			   (VNx2DI "VNx4SI") (VNx2DF "VNx4SF")])
+
 ;; Register suffix narrowed modes for VQN.
 (define_mode_attr Vntype [(V8HI "8b") (V4SI "4h")
 			  (V2DI "2s")])
@@ -810,18 +1303,43 @@
 
 ;; Widened mode register suffixes for VD_BHSI/VQW/VQ_HSF.
 (define_mode_attr Vwtype [(V8QI "8h") (V4HI "4s")
-			  (V2SI "2d") (V16QI "8h") 
+			  (V2SI "2d") (V16QI "8h")
 			  (V8HI "4s") (V4SI "2d")
 			  (V8HF "4s") (V4SF "2d")])
 
-;; SVE vector after widening
+;; Widened scalar register suffixes.
+(define_mode_attr Vwstype [(V8QI "h") (V4HI "s")
+			  (V2SI "") (V16QI "h")
+			  (V8HI "s") (V4SI "d")])
+;; Add a .1d for V2SI.
+(define_mode_attr Vwsuf [(V8QI "") (V4HI "")
+			  (V2SI ".1d") (V16QI "")
+			  (V8HI "") (V4SI "")])
+
+;; Scalar mode of widened vector reduction.
+(define_mode_attr VWIDE_S [(V8QI "HI") (V4HI "SI")
+			  (V2SI "DI") (V16QI "HI")
+			  (V8HI "SI") (V4SI "DI")])
+
+;; Widened mode with half the element register suffixes for VD_BHSI/VQW/VQ_HSF.
+(define_mode_attr Vwhalf [(V8QI "4h") (V4HI "2s")
+			  (V2SI "1d") (V16QI "8h")
+			  (V8HI "4s") (V4SI "2d")])
+
+;; SVE vector after narrowing.
+(define_mode_attr Ventype [(VNx8HI "b")
+			   (VNx4SI "h") (VNx4SF "h")
+			   (VNx2DI "s") (VNx2DF "s")])
+
+;; SVE vector after widening.
 (define_mode_attr Vewtype [(VNx16QI "h")
 			   (VNx8HI  "s") (VNx8HF "s")
-			   (VNx4SI  "d") (VNx4SF "d")])
+			   (VNx4SI  "d") (VNx4SF "d")
+			   (VNx2DI  "q")])
 
 ;; Widened mode register suffixes for VDW/VQW.
 (define_mode_attr Vmwtype [(V8QI ".8h") (V4HI ".4s")
-			   (V2SI ".2d") (V16QI ".8h") 
+			   (V2SI ".2d") (V16QI ".8h")
 			   (V8HI ".4s") (V4SI ".2d")
 			   (V4HF ".4s") (V2SF ".2d")
 			   (SI   "")    (HI   "")])
@@ -832,23 +1350,36 @@
 			     (V4SF "2s")])
 
 ;; Define corresponding core/FP element mode for each vector mode.
-(define_mode_attr vw [(V8QI "w") (V16QI "w") (VNx16QI "w")
-		      (V4HI "w") (V8HI "w") (VNx8HI "w")
-		      (V2SI "w") (V4SI "w") (VNx4SI "w")
-		      (DI   "x") (V2DI "x") (VNx2DI "x")
-		      (VNx8HF "h")
-		      (V2SF "s") (V4SF "s") (VNx4SF "s")
-		      (V2DF "d") (VNx2DF "d")])
+(define_mode_attr vw [(V8QI "w") (V16QI "w")
+		      (V4HI "w") (V8HI "w")
+		      (V2SI "w") (V4SI "w")
+		      (DI   "x") (V2DI "x")
+		      (V2SF "s") (V4SF "s")
+		      (V2DF "d")])
 
 ;; Corresponding core element mode for each vector mode.  This is a
 ;; variation on <vw> mapping FP modes to GP regs.
-(define_mode_attr vwcore [(V8QI "w") (V16QI "w") (VNx16QI "w")
-			  (V4HI "w") (V8HI "w") (VNx8HI "w")
-			  (V2SI "w") (V4SI "w") (VNx4SI "w")
-			  (DI   "x") (V2DI "x") (VNx2DI "x")
-			  (V4HF "w") (V8HF "w") (VNx8HF "w")
-			  (V2SF "w") (V4SF "w") (VNx4SF "w")
-			  (V2DF "x") (VNx2DF "x")])
+(define_mode_attr vwcore [(V8QI "w") (V16QI "w")
+			  (V4HI "w") (V8HI "w")
+			  (V2SI "w") (V4SI "w")
+			  (DI   "x") (V2DI "x")
+			  (V4HF "w") (V8HF "w")
+			  (V2SF "w") (V4SF "w")
+			  (V2DF "x")
+			  (VNx16QI "w") (VNx8QI "w") (VNx4QI "w") (VNx2QI "w")
+			  (VNx8HI "w") (VNx4HI "w") (VNx2HI "w")
+			  (VNx8HF "w") (VNx4HF "w") (VNx2HF "w")
+			  (VNx8BF "w") (VNx4BF "w") (VNx2BF "w")
+			  (VNx4SI "w") (VNx2SI "w")
+			  (VNx4SF "w") (VNx2SF "w")
+			  (VNx2DI "x")
+			  (VNx2DF "x")])
+
+;; Like vwcore, but for the container mode rather than the element mode.
+(define_mode_attr vccore [(VNx16QI "w") (VNx8QI "w") (VNx4QI "w") (VNx2QI "x")
+			  (VNx8HI "w") (VNx4HI "w") (VNx2HI "x")
+			  (VNx4SI "w") (VNx2SI "x")
+			  (VNx2DI "x")])
 
 ;; Double vector types for ALLX.
 (define_mode_attr Vallxd [(QI "8b") (HI "4h") (SI "2s")])
@@ -859,11 +1390,14 @@
 			       (V2SI "V2SI") (V4SI  "V4SI")
 			       (DI   "DI")   (V2DI  "V2DI")
 			       (V4HF "V4HI") (V8HF  "V8HI")
+			       (V4BF "V4HI") (V8BF  "V8HI")
 			       (V2SF "V2SI") (V4SF  "V4SI")
 			       (DF   "DI")   (V2DF  "V2DI")
-			       (SF   "SI")   (HF    "HI")
+			       (SF   "SI")   (SI    "SI")
+			       (HF    "HI")
 			       (VNx16QI "VNx16QI")
 			       (VNx8HI  "VNx8HI") (VNx8HF "VNx8HI")
+			       (VNx8BF  "VNx8HI")
 			       (VNx4SI  "VNx4SI") (VNx4SF "VNx4SI")
 			       (VNx2DI  "VNx2DI") (VNx2DF "VNx2DI")
 ])
@@ -874,21 +1408,56 @@
 			       (V2SI "v2si") (V4SI  "v4si")
 			       (DI   "di")   (V2DI  "v2di")
 			       (V4HF "v4hi") (V8HF  "v8hi")
+			       (V4BF "v4hi") (V8BF  "v8hi")
 			       (V2SF "v2si") (V4SF  "v4si")
 			       (DF   "di")   (V2DF  "v2di")
 			       (SF   "si")
 			       (VNx16QI "vnx16qi")
 			       (VNx8HI  "vnx8hi") (VNx8HF "vnx8hi")
+			       (VNx8BF  "vnx8hi")
 			       (VNx4SI  "vnx4si") (VNx4SF "vnx4si")
 			       (VNx2DI  "vnx2di") (VNx2DF "vnx2di")
 ])
 
 ;; Floating-point equivalent of selected modes.
-(define_mode_attr V_FP_EQUIV [(VNx4SI "VNx4SF") (VNx4SF "VNx4SF")
+(define_mode_attr V_FP_EQUIV [(VNx8HI "VNx8HF") (VNx8HF "VNx8HF")
+			      (VNx8BF "VNx8HF")
+			      (VNx4SI "VNx4SF") (VNx4SF "VNx4SF")
 			      (VNx2DI "VNx2DF") (VNx2DF "VNx2DF")])
-(define_mode_attr v_fp_equiv [(VNx4SI "vnx4sf") (VNx4SF "vnx4sf")
+(define_mode_attr v_fp_equiv [(VNx8HI "vnx8hf") (VNx8HF "vnx8hf")
+			      (VNx8BF "vnx8hf")
+			      (VNx4SI "vnx4sf") (VNx4SF "vnx4sf")
 			      (VNx2DI "vnx2df") (VNx2DF "vnx2df")])
 
+;; Maps full and partial vector modes of any element type to a full-vector
+;; integer mode with the same number of units.
+(define_mode_attr V_INT_CONTAINER [(VNx16QI "VNx16QI") (VNx8QI "VNx8HI")
+				   (VNx4QI "VNx4SI") (VNx2QI "VNx2DI")
+				   (VNx8HI "VNx8HI") (VNx4HI "VNx4SI")
+				   (VNx2HI "VNx2DI")
+				   (VNx4SI "VNx4SI") (VNx2SI "VNx2DI")
+				   (VNx2DI "VNx2DI")
+				   (VNx8HF "VNx8HI") (VNx4HF "VNx4SI")
+				   (VNx2HF "VNx2DI")
+				   (VNx8BF "VNx8HI") (VNx4BF "VNx4SI")
+				   (VNx2BF "VNx2DI")
+				   (VNx4SF "VNx4SI") (VNx2SF "VNx2DI")
+				   (VNx2DF "VNx2DI")])
+
+;; Lower-case version of V_INT_CONTAINER.
+(define_mode_attr v_int_container [(VNx16QI "vnx16qi") (VNx8QI "vnx8hi")
+				   (VNx4QI "vnx4si") (VNx2QI "vnx2di")
+				   (VNx8HI "vnx8hi") (VNx4HI "vnx4si")
+				   (VNx2HI "vnx2di")
+				   (VNx4SI "vnx4si") (VNx2SI "vnx2di")
+				   (VNx2DI "vnx2di")
+				   (VNx8HF "vnx8hi") (VNx4HF "vnx4si")
+				   (VNx2HF "vnx2di")
+				   (VNx8BF "vnx8hi") (VNx4BF "vnx4si")
+				   (VNx2BF "vnx2di")
+				   (VNx4SF "vnx4si") (VNx2SF "vnx2di")
+				   (VNx2DF "vnx2di")])
+
 ;; Mode for vector conditional operations where the comparison has
 ;; different type from the lhs.
 (define_mode_attr V_cmp_mixed [(V2SI "V2SF") (V4SI "V4SF")
@@ -907,6 +1476,9 @@
 			   (QI   "qi") (HI    "hi")
 			   (SI   "si")])
 
+;; Like ve_mode but for the half-width modes.
+(define_mode_attr vn_mode [(V8HI  "qi") (V4SI  "hi") (V2DI  "si")])
+
 ;; Vm for lane instructions is restricted to FP_LO_REGS.
 (define_mode_attr vwx [(V4HI "x") (V8HI "x") (HI "x")
 		       (V2SI "w") (V4SI "w") (SI "w")])
@@ -940,6 +1512,7 @@
 
 (define_mode_attr VSWAP_WIDTH [(V8QI "V16QI") (V16QI "V8QI")
 				(V4HI "V8HI") (V8HI  "V4HI")
+				(V8BF "V4BF") (V4BF  "V8BF")
 				(V2SI "V4SI") (V4SI  "V2SI")
 				(DI   "V2DI") (V2DI  "DI")
 				(V2SF "V4SF") (V4SF  "V2SF")
@@ -952,6 +1525,7 @@
 				    (DI   "to_128") (V2DI  "to_64")
 				    (V4HF "to_128") (V8HF  "to_64")
 				    (V2SF "to_128") (V4SF  "to_64")
+				    (V4BF "to_128") (V8BF  "to_64")
 				    (DF   "to_128") (V2DF  "to_64")])
 
 ;; For certain vector-by-element multiplication instructions we must
@@ -985,9 +1559,11 @@
 ;; Defined to '_q' for 128-bit types.
 (define_mode_attr q [(V8QI "") (V16QI "_q")
 		     (V4HI "") (V8HI  "_q")
+		     (V4BF "") (V8BF  "_q")
 		     (V2SI "") (V4SI  "_q")
 		     (DI   "") (V2DI  "_q")
 		     (V4HF "") (V8HF "_q")
+		     (V4BF "") (V8BF "_q")
 		     (V2SF "") (V4SF  "_q")
 			       (V2DF  "_q")
 		     (QI "") (HI "") (SI "") (DI "") (HF "") (SF "") (DF "")])
@@ -999,13 +1575,18 @@
 		      (V2SF "p") (V4SF  "v")
 		      (V4HF "v") (V8HF  "v")])
 
-(define_mode_attr vsi2qi [(V2SI "v8qi") (V4SI "v16qi")])
-(define_mode_attr VSI2QI [(V2SI "V8QI") (V4SI "V16QI")])
+(define_mode_attr vsi2qi [(V2SI "v8qi") (V4SI "v16qi")
+			  (VNx4SI "vnx16qi") (VNx2DI "vnx8hi")])
+(define_mode_attr VSI2QI [(V2SI "V8QI") (V4SI "V16QI")
+			  (VNx4SI "VNx16QI") (VNx2DI "VNx8HI")])
 
 
 ;; Register suffix for DOTPROD input types from the return type.
 (define_mode_attr Vdottype [(V2SI "8b") (V4SI "16b")])
 
+;; Register suffix for BFDOT input types from the return type.
+(define_mode_attr Vbfdottype [(V2SF "4h") (V4SF "8h")])
+
 ;; Sum of lengths of instructions needed to move vector registers of a mode.
 (define_mode_attr insn_count [(OI "8") (CI "12") (XI "16")])
 
@@ -1016,100 +1597,177 @@
 ;; Width of 2nd and 3rd arguments to fp16 vector multiply add/sub
 (define_mode_attr VFMLA_W [(V2SF "V4HF") (V4SF "V8HF")])
 
+;; Width of 2nd and 3rd arguments to bf16 vector multiply add/sub
+(define_mode_attr VBFMLA_W [(V2SF "V4BF") (V4SF "V8BF")])
+
 (define_mode_attr VFMLA_SEL_W [(V2SF "V2HF") (V4SF "V4HF")])
 
 (define_mode_attr f16quad [(V2SF "") (V4SF "q")])
 
+(define_mode_attr isquadop [(V8QI "") (V16QI "q") (V4BF "") (V8BF "q")])
+
 (define_code_attr f16mac [(plus "a") (minus "s")])
 
+;; Map smax to smin and umax to umin.
+(define_code_attr max_opp [(smax "smin") (umax "umin")])
+
+;; Same as above, but louder.
+(define_code_attr MAX_OPP [(smax "SMIN") (umax "UMIN")])
+
 ;; The number of subvectors in an SVE_STRUCT.
 (define_mode_attr vector_count [(VNx32QI "2") (VNx16HI "2")
 				(VNx8SI  "2") (VNx4DI  "2")
+				(VNx16BF "2")
 				(VNx16HF "2") (VNx8SF  "2") (VNx4DF "2")
 				(VNx48QI "3") (VNx24HI "3")
 				(VNx12SI "3") (VNx6DI  "3")
+				(VNx24BF "3")
 				(VNx24HF "3") (VNx12SF "3") (VNx6DF "3")
 				(VNx64QI "4") (VNx32HI "4")
 				(VNx16SI "4") (VNx8DI  "4")
+				(VNx32BF "4")
 				(VNx32HF "4") (VNx16SF "4") (VNx8DF "4")])
 
 ;; The number of instruction bytes needed for an SVE_STRUCT move.  This is
 ;; equal to vector_count * 4.
 (define_mode_attr insn_length [(VNx32QI "8")  (VNx16HI "8")
 			       (VNx8SI  "8")  (VNx4DI  "8")
+			       (VNx16BF "8")
 			       (VNx16HF "8")  (VNx8SF  "8")  (VNx4DF "8")
 			       (VNx48QI "12") (VNx24HI "12")
 			       (VNx12SI "12") (VNx6DI  "12")
+			       (VNx24BF "12")
 			       (VNx24HF "12") (VNx12SF "12") (VNx6DF "12")
 			       (VNx64QI "16") (VNx32HI "16")
 			       (VNx16SI "16") (VNx8DI  "16")
+			       (VNx32BF "16")
 			       (VNx32HF "16") (VNx16SF "16") (VNx8DF "16")])
 
 ;; The type of a subvector in an SVE_STRUCT.
 (define_mode_attr VSINGLE [(VNx32QI "VNx16QI")
 			   (VNx16HI "VNx8HI") (VNx16HF "VNx8HF")
+			   (VNx16BF "VNx8BF")
 			   (VNx8SI "VNx4SI") (VNx8SF "VNx4SF")
 			   (VNx4DI "VNx2DI") (VNx4DF "VNx2DF")
 			   (VNx48QI "VNx16QI")
 			   (VNx24HI "VNx8HI") (VNx24HF "VNx8HF")
+			   (VNx24BF "VNx8BF")
 			   (VNx12SI "VNx4SI") (VNx12SF "VNx4SF")
 			   (VNx6DI "VNx2DI") (VNx6DF "VNx2DF")
 			   (VNx64QI "VNx16QI")
 			   (VNx32HI "VNx8HI") (VNx32HF "VNx8HF")
+			   (VNx32BF "VNx8BF")
 			   (VNx16SI "VNx4SI") (VNx16SF "VNx4SF")
 			   (VNx8DI "VNx2DI") (VNx8DF "VNx2DF")])
 
 ;; ...and again in lower case.
 (define_mode_attr vsingle [(VNx32QI "vnx16qi")
 			   (VNx16HI "vnx8hi") (VNx16HF "vnx8hf")
+			   (VNx16BF "vnx8bf")
 			   (VNx8SI "vnx4si") (VNx8SF "vnx4sf")
 			   (VNx4DI "vnx2di") (VNx4DF "vnx2df")
 			   (VNx48QI "vnx16qi")
 			   (VNx24HI "vnx8hi") (VNx24HF "vnx8hf")
+			   (VNx24BF "vnx8bf")
 			   (VNx12SI "vnx4si") (VNx12SF "vnx4sf")
 			   (VNx6DI "vnx2di") (VNx6DF "vnx2df")
 			   (VNx64QI "vnx16qi")
 			   (VNx32HI "vnx8hi") (VNx32HF "vnx8hf")
+			   (VNx32BF "vnx8bf")
 			   (VNx16SI "vnx4si") (VNx16SF "vnx4sf")
 			   (VNx8DI "vnx2di") (VNx8DF "vnx2df")])
 
 ;; The predicate mode associated with an SVE data mode.  For structure modes
 ;; this is equivalent to the <VPRED> of the subvector mode.
-(define_mode_attr VPRED [(VNx16QI "VNx16BI")
-			 (VNx8HI "VNx8BI") (VNx8HF "VNx8BI")
-			 (VNx4SI "VNx4BI") (VNx4SF "VNx4BI")
-			 (VNx2DI "VNx2BI") (VNx2DF "VNx2BI")
+(define_mode_attr VPRED [(VNx16QI "VNx16BI") (VNx8QI "VNx8BI")
+			 (VNx4QI "VNx4BI") (VNx2QI "VNx2BI")
+			 (VNx8HI "VNx8BI") (VNx4HI "VNx4BI") (VNx2HI "VNx2BI")
+			 (VNx8HF "VNx8BI") (VNx4HF "VNx4BI") (VNx2HF "VNx2BI")
+			 (VNx8BF "VNx8BI") (VNx4BF "VNx4BI") (VNx2BF "VNx2BI")
+			 (VNx4SI "VNx4BI") (VNx2SI "VNx2BI")
+			 (VNx4SF "VNx4BI") (VNx2SF "VNx2BI")
+			 (VNx2DI "VNx2BI")
+			 (VNx2DF "VNx2BI")
 			 (VNx32QI "VNx16BI")
 			 (VNx16HI "VNx8BI") (VNx16HF "VNx8BI")
+			 (VNx16BF "VNx8BI")
 			 (VNx8SI "VNx4BI") (VNx8SF "VNx4BI")
 			 (VNx4DI "VNx2BI") (VNx4DF "VNx2BI")
 			 (VNx48QI "VNx16BI")
 			 (VNx24HI "VNx8BI") (VNx24HF "VNx8BI")
+			 (VNx24BF "VNx8BI")
 			 (VNx12SI "VNx4BI") (VNx12SF "VNx4BI")
 			 (VNx6DI "VNx2BI") (VNx6DF "VNx2BI")
 			 (VNx64QI "VNx16BI")
 			 (VNx32HI "VNx8BI") (VNx32HF "VNx8BI")
+			 (VNx32BF "VNx8BI")
 			 (VNx16SI "VNx4BI") (VNx16SF "VNx4BI")
 			 (VNx8DI "VNx2BI") (VNx8DF "VNx2BI")])
 
 ;; ...and again in lower case.
-(define_mode_attr vpred [(VNx16QI "vnx16bi")
-			 (VNx8HI "vnx8bi") (VNx8HF "vnx8bi")
-			 (VNx4SI "vnx4bi") (VNx4SF "vnx4bi")
-			 (VNx2DI "vnx2bi") (VNx2DF "vnx2bi")
+(define_mode_attr vpred [(VNx16QI "vnx16bi") (VNx8QI "vnx8bi")
+			 (VNx4QI "vnx4bi") (VNx2QI "vnx2bi")
+			 (VNx8HI "vnx8bi") (VNx4HI "vnx4bi") (VNx2HI "vnx2bi")
+			 (VNx8HF "vnx8bi") (VNx4HF "vnx4bi") (VNx2HF "vnx2bi")
+			 (VNx8BF "vnx8bi") (VNx4BF "vnx4bi") (VNx2BF "vnx2bi")
+			 (VNx4SI "vnx4bi") (VNx2SI "vnx2bi")
+			 (VNx4SF "vnx4bi") (VNx2SF "vnx2bi")
+			 (VNx2DI "vnx2bi")
+			 (VNx2DF "vnx2bi")
 			 (VNx32QI "vnx16bi")
 			 (VNx16HI "vnx8bi") (VNx16HF "vnx8bi")
+			 (VNx16BF "vnx8bi")
 			 (VNx8SI "vnx4bi") (VNx8SF "vnx4bi")
 			 (VNx4DI "vnx2bi") (VNx4DF "vnx2bi")
 			 (VNx48QI "vnx16bi")
 			 (VNx24HI "vnx8bi") (VNx24HF "vnx8bi")
+			 (VNx24BF "vnx8bi")
 			 (VNx12SI "vnx4bi") (VNx12SF "vnx4bi")
 			 (VNx6DI "vnx2bi") (VNx6DF "vnx2bi")
 			 (VNx64QI "vnx16bi")
 			 (VNx32HI "vnx8bi") (VNx32HF "vnx4bi")
+			 (VNx32BF "vnx8bi")
 			 (VNx16SI "vnx4bi") (VNx16SF "vnx4bi")
 			 (VNx8DI "vnx2bi") (VNx8DF "vnx2bi")])
 
+(define_mode_attr VDOUBLE [(VNx16QI "VNx32QI")
+			   (VNx8HI "VNx16HI") (VNx8HF "VNx16HF")
+			   (VNx8BF "VNx16BF")
+			   (VNx4SI "VNx8SI") (VNx4SF "VNx8SF")
+			   (VNx2DI "VNx4DI") (VNx2DF "VNx4DF")])
+
+;; On AArch64 the By element instruction doesn't have a 2S variant.
+;; However because the instruction always selects a pair of values
+;; The normal 3SAME instruction can be used here instead.
+(define_mode_attr FCMLA_maybe_lane [(V2SF "<Vtype>") (V4SF "<Vetype>[%4]")
+				    (V4HF "<Vetype>[%4]") (V8HF "<Vetype>[%4]")
+				    ])
+
+;; The number of bytes controlled by a predicate
+(define_mode_attr data_bytes [(VNx16BI "1") (VNx8BI "2")
+			      (VNx4BI "4") (VNx2BI "8")])
+
+;; Two-nybble mask for partial vector modes: nunits, byte size.
+(define_mode_attr self_mask [(VNx8QI "0x81")
+			     (VNx4QI "0x41")
+			     (VNx2QI "0x21")
+			     (VNx4HI "0x42")
+			     (VNx2HI "0x22")
+			     (VNx2SI "0x24")])
+
+;; For SVE_HSDI vector modes, the mask of narrower modes, encoded as above.
+(define_mode_attr narrower_mask [(VNx8HI "0x81") (VNx4HI "0x41")
+				 (VNx2HI "0x21")
+				 (VNx4SI "0x43") (VNx2SI "0x23")
+				 (VNx2DI "0x27")])
+
+;; The constraint to use for an SVE [SU]DOT, FMUL, FMLA or FMLS lane index.
+(define_mode_attr sve_lane_con [(VNx8HI "y") (VNx4SI "y") (VNx2DI "x")
+				(VNx8HF "y") (VNx4SF "y") (VNx2DF "x")])
+
+;; The constraint to use for an SVE FCMLA lane index.
+(define_mode_attr sve_lane_pair_con [(VNx8HF "y") (VNx4SF "x")])
+
 ;; -------------------------------------------------------------------
 ;; Code Iterators
 ;; -------------------------------------------------------------------
@@ -1120,6 +1778,8 @@
 ;; This code iterator allows the shifts supported in arithmetic instructions
 (define_code_iterator ASHIFT [ashift ashiftrt lshiftrt])
 
+(define_code_iterator SHIFTRT [ashiftrt lshiftrt])
+
 ;; Code iterator for logical operations
 (define_code_iterator LOGICAL [and ior xor])
 
@@ -1134,6 +1794,7 @@
 
 ;; Code iterator for sign/zero extension
 (define_code_iterator ANY_EXTEND [sign_extend zero_extend])
+(define_code_iterator ANY_EXTEND2 [sign_extend zero_extend])
 
 ;; All division operations (signed/unsigned)
 (define_code_iterator ANY_DIV [div udiv])
@@ -1163,7 +1824,10 @@
 
 (define_code_iterator FMAXMIN [smax smin])
 
-;; Code iterator for variants of vector max and min.
+;; Signed and unsigned max operations.
+(define_code_iterator USMAX [smax umax])
+
+;; Code iterator for plus and minus.
 (define_code_iterator ADDSUB [plus minus])
 
 ;; Code iterator for variants of vector saturating binary ops.
@@ -1175,6 +1839,21 @@
 ;; Code iterator for signed variants of vector saturating binary ops.
 (define_code_iterator SBINQOPS [ss_plus ss_minus])
 
+;; Code iterator for unsigned variants of vector saturating binary ops.
+(define_code_iterator UBINQOPS [us_plus us_minus])
+
+;; Modular and saturating addition.
+(define_code_iterator ANY_PLUS [plus ss_plus us_plus])
+
+;; Saturating addition.
+(define_code_iterator SAT_PLUS [ss_plus us_plus])
+
+;; Modular and saturating subtraction.
+(define_code_iterator ANY_MINUS [minus ss_minus us_minus])
+
+;; Saturating subtraction.
+(define_code_iterator SAT_MINUS [ss_minus us_minus])
+
 ;; Comparison operators for <F>CM.
 (define_code_iterator COMPARISONS [lt le eq ge gt])
 
@@ -1184,11 +1863,34 @@
 ;; Unsigned comparison operators.
 (define_code_iterator FAC_COMPARISONS [lt le ge gt])
 
+;; Signed and unsigned saturating truncations.
+(define_code_iterator SAT_TRUNC [ss_truncate us_truncate])
+
 ;; SVE integer unary operations.
-(define_code_iterator SVE_INT_UNARY [neg not popcount])
+(define_code_iterator SVE_INT_UNARY [abs neg not clrsb clz popcount
+				     (ss_abs "TARGET_SVE2")
+				     (ss_neg "TARGET_SVE2")])
+
+;; SVE integer binary operations.
+(define_code_iterator SVE_INT_BINARY [plus minus mult smax umax smin umin
+				      ashift ashiftrt lshiftrt
+				      and ior xor
+				      (ss_plus "TARGET_SVE2")
+				      (us_plus "TARGET_SVE2")
+				      (ss_minus "TARGET_SVE2")
+				      (us_minus "TARGET_SVE2")])
+
+;; SVE integer binary division operations.
+(define_code_iterator SVE_INT_BINARY_SD [div udiv])
 
-;; SVE floating-point unary operations.
-(define_code_iterator SVE_FP_UNARY [neg abs sqrt])
+;; SVE integer binary operations that have an immediate form.
+(define_code_iterator SVE_INT_BINARY_IMM [mult smax smin umax umin])
+
+;; SVE floating-point operations with an unpredicated all-register form.
+(define_code_iterator SVE_UNPRED_FP_BINARY [plus minus mult])
+
+;; SVE integer comparisons.
+(define_code_iterator SVE_INT_CMP [lt le eq ne ge gt ltu leu geu gtu])
 
 ;; -------------------------------------------------------------------
 ;; Code Attributes
@@ -1206,6 +1908,8 @@
 			 (unsigned_fix "fixuns")
 			 (float "float")
 			 (unsigned_float "floatuns")
+			 (clrsb "clrsb")
+			 (clz "clz")
 			 (popcount "popcount")
 			 (and "and")
 			 (ior "ior")
@@ -1214,10 +1918,13 @@
 			 (neg "neg")
 			 (plus "add")
 			 (minus "sub")
-			 (ss_plus "qadd")
-			 (us_plus "qadd")
-			 (ss_minus "qsub")
-			 (us_minus "qsub")
+			 (mult "mul")
+			 (div "div")
+			 (udiv "udiv")
+			 (ss_plus "ssadd")
+			 (us_plus "usadd")
+			 (ss_minus "sssub")
+			 (us_minus "ussub")
 			 (ss_neg "qneg")
 			 (ss_abs "qabs")
 			 (smin "smin")
@@ -1234,8 +1941,12 @@
 			 (leu "leu")
 			 (geu "geu")
 			 (gtu "gtu")
-			 (abs "abs")
-			 (sqrt "sqrt")])
+			 (abs "abs")])
+
+(define_code_attr addsub [(ss_plus "add")
+			  (us_plus "add")
+			  (ss_minus "sub")
+			  (us_minus "sub")])
 
 ;; For comparison operators we use the FCM* and CM* instructions.
 ;; As there are no CMLE or CMLT instructions which act on 3 vector
@@ -1253,6 +1964,18 @@
 			(ltu "LTU") (leu "LEU") (ne "NE") (geu "GEU")
 			(gtu "GTU")])
 
+;; The AArch64 condition associated with an rtl comparison code.
+(define_code_attr cmp_op [(lt "lt")
+			  (le "le")
+			  (eq "eq")
+			  (ne "ne")
+			  (ge "ge")
+			  (gt "gt")
+			  (ltu "lo")
+			  (leu "ls")
+			  (geu "hs")
+			  (gtu "hi")])
+
 (define_code_attr fix_trunc_optab [(fix "fix_trunc")
 				   (unsigned_fix "fixuns_trunc")])
 
@@ -1268,6 +1991,9 @@
 (define_code_attr shift [(ashift "lsl") (ashiftrt "asr")
 			 (lshiftrt "lsr") (rotatert "ror")])
 
+;; Op prefix for shift right and accumulate.
+(define_code_attr sra_op [(ashiftrt "s") (lshiftrt "u")])
+
 ;; Map shift operators onto underlying bit-field instructions
 (define_code_attr bfshift [(ashift "ubfiz") (ashiftrt "sbfx")
 			   (lshiftrt "ubfx") (rotatert "extr")])
@@ -1278,6 +2004,9 @@
 ;; Operation names for negate and bitwise complement.
 (define_code_attr neg_not_op [(neg "neg") (not "not")])
 
+;; csinv, csneg insn suffixes.
+(define_code_attr neg_not_cs [(neg "neg") (not "inv")])
+
 ;; Similar, but when the second operand is inverted.
 (define_code_attr nlogical [(and "bic") (ior "orn") (xor "eon")])
 
@@ -1290,7 +2019,17 @@
 		      (fix "s") (unsigned_fix "u")
 		      (div "s") (udiv "u")
 		      (smax "s") (umax "u")
-		      (smin "s") (umin "u")])
+		      (smin "s") (umin "u")
+		      (ss_truncate "s") (us_truncate "u")])
+
+;; "s" for signed ops, empty for unsigned ones.
+(define_code_attr s [(sign_extend "s") (zero_extend "")])
+
+;; Map signed/unsigned ops to the corresponding extension.
+(define_code_attr paired_extend [(ss_plus "sign_extend")
+				 (us_plus "zero_extend")
+				 (ss_minus "sign_extend")
+				 (us_minus "zero_extend")])
 
 ;; Whether a shift is left or right.
 (define_code_attr lr [(ashift "l") (ashiftrt "r") (lshiftrt "r")])
@@ -1342,43 +2081,152 @@
 
 ;; The integer SVE instruction that implements an rtx code.
 (define_code_attr sve_int_op [(plus "add")
+			      (minus "sub")
+			      (mult "mul")
+			      (div "sdiv")
+			      (udiv "udiv")
+			      (abs "abs")
 			      (neg "neg")
 			      (smin "smin")
 			      (smax "smax")
 			      (umin "umin")
 			      (umax "umax")
+			      (ashift "lsl")
+			      (ashiftrt "asr")
+			      (lshiftrt "lsr")
 			      (and "and")
 			      (ior "orr")
 			      (xor "eor")
 			      (not "not")
-			      (popcount "cnt")])
+			      (clrsb "cls")
+			      (clz "clz")
+			      (popcount "cnt")
+			      (ss_plus "sqadd")
+			      (us_plus "uqadd")
+			      (ss_minus "sqsub")
+			      (us_minus "uqsub")
+			      (ss_neg "sqneg")
+			      (ss_abs "sqabs")])
+
+(define_code_attr sve_int_op_rev [(plus "add")
+				  (minus "subr")
+				  (mult "mul")
+				  (div "sdivr")
+				  (udiv "udivr")
+				  (smin "smin")
+				  (smax "smax")
+				  (umin "umin")
+				  (umax "umax")
+				  (ashift "lslr")
+				  (ashiftrt "asrr")
+				  (lshiftrt "lsrr")
+				  (and "and")
+				  (ior "orr")
+				  (xor "eor")
+				  (ss_plus "sqadd")
+				  (us_plus "uqadd")
+				  (ss_minus "sqsubr")
+				  (us_minus "uqsubr")])
 
 ;; The floating-point SVE instruction that implements an rtx code.
 (define_code_attr sve_fp_op [(plus "fadd")
-			     (neg "fneg")
-			     (abs "fabs")
-			     (sqrt "fsqrt")])
+			     (minus "fsub")
+			     (mult "fmul")])
+
+;; The SVE immediate constraint to use for an rtl code.
+(define_code_attr sve_imm_con [(mult "vsm")
+			       (smax "vsm")
+			       (smin "vsm")
+			       (umax "vsb")
+			       (umin "vsb")
+			       (eq "vsc")
+			       (ne "vsc")
+			       (lt "vsc")
+			       (ge "vsc")
+			       (le "vsc")
+			       (gt "vsc")
+			       (ltu "vsd")
+			       (leu "vsd")
+			       (geu "vsd")
+			       (gtu "vsd")])
+
+;; The prefix letter to use when printing an immediate operand.
+(define_code_attr sve_imm_prefix [(mult "")
+				  (smax "")
+				  (smin "")
+				  (umax "D")
+				  (umin "D")])
+
+;; The predicate to use for the second input operand in a cond_<optab><mode>
+;; pattern.
+(define_code_attr sve_pred_int_rhs2_operand
+  [(plus "register_operand")
+   (minus "register_operand")
+   (mult "register_operand")
+   (smax "register_operand")
+   (umax "register_operand")
+   (smin "register_operand")
+   (umin "register_operand")
+   (ashift "aarch64_sve_lshift_operand")
+   (ashiftrt "aarch64_sve_rshift_operand")
+   (lshiftrt "aarch64_sve_rshift_operand")
+   (and "aarch64_sve_pred_and_operand")
+   (ior "register_operand")
+   (xor "register_operand")
+   (ss_plus "register_operand")
+   (us_plus "register_operand")
+   (ss_minus "register_operand")
+   (us_minus "register_operand")])
+
+(define_code_attr inc_dec [(minus "dec") (ss_minus "sqdec") (us_minus "uqdec")
+			   (plus "inc") (ss_plus "sqinc") (us_plus "uqinc")])
 
 ;; -------------------------------------------------------------------
 ;; Int Iterators.
 ;; -------------------------------------------------------------------
+
+;; The unspec codes for the SABAL, UABAL AdvancedSIMD instructions.
+(define_int_iterator ABAL [UNSPEC_SABAL UNSPEC_UABAL])
+
+;; The unspec codes for the SABDL, UABDL AdvancedSIMD instructions.
+(define_int_iterator ABDL [UNSPEC_SABDL UNSPEC_UABDL])
+
+;; The unspec codes for the SABAL2, UABAL2 AdvancedSIMD instructions.
+(define_int_iterator ABAL2 [UNSPEC_SABAL2 UNSPEC_UABAL2])
+
+;; The unspec codes for the SABDL2, UABDL2 AdvancedSIMD instructions.
+(define_int_iterator ABDL2 [UNSPEC_SABDL2 UNSPEC_UABDL2])
+
+;; The unspec codes for the SADALP, UADALP AdvancedSIMD instructions.
+(define_int_iterator ADALP [UNSPEC_SADALP UNSPEC_UADALP])
+
 (define_int_iterator MAXMINV [UNSPEC_UMAXV UNSPEC_UMINV
 			      UNSPEC_SMAXV UNSPEC_SMINV])
 
 (define_int_iterator FMAXMINV [UNSPEC_FMAXV UNSPEC_FMINV
 			       UNSPEC_FMAXNMV UNSPEC_FMINNMV])
 
-(define_int_iterator BITWISEV [UNSPEC_ANDV UNSPEC_IORV UNSPEC_XORV])
+(define_int_iterator SVE_INT_ADDV [UNSPEC_SADDV UNSPEC_UADDV])
+
+(define_int_iterator USADDLV [UNSPEC_SADDLV UNSPEC_UADDLV])
 
 (define_int_iterator LOGICALF [UNSPEC_ANDF UNSPEC_IORF UNSPEC_XORF])
 
 (define_int_iterator HADDSUB [UNSPEC_SHADD UNSPEC_UHADD
 			      UNSPEC_SRHADD UNSPEC_URHADD
-			      UNSPEC_SHSUB UNSPEC_UHSUB
-			      UNSPEC_SRHSUB UNSPEC_URHSUB])
+			      UNSPEC_SHSUB UNSPEC_UHSUB])
+
+(define_int_iterator HADD [UNSPEC_SHADD UNSPEC_UHADD])
+
+(define_int_iterator RHADD [UNSPEC_SRHADD UNSPEC_URHADD])
+
+(define_int_iterator BSL_DUP [1 2])
 
 (define_int_iterator DOTPROD [UNSPEC_SDOT UNSPEC_UDOT])
 
+(define_int_iterator DOTPROD_I8MM [UNSPEC_USDOT UNSPEC_SUDOT])
+(define_int_iterator DOTPROD_US_ONLY [UNSPEC_USDOT])
+
 (define_int_iterator ADDSUBHN [UNSPEC_ADDHN UNSPEC_RADDHN
 			       UNSPEC_SUBHN UNSPEC_RSUBHN])
 
@@ -1388,12 +2236,17 @@
 (define_int_iterator FMAXMIN_UNS [UNSPEC_FMAX UNSPEC_FMIN
 				  UNSPEC_FMAXNM UNSPEC_FMINNM])
 
-(define_int_iterator PAUTH_LR_SP [UNSPEC_PACISP UNSPEC_AUTISP])
+(define_int_iterator PAUTH_LR_SP [UNSPEC_PACIASP UNSPEC_AUTIASP
+				  UNSPEC_PACIBSP UNSPEC_AUTIBSP])
 
-(define_int_iterator PAUTH_17_16 [UNSPEC_PACI1716 UNSPEC_AUTI1716])
+(define_int_iterator PAUTH_17_16 [UNSPEC_PACIA1716 UNSPEC_AUTIA1716
+				  UNSPEC_PACIB1716 UNSPEC_AUTIB1716])
 
 (define_int_iterator VQDMULH [UNSPEC_SQDMULH UNSPEC_SQRDMULH])
 
+(define_int_iterator MULHRS [UNSPEC_SMULHS UNSPEC_UMULHS
+                             UNSPEC_SMULHRS UNSPEC_UMULHRS])
+
 (define_int_iterator USSUQADD [UNSPEC_SUQADD UNSPEC_USQADD])
 
 (define_int_iterator SUQMOVN [UNSPEC_SQXTN UNSPEC_UQXTN])
@@ -1427,6 +2280,10 @@
 			      UNSPEC_TRN1 UNSPEC_TRN2
 			      UNSPEC_UZP1 UNSPEC_UZP2])
 
+(define_int_iterator PERMUTEQ [UNSPEC_ZIP1Q UNSPEC_ZIP2Q
+			       UNSPEC_TRN1Q UNSPEC_TRN2Q
+			       UNSPEC_UZP1Q UNSPEC_UZP2Q])
+
 (define_int_iterator OPTAB_PERMUTE [UNSPEC_ZIP1 UNSPEC_ZIP2
 				    UNSPEC_UZP1 UNSPEC_UZP2])
 
@@ -1442,8 +2299,6 @@
 (define_int_iterator FCVT_F2FIXED [UNSPEC_FCVTZS UNSPEC_FCVTZU])
 (define_int_iterator FCVT_FIXED2F [UNSPEC_SCVTF UNSPEC_UCVTF])
 
-(define_int_iterator FRECP [UNSPEC_FRECPE UNSPEC_FRECPX])
-
 (define_int_iterator CRC [UNSPEC_CRC32B UNSPEC_CRC32H UNSPEC_CRC32W
                           UNSPEC_CRC32X UNSPEC_CRC32CB UNSPEC_CRC32CH
                           UNSPEC_CRC32CW UNSPEC_CRC32CX])
@@ -1475,24 +2330,459 @@
 
 (define_int_iterator MUL_HIGHPART [UNSPEC_SMUL_HIGHPART UNSPEC_UMUL_HIGHPART])
 
-(define_int_iterator SVE_COND_INT_OP [UNSPEC_COND_ADD UNSPEC_COND_SUB
-				      UNSPEC_COND_SMAX UNSPEC_COND_UMAX
-				      UNSPEC_COND_SMIN UNSPEC_COND_UMIN
-				      UNSPEC_COND_AND
-				      UNSPEC_COND_ORR
-				      UNSPEC_COND_EOR])
+(define_int_iterator CLAST [UNSPEC_CLASTA UNSPEC_CLASTB])
+
+(define_int_iterator LAST [UNSPEC_LASTA UNSPEC_LASTB])
+
+(define_int_iterator SVE_INT_UNARY [UNSPEC_RBIT UNSPEC_REVB
+				    UNSPEC_REVH UNSPEC_REVW])
+
+(define_int_iterator SVE_FP_UNARY [UNSPEC_FRECPE UNSPEC_RSQRTE])
+
+(define_int_iterator SVE_FP_UNARY_INT [UNSPEC_FEXPA])
+
+(define_int_iterator SVE_INT_SHIFT_IMM [UNSPEC_ASRD
+					(UNSPEC_SQSHLU "TARGET_SVE2")
+					(UNSPEC_SRSHR "TARGET_SVE2")
+					(UNSPEC_URSHR "TARGET_SVE2")])
+
+(define_int_iterator SVE_FP_BINARY [UNSPEC_FRECPS UNSPEC_RSQRTS])
+
+(define_int_iterator SVE_FP_BINARY_INT [UNSPEC_FTSMUL UNSPEC_FTSSEL])
+
+(define_int_iterator SVE_BFLOAT_TERNARY_LONG [UNSPEC_BFDOT
+					      UNSPEC_BFMLALB
+					      UNSPEC_BFMLALT
+					      UNSPEC_BFMMLA])
+
+(define_int_iterator SVE_BFLOAT_TERNARY_LONG_LANE [UNSPEC_BFDOT
+						   UNSPEC_BFMLALB
+						   UNSPEC_BFMLALT])
+
+(define_int_iterator SVE_INT_REDUCTION [UNSPEC_ANDV
+					UNSPEC_IORV
+					UNSPEC_SMAXV
+					UNSPEC_SMINV
+					UNSPEC_UMAXV
+					UNSPEC_UMINV
+					UNSPEC_XORV])
+
+(define_int_iterator SVE_FP_REDUCTION [UNSPEC_FADDV
+				       UNSPEC_FMAXV
+				       UNSPEC_FMAXNMV
+				       UNSPEC_FMINV
+				       UNSPEC_FMINNMV])
+
+(define_int_iterator SVE_COND_FP_UNARY [UNSPEC_COND_FABS
+					UNSPEC_COND_FNEG
+					UNSPEC_COND_FRECPX
+					UNSPEC_COND_FRINTA
+					UNSPEC_COND_FRINTI
+					UNSPEC_COND_FRINTM
+					UNSPEC_COND_FRINTN
+					UNSPEC_COND_FRINTP
+					UNSPEC_COND_FRINTX
+					UNSPEC_COND_FRINTZ
+					UNSPEC_COND_FSQRT])
+
+;; Same as SVE_COND_FP_UNARY, but without codes that have a dedicated
+;; <optab><mode>2 expander.
+(define_int_iterator SVE_COND_FP_UNARY_OPTAB [UNSPEC_COND_FABS
+					      UNSPEC_COND_FNEG
+					      UNSPEC_COND_FRECPX
+					      UNSPEC_COND_FRINTA
+					      UNSPEC_COND_FRINTI
+					      UNSPEC_COND_FRINTM
+					      UNSPEC_COND_FRINTN
+					      UNSPEC_COND_FRINTP
+					      UNSPEC_COND_FRINTX
+					      UNSPEC_COND_FRINTZ])
+
+(define_int_iterator SVE_COND_FCVT [UNSPEC_COND_FCVT])
+(define_int_iterator SVE_COND_FCVTI [UNSPEC_COND_FCVTZS UNSPEC_COND_FCVTZU])
+(define_int_iterator SVE_COND_ICVTF [UNSPEC_COND_SCVTF UNSPEC_COND_UCVTF])
+
+(define_int_iterator SVE_COND_FP_BINARY [UNSPEC_COND_FADD
+					 UNSPEC_COND_FDIV
+					 UNSPEC_COND_FMAX
+					 UNSPEC_COND_FMAXNM
+					 UNSPEC_COND_FMIN
+					 UNSPEC_COND_FMINNM
+					 UNSPEC_COND_FMUL
+					 UNSPEC_COND_FMULX
+					 UNSPEC_COND_FSUB])
+
+;; Same as SVE_COND_FP_BINARY, but without codes that have a dedicated
+;; <optab><mode>3 expander.
+(define_int_iterator SVE_COND_FP_BINARY_OPTAB [UNSPEC_COND_FADD
+					       UNSPEC_COND_FMAX
+					       UNSPEC_COND_FMAXNM
+					       UNSPEC_COND_FMIN
+					       UNSPEC_COND_FMINNM
+					       UNSPEC_COND_FMUL
+					       UNSPEC_COND_FMULX
+					       UNSPEC_COND_FSUB])
+
+(define_int_iterator SVE_COND_FP_BINARY_INT [UNSPEC_COND_FSCALE])
+
+(define_int_iterator SVE_COND_FP_ADD [UNSPEC_COND_FADD])
+(define_int_iterator SVE_COND_FP_SUB [UNSPEC_COND_FSUB])
+(define_int_iterator SVE_COND_FP_MUL [UNSPEC_COND_FMUL])
+
+(define_int_iterator SVE_COND_FP_BINARY_I1 [UNSPEC_COND_FMAX
+					    UNSPEC_COND_FMAXNM
+					    UNSPEC_COND_FMIN
+					    UNSPEC_COND_FMINNM
+					    UNSPEC_COND_FMUL])
+
+(define_int_iterator SVE_COND_FP_BINARY_REG [UNSPEC_COND_FDIV
+					     UNSPEC_COND_FMULX])
+
+(define_int_iterator SVE_COND_FCADD [UNSPEC_COND_FCADD90
+				     UNSPEC_COND_FCADD270])
+
+(define_int_iterator SVE_COND_FP_MAXMIN [UNSPEC_COND_FMAX
+					 UNSPEC_COND_FMAXNM
+					 UNSPEC_COND_FMIN
+					 UNSPEC_COND_FMINNM])
+
+;; Floating-point max/min operations that correspond to optabs,
+;; as opposed to those that are internal to the port.
+(define_int_iterator SVE_COND_FP_MAXMIN_PUBLIC [UNSPEC_COND_FMAXNM
+						UNSPEC_COND_FMINNM])
+
+(define_int_iterator SVE_COND_FP_TERNARY [UNSPEC_COND_FMLA
+					  UNSPEC_COND_FMLS
+					  UNSPEC_COND_FNMLA
+					  UNSPEC_COND_FNMLS])
+
+(define_int_iterator SVE_COND_FCMLA [UNSPEC_COND_FCMLA
+				     UNSPEC_COND_FCMLA90
+				     UNSPEC_COND_FCMLA180
+				     UNSPEC_COND_FCMLA270])
+
+(define_int_iterator SVE_COND_INT_CMP_WIDE [UNSPEC_COND_CMPEQ_WIDE
+					    UNSPEC_COND_CMPGE_WIDE
+					    UNSPEC_COND_CMPGT_WIDE
+					    UNSPEC_COND_CMPHI_WIDE
+					    UNSPEC_COND_CMPHS_WIDE
+					    UNSPEC_COND_CMPLE_WIDE
+					    UNSPEC_COND_CMPLO_WIDE
+					    UNSPEC_COND_CMPLS_WIDE
+					    UNSPEC_COND_CMPLT_WIDE
+					    UNSPEC_COND_CMPNE_WIDE])
+
+;; SVE FP comparisons that accept #0.0.
+(define_int_iterator SVE_COND_FP_CMP_I0 [UNSPEC_COND_FCMEQ
+					 UNSPEC_COND_FCMGE
+					 UNSPEC_COND_FCMGT
+					 UNSPEC_COND_FCMLE
+					 UNSPEC_COND_FCMLT
+					 UNSPEC_COND_FCMNE])
+
+(define_int_iterator SVE_COND_FP_ABS_CMP [UNSPEC_COND_FCMGE
+					  UNSPEC_COND_FCMGT
+					  UNSPEC_COND_FCMLE
+					  UNSPEC_COND_FCMLT])
+
+(define_int_iterator SVE_FP_TERNARY_LANE [UNSPEC_FMLA UNSPEC_FMLS])
+
+(define_int_iterator SVE_CFP_TERNARY_LANE [UNSPEC_FCMLA UNSPEC_FCMLA90
+					   UNSPEC_FCMLA180 UNSPEC_FCMLA270])
+
+(define_int_iterator SVE_WHILE [UNSPEC_WHILELE UNSPEC_WHILELO
+				UNSPEC_WHILELS UNSPEC_WHILELT
+				(UNSPEC_WHILEGE "TARGET_SVE2")
+				(UNSPEC_WHILEGT "TARGET_SVE2")
+				(UNSPEC_WHILEHI "TARGET_SVE2")
+				(UNSPEC_WHILEHS "TARGET_SVE2")
+				(UNSPEC_WHILERW "TARGET_SVE2")
+				(UNSPEC_WHILEWR "TARGET_SVE2")])
+
+(define_int_iterator SVE2_WHILE_PTR [UNSPEC_WHILERW UNSPEC_WHILEWR])
+
+(define_int_iterator SVE_SHIFT_WIDE [UNSPEC_ASHIFT_WIDE
+				     UNSPEC_ASHIFTRT_WIDE
+				     UNSPEC_LSHIFTRT_WIDE])
+
+(define_int_iterator SVE_LDFF1_LDNF1 [UNSPEC_LDFF1 UNSPEC_LDNF1])
+
+(define_int_iterator SVE2_U32_UNARY [UNSPEC_URECPE UNSPEC_RSQRTE])
+
+(define_int_iterator SVE2_INT_UNARY_NARROWB [UNSPEC_SQXTNB
+					     UNSPEC_SQXTUNB
+					     UNSPEC_UQXTNB])
+
+(define_int_iterator SVE2_INT_UNARY_NARROWT [UNSPEC_SQXTNT
+					     UNSPEC_SQXTUNT
+					     UNSPEC_UQXTNT])
+
+(define_int_iterator SVE2_INT_BINARY [UNSPEC_SQDMULH
+				      UNSPEC_SQRDMULH])
+
+(define_int_iterator SVE2_INT_BINARY_LANE [UNSPEC_SQDMULH
+					   UNSPEC_SQRDMULH])
+
+(define_int_iterator SVE2_INT_BINARY_LONG [UNSPEC_SABDLB
+					   UNSPEC_SABDLT
+					   UNSPEC_SADDLB
+					   UNSPEC_SADDLBT
+					   UNSPEC_SADDLT
+					   UNSPEC_SMULLB
+					   UNSPEC_SMULLT
+					   UNSPEC_SQDMULLB
+					   UNSPEC_SQDMULLT
+					   UNSPEC_SSUBLB
+					   UNSPEC_SSUBLBT
+					   UNSPEC_SSUBLT
+					   UNSPEC_SSUBLTB
+					   UNSPEC_UABDLB
+					   UNSPEC_UABDLT
+					   UNSPEC_UADDLB
+					   UNSPEC_UADDLT
+					   UNSPEC_UMULLB
+					   UNSPEC_UMULLT
+					   UNSPEC_USUBLB
+					   UNSPEC_USUBLT])
+
+(define_int_iterator SVE2_INT_BINARY_LONG_LANE [UNSPEC_SMULLB
+						UNSPEC_SMULLT
+						UNSPEC_SQDMULLB
+						UNSPEC_SQDMULLT
+						UNSPEC_UMULLB
+						UNSPEC_UMULLT])
+
+(define_int_iterator SVE2_INT_BINARY_NARROWB [UNSPEC_ADDHNB
+					      UNSPEC_RADDHNB
+					      UNSPEC_RSUBHNB
+					      UNSPEC_SUBHNB])
+
+(define_int_iterator SVE2_INT_BINARY_NARROWT [UNSPEC_ADDHNT
+					      UNSPEC_RADDHNT
+					      UNSPEC_RSUBHNT
+					      UNSPEC_SUBHNT])
+
+(define_int_iterator SVE2_INT_BINARY_PAIR [UNSPEC_ADDP
+					   UNSPEC_SMAXP
+					   UNSPEC_SMINP
+					   UNSPEC_UMAXP
+					   UNSPEC_UMINP])
+
+(define_int_iterator SVE2_FP_BINARY_PAIR [UNSPEC_FADDP
+					  UNSPEC_FMAXP
+					  UNSPEC_FMAXNMP
+					  UNSPEC_FMINP
+					  UNSPEC_FMINNMP])
+
+(define_int_iterator SVE2_INT_BINARY_PAIR_LONG [UNSPEC_SADALP UNSPEC_UADALP])
+
+(define_int_iterator SVE2_INT_BINARY_WIDE [UNSPEC_SADDWB
+					   UNSPEC_SADDWT
+					   UNSPEC_SSUBWB
+					   UNSPEC_SSUBWT
+					   UNSPEC_UADDWB
+					   UNSPEC_UADDWT
+					   UNSPEC_USUBWB
+					   UNSPEC_USUBWT])
+
+(define_int_iterator SVE2_INT_SHIFT_IMM_LONG [UNSPEC_SSHLLB
+					      UNSPEC_SSHLLT
+					      UNSPEC_USHLLB
+					      UNSPEC_USHLLT])
+
+(define_int_iterator SVE2_INT_SHIFT_IMM_NARROWB [UNSPEC_RSHRNB
+						 UNSPEC_SHRNB
+						 UNSPEC_SQRSHRNB
+						 UNSPEC_SQRSHRUNB
+						 UNSPEC_SQSHRNB
+						 UNSPEC_SQSHRUNB
+						 UNSPEC_UQRSHRNB
+						 UNSPEC_UQSHRNB])
+
+(define_int_iterator SVE2_INT_SHIFT_IMM_NARROWT [UNSPEC_RSHRNT
+						 UNSPEC_SHRNT
+						 UNSPEC_SQRSHRNT
+						 UNSPEC_SQRSHRUNT
+						 UNSPEC_SQSHRNT
+						 UNSPEC_SQSHRUNT
+						 UNSPEC_UQRSHRNT
+						 UNSPEC_UQSHRNT])
+
+(define_int_iterator SVE2_INT_SHIFT_INSERT [UNSPEC_SLI UNSPEC_SRI])
+
+(define_int_iterator SVE2_INT_CADD [UNSPEC_CADD90
+				    UNSPEC_CADD270
+				    UNSPEC_SQCADD90
+				    UNSPEC_SQCADD270])
+
+(define_int_iterator SVE2_INT_BITPERM [UNSPEC_BDEP UNSPEC_BEXT UNSPEC_BGRP])
+
+(define_int_iterator SVE2_INT_TERNARY [UNSPEC_ADCLB
+				       UNSPEC_ADCLT
+				       UNSPEC_EORBT
+				       UNSPEC_EORTB
+				       UNSPEC_SBCLB
+				       UNSPEC_SBCLT
+				       UNSPEC_SQRDMLAH
+				       UNSPEC_SQRDMLSH])
+
+(define_int_iterator SVE2_INT_TERNARY_LANE [UNSPEC_SQRDMLAH
+					    UNSPEC_SQRDMLSH])
+
+(define_int_iterator SVE2_FP_TERNARY_LONG [UNSPEC_FMLALB
+					   UNSPEC_FMLALT
+					   UNSPEC_FMLSLB
+					   UNSPEC_FMLSLT])
+
+(define_int_iterator SVE2_FP_TERNARY_LONG_LANE [UNSPEC_FMLALB
+						UNSPEC_FMLALT
+						UNSPEC_FMLSLB
+						UNSPEC_FMLSLT])
+
+(define_int_iterator SVE2_INT_CMLA [UNSPEC_CMLA
+				    UNSPEC_CMLA90
+				    UNSPEC_CMLA180
+				    UNSPEC_CMLA270
+				    UNSPEC_SQRDCMLAH
+				    UNSPEC_SQRDCMLAH90
+				    UNSPEC_SQRDCMLAH180
+				    UNSPEC_SQRDCMLAH270])
+
+;; Unlike the normal CMLA instructions these represent the actual operation
+;; to be performed.  They will always need to be expanded into multiple
+;; sequences consisting of CMLA.
+(define_int_iterator SVE2_INT_CMLA_OP [UNSPEC_CMLA
+				       UNSPEC_CMLA_CONJ
+				       UNSPEC_CMLA180
+				       UNSPEC_CMLA180_CONJ])
+
+;; Unlike the normal CMLA instructions these represent the actual operation
+;; to be performed.  They will always need to be expanded into multiple
+;; sequences consisting of CMLA.
+(define_int_iterator SVE2_INT_CMUL_OP [UNSPEC_CMUL
+				       UNSPEC_CMUL_CONJ])
+
+;; Same as SVE2_INT_CADD but exclude the saturating instructions
+(define_int_iterator SVE2_INT_CADD_OP [UNSPEC_CADD90
+				       UNSPEC_CADD270])
+
+(define_int_iterator SVE2_INT_CDOT [UNSPEC_CDOT
+				    UNSPEC_CDOT90
+				    UNSPEC_CDOT180
+				    UNSPEC_CDOT270])
+
+(define_int_iterator SVE2_INT_ADD_BINARY_LONG [UNSPEC_SABDLB
+					       UNSPEC_SABDLT
+					       UNSPEC_SMULLB
+					       UNSPEC_SMULLT
+					       UNSPEC_UABDLB
+					       UNSPEC_UABDLT
+					       UNSPEC_UMULLB
+					       UNSPEC_UMULLT])
+
+(define_int_iterator SVE2_INT_QADD_BINARY_LONG [UNSPEC_SQDMULLB
+					        UNSPEC_SQDMULLBT
+					        UNSPEC_SQDMULLT])
+
+(define_int_iterator SVE2_INT_SUB_BINARY_LONG [UNSPEC_SMULLB
+					       UNSPEC_SMULLT
+					       UNSPEC_UMULLB
+					       UNSPEC_UMULLT])
+
+(define_int_iterator SVE2_INT_QSUB_BINARY_LONG [UNSPEC_SQDMULLB
+					        UNSPEC_SQDMULLBT
+					        UNSPEC_SQDMULLT])
+
+(define_int_iterator SVE2_INT_ADD_BINARY_LONG_LANE [UNSPEC_SMULLB
+						    UNSPEC_SMULLT
+						    UNSPEC_UMULLB
+						    UNSPEC_UMULLT])
+
+(define_int_iterator SVE2_INT_QADD_BINARY_LONG_LANE [UNSPEC_SQDMULLB
+						     UNSPEC_SQDMULLT])
+
+(define_int_iterator SVE2_INT_SUB_BINARY_LONG_LANE [UNSPEC_SMULLB
+						    UNSPEC_SMULLT
+						    UNSPEC_UMULLB
+						    UNSPEC_UMULLT])
+
+(define_int_iterator SVE2_INT_QSUB_BINARY_LONG_LANE [UNSPEC_SQDMULLB
+						     UNSPEC_SQDMULLT])
+
+(define_int_iterator SVE2_COND_INT_UNARY_FP [UNSPEC_COND_FLOGB])
+
+(define_int_iterator SVE2_COND_FP_UNARY_LONG [UNSPEC_COND_FCVTLT])
+
+(define_int_iterator SVE2_COND_FP_UNARY_NARROWB [UNSPEC_COND_FCVTX])
+
+(define_int_iterator SVE2_COND_INT_BINARY [UNSPEC_SHADD
+					   UNSPEC_SHSUB
+					   UNSPEC_SQRSHL
+					   UNSPEC_SRHADD
+					   UNSPEC_SRSHL
+					   UNSPEC_SUQADD
+					   UNSPEC_UHADD
+					   UNSPEC_UHSUB
+					   UNSPEC_UQRSHL
+					   UNSPEC_URHADD
+					   UNSPEC_URSHL
+					   UNSPEC_USQADD])
+
+(define_int_iterator SVE2_COND_INT_BINARY_NOREV [UNSPEC_SUQADD
+						 UNSPEC_USQADD])
+
+(define_int_iterator SVE2_COND_INT_BINARY_REV [UNSPEC_SHADD
+					       UNSPEC_SHSUB
+					       UNSPEC_SQRSHL
+					       UNSPEC_SRHADD
+					       UNSPEC_SRSHL
+					       UNSPEC_UHADD
+					       UNSPEC_UHSUB
+					       UNSPEC_UQRSHL
+					       UNSPEC_URHADD
+					       UNSPEC_URSHL])
+
+(define_int_iterator SVE2_COND_INT_SHIFT [UNSPEC_SQSHL
+					  UNSPEC_UQSHL])
+
+(define_int_iterator SVE2_MATCH [UNSPEC_MATCH UNSPEC_NMATCH])
+
+(define_int_iterator SVE2_PMULL [UNSPEC_PMULLB UNSPEC_PMULLT])
+
+(define_int_iterator SVE2_PMULL_PAIR [UNSPEC_PMULLB_PAIR UNSPEC_PMULLT_PAIR])
+
+(define_int_iterator FCADD [UNSPEC_FCADD90
+			    UNSPEC_FCADD270])
+
+(define_int_iterator FCMLA [UNSPEC_FCMLA
+			    UNSPEC_FCMLA90
+			    UNSPEC_FCMLA180
+			    UNSPEC_FCMLA270])
+
+(define_int_iterator FRINTNZX [UNSPEC_FRINT32Z UNSPEC_FRINT32X
+			       UNSPEC_FRINT64Z UNSPEC_FRINT64X])
 
-(define_int_iterator SVE_COND_FP_OP [UNSPEC_COND_ADD UNSPEC_COND_SUB])
+(define_int_iterator SVE_BRK_UNARY [UNSPEC_BRKA UNSPEC_BRKB])
 
-(define_int_iterator SVE_COND_INT_CMP [UNSPEC_COND_LT UNSPEC_COND_LE
-				       UNSPEC_COND_EQ UNSPEC_COND_NE
-				       UNSPEC_COND_GE UNSPEC_COND_GT
-				       UNSPEC_COND_LO UNSPEC_COND_LS
-				       UNSPEC_COND_HS UNSPEC_COND_HI])
+(define_int_iterator SVE_BRK_BINARY [UNSPEC_BRKN UNSPEC_BRKPA UNSPEC_BRKPB])
 
-(define_int_iterator SVE_COND_FP_CMP [UNSPEC_COND_LT UNSPEC_COND_LE
-				      UNSPEC_COND_EQ UNSPEC_COND_NE
-				      UNSPEC_COND_GE UNSPEC_COND_GT])
+(define_int_iterator SVE_PITER [UNSPEC_PFIRST UNSPEC_PNEXT])
+
+(define_int_iterator MATMUL [UNSPEC_SMATMUL UNSPEC_UMATMUL
+			     UNSPEC_USMATMUL])
+
+(define_int_iterator FMMLA [UNSPEC_FMMLA])
+
+(define_int_iterator BF_MLA [UNSPEC_BFMLALB
+			     UNSPEC_BFMLALT])
+
+(define_int_iterator FCMLA_OP [UNSPEC_FCMLA
+			       UNSPEC_FCMLA180
+			       UNSPEC_FCMLA_CONJ
+			       UNSPEC_FCMLA180_CONJ])
+
+(define_int_iterator FCMUL_OP [UNSPEC_FCMUL
+			       UNSPEC_FCMUL_CONJ])
 
 ;; Iterators for atomic operations.
 
@@ -1504,6 +2794,10 @@
  [(UNSPECV_ATOMIC_LDOP_OR "set") (UNSPECV_ATOMIC_LDOP_BIC "clr")
   (UNSPECV_ATOMIC_LDOP_XOR "eor") (UNSPECV_ATOMIC_LDOP_PLUS "add")])
 
+(define_int_attr atomic_ldoptab
+ [(UNSPECV_ATOMIC_LDOP_OR "ior") (UNSPECV_ATOMIC_LDOP_BIC "bic")
+  (UNSPECV_ATOMIC_LDOP_XOR "xor") (UNSPECV_ATOMIC_LDOP_PLUS "add")])
+
 ;; -------------------------------------------------------------------
 ;; Int Iterators Attributes.
 ;; -------------------------------------------------------------------
@@ -1514,18 +2808,106 @@
 (define_int_attr optab [(UNSPEC_ANDF "and")
 			(UNSPEC_IORF "ior")
 			(UNSPEC_XORF "xor")
+			(UNSPEC_SADDV "sadd")
+			(UNSPEC_UADDV "uadd")
 			(UNSPEC_ANDV "and")
 			(UNSPEC_IORV "ior")
 			(UNSPEC_XORV "xor")
-			(UNSPEC_COND_ADD "add")
-			(UNSPEC_COND_SUB "sub")
-			(UNSPEC_COND_SMAX "smax")
-			(UNSPEC_COND_UMAX "umax")
-			(UNSPEC_COND_SMIN "smin")
-			(UNSPEC_COND_UMIN "umin")
-			(UNSPEC_COND_AND "and")
-			(UNSPEC_COND_ORR "ior")
-			(UNSPEC_COND_EOR "xor")])
+			(UNSPEC_FRECPE "frecpe")
+			(UNSPEC_FRECPS "frecps")
+			(UNSPEC_RSQRTE "frsqrte")
+			(UNSPEC_RSQRTS "frsqrts")
+			(UNSPEC_RBIT "rbit")
+			(UNSPEC_REVB "revb")
+			(UNSPEC_REVH "revh")
+			(UNSPEC_REVW "revw")
+			(UNSPEC_UMAXV "umax")
+			(UNSPEC_UMINV "umin")
+			(UNSPEC_SMAXV "smax")
+			(UNSPEC_SMINV "smin")
+			(UNSPEC_CADD90 "cadd90")
+			(UNSPEC_CADD270 "cadd270")
+			(UNSPEC_CDOT "cdot")
+			(UNSPEC_CDOT90 "cdot90")
+			(UNSPEC_CDOT180 "cdot180")
+			(UNSPEC_CDOT270 "cdot270")
+			(UNSPEC_CMLA "cmla")
+			(UNSPEC_CMLA90 "cmla90")
+			(UNSPEC_CMLA180 "cmla180")
+			(UNSPEC_CMLA270 "cmla270")
+			(UNSPEC_FADDV "plus")
+			(UNSPEC_FMAXNMV "smax")
+			(UNSPEC_FMAXV "smax_nan")
+			(UNSPEC_FMINNMV "smin")
+			(UNSPEC_FMINV "smin_nan")
+		        (UNSPEC_SMUL_HIGHPART "smulh")
+		        (UNSPEC_UMUL_HIGHPART "umulh")
+			(UNSPEC_FMLA "fma")
+			(UNSPEC_FMLS "fnma")
+			(UNSPEC_FCMLA "fcmla")
+			(UNSPEC_FCMLA90 "fcmla90")
+			(UNSPEC_FCMLA180 "fcmla180")
+			(UNSPEC_FCMLA270 "fcmla270")
+			(UNSPEC_FEXPA "fexpa")
+			(UNSPEC_FTSMUL "ftsmul")
+			(UNSPEC_FTSSEL "ftssel")
+			(UNSPEC_PMULLB "pmullb")
+			(UNSPEC_PMULLB_PAIR "pmullb_pair")
+			(UNSPEC_PMULLT "pmullt")
+			(UNSPEC_PMULLT_PAIR "pmullt_pair")
+			(UNSPEC_SMATMUL "smatmul")
+			(UNSPEC_SQCADD90 "sqcadd90")
+			(UNSPEC_SQCADD270 "sqcadd270")
+			(UNSPEC_SQRDCMLAH "sqrdcmlah")
+			(UNSPEC_SQRDCMLAH90 "sqrdcmlah90")
+			(UNSPEC_SQRDCMLAH180 "sqrdcmlah180")
+			(UNSPEC_SQRDCMLAH270 "sqrdcmlah270")
+			(UNSPEC_TRN1Q "trn1q")
+			(UNSPEC_TRN2Q "trn2q")
+			(UNSPEC_UMATMUL "umatmul")
+			(UNSPEC_USMATMUL "usmatmul")
+			(UNSPEC_UZP1Q "uzp1q")
+			(UNSPEC_UZP2Q "uzp2q")
+			(UNSPEC_WHILERW "vec_check_raw_alias")
+			(UNSPEC_WHILEWR "vec_check_war_alias")
+			(UNSPEC_ZIP1Q "zip1q")
+			(UNSPEC_ZIP2Q "zip2q")
+			(UNSPEC_COND_FABS "abs")
+			(UNSPEC_COND_FADD "add")
+			(UNSPEC_COND_FCADD90 "cadd90")
+			(UNSPEC_COND_FCADD270 "cadd270")
+			(UNSPEC_COND_FCMLA "fcmla")
+			(UNSPEC_COND_FCMLA90 "fcmla90")
+			(UNSPEC_COND_FCMLA180 "fcmla180")
+			(UNSPEC_COND_FCMLA270 "fcmla270")
+			(UNSPEC_COND_FCVT "fcvt")
+			(UNSPEC_COND_FCVTZS "fix_trunc")
+			(UNSPEC_COND_FCVTZU "fixuns_trunc")
+			(UNSPEC_COND_FDIV "div")
+			(UNSPEC_COND_FMAX "smax_nan")
+			(UNSPEC_COND_FMAXNM "smax")
+			(UNSPEC_COND_FMIN "smin_nan")
+			(UNSPEC_COND_FMINNM "smin")
+			(UNSPEC_COND_FMLA "fma")
+			(UNSPEC_COND_FMLS "fnma")
+			(UNSPEC_COND_FMUL "mul")
+			(UNSPEC_COND_FMULX "mulx")
+			(UNSPEC_COND_FNEG "neg")
+			(UNSPEC_COND_FNMLA "fnms")
+			(UNSPEC_COND_FNMLS "fms")
+			(UNSPEC_COND_FRECPX "frecpx")
+			(UNSPEC_COND_FRINTA "round")
+			(UNSPEC_COND_FRINTI "nearbyint")
+			(UNSPEC_COND_FRINTM "floor")
+			(UNSPEC_COND_FRINTN "frintn")
+			(UNSPEC_COND_FRINTP "ceil")
+			(UNSPEC_COND_FRINTX "rint")
+			(UNSPEC_COND_FRINTZ "btrunc")
+			(UNSPEC_COND_FSCALE "fscale")
+			(UNSPEC_COND_FSQRT "sqrt")
+			(UNSPEC_COND_FSUB "sub")
+			(UNSPEC_COND_SCVTF "float")
+			(UNSPEC_COND_UCVTF "floatuns")])
 
 (define_int_attr  maxmin_uns [(UNSPEC_UMAXV "umax")
 			      (UNSPEC_UMINV "umin")
@@ -1538,7 +2920,11 @@
 			      (UNSPEC_FMINNMV "smin")
 			      (UNSPEC_FMINV "smin_nan")
 			      (UNSPEC_FMAXNM "fmax")
-			      (UNSPEC_FMINNM "fmin")])
+			      (UNSPEC_FMINNM "fmin")
+			      (UNSPEC_COND_FMAX "fmax_nan")
+			      (UNSPEC_COND_FMAXNM "fmax")
+			      (UNSPEC_COND_FMIN "fmin_nan")
+			      (UNSPEC_COND_FMINNM "fmin")])
 
 (define_int_attr  maxmin_uns_op [(UNSPEC_UMAXV "umax")
 				 (UNSPEC_UMINV "umin")
@@ -1553,28 +2939,51 @@
 				 (UNSPEC_FMAXNM "fmaxnm")
 				 (UNSPEC_FMINNM "fminnm")])
 
-(define_int_attr bit_reduc_op [(UNSPEC_ANDV "andv")
-			       (UNSPEC_IORV "orv")
-			       (UNSPEC_XORV "eorv")])
+(define_code_attr binqops_op [(ss_plus "sqadd")
+			      (us_plus "uqadd")
+			      (ss_minus "sqsub")
+			      (us_minus "uqsub")])
+
+(define_code_attr binqops_op_rev [(ss_plus "sqsub")
+				  (ss_minus "sqadd")])
 
 ;; The SVE logical instruction that implements an unspec.
 (define_int_attr logicalf_op [(UNSPEC_ANDF "and")
 		 	      (UNSPEC_IORF "orr")
 			      (UNSPEC_XORF "eor")])
 
+(define_int_attr last_op [(UNSPEC_CLASTA "after_last")
+			  (UNSPEC_CLASTB "last")
+			  (UNSPEC_LASTA "after_last")
+			  (UNSPEC_LASTB "last")])
+
 ;; "s" for signed operations and "u" for unsigned ones.
-(define_int_attr su [(UNSPEC_UNPACKSHI "s")
+(define_int_attr su [(UNSPEC_SADDV "s")
+		     (UNSPEC_UADDV "u")
+		     (UNSPEC_SADDLV "s")
+		     (UNSPEC_UADDLV "u")
+		     (UNSPEC_UNPACKSHI "s")
 		     (UNSPEC_UNPACKUHI "u")
 		     (UNSPEC_UNPACKSLO "s")
 		     (UNSPEC_UNPACKULO "u")
 		     (UNSPEC_SMUL_HIGHPART "s")
-		     (UNSPEC_UMUL_HIGHPART "u")])
+		     (UNSPEC_UMUL_HIGHPART "u")
+		     (UNSPEC_COND_FCVTZS "s")
+		     (UNSPEC_COND_FCVTZU "u")
+		     (UNSPEC_COND_SCVTF "s")
+		     (UNSPEC_COND_UCVTF "u")
+		     (UNSPEC_SMULHS "s") (UNSPEC_UMULHS "u")
+		     (UNSPEC_SMULHRS "s") (UNSPEC_UMULHRS "u")])
 
 (define_int_attr sur [(UNSPEC_SHADD "s") (UNSPEC_UHADD "u")
 		      (UNSPEC_SRHADD "sr") (UNSPEC_URHADD "ur")
 		      (UNSPEC_SHSUB "s") (UNSPEC_UHSUB "u")
-		      (UNSPEC_SRHSUB "sr") (UNSPEC_URHSUB "ur")
 		      (UNSPEC_ADDHN "") (UNSPEC_RADDHN "r")
+		      (UNSPEC_SABAL "s") (UNSPEC_UABAL "u")
+		      (UNSPEC_SABAL2 "s") (UNSPEC_UABAL2 "u")
+		      (UNSPEC_SABDL "s") (UNSPEC_UABDL "u")
+		      (UNSPEC_SABDL2 "s") (UNSPEC_UABDL2 "u")
+		      (UNSPEC_SADALP "s") (UNSPEC_UADALP "u")
 		      (UNSPEC_SUBHN "") (UNSPEC_RSUBHN "r")
 		      (UNSPEC_ADDHN2 "") (UNSPEC_RADDHN2 "r")
 		      (UNSPEC_SUBHN2 "") (UNSPEC_RSUBHN2 "r")
@@ -1595,6 +3004,9 @@
 		      (UNSPEC_URSHL  "ur") (UNSPEC_SRSHL  "sr")
 		      (UNSPEC_UQRSHL  "u") (UNSPEC_SQRSHL  "s")
 		      (UNSPEC_SDOT "s") (UNSPEC_UDOT "u")
+		      (UNSPEC_USDOT "us") (UNSPEC_SUDOT "su")
+		      (UNSPEC_SMATMUL "s") (UNSPEC_UMATMUL "u")
+		      (UNSPEC_USMATMUL "us")
 ])
 
 (define_int_attr r [(UNSPEC_SQDMULH "") (UNSPEC_SQRDMULH "r")
@@ -1603,15 +3015,31 @@
                     (UNSPEC_SQRSHRN "r") (UNSPEC_UQRSHRN "r")
                     (UNSPEC_SQSHL   "")  (UNSPEC_UQSHL  "")
                     (UNSPEC_SQRSHL   "r")(UNSPEC_UQRSHL  "r")
+		    (UNSPEC_SMULHS "") (UNSPEC_UMULHS "")
+		    (UNSPEC_SMULHRS "r") (UNSPEC_UMULHRS "r")
 ])
 
 (define_int_attr lr [(UNSPEC_SSLI  "l") (UNSPEC_USLI  "l")
-		     (UNSPEC_SSRI  "r") (UNSPEC_USRI  "r")])
+		     (UNSPEC_SSRI  "r") (UNSPEC_USRI  "r")
+		     (UNSPEC_SQSHL "l") (UNSPEC_UQSHL "l")
+		     (UNSPEC_SQSHLU "l")
+		     (UNSPEC_SRSHR "r") (UNSPEC_URSHR "r")
+		     (UNSPEC_ASRD  "r")
+		     (UNSPEC_SLI   "l") (UNSPEC_SRI   "r")])
 
 (define_int_attr u [(UNSPEC_SQSHLU "u") (UNSPEC_SQSHL "") (UNSPEC_UQSHL "")
 		    (UNSPEC_SQSHRUN "u") (UNSPEC_SQRSHRUN "u")
-                    (UNSPEC_SQSHRN "")  (UNSPEC_UQSHRN "")
-                    (UNSPEC_SQRSHRN "") (UNSPEC_UQRSHRN "")])
+		    (UNSPEC_SQSHRN "")  (UNSPEC_UQSHRN "")
+		    (UNSPEC_SQRSHRN "") (UNSPEC_UQRSHRN "")
+		    (UNSPEC_SHADD "") (UNSPEC_UHADD "u")
+		    (UNSPEC_SRHADD "") (UNSPEC_URHADD "u")])
+
+(define_int_attr fn [(UNSPEC_LDFF1 "f") (UNSPEC_LDNF1 "n")])
+
+(define_int_attr ab [(UNSPEC_CLASTA "a") (UNSPEC_CLASTB "b")
+		     (UNSPEC_LASTA "a") (UNSPEC_LASTB "b")])
+
+(define_int_attr bt [(UNSPEC_BFMLALB "b") (UNSPEC_BFMLALT "t")])
 
 (define_int_attr addsub [(UNSPEC_SHADD "add")
 			 (UNSPEC_UHADD "add")
@@ -1619,8 +3047,6 @@
 			 (UNSPEC_URHADD "add")
 			 (UNSPEC_SHSUB "sub")
 			 (UNSPEC_UHSUB "sub")
-			 (UNSPEC_SRHSUB "sub")
-			 (UNSPEC_URHSUB "sub")
 			 (UNSPEC_ADDHN "add")
 			 (UNSPEC_SUBHN "sub")
 			 (UNSPEC_RADDHN "add")
@@ -1630,6 +3056,18 @@
 			 (UNSPEC_RADDHN2 "add")
 			 (UNSPEC_RSUBHN2 "sub")])
 
+;; BSL variants: first commutative operand.
+(define_int_attr bsl_1st [(1 "w") (2 "0")])
+
+;; BSL variants: second commutative operand.
+(define_int_attr bsl_2nd [(1 "0") (2 "w")])
+
+;; BSL variants: duplicated input operand.
+(define_int_attr bsl_dup [(1 "1") (2 "2")])
+
+;; BSL variants: operand which requires preserving via movprfx.
+(define_int_attr bsl_mov [(1 "2") (2 "1")])
+
 (define_int_attr offsetlr [(UNSPEC_SSLI "") (UNSPEC_USLI "")
 			   (UNSPEC_SSRI "offset_")
 			   (UNSPEC_USRI "offset_")])
@@ -1659,29 +3097,47 @@
 				  (UNSPEC_FCVTZU "fcvtzu")])
 
 ;; Pointer authentication mnemonic prefix.
-(define_int_attr pauth_mnem_prefix [(UNSPEC_PACISP "paci")
-				    (UNSPEC_AUTISP "auti")
-				    (UNSPEC_PACI1716 "paci")
-				    (UNSPEC_AUTI1716 "auti")])
-
-;; Pointer authentication HINT number for NOP space instructions using A Key.
-(define_int_attr pauth_hint_num_a [(UNSPEC_PACISP "25")
-				    (UNSPEC_AUTISP "29")
-				    (UNSPEC_PACI1716 "8")
-				    (UNSPEC_AUTI1716 "12")])
-
-(define_int_attr perm_insn [(UNSPEC_ZIP1 "zip") (UNSPEC_ZIP2 "zip")
-			    (UNSPEC_TRN1 "trn") (UNSPEC_TRN2 "trn")
-			    (UNSPEC_UZP1 "uzp") (UNSPEC_UZP2 "uzp")])
+(define_int_attr pauth_mnem_prefix [(UNSPEC_PACIASP "pacia")
+				    (UNSPEC_PACIBSP "pacib")
+				    (UNSPEC_PACIA1716 "pacia")
+				    (UNSPEC_PACIB1716 "pacib")
+				    (UNSPEC_AUTIASP "autia")
+				    (UNSPEC_AUTIBSP "autib")
+				    (UNSPEC_AUTIA1716 "autia")
+				    (UNSPEC_AUTIB1716 "autib")])
+
+(define_int_attr pauth_key [(UNSPEC_PACIASP "AARCH64_KEY_A")
+			    (UNSPEC_PACIBSP "AARCH64_KEY_B")
+			    (UNSPEC_PACIA1716 "AARCH64_KEY_A")
+			    (UNSPEC_PACIB1716 "AARCH64_KEY_B")
+			    (UNSPEC_AUTIASP "AARCH64_KEY_A")
+			    (UNSPEC_AUTIBSP "AARCH64_KEY_B")
+			    (UNSPEC_AUTIA1716 "AARCH64_KEY_A")
+			    (UNSPEC_AUTIB1716 "AARCH64_KEY_B")])
+
+;; Pointer authentication HINT number for NOP space instructions using A and
+;; B key.
+(define_int_attr pauth_hint_num [(UNSPEC_PACIASP "25")
+				   (UNSPEC_PACIBSP "27")
+				   (UNSPEC_AUTIASP "29")
+				   (UNSPEC_AUTIBSP "31")
+				   (UNSPEC_PACIA1716 "8")
+				   (UNSPEC_PACIB1716 "10")
+				   (UNSPEC_AUTIA1716 "12")
+				   (UNSPEC_AUTIB1716 "14")])
+
+(define_int_attr perm_insn [(UNSPEC_ZIP1 "zip1") (UNSPEC_ZIP2 "zip2")
+			    (UNSPEC_ZIP1Q "zip1") (UNSPEC_ZIP2Q "zip2")
+			    (UNSPEC_TRN1 "trn1") (UNSPEC_TRN2 "trn2")
+			    (UNSPEC_TRN1Q "trn1") (UNSPEC_TRN2Q "trn2")
+			    (UNSPEC_UZP1 "uzp1") (UNSPEC_UZP2 "uzp2")
+			    (UNSPEC_UZP1Q "uzp1") (UNSPEC_UZP2Q "uzp2")])
 
 ; op code for REV instructions (size within which elements are reversed).
 (define_int_attr rev_op [(UNSPEC_REV64 "64") (UNSPEC_REV32 "32")
 			 (UNSPEC_REV16 "16")])
 
-(define_int_attr perm_hilo [(UNSPEC_ZIP1 "1") (UNSPEC_ZIP2 "2")
-			    (UNSPEC_TRN1 "1") (UNSPEC_TRN2 "2")
-			    (UNSPEC_UZP1 "1") (UNSPEC_UZP2 "2")
-			    (UNSPEC_UNPACKSHI "hi") (UNSPEC_UNPACKUHI "hi")
+(define_int_attr perm_hilo [(UNSPEC_UNPACKSHI "hi") (UNSPEC_UNPACKUHI "hi")
 			    (UNSPEC_UNPACKSLO "lo") (UNSPEC_UNPACKULO "lo")])
 
 ;; Return true if the associated optab refers to the high-numbered lanes,
@@ -1693,8 +3149,6 @@
 				 (UNSPEC_UNPACKSLO "BYTES_BIG_ENDIAN")
 				 (UNSPEC_UNPACKULO "BYTES_BIG_ENDIAN")])
 
-(define_int_attr frecp_suffix  [(UNSPEC_FRECPE "e") (UNSPEC_FRECPX "x")])
-
 (define_int_attr crc_variant [(UNSPEC_CRC32B "crc32b") (UNSPEC_CRC32H "crc32h")
                         (UNSPEC_CRC32W "crc32w") (UNSPEC_CRC32X "crc32x")
                         (UNSPEC_CRC32CB "crc32cb") (UNSPEC_CRC32CH "crc32ch")
@@ -1725,39 +3179,475 @@
 (define_int_attr f16mac1 [(UNSPEC_FMLAL "a") (UNSPEC_FMLSL "s")
 			  (UNSPEC_FMLAL2 "a") (UNSPEC_FMLSL2 "s")])
 
+(define_int_attr frintnzs_op [(UNSPEC_FRINT32Z "frint32z") (UNSPEC_FRINT32X "frint32x")
+			      (UNSPEC_FRINT64Z "frint64z") (UNSPEC_FRINT64X "frint64x")])
+
 ;; The condition associated with an UNSPEC_COND_<xx>.
-(define_int_attr cmp_op [(UNSPEC_COND_LT "lt")
-			 (UNSPEC_COND_LE "le")
-			 (UNSPEC_COND_EQ "eq")
-			 (UNSPEC_COND_NE "ne")
-			 (UNSPEC_COND_GE "ge")
-			 (UNSPEC_COND_GT "gt")
-			 (UNSPEC_COND_LO "lo")
-			 (UNSPEC_COND_LS "ls")
-			 (UNSPEC_COND_HS "hs")
-			 (UNSPEC_COND_HI "hi")])
-
-;; The constraint to use for an UNSPEC_COND_<xx>.
-(define_int_attr imm_con [(UNSPEC_COND_EQ "vsc")
-			  (UNSPEC_COND_NE "vsc")
-			  (UNSPEC_COND_LT "vsc")
-			  (UNSPEC_COND_GE "vsc")
-			  (UNSPEC_COND_LE "vsc")
-			  (UNSPEC_COND_GT "vsc")
-			  (UNSPEC_COND_LO "vsd")
-			  (UNSPEC_COND_LS "vsd")
-			  (UNSPEC_COND_HS "vsd")
-			  (UNSPEC_COND_HI "vsd")])
-
-(define_int_attr sve_int_op [(UNSPEC_COND_ADD "add")
-			     (UNSPEC_COND_SUB "sub")
-			     (UNSPEC_COND_SMAX "smax")
-			     (UNSPEC_COND_UMAX "umax")
-			     (UNSPEC_COND_SMIN "smin")
-			     (UNSPEC_COND_UMIN "umin")
-			     (UNSPEC_COND_AND "and")
-			     (UNSPEC_COND_ORR "orr")
-			     (UNSPEC_COND_EOR "eor")])
-
-(define_int_attr sve_fp_op [(UNSPEC_COND_ADD "fadd")
-			    (UNSPEC_COND_SUB "fsub")])
+(define_int_attr cmp_op [(UNSPEC_COND_CMPEQ_WIDE "eq")
+			 (UNSPEC_COND_CMPGE_WIDE "ge")
+			 (UNSPEC_COND_CMPGT_WIDE "gt")
+			 (UNSPEC_COND_CMPHI_WIDE "hi")
+			 (UNSPEC_COND_CMPHS_WIDE "hs")
+			 (UNSPEC_COND_CMPLE_WIDE "le")
+			 (UNSPEC_COND_CMPLO_WIDE "lo")
+			 (UNSPEC_COND_CMPLS_WIDE "ls")
+			 (UNSPEC_COND_CMPLT_WIDE "lt")
+			 (UNSPEC_COND_CMPNE_WIDE "ne")
+			 (UNSPEC_COND_FCMEQ "eq")
+			 (UNSPEC_COND_FCMGE "ge")
+			 (UNSPEC_COND_FCMGT "gt")
+			 (UNSPEC_COND_FCMLE "le")
+			 (UNSPEC_COND_FCMLT "lt")
+			 (UNSPEC_COND_FCMNE "ne")
+			 (UNSPEC_WHILEGE "ge")
+			 (UNSPEC_WHILEGT "gt")
+			 (UNSPEC_WHILEHI "hi")
+			 (UNSPEC_WHILEHS "hs")
+			 (UNSPEC_WHILELE "le")
+			 (UNSPEC_WHILELO "lo")
+			 (UNSPEC_WHILELS "ls")
+			 (UNSPEC_WHILELT "lt")
+			 (UNSPEC_WHILERW "rw")
+			 (UNSPEC_WHILEWR "wr")])
+
+(define_int_attr while_optab_cmp [(UNSPEC_WHILEGE "ge")
+				  (UNSPEC_WHILEGT "gt")
+				  (UNSPEC_WHILEHI "ugt")
+				  (UNSPEC_WHILEHS "uge")
+				  (UNSPEC_WHILELE "le")
+				  (UNSPEC_WHILELO "ult")
+				  (UNSPEC_WHILELS "ule")
+				  (UNSPEC_WHILELT "lt")
+				  (UNSPEC_WHILERW "rw")
+				  (UNSPEC_WHILEWR "wr")])
+
+(define_int_attr raw_war [(UNSPEC_WHILERW "raw")
+			  (UNSPEC_WHILEWR "war")])
+
+(define_int_attr brk_op [(UNSPEC_BRKA "a") (UNSPEC_BRKB "b")
+			 (UNSPEC_BRKN "n")
+			 (UNSPEC_BRKPA "pa") (UNSPEC_BRKPB "pb")])
+
+(define_int_attr sve_pred_op [(UNSPEC_PFIRST "pfirst") (UNSPEC_PNEXT "pnext")])
+
+(define_int_attr sve_int_op [(UNSPEC_ADCLB "adclb")
+			     (UNSPEC_ADCLT "adclt")
+			     (UNSPEC_ADDHNB "addhnb")
+			     (UNSPEC_ADDHNT "addhnt")
+			     (UNSPEC_ADDP "addp")
+			     (UNSPEC_ANDV "andv")
+			     (UNSPEC_ASHIFTRT_WIDE "asr")
+			     (UNSPEC_ASHIFT_WIDE "lsl")
+			     (UNSPEC_ASRD "asrd")
+			     (UNSPEC_BDEP "bdep")
+			     (UNSPEC_BEXT "bext")
+			     (UNSPEC_BGRP "bgrp")
+			     (UNSPEC_CADD90 "cadd")
+			     (UNSPEC_CADD270 "cadd")
+			     (UNSPEC_CDOT "cdot")
+			     (UNSPEC_CDOT90 "cdot")
+			     (UNSPEC_CDOT180 "cdot")
+			     (UNSPEC_CDOT270 "cdot")
+			     (UNSPEC_CMLA "cmla")
+			     (UNSPEC_CMLA90 "cmla")
+			     (UNSPEC_CMLA180 "cmla")
+			     (UNSPEC_CMLA270 "cmla")
+			     (UNSPEC_EORBT "eorbt")
+			     (UNSPEC_EORTB "eortb")
+			     (UNSPEC_IORV "orv")
+			     (UNSPEC_LSHIFTRT_WIDE "lsr")
+			     (UNSPEC_MATCH "match")
+			     (UNSPEC_NMATCH "nmatch")
+			     (UNSPEC_PMULLB "pmullb")
+			     (UNSPEC_PMULLB_PAIR "pmullb")
+			     (UNSPEC_PMULLT "pmullt")
+			     (UNSPEC_PMULLT_PAIR "pmullt")
+			     (UNSPEC_RADDHNB "raddhnb")
+			     (UNSPEC_RADDHNT "raddhnt")
+			     (UNSPEC_RBIT "rbit")
+			     (UNSPEC_REVB "revb")
+			     (UNSPEC_REVH "revh")
+			     (UNSPEC_REVW "revw")
+			     (UNSPEC_RSHRNB "rshrnb")
+			     (UNSPEC_RSHRNT "rshrnt")
+			     (UNSPEC_RSQRTE "ursqrte")
+			     (UNSPEC_RSUBHNB "rsubhnb")
+			     (UNSPEC_RSUBHNT "rsubhnt")
+			     (UNSPEC_SABDLB "sabdlb")
+			     (UNSPEC_SABDLT "sabdlt")
+			     (UNSPEC_SADALP "sadalp")
+			     (UNSPEC_SADDLB "saddlb")
+			     (UNSPEC_SADDLBT "saddlbt")
+			     (UNSPEC_SADDLT "saddlt")
+			     (UNSPEC_SADDWB "saddwb")
+			     (UNSPEC_SADDWT "saddwt")
+			     (UNSPEC_SBCLB "sbclb")
+			     (UNSPEC_SBCLT "sbclt")
+			     (UNSPEC_SHADD "shadd")
+			     (UNSPEC_SHRNB "shrnb")
+			     (UNSPEC_SHRNT "shrnt")
+			     (UNSPEC_SHSUB "shsub")
+			     (UNSPEC_SLI "sli")
+			     (UNSPEC_SMAXP "smaxp")
+			     (UNSPEC_SMAXV "smaxv")
+			     (UNSPEC_SMINP "sminp")
+			     (UNSPEC_SMINV "sminv")
+			     (UNSPEC_SMUL_HIGHPART "smulh")
+			     (UNSPEC_SMULLB "smullb")
+			     (UNSPEC_SMULLT "smullt")
+			     (UNSPEC_SQCADD90 "sqcadd")
+			     (UNSPEC_SQCADD270 "sqcadd")
+			     (UNSPEC_SQDMULH "sqdmulh")
+			     (UNSPEC_SQDMULLB "sqdmullb")
+			     (UNSPEC_SQDMULLBT "sqdmullbt")
+			     (UNSPEC_SQDMULLT "sqdmullt")
+			     (UNSPEC_SQRDCMLAH "sqrdcmlah")
+			     (UNSPEC_SQRDCMLAH90 "sqrdcmlah")
+			     (UNSPEC_SQRDCMLAH180 "sqrdcmlah")
+			     (UNSPEC_SQRDCMLAH270 "sqrdcmlah")
+			     (UNSPEC_SQRDMLAH "sqrdmlah")
+			     (UNSPEC_SQRDMLSH "sqrdmlsh")
+			     (UNSPEC_SQRDMULH "sqrdmulh")
+			     (UNSPEC_SQRSHL "sqrshl")
+			     (UNSPEC_SQRSHRNB "sqrshrnb")
+			     (UNSPEC_SQRSHRNT "sqrshrnt")
+			     (UNSPEC_SQRSHRUNB "sqrshrunb")
+			     (UNSPEC_SQRSHRUNT "sqrshrunt")
+			     (UNSPEC_SQSHL "sqshl")
+			     (UNSPEC_SQSHLU "sqshlu")
+			     (UNSPEC_SQSHRNB "sqshrnb")
+			     (UNSPEC_SQSHRNT "sqshrnt")
+			     (UNSPEC_SQSHRUNB "sqshrunb")
+			     (UNSPEC_SQSHRUNT "sqshrunt")
+			     (UNSPEC_SQXTNB "sqxtnb")
+			     (UNSPEC_SQXTNT "sqxtnt")
+			     (UNSPEC_SQXTUNB "sqxtunb")
+			     (UNSPEC_SQXTUNT "sqxtunt")
+			     (UNSPEC_SRHADD "srhadd")
+			     (UNSPEC_SRI "sri")
+			     (UNSPEC_SRSHL "srshl")
+			     (UNSPEC_SRSHR "srshr")
+			     (UNSPEC_SSHLLB "sshllb")
+			     (UNSPEC_SSHLLT "sshllt")
+			     (UNSPEC_SSUBLB "ssublb")
+			     (UNSPEC_SSUBLBT "ssublbt")
+			     (UNSPEC_SSUBLT "ssublt")
+			     (UNSPEC_SSUBLTB "ssubltb")
+			     (UNSPEC_SSUBWB "ssubwb")
+			     (UNSPEC_SSUBWT "ssubwt")
+			     (UNSPEC_SUBHNB "subhnb")
+			     (UNSPEC_SUBHNT "subhnt")
+			     (UNSPEC_SUQADD "suqadd")
+			     (UNSPEC_UABDLB "uabdlb")
+			     (UNSPEC_UABDLT "uabdlt")
+			     (UNSPEC_UADALP "uadalp")
+			     (UNSPEC_UADDLB "uaddlb")
+			     (UNSPEC_UADDLT "uaddlt")
+			     (UNSPEC_UADDWB "uaddwb")
+			     (UNSPEC_UADDWT "uaddwt")
+			     (UNSPEC_UHADD "uhadd")
+			     (UNSPEC_UHSUB "uhsub")
+			     (UNSPEC_UMAXP "umaxp")
+			     (UNSPEC_UMAXV "umaxv")
+			     (UNSPEC_UMINP "uminp")
+			     (UNSPEC_UMINV "uminv")
+			     (UNSPEC_UMUL_HIGHPART "umulh")
+			     (UNSPEC_UMULLB "umullb")
+			     (UNSPEC_UMULLT "umullt")
+			     (UNSPEC_UQRSHL "uqrshl")
+			     (UNSPEC_UQRSHRNB "uqrshrnb")
+			     (UNSPEC_UQRSHRNT "uqrshrnt")
+			     (UNSPEC_UQSHL "uqshl")
+			     (UNSPEC_UQSHRNB "uqshrnb")
+			     (UNSPEC_UQSHRNT "uqshrnt")
+			     (UNSPEC_UQXTNB "uqxtnb")
+			     (UNSPEC_UQXTNT "uqxtnt")
+			     (UNSPEC_URECPE "urecpe")
+			     (UNSPEC_URHADD "urhadd")
+			     (UNSPEC_URSHL "urshl")
+			     (UNSPEC_URSHR "urshr")
+			     (UNSPEC_USHLLB "ushllb")
+			     (UNSPEC_USHLLT "ushllt")
+			     (UNSPEC_USQADD "usqadd")
+			     (UNSPEC_USUBLB "usublb")
+			     (UNSPEC_USUBLT "usublt")
+			     (UNSPEC_USUBWB "usubwb")
+			     (UNSPEC_USUBWT "usubwt")
+			     (UNSPEC_XORV "eorv")])
+
+(define_int_attr sve_int_op_rev [(UNSPEC_SHADD "shadd")
+				 (UNSPEC_SHSUB "shsubr")
+				 (UNSPEC_SQRSHL "sqrshlr")
+				 (UNSPEC_SRHADD "srhadd")
+				 (UNSPEC_SRSHL "srshlr")
+				 (UNSPEC_UHADD "uhadd")
+				 (UNSPEC_UHSUB "uhsubr")
+				 (UNSPEC_UQRSHL "uqrshlr")
+				 (UNSPEC_URHADD "urhadd")
+				 (UNSPEC_URSHL "urshlr")])
+
+(define_int_attr sve_int_add_op [(UNSPEC_SABDLB "sabalb")
+				 (UNSPEC_SABDLT "sabalt")
+				 (UNSPEC_SMULLB "smlalb")
+				 (UNSPEC_SMULLT "smlalt")
+				 (UNSPEC_UABDLB "uabalb")
+				 (UNSPEC_UABDLT "uabalt")
+				 (UNSPEC_UMULLB "umlalb")
+				 (UNSPEC_UMULLT "umlalt")])
+
+(define_int_attr sve_int_qadd_op [(UNSPEC_SQDMULLB "sqdmlalb")
+				  (UNSPEC_SQDMULLBT "sqdmlalbt")
+				  (UNSPEC_SQDMULLT "sqdmlalt")])
+
+(define_int_attr sve_int_sub_op [(UNSPEC_SMULLB "smlslb")
+				 (UNSPEC_SMULLT "smlslt")
+				 (UNSPEC_UMULLB "umlslb")
+				 (UNSPEC_UMULLT "umlslt")])
+
+(define_int_attr sve_int_qsub_op [(UNSPEC_SQDMULLB "sqdmlslb")
+				  (UNSPEC_SQDMULLBT "sqdmlslbt")
+				  (UNSPEC_SQDMULLT "sqdmlslt")])
+
+(define_int_attr sve_fp_op [(UNSPEC_BFDOT "bfdot")
+			    (UNSPEC_BFMLALB "bfmlalb")
+			    (UNSPEC_BFMLALT "bfmlalt")
+			    (UNSPEC_BFMMLA "bfmmla")
+			    (UNSPEC_FRECPE "frecpe")
+			    (UNSPEC_FRECPS "frecps")
+			    (UNSPEC_RSQRTE "frsqrte")
+			    (UNSPEC_RSQRTS "frsqrts")
+			    (UNSPEC_FADDP "faddp")
+			    (UNSPEC_FADDV "faddv")
+			    (UNSPEC_FEXPA "fexpa")
+			    (UNSPEC_FMAXNMP "fmaxnmp")
+			    (UNSPEC_FMAXNMV "fmaxnmv")
+			    (UNSPEC_FMAXP "fmaxp")
+			    (UNSPEC_FMAXV "fmaxv")
+			    (UNSPEC_FMINNMP "fminnmp")
+			    (UNSPEC_FMINNMV "fminnmv")
+			    (UNSPEC_FMINP "fminp")
+			    (UNSPEC_FMINV "fminv")
+			    (UNSPEC_FMLA "fmla")
+			    (UNSPEC_FMLALB "fmlalb")
+			    (UNSPEC_FMLALT "fmlalt")
+			    (UNSPEC_FMLS "fmls")
+			    (UNSPEC_FMLSLB "fmlslb")
+			    (UNSPEC_FMLSLT "fmlslt")
+			    (UNSPEC_FMMLA "fmmla")
+			    (UNSPEC_FTSMUL "ftsmul")
+			    (UNSPEC_FTSSEL "ftssel")
+			    (UNSPEC_COND_FABS "fabs")
+			    (UNSPEC_COND_FADD "fadd")
+			    (UNSPEC_COND_FCVTLT "fcvtlt")
+			    (UNSPEC_COND_FCVTX "fcvtx")
+			    (UNSPEC_COND_FDIV "fdiv")
+			    (UNSPEC_COND_FLOGB "flogb")
+			    (UNSPEC_COND_FMAX "fmax")
+			    (UNSPEC_COND_FMAXNM "fmaxnm")
+			    (UNSPEC_COND_FMIN "fmin")
+			    (UNSPEC_COND_FMINNM "fminnm")
+			    (UNSPEC_COND_FMUL "fmul")
+			    (UNSPEC_COND_FMULX "fmulx")
+			    (UNSPEC_COND_FNEG "fneg")
+			    (UNSPEC_COND_FRECPX "frecpx")
+			    (UNSPEC_COND_FRINTA "frinta")
+			    (UNSPEC_COND_FRINTI "frinti")
+			    (UNSPEC_COND_FRINTM "frintm")
+			    (UNSPEC_COND_FRINTN "frintn")
+			    (UNSPEC_COND_FRINTP "frintp")
+			    (UNSPEC_COND_FRINTX "frintx")
+			    (UNSPEC_COND_FRINTZ "frintz")
+			    (UNSPEC_COND_FSCALE "fscale")
+			    (UNSPEC_COND_FSQRT "fsqrt")
+			    (UNSPEC_COND_FSUB "fsub")])
+
+(define_int_attr sve_fp_op_rev [(UNSPEC_COND_FADD "fadd")
+				(UNSPEC_COND_FDIV "fdivr")
+				(UNSPEC_COND_FMAX "fmax")
+				(UNSPEC_COND_FMAXNM "fmaxnm")
+				(UNSPEC_COND_FMIN "fmin")
+				(UNSPEC_COND_FMINNM "fminnm")
+				(UNSPEC_COND_FMUL "fmul")
+				(UNSPEC_COND_FMULX "fmulx")
+				(UNSPEC_COND_FSUB "fsubr")])
+
+(define_int_attr rot [(UNSPEC_CADD90 "90")
+		      (UNSPEC_CADD270 "270")
+		      (UNSPEC_CDOT "0")
+		      (UNSPEC_CDOT90 "90")
+		      (UNSPEC_CDOT180 "180")
+		      (UNSPEC_CDOT270 "270")
+		      (UNSPEC_CMLA "0")
+		      (UNSPEC_CMLA90 "90")
+		      (UNSPEC_CMLA180 "180")
+		      (UNSPEC_CMLA270 "270")
+		      (UNSPEC_FCADD90 "90")
+		      (UNSPEC_FCADD270 "270")
+		      (UNSPEC_FCMLA "0")
+		      (UNSPEC_FCMLA90 "90")
+		      (UNSPEC_FCMLA180 "180")
+		      (UNSPEC_FCMLA270 "270")
+		      (UNSPEC_SQCADD90 "90")
+		      (UNSPEC_SQCADD270 "270")
+		      (UNSPEC_SQRDCMLAH "0")
+		      (UNSPEC_SQRDCMLAH90 "90")
+		      (UNSPEC_SQRDCMLAH180 "180")
+		      (UNSPEC_SQRDCMLAH270 "270")
+		      (UNSPEC_COND_FCADD90 "90")
+		      (UNSPEC_COND_FCADD270 "270")
+		      (UNSPEC_COND_FCMLA "0")
+		      (UNSPEC_COND_FCMLA90 "90")
+		      (UNSPEC_COND_FCMLA180 "180")
+		      (UNSPEC_COND_FCMLA270 "270")
+		      (UNSPEC_FCMUL "0")
+		      (UNSPEC_FCMUL_CONJ "180")])
+
+;; A conjucate is a negation of the imaginary component
+;; The number in the unspecs are the rotation component of the instruction, e.g
+;; FCMLA180 means use the instruction with #180.
+;; The iterator is used to produce the right name mangling for the function.
+(define_int_attr conj_op [(UNSPEC_FCMLA180 "")
+			  (UNSPEC_FCMLA180_CONJ "_conj")
+			  (UNSPEC_FCMLA "")
+			  (UNSPEC_FCMLA_CONJ "_conj")
+			  (UNSPEC_FCMUL "")
+			  (UNSPEC_FCMUL_CONJ "_conj")
+			  (UNSPEC_CMLA "")
+			  (UNSPEC_CMLA180 "")
+			  (UNSPEC_CMLA180_CONJ "_conj")
+			  (UNSPEC_CMLA_CONJ "_conj")
+			  (UNSPEC_CMUL "")
+			  (UNSPEC_CMUL_CONJ "_conj")])
+
+;; The complex operations when performed on a real complex number require two
+;; instructions to perform the operation. e.g. complex multiplication requires
+;; two FCMUL with a particular rotation value.
+;;
+;; These values can be looked up in rotsplit1 and rotsplit2.  as an example
+;; FCMUL needs the first instruction to use #0 and the second #90.
+(define_int_attr rotsplit1 [(UNSPEC_FCMLA "0")
+			    (UNSPEC_FCMLA_CONJ "0")
+			    (UNSPEC_FCMUL "0")
+			    (UNSPEC_FCMUL_CONJ "0")
+			    (UNSPEC_FCMLA180 "180")
+			    (UNSPEC_FCMLA180_CONJ "180")])
+
+(define_int_attr rotsplit2 [(UNSPEC_FCMLA "90")
+			    (UNSPEC_FCMLA_CONJ "270")
+			    (UNSPEC_FCMUL "90")
+			    (UNSPEC_FCMUL_CONJ "270")
+			    (UNSPEC_FCMLA180 "270")
+			    (UNSPEC_FCMLA180_CONJ "90")])
+
+;; SVE has slightly different namings from NEON so we have to split these
+;; iterators.
+(define_int_attr sve_rot1 [(UNSPEC_FCMLA "")
+			   (UNSPEC_FCMLA_CONJ "")
+			   (UNSPEC_FCMUL "")
+			   (UNSPEC_FCMUL_CONJ "")
+			   (UNSPEC_FCMLA180 "180")
+			   (UNSPEC_FCMLA180_CONJ "180")
+			   (UNSPEC_CMLA "")
+			   (UNSPEC_CMLA_CONJ "")
+			   (UNSPEC_CMUL "")
+			   (UNSPEC_CMUL_CONJ "")
+			   (UNSPEC_CMLA180 "180")
+			   (UNSPEC_CMLA180_CONJ "180")])
+
+(define_int_attr sve_rot2 [(UNSPEC_FCMLA "90")
+			   (UNSPEC_FCMLA_CONJ "270")
+			   (UNSPEC_FCMUL "90")
+			   (UNSPEC_FCMUL_CONJ "270")
+			   (UNSPEC_FCMLA180 "270")
+			   (UNSPEC_FCMLA180_CONJ "90")
+			   (UNSPEC_CMLA "90")
+			   (UNSPEC_CMLA_CONJ "270")
+			   (UNSPEC_CMUL "90")
+			   (UNSPEC_CMUL_CONJ "270")
+			   (UNSPEC_CMLA180 "270")
+			   (UNSPEC_CMLA180_CONJ "90")])
+
+
+(define_int_attr fcmac1 [(UNSPEC_FCMLA "a") (UNSPEC_FCMLA_CONJ "a")
+			 (UNSPEC_FCMLA180 "s") (UNSPEC_FCMLA180_CONJ "s")
+			 (UNSPEC_CMLA "a") (UNSPEC_CMLA_CONJ "a")
+			 (UNSPEC_CMLA180 "s") (UNSPEC_CMLA180_CONJ "s")])
+
+(define_int_attr sve_fmla_op [(UNSPEC_COND_FMLA "fmla")
+			      (UNSPEC_COND_FMLS "fmls")
+			      (UNSPEC_COND_FNMLA "fnmla")
+			      (UNSPEC_COND_FNMLS "fnmls")])
+
+(define_int_attr sve_fmad_op [(UNSPEC_COND_FMLA "fmad")
+			      (UNSPEC_COND_FMLS "fmsb")
+			      (UNSPEC_COND_FNMLA "fnmad")
+			      (UNSPEC_COND_FNMLS "fnmsb")])
+
+;; The register constraint to use for the final operand in a binary BRK.
+(define_int_attr brk_reg_con [(UNSPEC_BRKN "0")
+			      (UNSPEC_BRKPA "Upa") (UNSPEC_BRKPB "Upa")])
+
+;; The register number to print for the above.
+(define_int_attr brk_reg_opno [(UNSPEC_BRKN "0")
+			       (UNSPEC_BRKPA "3") (UNSPEC_BRKPB "3")])
+
+;; The predicate to use for the first input operand in a floating-point
+;; <optab><mode>3 pattern.
+(define_int_attr sve_pred_fp_rhs1_operand
+  [(UNSPEC_COND_FADD "register_operand")
+   (UNSPEC_COND_FDIV "register_operand")
+   (UNSPEC_COND_FMAX "register_operand")
+   (UNSPEC_COND_FMAXNM "register_operand")
+   (UNSPEC_COND_FMIN "register_operand")
+   (UNSPEC_COND_FMINNM "register_operand")
+   (UNSPEC_COND_FMUL "register_operand")
+   (UNSPEC_COND_FMULX "register_operand")
+   (UNSPEC_COND_FSUB "aarch64_sve_float_arith_operand")])
+
+;; The predicate to use for the second input operand in a floating-point
+;; <optab><mode>3 pattern.
+(define_int_attr sve_pred_fp_rhs2_operand
+  [(UNSPEC_COND_FADD "aarch64_sve_float_arith_with_sub_operand")
+   (UNSPEC_COND_FDIV "register_operand")
+   (UNSPEC_COND_FMAX "aarch64_sve_float_maxmin_operand")
+   (UNSPEC_COND_FMAXNM "aarch64_sve_float_maxmin_operand")
+   (UNSPEC_COND_FMIN "aarch64_sve_float_maxmin_operand")
+   (UNSPEC_COND_FMINNM "aarch64_sve_float_maxmin_operand")
+   (UNSPEC_COND_FMUL "aarch64_sve_float_mul_operand")
+   (UNSPEC_COND_FMULX "register_operand")
+   (UNSPEC_COND_FSUB "register_operand")])
+
+;; Likewise for immediates only.
+(define_int_attr sve_pred_fp_rhs2_immediate
+  [(UNSPEC_COND_FMAX "aarch64_sve_float_maxmin_immediate")
+   (UNSPEC_COND_FMAXNM "aarch64_sve_float_maxmin_immediate")
+   (UNSPEC_COND_FMIN "aarch64_sve_float_maxmin_immediate")
+   (UNSPEC_COND_FMINNM "aarch64_sve_float_maxmin_immediate")
+   (UNSPEC_COND_FMUL "aarch64_sve_float_mul_immediate")])
+
+;; The maximum number of element bits that an instruction can handle.
+(define_int_attr max_elem_bits [(UNSPEC_UADDV "64") (UNSPEC_SADDV "32")
+				(UNSPEC_PFIRST "8") (UNSPEC_PNEXT "64")])
+
+;; The minimum number of element bits that an instruction can handle.
+(define_int_attr min_elem_bits [(UNSPEC_RBIT "8")
+				(UNSPEC_REVB "16")
+				(UNSPEC_REVH "32")
+				(UNSPEC_REVW "64")])
+
+(define_int_attr unspec [(UNSPEC_WHILERW "UNSPEC_WHILERW")
+			 (UNSPEC_WHILEWR "UNSPEC_WHILEWR")])
+
+;; Iterators and attributes for fpcr fpsr getter setters
+
+(define_int_iterator GET_FPSCR
+  [UNSPECV_GET_FPSR UNSPECV_GET_FPCR])
+
+(define_int_iterator SET_FPSCR
+  [UNSPECV_SET_FPSR UNSPECV_SET_FPCR])
+
+(define_int_attr fpscr_name
+  [(UNSPECV_GET_FPSR "fpsr")
+   (UNSPECV_SET_FPSR "fpsr")
+   (UNSPECV_GET_FPCR "fpcr")
+   (UNSPECV_SET_FPCR "fpcr")])
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 5d41d4350402b..49f02ae038135 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -1,5 +1,5 @@
 ;; Machine description for AArch64 architecture.
-;; Copyright (C) 2009-2018 Free Software Foundation, Inc.
+;; Copyright (C) 2009-2021 Free Software Foundation, Inc.
 ;; Contributed by ARM Ltd.
 ;;
 ;; This file is part of GCC.
@@ -18,6 +18,8 @@
 ;; along with GCC; see the file COPYING3.  If not see
 ;; <http://www.gnu.org/licenses/>.
 
+(include "../arm/common.md")
+
 (define_special_predicate "cc_register"
   (and (match_code "reg")
        (and (match_test "REGNO (op) == CC_REGNUM")
@@ -30,14 +32,27 @@
   (ior (match_code "symbol_ref")
        (match_operand 0 "register_operand")))
 
+(define_predicate "aarch64_general_reg"
+  (and (match_operand 0 "register_operand")
+       (match_test "REGNO_REG_CLASS (REGNO (op)) == STUB_REGS
+		    || REGNO_REG_CLASS (REGNO (op)) == GENERAL_REGS")))
+
 ;; Return true if OP a (const_int 0) operand.
 (define_predicate "const0_operand"
   (and (match_code "const_int")
        (match_test "op == CONST0_RTX (mode)")))
 
-(define_special_predicate "subreg_lowpart_operator"
-  (and (match_code "subreg")
-       (match_test "subreg_lowpart_p (op)")))
+(define_predicate "const_1_to_3_operand"
+  (match_code "const_int,const_vector")
+{
+  op = unwrap_const_vec_duplicate (op);
+  return CONST_INT_P (op) && IN_RANGE (INTVAL (op), 1, 3);
+})
+
+(define_predicate "subreg_lowpart_operator"
+  (ior (match_code "truncate")
+       (and (match_code "subreg")
+	    (match_test "subreg_lowpart_p (op)"))))
 
 (define_predicate "aarch64_ccmp_immediate"
   (and (match_code "const_int")
@@ -49,19 +64,22 @@
 
 (define_predicate "aarch64_simd_register"
   (and (match_code "reg")
-       (ior (match_test "REGNO_REG_CLASS (REGNO (op)) == FP_LO_REGS")
-            (match_test "REGNO_REG_CLASS (REGNO (op)) == FP_REGS"))))
+       (match_test "FP_REGNUM_P (REGNO (op))")))
 
 (define_predicate "aarch64_reg_or_zero"
-  (and (match_code "reg,subreg,const_int")
+  (and (match_code "reg,subreg,const_int,const_double")
        (ior (match_operand 0 "register_operand")
-	    (match_test "op == const0_rtx"))))
+	    (match_test "op == CONST0_RTX (GET_MODE (op))"))))
 
 (define_predicate "aarch64_reg_or_fp_zero"
   (ior (match_operand 0 "register_operand")
 	(and (match_code "const_double")
 	     (match_test "aarch64_float_const_zero_rtx_p (op)"))))
 
+(define_predicate "aarch64_reg_zero_or_fp_zero"
+  (ior (match_operand 0 "aarch64_reg_or_fp_zero")
+       (match_operand 0 "aarch64_reg_or_zero")))
+
 (define_predicate "aarch64_reg_zero_or_m1_or_1"
   (and (match_code "reg,subreg,const_int")
        (ior (match_operand 0 "register_operand")
@@ -90,6 +108,10 @@
   (and (match_code "const_double")
 	(match_test "aarch64_fpconst_pow_of_2 (op) > 0")))
 
+(define_predicate "aarch64_fp_pow2_recip"
+  (and (match_code "const_double")
+       (match_test "aarch64_fpconst_pow2_recip (op) > 0")))
+
 (define_predicate "aarch64_fp_vec_pow2"
   (match_test "aarch64_vec_fpconst_pow_of_2 (op) > 0"))
 
@@ -99,17 +121,29 @@
 
 (define_predicate "aarch64_sub_immediate"
   (and (match_code "const_int")
-       (match_test "aarch64_uimm12_shift (-INTVAL (op))")))
+       (match_test "aarch64_uimm12_shift (-UINTVAL (op))")))
 
 (define_predicate "aarch64_plus_immediate"
   (and (match_code "const_int")
        (ior (match_test "aarch64_uimm12_shift (INTVAL (op))")
-	    (match_test "aarch64_uimm12_shift (-INTVAL (op))"))))
+	    (match_test "aarch64_uimm12_shift (-UINTVAL (op))"))))
 
 (define_predicate "aarch64_plus_operand"
   (ior (match_operand 0 "register_operand")
        (match_operand 0 "aarch64_plus_immediate")))
 
+(define_predicate "aarch64_plushi_immediate"
+  (match_code "const_int")
+{
+  HOST_WIDE_INT val = INTVAL (op);
+  /* The HImode value must be zero-extendable to an SImode plus_operand.  */
+  return ((val & 0xfff) == val || sext_hwi (val & 0xf000, 16) == val);
+})
+
+(define_predicate "aarch64_plushi_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "aarch64_plushi_immediate")))
+
 (define_predicate "aarch64_pluslong_immediate"
   (and (match_code "const_int")
        (match_test "(INTVAL (op) < 0xffffff && INTVAL (op) > -0xffffff)")))
@@ -118,10 +152,18 @@
   (and (match_operand 0 "aarch64_pluslong_immediate")
        (not (match_operand 0 "aarch64_plus_immediate"))))
 
+(define_predicate "aarch64_sve_scalar_inc_dec_immediate"
+  (and (match_code "const_poly_int")
+       (match_test "aarch64_sve_scalar_inc_dec_immediate_p (op)")))
+
 (define_predicate "aarch64_sve_addvl_addpl_immediate"
   (and (match_code "const_poly_int")
        (match_test "aarch64_sve_addvl_addpl_immediate_p (op)")))
 
+(define_predicate "aarch64_sve_plus_immediate"
+  (ior (match_operand 0 "aarch64_sve_scalar_inc_dec_immediate")
+       (match_operand 0 "aarch64_sve_addvl_addpl_immediate")))
+
 (define_predicate "aarch64_split_add_offset_immediate"
   (and (match_code "const_poly_int")
        (match_test "aarch64_add_offset_temporaries (op) == 1")))
@@ -129,7 +171,8 @@
 (define_predicate "aarch64_pluslong_operand"
   (ior (match_operand 0 "register_operand")
        (match_operand 0 "aarch64_pluslong_immediate")
-       (match_operand 0 "aarch64_sve_addvl_addpl_immediate")))
+       (and (match_test "TARGET_SVE")
+	    (match_operand 0 "aarch64_sve_plus_immediate"))))
 
 (define_predicate "aarch64_pluslong_or_poly_operand"
   (ior (match_operand 0 "aarch64_pluslong_operand")
@@ -194,21 +237,6 @@
   (and (match_code "const_int")
        (match_test "IN_RANGE (UINTVAL (op), 0, 0xffffff)")))
 
-(define_predicate "aarch64_pwr_imm3"
-  (and (match_code "const_int")
-       (match_test "INTVAL (op) != 0
-		    && (unsigned) exact_log2 (INTVAL (op)) <= 4")))
-
-(define_predicate "aarch64_pwr_2_si"
-  (and (match_code "const_int")
-       (match_test "INTVAL (op) != 0
-		    && (unsigned) exact_log2 (INTVAL (op)) < 32")))
-
-(define_predicate "aarch64_pwr_2_di"
-  (and (match_code "const_int")
-       (match_test "INTVAL (op) != 0
-		    && (unsigned) exact_log2 (INTVAL (op)) < 64")))
-
 (define_predicate "aarch64_mem_pair_offset"
   (and (match_code "const_int")
        (match_test "aarch64_offset_7bit_signed_scaled_p (mode, INTVAL (op))")))
@@ -222,8 +250,9 @@
 ;; as a 128-bit vec_concat.
 (define_predicate "aarch64_mem_pair_lanes_operand"
   (and (match_code "mem")
-       (match_test "aarch64_legitimate_address_p (DFmode, XEXP (op, 0), 1,
-						  ADDR_QUERY_LDP_STP)")))
+       (match_test "aarch64_legitimate_address_p (GET_MODE (op), XEXP (op, 0),
+						  false,
+						  ADDR_QUERY_LDP_STP_N)")))
 
 (define_predicate "aarch64_prefetch_operand"
   (match_test "aarch64_address_valid_for_prefetch_p (op, false)"))
@@ -330,23 +359,37 @@
   (match_code "eq,ne"))
 
 (define_special_predicate "aarch64_carry_operation"
-  (match_code "ne,geu")
+  (match_code "ltu,geu")
 {
   if (XEXP (op, 1) != const0_rtx)
     return false;
-  machine_mode ccmode = (GET_CODE (op) == NE ? CC_Cmode : CCmode);
   rtx op0 = XEXP (op, 0);
-  return REG_P (op0) && REGNO (op0) == CC_REGNUM && GET_MODE (op0) == ccmode;
+  if (!REG_P (op0) || REGNO (op0) != CC_REGNUM)
+    return false;
+  machine_mode ccmode = GET_MODE (op0);
+  if (ccmode == CC_Cmode)
+    return GET_CODE (op) == LTU;
+  if (ccmode == CC_ADCmode || ccmode == CCmode)
+    return GET_CODE (op) == GEU;
+  return false;
 })
 
+; borrow is essentially the inverse of carry since the sense of the C flag
+; is inverted during subtraction.  See the note in aarch64-modes.def.
 (define_special_predicate "aarch64_borrow_operation"
-  (match_code "eq,ltu")
+  (match_code "geu,ltu")
 {
   if (XEXP (op, 1) != const0_rtx)
     return false;
-  machine_mode ccmode = (GET_CODE (op) == EQ ? CC_Cmode : CCmode);
   rtx op0 = XEXP (op, 0);
-  return REG_P (op0) && REGNO (op0) == CC_REGNUM && GET_MODE (op0) == ccmode;
+  if (!REG_P (op0) || REGNO (op0) != CC_REGNUM)
+    return false;
+  machine_mode ccmode = GET_MODE (op0);
+  if (ccmode == CC_Cmode)
+    return GET_CODE (op) == GEU;
+  if (ccmode == CC_ADCmode || ccmode == CCmode)
+    return GET_CODE (op) == LTU;
+  return false;
 })
 
 ;; True if the operand is memory reference suitable for a load/store exclusive.
@@ -354,6 +397,36 @@
   (and (match_operand 0 "memory_operand")
        (match_code "reg" "0")))
 
+(define_predicate "aarch64_9bit_offset_memory_operand"
+  (and (match_operand 0 "memory_operand")
+       (ior (match_code "reg" "0")
+	    (and (match_code "plus" "0")
+		 (match_code "reg"  "00")
+		 (match_code "const_int" "01"))))
+{
+  rtx mem_op = XEXP (op, 0);
+
+  if (REG_P (mem_op))
+    return GET_MODE (mem_op) == DImode;
+
+  rtx plus_op0 = XEXP (mem_op, 0);
+  rtx plus_op1 = XEXP (mem_op, 1);
+
+  if (GET_MODE (plus_op0) != DImode)
+    return false;
+
+  poly_int64 offset;
+  if (!poly_int_rtx_p (plus_op1, &offset))
+    gcc_unreachable ();
+
+  return aarch64_offset_9bit_signed_unscaled_p (mode, offset);
+})
+
+(define_predicate "aarch64_rcpc_memory_operand"
+  (if_then_else (match_test "AARCH64_ISA_RCPC8_4")
+    (match_operand 0 "aarch64_9bit_offset_memory_operand")
+    (match_operand 0 "aarch64_sync_memory_operand")))
+
 ;; Predicates for parallel expanders based on mode.
 (define_special_predicate "vect_par_cnst_hi_half"
   (match_code "parallel")
@@ -367,6 +440,18 @@
   return aarch64_simd_check_vect_par_cnst_half (op, mode, false);
 })
 
+(define_predicate "descending_int_parallel"
+  (match_code "parallel")
+{
+  return aarch64_stepped_int_parallel_p (op, -1);
+})
+
+(define_predicate "ascending_int_parallel"
+  (match_code "parallel")
+{
+  return aarch64_stepped_int_parallel_p (op, 1);
+})
+
 (define_special_predicate "aarch64_simd_lshift_imm"
   (match_code "const,const_vector")
 {
@@ -383,6 +468,10 @@
   (and (match_code "const,const_vector")
        (match_test "op == CONST0_RTX (GET_MODE (op))")))
 
+(define_predicate "aarch64_simd_imm_one"
+  (and (match_code "const_vector")
+       (match_test "op == CONST1_RTX (GET_MODE (op))")))
+
 (define_predicate "aarch64_simd_or_scalar_imm_zero"
   (and (match_code "const_int,const_double,const,const_vector")
        (match_test "op == CONST0_RTX (GET_MODE (op))")))
@@ -397,6 +486,10 @@
 	    (match_test "op == const0_rtx")
 	    (match_operand 0 "aarch64_simd_or_scalar_imm_zero"))))
 
+(define_predicate "aarch64_simd_reg_or_minus_one"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "aarch64_simd_imm_minus_one")))
+
 (define_predicate "aarch64_simd_struct_operand"
   (and (match_code "mem")
        (match_test "TARGET_SIMD && aarch64_simd_mem_operand_p (op)")))
@@ -452,6 +545,22 @@
   (and (match_code "const_int")
        (match_test "IN_RANGE (INTVAL (op), 1, 64)")))
 
+(define_predicate "aarch64_simd_shift_imm_vec_qi"
+  (and (match_code "const_vector")
+       (match_test "aarch64_const_vec_all_same_in_range_p (op, 1, 8)")))
+
+(define_predicate "aarch64_simd_shift_imm_vec_hi"
+  (and (match_code "const_vector")
+       (match_test "aarch64_const_vec_all_same_in_range_p (op, 1, 16)")))
+
+(define_predicate "aarch64_simd_shift_imm_vec_si"
+  (and (match_code "const_vector")
+       (match_test "aarch64_const_vec_all_same_in_range_p (op, 1, 32)")))
+
+(define_predicate "aarch64_simd_shift_imm_vec_di"
+  (and (match_code "const_vector")
+       (match_test "aarch64_const_vec_all_same_in_range_p (op, 1, 64)")))
+
 (define_predicate "aarch64_simd_shift_imm_bitsize_qi"
   (and (match_code "const_int")
        (match_test "IN_RANGE (INTVAL (op), 0, 8)")))
@@ -479,12 +588,44 @@
   (and (match_operand 0 "memory_operand")
        (match_test "aarch64_sve_ld1r_operand_p (op)")))
 
+(define_predicate "aarch64_sve_ld1rq_operand"
+  (and (match_code "mem")
+       (match_test "aarch64_sve_ld1rq_operand_p (op)")))
+
+(define_predicate "aarch64_sve_ld1ro_operand_b"
+  (and (match_code "mem")
+       (match_test "aarch64_sve_ld1ro_operand_p (op, QImode)")))
+
+(define_predicate "aarch64_sve_ld1ro_operand_h"
+  (and (match_code "mem")
+       (match_test "aarch64_sve_ld1ro_operand_p (op, HImode)")))
+
+(define_predicate "aarch64_sve_ld1ro_operand_w"
+  (and (match_code "mem")
+       (match_test "aarch64_sve_ld1ro_operand_p (op, SImode)")))
+
+(define_predicate "aarch64_sve_ld1ro_operand_d"
+  (and (match_code "mem")
+       (match_test "aarch64_sve_ld1ro_operand_p (op, DImode)")))
+
+(define_predicate "aarch64_sve_ldff1_operand"
+  (and (match_code "mem")
+       (match_test "aarch64_sve_ldff1_operand_p (op)")))
+
+(define_predicate "aarch64_sve_ldnf1_operand"
+  (and (match_code "mem")
+       (match_test "aarch64_sve_ldnf1_operand_p (op)")))
+
 ;; Like memory_operand, but restricted to addresses that are valid for
 ;; SVE LDR and STR instructions.
 (define_predicate "aarch64_sve_ldr_operand"
   (and (match_code "mem")
        (match_test "aarch64_sve_ldr_operand_p (op)")))
 
+(define_special_predicate "aarch64_sve_prefetch_operand"
+  (and (match_code "reg, plus")
+       (match_test "aarch64_sve_prefetch_operand_p (op, mode)")))
+
 (define_predicate "aarch64_sve_nonimmediate_operand"
   (ior (match_operand 0 "register_operand")
        (match_operand 0 "aarch64_sve_ldr_operand")))
@@ -509,36 +650,96 @@
   (ior (match_operand 0 "register_operand")
        (match_operand 0 "aarch64_sve_ld1r_operand")))
 
+(define_predicate "aarch64_sve_ptrue_svpattern_immediate"
+  (and (match_code "const")
+       (match_test "aarch64_sve_ptrue_svpattern_p (op, NULL)")))
+
 (define_predicate "aarch64_sve_arith_immediate"
   (and (match_code "const,const_vector")
-       (match_test "aarch64_sve_arith_immediate_p (op, false)")))
+       (match_test "aarch64_sve_arith_immediate_p (mode, op, false)")))
 
 (define_predicate "aarch64_sve_sub_arith_immediate"
   (and (match_code "const,const_vector")
-       (match_test "aarch64_sve_arith_immediate_p (op, true)")))
+       (match_test "aarch64_sve_arith_immediate_p (mode, op, true)")))
+
+(define_predicate "aarch64_sve_qadd_immediate"
+  (and (match_code "const,const_vector")
+       (match_test "aarch64_sve_sqadd_sqsub_immediate_p (mode, op, false)")))
 
-(define_predicate "aarch64_sve_inc_dec_immediate"
+(define_predicate "aarch64_sve_qsub_immediate"
   (and (match_code "const,const_vector")
-       (match_test "aarch64_sve_inc_dec_immediate_p (op)")))
+       (match_test "aarch64_sve_sqadd_sqsub_immediate_p (mode, op, true)")))
+
+(define_predicate "aarch64_sve_vector_inc_dec_immediate"
+  (and (match_code "const,const_vector")
+       (match_test "aarch64_sve_vector_inc_dec_immediate_p (op)")))
+
+(define_predicate "aarch64_sve_gather_immediate_b"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 31)")))
+
+(define_predicate "aarch64_sve_gather_immediate_h"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 62)")
+       (match_test "(INTVAL (op) & 1) == 0")))
+
+(define_predicate "aarch64_sve_gather_immediate_w"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 124)")
+       (match_test "(INTVAL (op) & 3) == 0")))
+
+(define_predicate "aarch64_sve_gather_immediate_d"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 248)")
+       (match_test "(INTVAL (op) & 7) == 0")))
+
+(define_predicate "aarch64_sve_uxtb_immediate"
+  (and (match_code "const_vector")
+       (match_test "GET_MODE_UNIT_BITSIZE (GET_MODE (op)) > 8")
+       (match_test "aarch64_const_vec_all_same_int_p (op, 0xff)")))
+
+(define_predicate "aarch64_sve_uxth_immediate"
+  (and (match_code "const_vector")
+       (match_test "GET_MODE_UNIT_BITSIZE (GET_MODE (op)) > 16")
+       (match_test "aarch64_const_vec_all_same_int_p (op, 0xffff)")))
+
+(define_predicate "aarch64_sve_uxtw_immediate"
+  (and (match_code "const_vector")
+       (match_test "GET_MODE_UNIT_BITSIZE (GET_MODE (op)) > 32")
+       (match_test "aarch64_const_vec_all_same_int_p (op, 0xffffffff)")))
+
+(define_predicate "aarch64_sve_uxt_immediate"
+  (ior (match_operand 0 "aarch64_sve_uxtb_immediate")
+       (match_operand 0 "aarch64_sve_uxth_immediate")
+       (match_operand 0 "aarch64_sve_uxtw_immediate")))
 
 (define_predicate "aarch64_sve_logical_immediate"
   (and (match_code "const,const_vector")
        (match_test "aarch64_sve_bitmask_immediate_p (op)")))
 
-(define_predicate "aarch64_sve_mul_immediate"
+;; Used for SVE UMAX and UMIN.
+(define_predicate "aarch64_sve_vsb_immediate"
+  (and (match_code "const_vector")
+       (match_test "GET_MODE_INNER (GET_MODE (op)) == QImode
+		    ? aarch64_const_vec_all_same_in_range_p (op, -128, 127)
+		    : aarch64_const_vec_all_same_in_range_p (op, 0, 255)")))
+
+;; Used for SVE MUL, SMAX and SMIN.
+(define_predicate "aarch64_sve_vsm_immediate"
   (and (match_code "const,const_vector")
        (match_test "aarch64_const_vec_all_same_in_range_p (op, -128, 127)")))
 
 (define_predicate "aarch64_sve_dup_immediate"
   (and (match_code "const,const_vector")
-       (match_test "aarch64_sve_dup_immediate_p (op)")))
+       (ior (match_test "aarch64_sve_dup_immediate_p (op)")
+	    (match_test "aarch64_float_const_representable_p (op)"))))
 
 (define_predicate "aarch64_sve_cmp_vsc_immediate"
-  (and (match_code "const,const_vector")
+  (and (match_code "const_int,const_vector")
        (match_test "aarch64_sve_cmp_immediate_p (op, true)")))
 
 (define_predicate "aarch64_sve_cmp_vsd_immediate"
-  (and (match_code "const,const_vector")
+  (and (match_code "const_int,const_vector")
        (match_test "aarch64_sve_cmp_immediate_p (op, false)")))
 
 (define_predicate "aarch64_sve_index_immediate"
@@ -549,14 +750,23 @@
   (and (match_code "const,const_vector")
        (match_test "aarch64_sve_float_arith_immediate_p (op, false)")))
 
-(define_predicate "aarch64_sve_float_arith_with_sub_immediate"
+(define_predicate "aarch64_sve_float_negated_arith_immediate"
   (and (match_code "const,const_vector")
        (match_test "aarch64_sve_float_arith_immediate_p (op, true)")))
 
+(define_predicate "aarch64_sve_float_arith_with_sub_immediate"
+  (ior (match_operand 0 "aarch64_sve_float_arith_immediate")
+       (match_operand 0 "aarch64_sve_float_negated_arith_immediate")))
+
 (define_predicate "aarch64_sve_float_mul_immediate"
   (and (match_code "const,const_vector")
        (match_test "aarch64_sve_float_mul_immediate_p (op)")))
 
+(define_predicate "aarch64_sve_float_maxmin_immediate"
+  (and (match_code "const_vector")
+       (ior (match_test "op == CONST0_RTX (GET_MODE (op))")
+	    (match_test "op == CONST1_RTX (GET_MODE (op))"))))
+
 (define_predicate "aarch64_sve_arith_operand"
   (ior (match_operand 0 "register_operand")
        (match_operand 0 "aarch64_sve_arith_immediate")))
@@ -564,12 +774,37 @@
 (define_predicate "aarch64_sve_add_operand"
   (ior (match_operand 0 "aarch64_sve_arith_operand")
        (match_operand 0 "aarch64_sve_sub_arith_immediate")
-       (match_operand 0 "aarch64_sve_inc_dec_immediate")))
+       (match_operand 0 "aarch64_sve_vector_inc_dec_immediate")))
+
+(define_predicate "aarch64_sve_sqadd_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "aarch64_sve_qadd_immediate")
+       (match_operand 0 "aarch64_sve_qsub_immediate")))
+
+(define_predicate "aarch64_sve_pred_and_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "aarch64_sve_uxt_immediate")))
 
 (define_predicate "aarch64_sve_logical_operand"
   (ior (match_operand 0 "register_operand")
        (match_operand 0 "aarch64_sve_logical_immediate")))
 
+(define_predicate "aarch64_sve_gather_offset_b"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "aarch64_sve_gather_immediate_b")))
+
+(define_predicate "aarch64_sve_gather_offset_h"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "aarch64_sve_gather_immediate_h")))
+
+(define_predicate "aarch64_sve_gather_offset_w"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "aarch64_sve_gather_immediate_w")))
+
+(define_predicate "aarch64_sve_gather_offset_d"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "aarch64_sve_gather_immediate_d")))
+
 (define_predicate "aarch64_sve_lshift_operand"
   (ior (match_operand 0 "register_operand")
        (match_operand 0 "aarch64_simd_lshift_imm")))
@@ -578,9 +813,17 @@
   (ior (match_operand 0 "register_operand")
        (match_operand 0 "aarch64_simd_rshift_imm")))
 
-(define_predicate "aarch64_sve_mul_operand"
+(define_predicate "aarch64_sve_vsb_operand"
   (ior (match_operand 0 "register_operand")
-       (match_operand 0 "aarch64_sve_mul_immediate")))
+       (match_operand 0 "aarch64_sve_vsb_immediate")))
+
+(define_predicate "aarch64_sve_vsm_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "aarch64_sve_vsm_immediate")))
+
+(define_predicate "aarch64_sve_reg_or_dup_imm"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "aarch64_sve_dup_immediate")))
 
 (define_predicate "aarch64_sve_cmp_vsc_operand"
   (ior (match_operand 0 "register_operand")
@@ -599,17 +842,39 @@
        (match_operand 0 "aarch64_sve_float_arith_immediate")))
 
 (define_predicate "aarch64_sve_float_arith_with_sub_operand"
-  (ior (match_operand 0 "aarch64_sve_float_arith_operand")
+  (ior (match_operand 0 "register_operand")
        (match_operand 0 "aarch64_sve_float_arith_with_sub_immediate")))
 
 (define_predicate "aarch64_sve_float_mul_operand"
   (ior (match_operand 0 "register_operand")
        (match_operand 0 "aarch64_sve_float_mul_immediate")))
 
+(define_predicate "aarch64_sve_float_maxmin_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "aarch64_sve_float_maxmin_immediate")))
+
 (define_predicate "aarch64_sve_vec_perm_operand"
   (ior (match_operand 0 "register_operand")
        (match_operand 0 "aarch64_constant_vector_operand")))
 
+(define_predicate "aarch64_sve_ptrue_flag"
+  (and (match_code "const_int")
+       (ior (match_test "INTVAL (op) == SVE_MAYBE_NOT_PTRUE")
+	    (match_test "INTVAL (op) == SVE_KNOWN_PTRUE"))))
+
+(define_predicate "aarch64_sve_gp_strictness"
+  (and (match_code "const_int")
+       (ior (match_test "INTVAL (op) == SVE_RELAXED_GP")
+	    (match_test "INTVAL (op) == SVE_STRICT_GP"))))
+
+(define_predicate "aarch64_gather_scale_operand_b"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 1")))
+
+(define_predicate "aarch64_gather_scale_operand_h"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 1 || INTVAL (op) == 2")))
+
 (define_predicate "aarch64_gather_scale_operand_w"
   (and (match_code "const_int")
        (match_test "INTVAL (op) == 1 || INTVAL (op) == 4")))
@@ -621,3 +886,25 @@
 ;; A special predicate that doesn't match a particular mode.
 (define_special_predicate "aarch64_any_register_operand"
   (match_code "reg"))
+
+(define_predicate "aarch64_sve_any_binary_operator"
+  (match_code "plus,minus,mult,div,udiv,smax,umax,smin,umin,and,ior,xor"))
+
+(define_predicate "aarch64_bytes_per_sve_vector_operand"
+  (and (match_code "const_int,const_poly_int")
+       (match_test "known_eq (wi::to_poly_wide (op, mode),
+			      BYTES_PER_SVE_VECTOR)")))
+
+(define_predicate "aarch64_memtag_tag_offset"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 15)")))
+
+(define_predicate "aarch64_granule16_uimm6"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 1008)
+		    && !(INTVAL (op) & 0xf)")))
+
+(define_predicate "aarch64_granule16_simm9"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op),  -4096, 4080)
+		    && !(INTVAL (op) & 0xf)")))
diff --git a/gcc/config/aarch64/rtems.h b/gcc/config/aarch64/rtems.h
index 4f0d066fff9f4..84904b01dee62 100644
--- a/gcc/config/aarch64/rtems.h
+++ b/gcc/config/aarch64/rtems.h
@@ -1,5 +1,5 @@
 /* Definitions for RTEMS based AARCH64 system.
-   Copyright (C) 2016-2018 Free Software Foundation, Inc.
+   Copyright (C) 2016-2021 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/aarch64/saphira.md b/gcc/config/aarch64/saphira.md
new file mode 100644
index 0000000000000..e51dfec939fc0
--- /dev/null
+++ b/gcc/config/aarch64/saphira.md
@@ -0,0 +1,560 @@
+;; Saphira pipeline description
+;; Copyright (C) 2017-2021 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "saphira")
+
+;; Complex int instructions (e.g. multiply and divide) execute in the X
+;; pipeline.  Simple int instructions execute in the X, Y, Z and B pipelines.
+
+(define_cpu_unit "saphira_x" "saphira")
+(define_cpu_unit "saphira_y" "saphira")
+
+;; Branches execute in the Z or B pipeline or in one of the int pipelines depending
+;; on how complex it is.  Simple int insns (like movz) can also execute here.
+
+(define_cpu_unit "saphira_z" "saphira")
+(define_cpu_unit "saphira_b" "saphira")
+
+;; Vector and FP insns execute in the VX and VY pipelines.
+
+(define_automaton "saphira_vfp")
+
+(define_cpu_unit "saphira_vx" "saphira_vfp")
+(define_cpu_unit "saphira_vy" "saphira_vfp")
+
+;; Loads execute in the LD pipeline.
+;; Stores execute in the ST pipeline, for address, data, and
+;; vector data.
+
+(define_automaton "saphira_mem")
+
+(define_cpu_unit "saphira_ld" "saphira_mem")
+(define_cpu_unit "saphira_st" "saphira_mem")
+
+;; The GTOV and VTOG pipelines are for general to vector reg moves, and vice
+;; versa.
+
+(define_cpu_unit "saphira_gtov" "saphira")
+(define_cpu_unit "saphira_vtog" "saphira")
+
+;; Common reservation combinations.
+
+(define_reservation "saphira_vxvy" "saphira_vx|saphira_vy")
+(define_reservation "saphira_zb"   "saphira_z|saphira_b")
+(define_reservation "saphira_xyzb" "saphira_x|saphira_y|saphira_z|saphira_b")
+
+;; SIMD Floating-Point Instructions
+
+(define_insn_reservation "saphira_afp_1_vxvy" 1
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_fp_neg_s,neon_fp_neg_d,neon_fp_abs_s,neon_fp_abs_d,neon_fp_neg_s_q,neon_fp_neg_d_q,neon_fp_abs_s_q,neon_fp_abs_d_q"))
+  "saphira_vxvy")
+
+(define_insn_reservation "saphira_afp_2_vxvy" 2
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_fp_minmax_s,neon_fp_minmax_d,neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d,neon_fp_compare_s,neon_fp_compare_d,neon_fp_round_s,neon_fp_round_d,neon_fp_minmax_s_q,neon_fp_minmax_d_q,neon_fp_compare_s_q,neon_fp_compare_d_q,neon_fp_round_s_q,neon_fp_round_d_q"))
+  "saphira_vxvy")
+
+(define_insn_reservation "saphira_afp_3_vxvy" 3
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_fp_reduc_minmax_s_q,neon_fp_reduc_minmax_d_q,neon_fp_abd_s,neon_fp_abd_d,neon_fp_addsub_s,neon_fp_addsub_d,neon_fp_reduc_add_s,neon_fp_reduc_add_d,neon_fp_abd_s_q,neon_fp_abd_d_q,neon_fp_addsub_s_q,neon_fp_addsub_d_q,neon_fp_reduc_add_s_q,neon_fp_reduc_add_d_q"))
+  "saphira_vxvy")
+
+(define_insn_reservation "saphira_afp_4_vxvy" 4
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_fp_to_int_s,neon_fp_to_int_d,neon_int_to_fp_s,neon_int_to_fp_d,neon_fp_cvt_widen_h,neon_fp_cvt_widen_s,neon_fp_to_int_s_q,neon_fp_to_int_d_q,neon_int_to_fp_s_q,neon_int_to_fp_d_q"))
+  "saphira_vxvy")
+
+(define_insn_reservation "saphira_afp_5_vxvy_mul" 5
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_fp_mul_s,neon_fp_mul_s_scalar,neon_fp_mul_s_q,neon_fp_mul_s_scalar_q"))
+  "saphira_vxvy")
+
+(define_insn_reservation "saphira_afp_5_vxvy_mla" 5
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_fp_mla_s,neon_fp_mla_s_scalar,neon_fp_mla_s_q,neon_fp_mla_s_scalar_q"))
+  "saphira_vxvy")
+
+(define_insn_reservation "saphira_afp_6_vxvy_mul" 6
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_fp_mul_d,neon_fp_mul_d_q,neon_fp_mul_d_scalar_q"))
+  "saphira_vxvy")
+
+(define_insn_reservation "saphira_afp_6_vxvy_mla" 6
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_fp_mla_d,neon_fp_mla_d_q,neon_fp_mla_d_scalar_q"))
+  "saphira_vxvy")
+
+(define_insn_reservation "saphira_afp_4_vxvy_vxvy_vxvy" 4
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_fp_cvt_narrow_s_q,neon_fp_cvt_narrow_d_q"))
+  "saphira_vxvy+saphira_vxvy,saphira_vxvy")
+
+(define_insn_reservation "saphira_afp_6_vx_vy" 6
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_fp_div_s"))
+  "saphira_vx+saphira_vy")
+
+(define_insn_reservation "saphira_afp_11_vx_vy" 11
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_fp_div_d"))
+  "saphira_vx+saphira_vy")
+
+(define_insn_reservation "saphira_afp_6_vx_vy_vx_vy" 6
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_fp_div_s_q"))
+  "(saphira_vx+saphira_vy),(saphira_vx+saphira_vy)")
+
+(define_insn_reservation "saphira_afp_11_vx_vy_vx_vy" 11
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_fp_div_d_q"))
+  "(saphira_vx+saphira_vy),(saphira_vx+saphira_vy)")
+
+(define_insn_reservation "saphira_afp_12_vx_vy" 12
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_fp_sqrt_s"))
+  "saphira_vx+saphira_vy")
+
+(define_insn_reservation "saphira_afp_22_vx_vy" 22
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_fp_sqrt_d"))
+  "saphira_vx+saphira_vy")
+
+(define_insn_reservation "saphira_afp_12_vx_vy_vx_vy" 12
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_fp_sqrt_s_q"))
+  "(saphira_vx+saphira_vy),(saphira_vx+saphira_vy)")
+
+(define_insn_reservation "saphira_afp_22_vx_vy_vx_vy" 22
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_fp_sqrt_d_q"))
+  "(saphira_vx+saphira_vy),(saphira_vx+saphira_vy)")
+
+;; SIMD Integer Instructions
+
+(define_insn_reservation "saphira_ai_1_vxvy" 1
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_add,neon_reduc_add,neon_logic,neon_neg,neon_sub,neon_add_q,neon_reduc_add_q,neon_logic_q,neon_neg_q,neon_sub_q"))
+  "saphira_vxvy")
+
+(define_insn_reservation "saphira_ai_2_vxvy" 2
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_add_long,neon_sub_long,neon_add_halve,neon_sub_halve,neon_shift_imm,neon_shift_reg,neon_minmax,neon_abs,neon_compare,neon_compare_zero,neon_tst,neon_shift_imm_long,neon_reduc_add_long,neon_add_halve_q,neon_sub_halve_q,neon_shift_imm_q,neon_shift_reg_q,neon_minmax_q,neon_abs_q,neon_compare_q,neon_compare_zero_q,neon_tst_q"))
+  "saphira_vxvy")
+
+(define_insn_reservation "saphira_ai_3_vxvy" 3
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_shift_acc,neon_reduc_add_acc,neon_abd,neon_qadd,neon_qsub,neon_qabs,neon_qneg,neon_sat_shift_imm,neon_sat_shift_imm_narrow_q,neon_sat_shift_reg,neon_shift_acc_q,neon_reduc_add_acc_q,neon_abd_q,neon_abd_long,neon_qadd_q,neon_qsub_q,neon_qabs_q,neon_qneg_q,neon_sat_shift_imm_q,neon_sat_shift_reg_q,neon_add_halve_narrow_q,neon_sub_halve_narrow_q"))
+  "saphira_vxvy")
+
+(define_insn_reservation "saphira_ai_4_vxvy" 4
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_reduc_minmax,neon_reduc_minmax_q,neon_arith_acc,neon_arith_acc_q"))
+  "saphira_vxvy")
+
+(define_insn_reservation "saphira_ai_4_vxvy_mul" 4
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_mul_b,neon_mul_h,neon_mul_s,neon_mul_h_scalar,neon_mul_s_scalar,neon_sat_mul_b,neon_sat_mul_h,neon_sat_mul_s,neon_sat_mul_h_scalar,neon_sat_mul_s_scalar,neon_mul_b_q,neon_mul_h_q,neon_mul_s_q,neon_mul_h_scalar_q,neon_mul_s_scalar_q,neon_sat_mul_b_q,neon_sat_mul_h_q,neon_sat_mul_s_q,neon_mul_b_long,neon_mul_h_long,neon_mul_s_long,neon_mul_d_long,neon_mul_h_scalar_long,neon_mul_s_scalar_long,neon_sat_mul_b_long,neon_sat_mul_h_long,neon_sat_mul_s_long,neon_sat_mul_h_scalar_q,neon_sat_mul_s_scalar_q,neon_sat_mul_h_scalar_long,neon_sat_mul_s_scalar_long"))
+  "saphira_vxvy")
+
+(define_insn_reservation "saphira_ai_4_vxvy_mla" 4
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_mla_b,neon_mla_h,neon_mla_s,neon_mla_h_scalar,neon_mla_s_scalar,neon_mla_b_q,neon_mla_h_q,neon_mla_s_q,neon_mla_h_scalar_q,neon_mla_s_scalar_q,neon_mla_b_long,neon_mla_h_long,neon_mla_s_long,neon_mla_h_scalar_long,neon_mla_s_scalar_long,neon_sat_mla_b_long,neon_sat_mla_h_long,neon_sat_mla_s_long,neon_sat_mla_h_scalar_long,neon_sat_mla_s_scalar_long"))
+  "saphira_vxvy")
+
+(define_insn_reservation "saphira_2_ai_vxvy_vxvy" 2
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_add_widen,neon_sub_widen"))
+  "(saphira_vxvy),(saphira_vxvy)")
+
+;; SIMD Load Instructions
+
+(define_insn_reservation "saphira_ald_4_ld" 4
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_load1_1reg,neon_load1_1reg_q,neon_load1_all_lanes,neon_load2_one_lane"))
+  "saphira_ld")
+
+(define_insn_reservation "saphira_ald_4_ld_none" 4
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_load1_2reg,neon_load2_2reg,neon_load2_all_lanes"))
+  "saphira_ld")
+
+(define_insn_reservation "saphira_ald_4_ld_ld" 4
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_load1_2reg_q,neon_load2_2reg_q,neon_load2_all_lanes_q,neon_load3_one_lane,neon_load4_one_lane,neon_ldp,neon_ldp_q"))
+  "saphira_ld,saphira_ld")
+
+(define_insn_reservation "saphira_ald_4_ld_ld_none" 4
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_load1_3reg,neon_load3_3reg,neon_load3_all_lanes"))
+  "saphira_ld,saphira_ld")
+
+(define_insn_reservation "saphira_ald_4_ld_ld_ld" 4
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_load1_3reg_q,neon_load3_3reg_q,neon_load3_all_lanes_q"))
+  "saphira_ld,saphira_ld,saphira_ld")
+
+(define_insn_reservation "saphira_ald_4_ld_ld_none_none" 4
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_load1_4reg,neon_load4_4reg"))
+  "saphira_ld,saphira_ld")
+
+(define_insn_reservation "saphira_ald_4_ld_ld_ld_ld" 4
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_load1_4reg_q,neon_load4_4reg_q,neon_load4_all_lanes,neon_load4_all_lanes_q"))
+  "saphira_ld,saphira_ld,saphira_ld,saphira_ld")
+
+;; Arithmetic and Logical Instructions
+
+(define_insn_reservation "saphira_alu_1_xyz" 1
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "alus_sreg,alus_imm,alus_shift_imm,csel,adc_reg,alu_imm,alu_sreg,alu_shift_imm_lsl_1to4,alu_shift_imm_other,alu_ext,alus_ext,logic_imm,logic_reg,logic_shift_imm,logics_imm,logics_reg,logics_shift_imm,mov_reg"))
+  "saphira_xyzb")
+
+;; SIMD Miscellaneous Instructions
+
+;; No separate type for ins and dup.  But this is correct for both.
+
+(define_insn_reservation "saphira_am_3_gtov" 3
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_from_gp"))
+  "saphira_gtov")
+
+;; No separate type for ins and dup.  Assuming dup is more common.  Ins is
+;; gtov+vxvy and latency of 4.
+
+(define_insn_reservation "saphira_am_3_gtov_gtov" 3
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_from_gp_q"))
+  "saphira_gtov,saphira_gtov")
+
+;; DUP  does not use vector pipes in Q mode, only gtov+gtov.
+(define_insn_reservation "saphira_am_1_gtov_gtov" 1
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_dup_q"))
+  "saphira_gtov,saphira_gtov")
+
+;; neon_to_gp_q is used for 32-bit ARM instructions that move 64-bits of data
+;; so no use needed here.
+
+(define_insn_reservation "saphira_am_3_vtog" 3
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_to_gp"))
+  "saphira_vtog")
+
+(define_insn_reservation "saphira_am_1_vxvy" 1
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_bsl,neon_dup,neon_ext,neon_ins,neon_ins_q,neon_move,neon_rev,neon_tbl1,neon_permute,neon_shift_imm_narrow_q,neon_bsl_q,neon_ext_q,neon_move_q,neon_rev_q,neon_tbl1_q,neon_permute_q,neon_tbl1,neon_tbl1_q,neon_tbl2_q,neon_tbl2"))
+  "saphira_vxvy")
+
+(define_insn_reservation "saphira_am_2_vxvy" 2
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_cls,neon_cnt,neon_rbit,neon_cls_q,neon_cnt_q,neon_rbit_q,neon_tbl2,neon_tbl3_q,neon_tbl3"))
+  "saphira_vxvy")
+
+(define_insn_reservation "saphira_am_3_vxvy" 3
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_fp_recpe_s,neon_fp_recpe_d,neon_fp_rsqrte_s,neon_fp_rsqrte_d,neon_fp_recpx_s,neon_fp_recpx_d,neon_fp_recpe_s_q,neon_fp_recpe_d_q,neon_fp_rsqrte_s_q,neon_fp_rsqrte_d_q,neon_tbl4_q,neon_tbl4"))
+  "saphira_vxvy")
+
+(define_insn_reservation "saphira_am_5_vxvy" 5
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_fp_recps_s,neon_fp_recps_s_q"))
+  "saphira_vxvy")
+
+(define_insn_reservation "saphira_am_6_vxvy" 6
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_fp_recps_d,neon_fp_rsqrts_d,neon_fp_recps_d_q,neon_fp_rsqrts_d_q"))
+  "saphira_vxvy")
+
+;; SIMD Store Instructions
+
+;; ??? stp is neon_store1_2reg in aarch64.md, but neon_stp in aarch64-simd.md.
+;; Similarly with ldp.
+
+(define_insn_reservation "saphira_ast_st_vsd" 0
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_store1_1reg,neon_store1_1reg_q,neon_store1_one_lane,neon_store1_one_lane_q,neon_store1_2reg,neon_store2_2reg,neon_store2_one_lane,neon_store2_one_lane_q,neon_stp"))
+  "saphira_st")
+
+(define_insn_reservation "saphira_as_0_st_vsd_st_vsd" 0
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_store1_2reg_q,neon_store1_3reg,neon_store1_4reg,neon_store2_2reg_q,neon_store3_3reg,neon_store4_4reg,neon_store3_one_lane,neon_store3_one_lane_q,neon_store4_one_lane,neon_store4_one_lane_q,neon_stp_q"))
+  "(saphira_st),(saphira_st)")
+
+(define_insn_reservation "saphira_as_0_st_vsd_st_vsd_st_vsd" 0
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_store1_3reg_q,neon_store3_3reg_q"))
+  "(saphira_st),(saphira_st),(saphira_st)")
+
+(define_insn_reservation "saphira_as_0_st_vsd_st_vsd_st_vsd_st_vsd" 0
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "neon_store1_4reg_q,neon_store4_4reg_q"))
+  "(saphira_st),(saphira_st),(saphira_st),(saphira_st)")
+
+;; Branch Instructions
+
+(define_insn_reservation "saphira_branch_0_zb" 0
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "branch"))
+  "saphira_zb")
+
+(define_insn_reservation "saphira_call_0_xyzb" 0
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "call"))
+  "saphira_xyzb")
+
+;; Cryptography Extensions
+
+(define_insn_reservation "saphira_cry_1_vxvy" 1
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "crypto_sha1_fast"))
+  "saphira_vxvy")
+
+(define_insn_reservation "saphira_cry_2_vxvy" 2
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "crypto_aesmc"))
+  "saphira_vxvy")
+
+(define_insn_reservation "saphira_cry_2_vxvy_vxvy" 2
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "crypto_sha1_xor,crypto_sha256_fast,crypto_pmull,crypto_aese"))
+  "saphira_vxvy")
+
+(define_insn_reservation "saphira_cry_4_vy_vx" 4
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "crypto_sha1_slow"))
+  "saphira_vxvy")
+
+(define_insn_reservation "saphira_cry_5_vy_vx" 5
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "crypto_sha256_slow"))
+  "saphira_vxvy")
+
+;; FP Load Instructions
+
+(define_insn_reservation "saphira_fld_4_ld" 4
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "f_loads,f_loadd"))
+  "saphira_ld")
+
+;; No separate FP store section, these are found in the SIMD store section.
+
+(define_insn_reservation "saphira_fld_0_st_vsd" 0
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "f_stores,f_stored"))
+  "saphira_st")
+
+;; FP Data Processing Instructions
+
+(define_insn_reservation "saphira_fpdt_0_vxvy" 0
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "fcmps,fcmpd,fccmps,fccmpd"))
+  "saphira_vxvy")
+
+(define_insn_reservation "saphira_fpdt_5_vtog" 5
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "f_cvtf2i"))
+  "saphira_vtog")
+
+(define_insn_reservation "saphira_fpdt_1_vxvy" 1
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "ffariths,ffarithd,fcsel"))
+  "saphira_vxvy")
+
+(define_insn_reservation "saphira_fpdt_2_vxvy" 2
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "f_minmaxd,f_minmaxs,f_rintd,f_rints"))
+  "saphira_vxvy")
+
+;; Scalar FP ABD is handled same as vector FP ABD.
+
+(define_insn_reservation "saphira_fpdt_3_vxvy" 3
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "faddd,fadds"))
+  "saphira_vxvy")
+
+(define_insn_reservation "saphira_fpdt_4_vxvy" 4
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "f_cvt"))
+  "saphira_vxvy")
+
+(define_insn_reservation "saphira_fpdt_5_vxvy_mul" 5
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "fmuls"))
+  "saphira_vxvy")
+
+(define_insn_reservation "saphira_fpdt_5_vxvy_mla" 5
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "fmacs,ffmas"))
+  "saphira_vxvy")
+
+(define_insn_reservation "saphira_fpdt_6_vxvy_mul" 6
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "fmuld"))
+  "saphira_vxvy")
+
+(define_insn_reservation "saphira_fpdt_6_vxvy_mla" 6
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "fmacd,ffmad"))
+  "saphira_vxvy")
+
+(define_insn_reservation "saphira_fpdt_6_vx_vy" 6
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "fdivs"))
+  "saphira_vx+saphira_vy")
+
+(define_insn_reservation "saphira_fpdt_11_vx_vy" 11
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "fdivd"))
+  "saphira_vx+saphira_vy")
+
+(define_insn_reservation "saphira_fpdt_12_vx_vy" 12
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "fsqrts"))
+  "saphira_vxvy")
+
+(define_insn_reservation "saphira_fpdt_22_vx_vy" 22
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "fsqrtd"))
+  "saphira_vxvy")
+
+;; FP Miscellaneous Instructions
+
+(define_insn_reservation "saphira_fpmsc_3_vtog" 3
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "f_mrc"))
+  "saphira_vtog")
+
+(define_insn_reservation "saphira_fpmsc_3_gtov" 3
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "f_mcr"))
+  "saphira_gtov")
+
+(define_insn_reservation "saphira_fpmsc_1_vxvy" 1
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "fmov,fconsts,fconstd"))
+  "saphira_vxvy")
+
+;; No separate type for float-to-fixed conversions.  Same type as
+;; float-to-int conversions.  They schedule the same though, so no problem.
+
+(define_insn_reservation "saphira_fpmsc_6_gtov" 6
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "f_cvti2f"))
+  "saphira_gtov")
+
+;; Load Instructions
+
+(define_insn_reservation "saphira_ld_3_ld" 3
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "load_4,load_8,load_16"))
+  "saphira_ld")
+
+;; Miscellaneous Data-Processing Instructions
+
+(define_insn_reservation "saphira_misc_1_xyzb" 1
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "bfx,bfm,extend,rotate_imm,shift_imm"))
+  "saphira_xyzb")
+
+(define_insn_reservation "saphira_misc_2_x" 2
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "crc"))
+  "saphira_x")
+
+(define_insn_reservation "saphira_misc_2_xyzb" 2
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "clz,rbit,rev"))
+  "saphira_xyzb")
+
+;; Divide and Multiply Instructions
+
+(define_insn_reservation "saphira_muldiv_4_x_mul" 4
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "mul"))
+  "saphira_x")
+
+(define_insn_reservation "saphira_muldiv_4_x_mla" 4
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "mla,smlal,umlal"))
+  "saphira_x")
+
+(define_insn_reservation "saphira_muldiv_5_x_mul" 5
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "smull,umull"))
+  "saphira_x")
+
+(define_insn_reservation "saphira_md_11_x_zb" 11
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "sdiv,udiv"))
+  "saphira_x+saphira_zb")
+
+;; Move and Shift Instructions
+
+(define_insn_reservation "saphira_mvs_1_xyzb" 1
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "mov_imm,shift_reg,adr"))
+  "saphira_xyzb")
+
+;; Other Instructions
+
+;; Block is for instruction scheduling blockage insns in RTL.  There are no
+;; hardware instructions emitted for them, so don't use any resources.
+
+(define_insn_reservation "saphira_other_0_nothing" 0
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "trap,block"))
+  "nothing")
+
+(define_insn_reservation "saphira_other_2_ld" 2
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "mrs"))
+  "saphira_ld")
+
+;; Assume multiple instructions use all pipes.
+
+(define_insn_reservation "saphira_extra" 1
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "multiple"))
+  "saphira_x+saphira_y+saphira_z+saphira_b+saphira_vx+saphira_vy+saphira_ld+saphira_st+saphira_gtov+saphira_vtog")
+
+;; Store Instructions
+
+;; No use of store_rel, store3, or store4 in aarch64.
+
+(define_insn_reservation "saphira_st_0_st_sd" 0
+  (and (eq_attr "tune" "saphira")
+       (eq_attr "type" "store_4,store_8,store_16"))
+  "saphira_st")
+
+;; Muliply bypasses.
+
+;; 1 cycle latency (0 bubble) for an integer mul or mac feeding into a mac.
+
+(define_bypass 1
+  "saphira_ai_4_vxvy_mul,saphira_ai_4_vxvy_mla,saphira_muldiv_4_x_mul,saphira_muldiv_4_x_mla,saphira_muldiv_5_x_mul"
+  "saphira_ai_4_vxvy_mla,saphira_muldiv_4_x_mla")
+
+;; 3 cycle latency (2 bubbles) for an FP mul or mac feeding into a mac.
+
+(define_bypass 3
+  "saphira_afp_5_vxvy_mul,saphira_afp_5_vxvy_mla,saphira_afp_6_vxvy_mul,saphira_afp_6_vxvy_mla,saphira_fpdt_5_vxvy_mul,saphira_fpdt_5_vxvy_mla,saphira_fpdt_6_vxvy_mul,saphira_fpdt_6_vxvy_mla"
+  "saphira_afp_5_vxvy_mla,saphira_afp_6_vxvy_mla,saphira_fpdt_5_vxvy_mla,saphira_fpdt_6_vxvy_mla")
+
diff --git a/gcc/config/aarch64/t-aarch64 b/gcc/config/aarch64/t-aarch64
index 0be1f0d63aa15..7fa615843e076 100644
--- a/gcc/config/aarch64/t-aarch64
+++ b/gcc/config/aarch64/t-aarch64
@@ -1,5 +1,5 @@
 # Machine description for AArch64 architecture.
-#  Copyright (C) 2009-2018 Free Software Foundation, Inc.
+#  Copyright (C) 2009-2021 Free Software Foundation, Inc.
 #  Contributed by ARM Ltd.
 #
 #  This file is part of GCC.
@@ -24,11 +24,15 @@ OPTIONS_H_EXTRA += $(srcdir)/config/aarch64/aarch64-cores.def \
 		   $(srcdir)/config/aarch64/aarch64-fusion-pairs.def \
 		   $(srcdir)/config/aarch64/aarch64-tuning-flags.def
 
-$(srcdir)/config/aarch64/aarch64-tune.md: $(srcdir)/config/aarch64/gentune.sh \
+$(srcdir)/config/aarch64/aarch64-tune.md: s-aarch64-tune-md; @true
+s-aarch64-tune-md: $(srcdir)/config/aarch64/gentune.sh \
 	$(srcdir)/config/aarch64/aarch64-cores.def
 	$(SHELL) $(srcdir)/config/aarch64/gentune.sh \
 		$(srcdir)/config/aarch64/aarch64-cores.def > \
+		tmp-aarch64-tune.md
+	$(SHELL) $(srcdir)/../move-if-change tmp-aarch64-tune.md \
 		$(srcdir)/config/aarch64/aarch64-tune.md
+	$(STAMP) s-aarch64-tune-md
 
 aarch64-builtins.o: $(srcdir)/config/aarch64/aarch64-builtins.c $(CONFIG_H) \
   $(SYSTEM_H) coretypes.h $(TM_H) \
@@ -37,8 +41,62 @@ aarch64-builtins.o: $(srcdir)/config/aarch64/aarch64-builtins.c $(CONFIG_H) \
   $(srcdir)/config/aarch64/aarch64-simd-builtins.def \
   $(srcdir)/config/aarch64/aarch64-simd-builtin-types.def \
   aarch64-builtin-iterators.h
+	$(COMPILER) -std=gnu++11 -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+ 		$(srcdir)/config/aarch64/aarch64-builtins.c
+
+aarch64-sve-builtins.o: $(srcdir)/config/aarch64/aarch64-sve-builtins.cc \
+  $(srcdir)/config/aarch64/aarch64-sve-builtins.def \
+  $(srcdir)/config/aarch64/aarch64-sve-builtins-base.def \
+  $(srcdir)/config/aarch64/aarch64-sve-builtins-sve2.def \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(RTL_H) \
+  $(TM_P_H) memmodel.h insn-codes.h $(OPTABS_H) $(RECOG_H) $(DIAGNOSTIC_H) \
+  $(EXPR_H) $(BASIC_BLOCK_H) $(FUNCTION_H) fold-const.h $(GIMPLE_H) \
+  gimple-iterator.h gimplify.h explow.h $(EMIT_RTL_H) tree-vector-builder.h \
+  stor-layout.h $(REG_H) alias.h gimple-fold.h langhooks.h \
+  stringpool.h \
+  $(srcdir)/config/aarch64/aarch64-sve-builtins.h \
+  $(srcdir)/config/aarch64/aarch64-sve-builtins-shapes.h \
+  $(srcdir)/config/aarch64/aarch64-sve-builtins-base.h \
+  $(srcdir)/config/aarch64/aarch64-sve-builtins-sve2.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/aarch64/aarch64-sve-builtins.cc
+
+aarch64-sve-builtins-shapes.o: \
+  $(srcdir)/config/aarch64/aarch64-sve-builtins-shapes.cc \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(RTL_H) \
+  $(TM_P_H) memmodel.h insn-codes.h $(OPTABS_H) \
+  $(srcdir)/config/aarch64/aarch64-sve-builtins.h \
+  $(srcdir)/config/aarch64/aarch64-sve-builtins-shapes.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/aarch64/aarch64-sve-builtins-shapes.cc
+
+aarch64-sve-builtins-base.o: \
+  $(srcdir)/config/aarch64/aarch64-sve-builtins-base.cc \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(RTL_H) \
+  $(TM_P_H) memmodel.h insn-codes.h $(OPTABS_H) $(RECOG_H) \
+  $(EXPR_H) $(BASIC_BLOCK_H) $(FUNCTION_H) fold-const.h $(GIMPLE_H) \
+  gimple-iterator.h gimplify.h explow.h $(EMIT_RTL_H) tree-vector-builder.h \
+  rtx-vector-builder.h vec-perm-indices.h \
+  $(srcdir)/config/aarch64/aarch64-sve-builtins.h \
+  $(srcdir)/config/aarch64/aarch64-sve-builtins-shapes.h \
+  $(srcdir)/config/aarch64/aarch64-sve-builtins-base.h \
+  $(srcdir)/config/aarch64/aarch64-sve-builtins-functions.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/aarch64/aarch64-sve-builtins-base.cc
+
+aarch64-sve-builtins-sve2.o: \
+  $(srcdir)/config/aarch64/aarch64-sve-builtins-sve2.cc \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(RTL_H) \
+  $(TM_P_H) memmodel.h insn-codes.h $(OPTABS_H) $(RECOG_H) \
+  $(EXPR_H) $(BASIC_BLOCK_H) $(FUNCTION_H) fold-const.h $(GIMPLE_H) \
+  gimple-iterator.h gimplify.h explow.h $(EMIT_RTL_H) tree-vector-builder.h \
+  rtx-vector-builder.h vec-perm-indices.h \
+  $(srcdir)/config/aarch64/aarch64-sve-builtins.h \
+  $(srcdir)/config/aarch64/aarch64-sve-builtins-shapes.h \
+  $(srcdir)/config/aarch64/aarch64-sve-builtins-sve2.h \
+  $(srcdir)/config/aarch64/aarch64-sve-builtins-functions.h
 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
-		$(srcdir)/config/aarch64/aarch64-builtins.c
+		$(srcdir)/config/aarch64/aarch64-sve-builtins-sve2.cc
 
 aarch64-builtin-iterators.h: $(srcdir)/config/aarch64/geniterators.sh \
 	$(srcdir)/config/aarch64/iterators.md
@@ -56,6 +114,10 @@ aarch64-c.o: $(srcdir)/config/aarch64/aarch64-c.c $(CONFIG_H) $(SYSTEM_H) \
 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
 		$(srcdir)/config/aarch64/aarch64-c.c
 
+aarch64-d.o: $(srcdir)/config/aarch64/aarch64-d.c
+	$(COMPILE) $<
+	$(POSTCOMPILE)
+
 PASSES_EXTRA += $(srcdir)/config/aarch64/aarch64-passes.def
 
 cortex-a57-fma-steering.o: $(srcdir)/config/aarch64/cortex-a57-fma-steering.c \
@@ -67,6 +129,51 @@ cortex-a57-fma-steering.o: $(srcdir)/config/aarch64/cortex-a57-fma-steering.c \
 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
 		$(srcdir)/config/aarch64/cortex-a57-fma-steering.c
 
+aarch64-speculation.o: $(srcdir)/config/aarch64/aarch64-speculation.cc \
+    $(CONFIG_H) \
+    $(SYSTEM_H) \
+    $(TM_H) \
+    $(TARGET_H) \
+    $(RTL_BASE_H) \
+    $(TREE_PASS_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_SPPFLAGS) $(INCLUDES) \
+	  $(srcdir)/config/aarch64/aarch64-speculation.cc
+
+falkor-tag-collision-avoidance.o: \
+    $(srcdir)/config/aarch64/falkor-tag-collision-avoidance.c \
+    $(CONFIG_H) $(SYSTEM_H) $(TM_H) $(REGS_H) insn-config.h $(RTL_BASE_H) \
+    dominance.h cfg.h cfganal.h $(BASIC_BLOCK_H) $(INSN_ATTR_H) $(RECOG_H) \
+    output.h hash-map.h $(DF_H) $(OBSTACK_H) $(TARGET_H) $(RTL_H) \
+    $(CONTEXT_H) $(TREE_PASS_H) regrename.h \
+    $(srcdir)/config/aarch64/aarch64-protos.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/aarch64/falkor-tag-collision-avoidance.c
+
+aarch64-bti-insert.o: $(srcdir)/config/aarch64/aarch64-bti-insert.c \
+    $(CONFIG_H) $(SYSTEM_H) $(TM_H) $(REGS_H) insn-config.h $(RTL_BASE_H) \
+    dominance.h cfg.h cfganal.h $(BASIC_BLOCK_H) $(INSN_ATTR_H) $(RECOG_H) \
+    output.h hash-map.h $(DF_H) $(OBSTACK_H) $(TARGET_H) $(RTL_H) \
+    $(CONTEXT_H) $(TREE_PASS_H) regrename.h \
+    $(srcdir)/config/aarch64/aarch64-protos.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/aarch64/aarch64-bti-insert.c
+
+aarch64-cc-fusion.o: $(srcdir)/config/aarch64/aarch64-cc-fusion.cc \
+    $(CONFIG_H) $(SYSTEM_H) $(CORETYPES_H) $(BACKEND_H) $(RTL_H) $(DF_H) \
+    $(RTL_SSA_H) tree-pass.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/aarch64/aarch64-cc-fusion.cc
+
 comma=,
 MULTILIB_OPTIONS    = $(subst $(comma),/, $(patsubst %, mabi=%, $(subst $(comma),$(comma)mabi=,$(TM_MULTILIB_CONFIG))))
 MULTILIB_DIRNAMES   = $(subst $(comma), ,$(TM_MULTILIB_CONFIG))
+
+insn-conditions.md: s-check-sve-md
+s-check-sve-md: $(srcdir)/config/aarch64/check-sve-md.awk \
+		$(srcdir)/config/aarch64/aarch64-sve.md \
+		$(srcdir)/config/aarch64/aarch64-sve2.md
+	$(AWK) -f $(srcdir)/config/aarch64/check-sve-md.awk \
+	  $(srcdir)/config/aarch64/aarch64-sve.md
+	$(AWK) -f $(srcdir)/config/aarch64/check-sve-md.awk \
+	  $(srcdir)/config/aarch64/aarch64-sve2.md
+	$(STAMP) s-check-sve-md
diff --git a/gcc/config/aarch64/t-aarch64-freebsd b/gcc/config/aarch64/t-aarch64-freebsd
index 99d3dea36d799..2f1c0a78c230a 100644
--- a/gcc/config/aarch64/t-aarch64-freebsd
+++ b/gcc/config/aarch64/t-aarch64-freebsd
@@ -1,5 +1,5 @@
 # Machine description for AArch64 architecture.
-#  Copyright (C) 2016-2018 Free Software Foundation, Inc.
+#  Copyright (C) 2016-2021 Free Software Foundation, Inc.
 #
 #  This file is part of GCC.
 #
diff --git a/gcc/config/aarch64/t-aarch64-linux b/gcc/config/aarch64/t-aarch64-linux
index b9897785a892d..241b0ef20b665 100644
--- a/gcc/config/aarch64/t-aarch64-linux
+++ b/gcc/config/aarch64/t-aarch64-linux
@@ -1,5 +1,5 @@
 # Machine description for AArch64 architecture.
-#  Copyright (C) 2009-2018 Free Software Foundation, Inc.
+#  Copyright (C) 2009-2021 Free Software Foundation, Inc.
 #  Contributed by ARM Ltd.
 #
 #  This file is part of GCC.
diff --git a/gcc/config/aarch64/t-aarch64-netbsd b/gcc/config/aarch64/t-aarch64-netbsd
new file mode 100644
index 0000000000000..2f1c0a78c230a
--- /dev/null
+++ b/gcc/config/aarch64/t-aarch64-netbsd
@@ -0,0 +1,21 @@
+# Machine description for AArch64 architecture.
+#  Copyright (C) 2016-2021 Free Software Foundation, Inc.
+#
+#  This file is part of GCC.
+#
+#  GCC is free software; you can redistribute it and/or modify it
+#  under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 3, or (at your option)
+#  any later version.
+#
+#  GCC is distributed in the hope that it will be useful, but
+#  WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#  General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with GCC; see the file COPYING3.  If not see
+#  <http://www.gnu.org/licenses/>.
+
+LIB1ASMSRC   = aarch64/lib1funcs.asm
+LIB1ASMFUNCS = _aarch64_sync_cache_range
diff --git a/gcc/config/aarch64/t-aarch64-vxworks b/gcc/config/aarch64/t-aarch64-vxworks
new file mode 100644
index 0000000000000..deebcfb172634
--- /dev/null
+++ b/gcc/config/aarch64/t-aarch64-vxworks
@@ -0,0 +1,22 @@
+# Multilibs for VxWorks.
+#
+# Copyright (C) 2018-2021 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+MULTILIB_OPTIONS += mrtp
+MULTILIB_DIRNAMES += mrtp
diff --git a/gcc/config/aarch64/thunderx.md b/gcc/config/aarch64/thunderx.md
index df55db8bb0cf0..9194a3e824c6a 100644
--- a/gcc/config/aarch64/thunderx.md
+++ b/gcc/config/aarch64/thunderx.md
@@ -1,5 +1,5 @@
 ;; Cavium ThunderX pipeline description
-;; Copyright (C) 2014-2018 Free Software Foundation, Inc.
+;; Copyright (C) 2014-2021 Free Software Foundation, Inc.
 ;;
 ;; Written by Andrew Pinski  <apinski@cavium.com>
 
@@ -51,7 +51,7 @@
 
 (define_insn_reservation "thunderx_arith_shift" 2
   (and (eq_attr "tune" "thunderx")
-       (eq_attr "type" "alu_ext,alu_shift_imm,alu_shift_reg,alus_ext,logic_shift_imm,logic_shift_reg,logics_shift_imm,logics_shift_reg,alus_shift_imm"))
+       (eq_attr "type" "alu_ext,alu_shift_imm_lsl_1to4,alu_shift_imm_other,alu_shift_reg,alus_ext,logic_shift_imm,logic_shift_reg,logics_shift_imm,logics_shift_reg,alus_shift_imm"))
   "thunderx_pipe0 | thunderx_pipe1")
 
 (define_insn_reservation "thunderx_csel" 2
diff --git a/gcc/config/aarch64/thunderx2t99.md b/gcc/config/aarch64/thunderx2t99.md
index 589e5649851a8..9f7f19a87095a 100644
--- a/gcc/config/aarch64/thunderx2t99.md
+++ b/gcc/config/aarch64/thunderx2t99.md
@@ -1,5 +1,5 @@
 ;; Cavium ThunderX 2 CN99xx pipeline description
-;; Copyright (C) 2016-2018 Free Software Foundation, Inc.
+;; Copyright (C) 2016-2021 Free Software Foundation, Inc.
 ;;
 ;; Contributed by Cavium, Broadcom and Mentor Embedded.
 
@@ -54,8 +54,6 @@
 (define_reservation "thunderx2t99_ls01" "thunderx2t99_ls0|thunderx2t99_ls1")
 (define_reservation "thunderx2t99_f01" "thunderx2t99_f0|thunderx2t99_f1")
 
-(define_reservation "thunderx2t99_ls_both" "thunderx2t99_ls0+thunderx2t99_ls1")
-
 ; A load with delay in the ls0/ls1 pipes.
 (define_reservation "thunderx2t99_l0delay" "thunderx2t99_ls0,\
 				      thunderx2t99_ls0d1,thunderx2t99_ls0d2,\
@@ -76,7 +74,7 @@
 
 (define_insn_reservation "thunderx2t99_nothing" 0
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "no_insn,block"))
+       (eq_attr "type" "block"))
   "nothing")
 
 (define_insn_reservation "thunderx2t99_mrs" 0
@@ -86,12 +84,10 @@
 
 (define_insn_reservation "thunderx2t99_multiple" 1
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "multiple"))
+       (eq_attr "type" "multiple,untyped"))
   "thunderx2t99_i0+thunderx2t99_i1+thunderx2t99_i2+thunderx2t99_ls0+\
    thunderx2t99_ls1+thunderx2t99_sd+thunderx2t99_i1m1+thunderx2t99_i1m2+\
-   thunderx2t99_i1m3+thunderx2t99_ls0d1+thunderx2t99_ls0d2+thunderx2t99_ls0d3+\
-   thunderx2t99_ls1d1+thunderx2t99_ls1d2+thunderx2t99_ls1d3+thunderx2t99_f0+\
-   thunderx2t99_f1")
+   thunderx2t99_i1m3+thunderx2t99_f0+thunderx2t99_f1")
 
 ;; Integer arithmetic/logic instructions.
 
@@ -113,9 +109,9 @@
 
 (define_insn_reservation "thunderx2t99_alu_shift" 2
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "alu_shift_imm,alu_ext,alu_shift_reg,\
-			alus_shift_imm,alus_ext,alus_shift_reg,\
-			logic_shift_imm,logics_shift_reg"))
+       (eq_attr "type" "alu_shift_imm_lsl_1to4,alu_shift_imm_other,alu_ext,\
+			alus_shift_imm,alus_ext,\
+			logic_shift_imm,logics_shift_imm"))
   "thunderx2t99_i012,thunderx2t99_i012")
 
 (define_insn_reservation "thunderx2t99_div" 13
@@ -228,21 +224,11 @@
        (eq_attr "type" "f_loads,f_loadd"))
   "thunderx2t99_ls01")
 
-(define_insn_reservation "thunderx2t99_fp_loadpair_basic" 4
-  (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "neon_load1_2reg"))
-  "thunderx2t99_ls01*2")
-
 (define_insn_reservation "thunderx2t99_fp_store_basic" 1
   (and (eq_attr "tune" "thunderx2t99")
        (eq_attr "type" "f_stores,f_stored"))
   "thunderx2t99_ls01,thunderx2t99_sd")
 
-(define_insn_reservation "thunderx2t99_fp_storepair_basic" 1
-  (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "neon_store1_2reg"))
-  "thunderx2t99_ls01,(thunderx2t99_ls01+thunderx2t99_sd),thunderx2t99_sd")
-
 ;; ASIMD integer instructions.
 
 (define_insn_reservation "thunderx2t99_asimd_int" 7
@@ -251,6 +237,7 @@
 			neon_arith_acc,neon_arith_acc_q,\
 			neon_abs,neon_abs_q,\
 			neon_add,neon_add_q,\
+			neon_sub,neon_sub_q,\
 			neon_neg,neon_neg_q,\
 			neon_add_long,neon_add_widen,\
 			neon_add_halve,neon_add_halve_q,\
@@ -301,11 +288,6 @@
        (eq_attr "type" "neon_logic,neon_logic_q"))
   "thunderx2t99_f01")
 
-(define_insn_reservation "thunderx2t99_asimd_polynomial" 5
-  (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "neon_mul_d_long"))
-  "thunderx2t99_f01")
-
 ;; ASIMD floating-point instructions.
 
 (define_insn_reservation "thunderx2t99_asimd_fp_simple" 5
@@ -332,6 +314,7 @@
 			neon_fp_reduc_add_s_q,neon_fp_reduc_add_d_q,\
 			neon_fp_mul_s,neon_fp_mul_d,\
 			neon_fp_mul_s_q,neon_fp_mul_d_q,\
+			neon_fp_mul_s_scalar_q,neon_fp_mul_d_scalar_q,\
 			neon_fp_mla_s,neon_fp_mla_d,\
 			neon_fp_mla_s_q,neon_fp_mla_d_q"))
   "thunderx2t99_f01")
@@ -341,6 +324,8 @@
        (eq_attr "type" "neon_fp_cvt_widen_s,neon_fp_cvt_narrow_d_q,\
 			neon_fp_to_int_s,neon_fp_to_int_d,\
 			neon_fp_to_int_s_q,neon_fp_to_int_d_q,\
+			neon_int_to_fp_s,neon_int_to_fp_d,\
+			neon_int_to_fp_s_q,neon_int_to_fp_d_q,\
 			neon_fp_round_s,neon_fp_round_d,\
 			neon_fp_round_s_q,neon_fp_round_d_q"))
   "thunderx2t99_f01")
@@ -373,7 +358,6 @@
 			neon_fp_recpx_s,neon_fp_recpx_d,\
 			neon_fp_recpx_s_q,neon_fp_recpx_d_q,\
 			neon_rev,neon_rev_q,\
-			neon_dup,neon_dup_q,\
 			neon_permute,neon_permute_q"))
   "thunderx2t99_f01")
 
@@ -381,13 +365,18 @@
   (and (eq_attr "tune" "thunderx2t99")
        (eq_attr "type" "neon_fp_recps_s,neon_fp_recps_s_q,\
 			neon_fp_recps_d,neon_fp_recps_d_q,\
+			neon_fp_sqrt_s,neon_fp_sqrt_s_q,\
+			neon_fp_sqrt_d,neon_fp_sqrt_d_q,\
+			neon_fp_rsqrte_s, neon_fp_rsqrte_s_q,\
+			neon_fp_rsqrte_d, neon_fp_rsqrte_d_q,\
 			neon_fp_rsqrts_s, neon_fp_rsqrts_s_q,\
 			neon_fp_rsqrts_d, neon_fp_rsqrts_d_q"))
   "thunderx2t99_f01")
 
 (define_insn_reservation "thunderx2t99_asimd_lut" 8
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "neon_tbl1,neon_tbl1_q,neon_tbl2_q"))
+       (eq_attr "type" "neon_tbl1,neon_tbl1_q,neon_tbl2,neon_tbl2_q,\
+			neon_tbl3,neon_tbl3_q,neon_tbl4,neon_tbl4_q"))
   "thunderx2t99_f01")
 
 (define_insn_reservation "thunderx2t99_asimd_elt_to_gr" 6
@@ -395,26 +384,24 @@
        (eq_attr "type" "neon_to_gp,neon_to_gp_q"))
   "thunderx2t99_f01")
 
-(define_insn_reservation "thunderx2t99_asimd_ext" 7
-  (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "neon_shift_imm_narrow_q,neon_sat_shift_imm_narrow_q"))
-  "thunderx2t99_f01")
-
 ;; ASIMD load instructions.
 
 ; NOTE: These reservations attempt to model latency and throughput correctly,
 ; but the cycle timing of unit allocation is not necessarily accurate (because
 ; insns are split into uops, and those may be issued out-of-order).
 
-(define_insn_reservation "thunderx2t99_asimd_load1_1_mult" 4
+(define_insn_reservation "thunderx2t99_asimd_load1_ldp" 5
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "neon_load1_1reg,neon_load1_1reg_q"))
-  "thunderx2t99_ls01")
+       (eq_attr "type" "neon_ldp,neon_ldp_q"))
+  "thunderx2t99_i012,thunderx2t99_ls01")
 
-(define_insn_reservation "thunderx2t99_asimd_load1_2_mult" 4
+(define_insn_reservation "thunderx2t99_asimd_load1" 4
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
-  "thunderx2t99_ls_both")
+       (eq_attr "type" "neon_load1_1reg,neon_load1_1reg_q,\
+			neon_load1_2reg,neon_load1_2reg_q,\
+			neon_load1_3reg,neon_load1_3reg_q,\
+			neon_load1_4reg,neon_load1_4reg_q"))
+  "thunderx2t99_ls01")
 
 (define_insn_reservation "thunderx2t99_asimd_load1_onelane" 5
   (and (eq_attr "tune" "thunderx2t99")
@@ -431,36 +418,59 @@
        (eq_attr "type" "neon_load2_2reg,neon_load2_2reg_q,\
 			neon_load2_one_lane,neon_load2_one_lane_q,\
 			neon_load2_all_lanes,neon_load2_all_lanes_q"))
-  "(thunderx2t99_l0delay,thunderx2t99_f01)|(thunderx2t99_l1delay,\
-    thunderx2t99_f01)")
+  "thunderx2t99_l01delay,thunderx2t99_f01")
+
+(define_insn_reservation "thunderx2t99_asimd_load3" 7
+  (and (eq_attr "tune" "thunderx2t99")
+       (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\
+			neon_load3_one_lane,neon_load3_one_lane_q,\
+			neon_load3_all_lanes,neon_load3_all_lanes_q"))
+  "thunderx2t99_l01delay,thunderx2t99_f01")
+
+(define_insn_reservation "thunderx2t99_asimd_load4" 8
+  (and (eq_attr "tune" "thunderx2t99")
+       (eq_attr "type" "neon_load4_4reg,neon_load4_4reg_q,\
+			neon_load4_one_lane,neon_load4_one_lane_q,\
+			neon_load4_all_lanes,neon_load4_all_lanes_q"))
+  "thunderx2t99_l01delay,thunderx2t99_f01")
 
 ;; ASIMD store instructions.
 
 ; Same note applies as for ASIMD load instructions.
 
-(define_insn_reservation "thunderx2t99_asimd_store1_1_mult" 1
+(define_insn_reservation "thunderx2t99_asimd_store_stp" 1
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "neon_store1_1reg,neon_store1_1reg_q"))
-  "thunderx2t99_ls01")
+       (eq_attr "type" "neon_stp,neon_stp_q"))
+  "thunderx2t99_ls01,thunderx2t99_sd")
 
-(define_insn_reservation "thunderx2t99_asimd_store1_2_mult" 1
+(define_insn_reservation "thunderx2t99_asimd_store1" 1
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "neon_store1_2reg,neon_store1_2reg_q"))
-  "thunderx2t99_ls_both")
+       (eq_attr "type" "neon_store1_1reg,neon_store1_1reg_q,\
+			neon_store1_2reg,neon_store1_2reg_q,\
+			neon_store1_3reg,neon_store1_4reg"))
+  "thunderx2t99_ls01")
 
 (define_insn_reservation "thunderx2t99_asimd_store1_onelane" 1
   (and (eq_attr "tune" "thunderx2t99")
        (eq_attr "type" "neon_store1_one_lane,neon_store1_one_lane_q"))
   "thunderx2t99_ls01,thunderx2t99_f01")
 
-(define_insn_reservation "thunderx2t99_asimd_store2_mult" 1
+(define_insn_reservation "thunderx2t99_asimd_store2" 1
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "neon_store2_2reg,neon_store2_2reg_q"))
-  "thunderx2t99_ls_both,thunderx2t99_f01")
+       (eq_attr "type" "neon_store2_2reg,neon_store2_2reg_q,\
+			neon_store2_one_lane,neon_store2_one_lane_q"))
+  "thunderx2t99_ls01,thunderx2t99_f01")
+
+(define_insn_reservation "thunderx2t99_asimd_store3" 1
+  (and (eq_attr "tune" "thunderx2t99")
+       (eq_attr "type" "neon_store3_3reg,neon_store3_3reg_q,\
+			neon_store3_one_lane,neon_store3_one_lane_q"))
+  "thunderx2t99_ls01,thunderx2t99_f01")
 
-(define_insn_reservation "thunderx2t99_asimd_store2_onelane" 1
+(define_insn_reservation "thunderx2t99_asimd_store4" 1
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "neon_store2_one_lane,neon_store2_one_lane_q"))
+       (eq_attr "type" "neon_store4_4reg,neon_store4_4reg_q,\
+			neon_store4_one_lane,neon_store4_one_lane_q"))
   "thunderx2t99_ls01,thunderx2t99_f01")
 
 ;; Crypto extensions.
diff --git a/gcc/config/aarch64/thunderx3t110.md b/gcc/config/aarch64/thunderx3t110.md
new file mode 100644
index 0000000000000..4f83be2132bc7
--- /dev/null
+++ b/gcc/config/aarch64/thunderx3t110.md
@@ -0,0 +1,686 @@
+;; Cavium ThunderX 3 CN11xx pipeline description
+;; Copyright (C) 2020-2021 Free Software Foundation, Inc.
+;;
+;; Contributed by Marvell
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "thunderx3t110, thunderx3t110_advsimd, thunderx3t110_ldst")
+(define_automaton "thunderx3t110_mult")
+
+(define_cpu_unit "thunderx3t110_i0" "thunderx3t110")
+(define_cpu_unit "thunderx3t110_i1" "thunderx3t110")
+(define_cpu_unit "thunderx3t110_i2" "thunderx3t110")
+(define_cpu_unit "thunderx3t110_i3" "thunderx3t110")
+
+(define_cpu_unit "thunderx3t110_ls0" "thunderx3t110_ldst")
+(define_cpu_unit "thunderx3t110_ls1" "thunderx3t110_ldst")
+(define_cpu_unit "thunderx3t110_sd" "thunderx3t110_ldst")
+
+; Pseudo-units for multiply pipeline.
+; unchanged from TX2, occupies I1 for four (1 + 3 additional) slots
+
+(define_cpu_unit "thunderx3t110_i1m1" "thunderx3t110_mult")
+(define_cpu_unit "thunderx3t110_i1m2" "thunderx3t110_mult")
+(define_cpu_unit "thunderx3t110_i1m3" "thunderx3t110_mult")
+
+; Pseudo-units for load delay (assuming dcache hit).
+
+(define_cpu_unit "thunderx3t110_ls0d1" "thunderx3t110_ldst")
+(define_cpu_unit "thunderx3t110_ls0d2" "thunderx3t110_ldst")
+(define_cpu_unit "thunderx3t110_ls0d3" "thunderx3t110_ldst")
+
+(define_cpu_unit "thunderx3t110_ls1d1" "thunderx3t110_ldst")
+(define_cpu_unit "thunderx3t110_ls1d2" "thunderx3t110_ldst")
+(define_cpu_unit "thunderx3t110_ls1d3" "thunderx3t110_ldst")
+
+; Define FP units f0/f1/f2/f3.
+(define_cpu_unit "thunderx3t110_f0" "thunderx3t110_advsimd")
+(define_cpu_unit "thunderx3t110_f1" "thunderx3t110_advsimd")
+(define_cpu_unit "thunderx3t110_f2" "thunderx3t110_advsimd")
+(define_cpu_unit "thunderx3t110_f3" "thunderx3t110_advsimd")
+
+(define_reservation "thunderx3t110_i23" "thunderx3t110_i2|thunderx3t110_i3")
+(define_reservation "thunderx3t110_i01"
+    "thunderx3t110_i0|thunderx3t110_i1")
+(define_reservation "thunderx3t110_i012"
+    "thunderx3t110_i0|thunderx3t110_i1|thunderx3t110_i2")
+(define_reservation "thunderx3t110_i0123"
+    "thunderx3t110_i0|thunderx3t110_i1|thunderx3t110_i2|thunderx3t110_i3")
+(define_reservation "thunderx3t110_ls01" "thunderx3t110_ls0|thunderx3t110_ls1")
+(define_reservation "thunderx3t110_f01" "thunderx3t110_f0|thunderx3t110_f1")
+(define_reservation "thunderx3t110_f23" "thunderx3t110_f2|thunderx3t110_f3")
+(define_reservation "thunderx3t110_f0123"
+    "thunderx3t110_f0|thunderx3t110_f1|thunderx3t110_f2|thunderx3t110_f3")
+
+; A load with delay in the ls0/ls1 pipes.
+; this is always a delay of four
+(define_reservation "thunderx3t110_l0delay"
+    "thunderx3t110_ls0,thunderx3t110_ls0d1,thunderx3t110_ls0d2,\
+     thunderx3t110_ls0d3")
+(define_reservation "thunderx3t110_l1delay"
+    "thunderx3t110_ls1,thunderx3t110_ls1d1,thunderx3t110_ls1d2,\
+     thunderx3t110_ls1d3")
+(define_reservation "thunderx3t110_l01delay"
+    "thunderx3t110_l0delay|thunderx3t110_l1delay")
+;; Branch and call instructions.
+
+(define_insn_reservation "thunderx3t110_branch" 1
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "call,branch,trap"))
+  "thunderx3t110_i23")
+
+;; Misc instructions.
+
+; Speculation barrier
+(define_insn_reservation "thunderx3t110_nothing" 0
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "block"))
+  "nothing")
+
+(define_insn_reservation "thunderx3t110_mrs" 0
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "mrs"))
+  "thunderx3t110_i2")
+
+(define_insn_reservation "thunderx3t110_multiple" 1
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "multiple"))
+  "thunderx3t110_i0+thunderx3t110_i1+thunderx3t110_i3+thunderx3t110_ls0+\
+   thunderx3t110_ls1+thunderx3t110_sd+thunderx3t110_i1m1+thunderx3t110_i1m2+\
+   thunderx3t110_i1m3+thunderx3t110_f0+thunderx3t110_f1")
+
+;; Integer arithmetic/logic instructions.
+
+; Plain register moves are handled by renaming,
+; and don't create any uops.
+(define_insn_reservation "thunderx3t110_regmove" 0
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "mov_reg"))
+  "nothing")
+
+(define_insn_reservation "thunderx3t110_alu_basic" 1
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "alu_imm,alu_sreg,alus_imm,alus_sreg,\
+			adc_reg,adc_imm,adcs_reg,adcs_imm,\
+			logic_reg,logic_imm,logics_reg,logics_imm,\
+			csel,adr,mov_imm,shift_reg,shift_imm,bfm,\
+			bfx,rbit,rev,extend,rotate_imm"))
+  "thunderx3t110_i0123")
+
+; distinguish between latency 1|2 and throughput 1/4|2/4?
+; is it actually 1,1/2,{i0,i1} vs 2,1/4,{i0,i1,i2,i3}
+(define_insn_reservation "thunderx3t110_alu_shift" 2
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "alu_shift_imm_lsl_1to4,alu_shift_imm_other,alu_ext,\
+			alus_shift_imm,alus_ext,\
+			logic_shift_imm,logics_shift_imm"))
+  "thunderx3t110_i0123")
+
+(define_insn_reservation "thunderx3t110_alu_shift1" 1
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "alu_shift_imm_lsl_1to4,alu_shift_imm_other,alu_ext,\
+			alus_shift_imm,alus_ext,\
+			logic_shift_imm,logics_shift_imm"))
+  "thunderx3t110_i01")
+
+; we are going for the the optimistic answer (13)
+; for now, the worst case is 23
+(define_insn_reservation "thunderx3t110_div" 13
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "sdiv,udiv"))
+  "thunderx3t110_i1*3")
+
+(define_insn_reservation "thunderx3t110_madd" 5
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "mla,smlal,umlal"))
+  "thunderx3t110_i0123,thunderx3t110_i1m1,thunderx3t110_i1m2,thunderx3t110_i1m3,\
+   thunderx3t110_i012")
+
+; NOTE: smull, umull are used for "high part" multiplies too.
+; mul is alias for MADD
+; it has to be distinguished between smulh, umulh (4,1) and
+; other (5,1) but there is no such a type, so, we go for the
+; conservative approach of (5,1) for now
+; smulh, umulh only runs on I1
+(define_insn_reservation "thunderx3t110_mul" 5
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "mul,smull,umull"))
+  "thunderx3t110_i0123,thunderx3t110_i1m1,thunderx3t110_i1m2,thunderx3t110_i1m3")
+
+(define_insn_reservation "thunderx3t110_countbits" 3
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "clz"))
+  "thunderx3t110_i1")
+
+;; Integer loads and stores.
+
+; load_4 matches prefetch, a multitude of move/str/dup variants,
+; sign extend
+(define_insn_reservation "thunderx3t110_load_basic" 4
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "load_4"))
+  "thunderx3t110_ls01")
+
+; model use of I0/I1/I2 for index versions only, model 4|8 2nd on load
+(define_insn_reservation "thunderx3t110_loadpair" 5
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "load_8,load_16"))
+  "thunderx3t110_i012,thunderx3t110_ls01")
+
+(define_insn_reservation "thunderx3t110_store_basic" 1
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "store_4"))
+  "thunderx3t110_ls01,thunderx3t110_sd")
+
+; model use of I0/I1/I2/I3 for index versions, model differing
+; throughputs
+(define_insn_reservation "thunderx3t110_storepair_basic" 1
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "store_8,store_16"))
+  "thunderx3t110_ls01,thunderx3t110_sd")
+
+;; FP data processing instructions.
+
+(define_insn_reservation "thunderx3t110_fp_simple" 5
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "ffariths,ffarithd,f_minmaxs,f_minmaxd"))
+  "thunderx3t110_f0123")
+
+; distinguish latency 3/4 throughput 1/2|1/4
+(define_insn_reservation "thunderx3t110_fp_addsub3" 3
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "fadds,faddd"))
+  "thunderx3t110_f23")
+(define_insn_reservation "thunderx3t110_fp_addsub4" 4
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "fadds,faddd"))
+  "thunderx3t110_f0123")
+
+(define_insn_reservation "thunderx3t110_fp_cmp" 4
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "fcmps,fcmpd,fccmps,fccmpd"))
+  "thunderx3t110_f0123")
+
+; need to split out latency 23 throughput 23/4: F64 from
+; latency 16 throughput  16/4: FDIV F32
+(define_insn_reservation "thunderx3t110_fp_divsqrt_s" 16
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "fdivs,fsqrts"))
+  "thunderx3t110_f0*3|thunderx3t110_f1*3|\
+   thunderx3t110_f2*3|thunderx3t110_f3*3")
+
+(define_insn_reservation "thunderx3t110_fp_divsqrt_d" 23
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "fdivd,fsqrtd"))
+  "thunderx3t110_f0*5|thunderx3t110_f1*5|\
+   thunderx3t110_f2*5|thunderx3t110_f3*5")
+
+(define_insn_reservation "thunderx3t110_fp_mul_mac" 5
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "fmuls,fmuld,fmacs,fmacd"))
+  "thunderx3t110_f01")
+
+(define_insn_reservation "thunderx3t110_frint" 5
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "f_rints,f_rintd"))
+  "thunderx3t110_f0123")
+
+; mimic latency 3|4 throughput 1/2|1/4
+(define_insn_reservation "thunderx3t110_fcsel3" 3
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "fcsel"))
+  "thunderx3t110_f23")
+
+(define_insn_reservation "thunderx3t110_fcsel4" 4
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "fcsel"))
+  "thunderx3t110_f0123")
+
+;; FP miscellaneous instructions.
+
+(define_insn_reservation "thunderx3t110_fp_cvt" 5
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "f_cvtf2i,f_cvt,f_cvti2f"))
+  "thunderx3t110_f0123")
+
+; even though f_mrc has to belong to fp_mov_to_gen
+; we retain this for the sake of legacy as codegen
+; doesn't use it anyway
+(define_insn_reservation "thunderx3t110_fp_mov3" 3
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "fconsts,fconstd,fmov,f_mrc"))
+  "thunderx3t110_f23")
+
+(define_insn_reservation "thunderx3t110_fp_mov" 4
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "fconsts,fconstd,fmov,f_mrc"))
+  "thunderx3t110_f0123")
+
+(define_insn_reservation "thunderx3t110_fp_mov_to_gen" 4
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "f_mcr"))
+  "thunderx3t110_f0123")
+
+;; FP loads and stores.
+;  model use of I0/I1/I2 for post/pre index modes
+
+(define_insn_reservation "thunderx3t110_fp_load_basic" 4
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "f_loads,f_loadd"))
+  "thunderx3t110_ls01")
+
+; model throughput 1
+(define_insn_reservation "thunderx3t110_fp_store_basic" 1
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "f_stores,f_stored"))
+  "thunderx3t110_ls01,thunderx3t110_sd")
+
+;; ASIMD integer instructions.
+
+(define_insn_reservation "thunderx3t110_asimd_int" 5
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "neon_abd,neon_abd_q,\
+			neon_arith_acc,neon_arith_acc_q,\
+			neon_abs,neon_abs_q,\
+			neon_add,neon_add_q,\
+			neon_sub,neon_sub_q,\
+			neon_neg,neon_neg_q,\
+			neon_add_long,neon_add_widen,\
+			neon_add_halve,neon_add_halve_q,\
+			neon_sub_long,neon_sub_widen,\
+			neon_sub_halve,neon_sub_halve_q,\
+			neon_add_halve_narrow_q,neon_sub_halve_narrow_q,\
+			neon_qabs,neon_qabs_q,\
+			neon_qadd,neon_qadd_q,\
+			neon_qneg,neon_qneg_q,\
+			neon_qsub,neon_qsub_q,\
+			neon_minmax,neon_minmax_q,\
+			neon_reduc_minmax,neon_reduc_minmax_q,\
+			neon_mul_b,neon_mul_h,neon_mul_s,\
+			neon_mul_b_q,neon_mul_h_q,neon_mul_s_q,\
+			neon_sat_mul_b,neon_sat_mul_h,neon_sat_mul_s,\
+			neon_sat_mul_b_q,neon_sat_mul_h_q,neon_sat_mul_s_q,\
+			neon_mla_b,neon_mla_h,neon_mla_s,\
+			neon_mla_b_q,neon_mla_h_q,neon_mla_s_q,\
+			neon_mul_b_long,neon_mul_h_long,\
+			neon_mul_s_long,neon_mul_d_long,\
+			neon_sat_mul_b_long,neon_sat_mul_h_long,\
+			neon_sat_mul_s_long,\
+			neon_mla_b_long,neon_mla_h_long,neon_mla_s_long,\
+			neon_sat_mla_b_long,neon_sat_mla_h_long,\
+			neon_sat_mla_s_long,\
+			neon_shift_acc,neon_shift_acc_q,\
+			neon_shift_imm,neon_shift_imm_q,\
+			neon_shift_reg,neon_shift_reg_q,\
+			neon_shift_imm_long,neon_shift_imm_narrow_q,\
+			neon_sat_shift_imm,neon_sat_shift_imm_q,\
+			neon_sat_shift_reg,neon_sat_shift_reg_q,\
+			neon_sat_shift_imm_narrow_q"))
+  "thunderx3t110_f0123")
+
+; neon_reduc_add is used for both addp and [su]adalp
+(define_insn_reservation "thunderx3t110_asimd_reduc_add" 5
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "neon_reduc_add,neon_reduc_add_q"))
+  "thunderx3t110_f01")
+
+(define_insn_reservation "thunderx3t110_asimd_cmp" 5
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "neon_compare,neon_compare_q,neon_compare_zero,\
+			neon_tst,neon_tst_q"))
+  "thunderx3t110_f0123")
+
+; neon_logic used in ldr, str, mov, umov, fmov, mov; orn; bic; and,
+;   simd mov immediate; orr, simd mov immediate; eor; not (mvn)
+; latency 4 throughput 1/2 LS0/LS1: ldr
+; latency 1 throughput 1 LS0/LS1,SDI,I0/I1/I2: str
+; latency 3|4 throughput 1/2|1/4 F2/F3 F0/F1/F2/F3: fmov immed, orn,
+;   bic, and, orr, eor, not (mvn)
+; latency 4 throughput 1/4 F0/F1/F2/F3: fmov register, fmov gen to vec
+; latency 5 throughput 1/4 F0/F1/F2/F3: fmov vec to gen, umov, fmov
+(define_insn_reservation "thunderx3t110_asimd_logic4" 4
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "neon_logic,neon_logic_q"))
+  "thunderx3t110_f23")
+
+(define_insn_reservation "thunderx3t110_asimd_logic5" 5
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "neon_logic,neon_logic_q"))
+  "thunderx3t110_f0123")
+
+;; ASIMD floating-point instructions.
+
+; Distinguish between latency 5 throughput 1/4: fabs, fmax, fmin, fneg
+; latency 4 throughput 1/4: fcmp
+(define_insn_reservation "thunderx3t110_asimd_fp_simple" 5
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "neon_fp_abs_s,neon_fp_abs_d,\
+			neon_fp_abs_s_q,neon_fp_abs_d_q,\
+			neon_fp_compare_s,neon_fp_compare_d,\
+			neon_fp_compare_s_q,neon_fp_compare_d_q,\
+			neon_fp_minmax_s,neon_fp_minmax_d,\
+			neon_fp_minmax_s_q,neon_fp_minmax_d_q,\
+			neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d,\
+			neon_fp_reduc_minmax_s_q,neon_fp_reduc_minmax_d_q,\
+			neon_fp_neg_s,neon_fp_neg_d,\
+			neon_fp_neg_s_q,neon_fp_neg_d_q"))
+  "thunderx3t110_f0123")
+
+; distinguish between latency 3 throughput 1/2,
+; latency 4 throughput 1/4
+; neon_fp_reduc_add_<stype><q> is used for both faddp and
+; vector reduction add. On TX3, faddp is 3|4 1/2|1/4 and reduction is 5 1/4
+(define_insn_reservation "thunderx3t110_asimd_fp_arith3" 3
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "neon_fp_abd_s,neon_fp_abd_d,\
+			neon_fp_abd_s_q,neon_fp_abd_d_q,\
+			neon_fp_addsub_s,neon_fp_addsub_d,\
+			neon_fp_addsub_s_q,neon_fp_addsub_d_q,\
+			neon_fp_reduc_add_s,neon_fp_reduc_add_d,\
+			neon_fp_reduc_add_s_q,neon_fp_reduc_add_d_q"))
+  "thunderx3t110_f23")
+
+(define_insn_reservation "thunderx3t110_asimd_fp_arith4" 4
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "neon_fp_abd_s,neon_fp_abd_d,\
+			neon_fp_abd_s_q,neon_fp_abd_d_q,\
+			neon_fp_addsub_s,neon_fp_addsub_d,\
+			neon_fp_addsub_s_q,neon_fp_addsub_d_q,\
+			neon_fp_reduc_add_s,neon_fp_reduc_add_d,\
+			neon_fp_reduc_add_s_q,neon_fp_reduc_add_d_q"))
+  "thunderx3t110_f0123")
+
+(define_insn_reservation "thunderx3t110_asimd_fp_arith5" 5
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "neon_fp_mul_s,neon_fp_mul_d,\
+			neon_fp_mul_s_q,neon_fp_mul_d_q,\
+			neon_fp_mul_s_scalar_q,neon_fp_mul_d_scalar_q,\
+			neon_fp_mla_s,neon_fp_mla_d,\
+			neon_fp_mla_s_q,neon_fp_mla_d_q"))
+  "thunderx3t110_f0123")
+
+; neon_fp_cvt_widen_s,neon_fp_cvt_narrow_d_q: fcvtl,fctvl2,fcvtn,fcvtn2
+; neon_fp_to_int_s,neon_fp_to_int_d: fcvt{<frint_suffix><su>,z<su>}
+;   where frint_suffix: zpmixan, su: su (plus other sign/unsign/extract...
+; neon_fp_to_int_s_q,neon_fp_to_int_d_q: fcvtz<su> other
+; The int_to_fp* is complicated
+;   neon_int_to_fp_s,neon_int_to_fp_d: <su_optab>cvtf
+;   neon_int_to_fp_s_q,neon_int_to_fp_d_q
+; Round matches single define_insn, frint<frint_suffix>
+;   neon_fp_round_s,neon_fp_round_d,neon_fp_round_s_q,
+;   neon_fp_round_d_q: frint<frint_suffix>
+; FCVT*,VCVTAU,[SU]CVTF: latency 5 throughput 1/4
+; FRINT*: latency 5 throughput 1/4
+(define_insn_reservation "thunderx3t110_asimd_fp_conv" 5
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "neon_fp_cvt_widen_s,neon_fp_cvt_narrow_d_q,\
+			neon_fp_to_int_s,neon_fp_to_int_d,\
+			neon_fp_to_int_s_q,neon_fp_to_int_d_q,\
+			neon_int_to_fp_s,neon_int_to_fp_d,\
+			neon_int_to_fp_s_q,neon_int_to_fp_d_q,\
+			neon_fp_round_s,neon_fp_round_d,\
+			neon_fp_round_s_q,neon_fp_round_d_q"))
+  "thunderx3t110_f0123")
+
+; model that pipeline is occupied the whole time D/F32, Q/F32: 16/4
+; Q/F64: 23/4
+(define_insn_reservation "thunderx3t110_asimd_fp_div_s" 16
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "neon_fp_div_s,neon_fp_div_s_q"))
+  "thunderx3t110_f0123")
+
+(define_insn_reservation "thunderx3t110_asimd_fp_div_d" 23
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "neon_fp_div_d,neon_fp_div_d_q"))
+  "thunderx3t110_f0123")
+
+;; ASIMD miscellaneous instructions.
+
+;  divided out:
+;  rbit,bsl,bsl_q,cls,cls_q,cnt,cnt_q,move,move_q: 3|4 1/2 | 1/4
+;  from_gp,from_gp_q : 4 | 1/4
+;  dup,dup_q,ext,ext_q,ins,ins_q,all recpe forms, rev,rev_q: 5 1/4
+;  permute,permute_q needs to depend on aarch64_expand_vec_perm_const does
+;  on TX3
+(define_insn_reservation "thunderx3t110_asimd_misc3" 3
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "neon_rbit,\
+			neon_bsl,neon_bsl_q,\
+			neon_cls,neon_cls_q,\
+			neon_cnt,neon_cnt_q,\
+			neon_move,neon_move_q"))
+  "thunderx3t110_f23")
+
+(define_insn_reservation "thunderx3t110_asimd_misc4" 4
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "neon_rbit,\
+			neon_bsl,neon_bsl_q,\
+			neon_cls,neon_cls_q,\
+			neon_cnt,neon_cnt_q,\
+			neon_from_gp,neon_from_gp_q,\
+			neon_move,neon_move_q"))
+  "thunderx3t110_f0123")
+
+(define_insn_reservation "thunderx3t110_asimd_misc" 5
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "
+			neon_dup,neon_dup_q,\
+			neon_ext,neon_ext_q,\
+			neon_ins,neon_ins_q,\
+			neon_move,neon_move_q,\
+			neon_fp_recpe_s,neon_fp_recpe_d,\
+			neon_fp_recpe_s_q,neon_fp_recpe_d_q,\
+			neon_fp_recpx_s,neon_fp_recpx_d,\
+			neon_fp_recpx_s_q,neon_fp_recpx_d_q,\
+			neon_rev,neon_rev_q,\
+			neon_permute,neon_permute_q"))
+  "thunderx3t110_f0123")
+
+(define_insn_reservation "thunderx3t110_asimd_recip_step" 5
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "neon_fp_recps_s,neon_fp_recps_s_q,\
+			neon_fp_recps_d,neon_fp_recps_d_q,\
+			neon_fp_sqrt_s,neon_fp_sqrt_s_q,\
+			neon_fp_sqrt_d,neon_fp_sqrt_d_q,\
+			neon_fp_rsqrte_s, neon_fp_rsqrte_s_q,\
+			neon_fp_rsqrte_d, neon_fp_rsqrte_d_q,\
+			neon_fp_rsqrts_s, neon_fp_rsqrts_s_q,\
+			neon_fp_rsqrts_d, neon_fp_rsqrts_d_q"))
+  "thunderx3t110_f0123")
+
+(define_insn_reservation "thunderx3t110_asimd_lut1" 5
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "neon_tbl1,neon_tbl1_q"))
+  "thunderx3t110_f0123")
+
+(define_insn_reservation "thunderx3t110_asimd_lut2" 10
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "neon_tbl2,neon_tbl2_q"))
+  "thunderx3t110_f0123")
+
+(define_insn_reservation "thunderx3t110_asimd_lut3" 15
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "neon_tbl3,neon_tbl3_q"))
+  "thunderx3t110_f0123")
+
+(define_insn_reservation "thunderx3t110_asimd_lut4" 20
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "neon_tbl4,neon_tbl4_q"))
+  "thunderx3t110_f0123")
+
+(define_insn_reservation "thunderx3t110_asimd_elt_to_gr" 5
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "neon_to_gp,neon_to_gp_q"))
+  "thunderx3t110_f0123")
+
+;; ASIMD load instructions.
+
+; NOTE: These reservations attempt to model latency and throughput
+; correctly, but the cycle timing of unit allocation is not
+; necessarily accurate (because insns are split into uops, and those
+; may be issued out-of-order).
+
+; the LDP/LDNP imm-offset S/D/Q suppplies the first arg with latency 4
+; and the 2nd at 5 (Q form) or 8 (S/D form). Can this be modeled? These
+;forms, as documented, do not use the I0/I1/I2 units (no I3), but the
+; other LDP ones do.
+(define_insn_reservation "thunderx3t110_asimd_load1_ldp" 5
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "neon_ldp,neon_ldp_q"))
+  "thunderx3t110_i012,thunderx3t110_ls01")
+
+; Need to distinguish latency 6 throughput 2: 4 reg D/Q
+; latency 5 throughput 3/2: 3 reg D/Q
+; latency 4 throughput 1: 2 reg D/Q
+; latency 4 throughput 1/2: 1 reg D/Q
+(define_insn_reservation "thunderx3t110_asimd_load1" 4
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "neon_load1_1reg,neon_load1_1reg_q,\
+			neon_load1_2reg,neon_load1_2reg_q,\
+			neon_load1_3reg,neon_load1_3reg_q,\
+			neon_load1_4reg,neon_load1_4reg_q"))
+  "thunderx3t110_ls01")
+
+(define_insn_reservation "thunderx3t110_asimd_load1_onelane" 5
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q"))
+  "thunderx3t110_l01delay,thunderx3t110_f0123")
+
+(define_insn_reservation "thunderx3t110_asimd_load1_all" 5
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "neon_load1_all_lanes,neon_load1_all_lanes_q"))
+  "thunderx3t110_l01delay,thunderx3t110_f0123")
+
+(define_insn_reservation "thunderx3t110_asimd_load2" 5
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "neon_load2_2reg,neon_load2_2reg_q,\
+			neon_load2_one_lane,neon_load2_one_lane_q,\
+			neon_load2_all_lanes,neon_load2_all_lanes_q"))
+  "thunderx3t110_l01delay,thunderx3t110_f0123")
+
+(define_insn_reservation "thunderx3t110_asimd_load3" 7
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\
+			neon_load3_one_lane,neon_load3_one_lane_q,\
+			neon_load3_all_lanes,neon_load3_all_lanes_q"))
+  "thunderx3t110_l01delay,thunderx3t110_f0123")
+
+(define_insn_reservation "thunderx3t110_asimd_load4" 8
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "neon_load4_4reg,neon_load4_4reg_q,\
+			neon_load4_one_lane,neon_load4_one_lane_q,\
+			neon_load4_all_lanes,neon_load4_all_lanes_q"))
+  "thunderx3t110_l01delay,thunderx3t110_f0123")
+
+;; ASIMD store instructions.
+
+; Same note applies as for ASIMD load instructions.
+
+; Vector Store pair Need to distinguish:
+; 5 throughput: imm-offset S/D; imm-postindex S/D; imm-preindex S/D
+; 2 throughput: imm-offset Q; imm-postindex Q; imm-preindex Q
+; all index modes use I0/I1/I2
+(define_insn_reservation "thunderx3t110_asimd_store_stp" 1
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "neon_stp,neon_stp_q"))
+  "thunderx3t110_ls01,thunderx3t110_sd")
+
+; There are multiple forms of ST1
+; The following two groups, as documented, do not use the FP pipelines.
+; multiple, 1 reg, D-form     ST1
+; tx2_ltp:    x    1/2     LS0/LS1
+; tx3_ltp:    x    1/2     LS0/LS1
+; multiple, 1 reg, Q-form     ST1
+; tx2_ltp:    x    1/2     LS0/LS1
+; tx3_ltp:    x    1/2     LS0/LS1
+;
+; one lane, B/H/S         ST1
+; tx2_ltp:    x       1/2     LS0/LS1,F0/F1
+; tx3_ltp:    x       1/2     LS0/LS1,F0/F1/F2/F3
+; one lane, D             ST1
+; tx2_ltp:    x       1/2     LS0/LS1,F0/F1
+; tx3_ltp:    x       1/2     LS0/LS1,F0/F1/F2/F3
+;; Model for st1 insn needs refinement for different register forms
+; multiple, 2 reg, D-form     ST1     x    1     LS0/LS1
+; multiple, 2 reg, Q-form     ST1     x    1     LS0/LS1
+; multiple, 3 reg, D-form     ST1     x    3/2     LS0/LS1
+; multiple, 3 reg, Q-form     ST1     x    3/2     LS0/LS1
+; multiple,4 reg, D-form         ST1     x    2     LS0/LS1
+; multiple,4 reg, Q-form         ST1     x    2     LS0/LS1
+(define_insn_reservation "thunderx3t110_asimd_store1" 1
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "neon_store1_1reg,neon_store1_1reg_q,\
+			neon_store1_2reg,neon_store1_2reg_q,\
+			neon_store1_3reg,neon_store1_4reg"))
+  "thunderx3t110_ls01")
+
+(define_insn_reservation "thunderx3t110_asimd_store1_onelane" 1
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "neon_store1_one_lane,neon_store1_one_lane_q"))
+  "thunderx3t110_ls01,thunderx3t110_f0123")
+
+; distinguish between throughput 1: D/Q-form B/H/S, Q-form D and
+; throughput 1/2: one lane B/H/S/D
+(define_insn_reservation "thunderx3t110_asimd_store2" 1
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "neon_store2_2reg,neon_store2_2reg_q,\
+			neon_store2_one_lane,neon_store2_one_lane_q"))
+  "thunderx3t110_ls01,thunderx3t110_f0123")
+
+; distinguish between throughput 3: D/Q-form B/H/S, Q-form D and
+; throughput 1: one lane B/H/S/D
+(define_insn_reservation "thunderx3t110_asimd_store3" 1
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "neon_store3_3reg,neon_store3_3reg_q,\
+			neon_store3_one_lane,neon_store3_one_lane_q"))
+  "thunderx3t110_ls01,thunderx3t110_f0123")
+
+; distinguish between throughput 4: D/Q-form B/H/S, Q-form D and
+; throughput 1: one lane B/H/S/D? (not in doc)
+(define_insn_reservation "thunderx3t110_asimd_store4" 1
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "neon_store4_4reg,neon_store4_4reg_q,\
+			neon_store4_one_lane,neon_store4_one_lane_q"))
+  "thunderx3t110_ls01,thunderx3t110_f0123")
+
+;; Crypto extensions.
+
+(define_insn_reservation "thunderx3t110_aes" 4
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "crypto_aese,crypto_aesmc"))
+  "thunderx3t110_f0123")
+
+(define_insn_reservation "thunderx3t110_sha" 5
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor,crypto_sha1_slow,\
+			crypto_sha256_fast,crypto_sha256_slow"))
+  "thunderx3t110_f0123")
+
+;; CRC extension.
+
+(define_insn_reservation "thunderx3t110_crc" 3
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "crc"))
+  "thunderx3t110_i1")
+
+;; PMULL extension.
+
+(define_insn_reservation "thunderx3t110_pmull" 5
+  (and (eq_attr "tune" "thunderx3t110")
+       (eq_attr "type" "crypto_pmull"))
+  "thunderx3t110_f0123")
diff --git a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md
new file mode 100644
index 0000000000000..3f9cbe0e1039b
--- /dev/null
+++ b/gcc/config/aarch64/tsv110.md
@@ -0,0 +1,708 @@
+;; tsv110 pipeline description
+;; Copyright (C) 2018-2021 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "tsv110")
+
+(define_attr "tsv110_neon_type"
+  "neon_arith_acc, neon_arith_acc_q,
+   neon_arith_basic, neon_arith_complex,
+   neon_reduc_add_acc, neon_multiply, neon_multiply_q,
+   neon_multiply_long, neon_mla, neon_mla_q, neon_mla_long,
+   neon_sat_mla_long, neon_shift_acc, neon_shift_imm_basic,
+   neon_shift_imm_complex,
+   neon_shift_reg_basic, neon_shift_reg_basic_q, neon_shift_reg_complex,
+   neon_shift_reg_complex_q, neon_fp_negabs, neon_fp_arith,
+   neon_fp_arith_q, neon_fp_reductions_q, neon_fp_cvt_int,
+   neon_fp_cvt_int_q, neon_fp_cvt16, neon_fp_minmax, neon_fp_mul,
+   neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, neon_fp_recpe_rsqrte,
+   neon_fp_recpe_rsqrte_q, neon_fp_recps_rsqrts, neon_fp_recps_rsqrts_q,
+   neon_bitops, neon_bitops_q, neon_from_gp,
+   neon_from_gp_q, neon_move, neon_tbl3_tbl4, neon_zip_q, neon_to_gp,
+   neon_load_a, neon_load_b, neon_load_c, neon_load_d, neon_load_e,
+   neon_load_f, neon_store_a, neon_store_b, neon_store_complex,
+   unknown"
+  (cond [
+	  (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc,\
+			   neon_reduc_add_acc_q")
+	    (const_string "neon_arith_acc")
+	  (eq_attr "type" "neon_arith_acc_q")
+	    (const_string "neon_arith_acc_q")
+	  (eq_attr "type" "neon_abs,neon_abs_q,neon_add, neon_add_q, neon_add_long,\
+			   neon_add_widen, neon_neg, neon_neg_q,\
+			   neon_reduc_add, neon_reduc_add_q,\
+			   neon_reduc_add_long, neon_sub, neon_sub_q,\
+			   neon_sub_long, neon_sub_widen, neon_logic,\
+			   neon_logic_q, neon_tst, neon_tst_q,\
+			   neon_compare, neon_compare_q,\
+			   neon_compare_zero, neon_compare_zero_q,\
+			   neon_minmax, neon_minmax_q, neon_reduc_minmax,\
+			   neon_reduc_minmax_q")
+	    (const_string "neon_arith_basic")
+	  (eq_attr "type" "neon_add_halve_narrow_q,\
+			   neon_add_halve, neon_add_halve_q,\
+			   neon_sub_halve, neon_sub_halve_q, neon_qabs,\
+			   neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
+			   neon_qneg_q, neon_qsub, neon_qsub_q,\
+			   neon_sub_halve_narrow_q")
+	    (const_string "neon_arith_complex")
+
+	  (eq_attr "type" "neon_mul_b, neon_mul_h, neon_mul_s,\
+			   neon_mul_h_scalar, neon_mul_s_scalar,\
+			   neon_sat_mul_b, neon_sat_mul_h,\
+			   neon_sat_mul_s, neon_sat_mul_h_scalar,\
+			   neon_sat_mul_s_scalar,\
+			   neon_mul_b_long, neon_mul_h_long,\
+			   neon_mul_s_long,\
+			   neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
+			   neon_sat_mul_b_long, neon_sat_mul_h_long,\
+			   neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
+			   neon_sat_mul_s_scalar_long,\
+			   neon_mla_b, neon_mla_h, neon_mla_s,\
+			   neon_mla_h_scalar, neon_mla_s_scalar,\
+			   neon_mla_b_long, neon_mla_h_long,\
+			   neon_mla_s_long,\
+			   neon_mla_h_scalar_long, neon_mla_s_scalar_long,\
+			   neon_sat_mla_b_long, neon_sat_mla_h_long,\
+			   neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
+			   neon_sat_mla_s_scalar_long")
+	    (const_string "neon_multiply")
+	  (eq_attr "type" "neon_mul_b_q, neon_mul_h_q, neon_mul_s_q,\
+			   neon_mul_h_scalar_q, neon_mul_s_scalar_q,\
+			   neon_sat_mul_b_q, neon_sat_mul_h_q,\
+			   neon_sat_mul_s_q, neon_sat_mul_h_scalar_q,\
+			   neon_sat_mul_s_scalar_q,\
+			   neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
+			   neon_mla_h_scalar_q, neon_mla_s_scalar_q")
+	    (const_string "neon_multiply_q")
+
+	  (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
+	    (const_string "neon_shift_acc")
+	  (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
+			   neon_shift_imm_narrow_q, neon_shift_imm_long")
+	    (const_string "neon_shift_imm_basic")
+	  (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
+			   neon_sat_shift_imm_narrow_q")
+	    (const_string "neon_shift_imm_complex")
+	  (eq_attr "type" "neon_shift_reg")
+	    (const_string "neon_shift_reg_basic")
+	  (eq_attr "type" "neon_shift_reg_q")
+	    (const_string "neon_shift_reg_basic_q")
+	  (eq_attr "type" "neon_sat_shift_reg")
+	    (const_string "neon_shift_reg_complex")
+	  (eq_attr "type" "neon_sat_shift_reg_q")
+	    (const_string "neon_shift_reg_complex_q")
+
+	  (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
+			   neon_fp_abs_s, neon_fp_abs_s_q,\
+			   neon_fp_neg_d, neon_fp_neg_d_q,\
+			   neon_fp_abs_d, neon_fp_abs_d_q,\
+			   neon_fp_minmax_s,neon_fp_minmax_d,\
+			   neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d")
+	    (const_string "neon_fp_negabs")
+	  (eq_attr "type" "neon_fp_addsub_s, neon_fp_abd_s,\
+			   neon_fp_reduc_add_s, neon_fp_compare_s,\
+			   neon_fp_round_s,\
+			   neon_fp_addsub_d, neon_fp_abd_d,\
+			   neon_fp_reduc_add_d, neon_fp_compare_d,\
+			   neon_fp_round_d")
+	    (const_string "neon_fp_arith")
+	  (eq_attr "type" "neon_fp_addsub_s_q, neon_fp_abd_s_q,\
+			   neon_fp_reduc_add_s_q, neon_fp_compare_s_q,\
+			   neon_fp_minmax_s_q, neon_fp_round_s_q,\
+			   neon_fp_addsub_d_q, neon_fp_abd_d_q,\
+			   neon_fp_reduc_add_d_q, neon_fp_compare_d_q,\
+			   neon_fp_minmax_d_q, neon_fp_round_d_q")
+	    (const_string "neon_fp_arith_q")
+	  (eq_attr "type" "neon_fp_reduc_minmax_s_q,\
+			   neon_fp_reduc_minmax_d_q,\
+			   neon_fp_reduc_add_s_q, neon_fp_reduc_add_d_q")
+	    (const_string "neon_fp_reductions_q")
+	  (eq_attr "type" "neon_fp_to_int_s, neon_int_to_fp_s,\
+			   neon_fp_to_int_d, neon_int_to_fp_d")
+	    (const_string "neon_fp_cvt_int")
+	  (eq_attr "type" "neon_fp_to_int_s_q, neon_int_to_fp_s_q,\
+			   neon_fp_to_int_d_q, neon_int_to_fp_d_q")
+	    (const_string "neon_fp_cvt_int_q")
+	  (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h")
+	    (const_string "neon_fp_cvt16")
+	  (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_scalar,\
+			   neon_fp_mul_d")
+	    (const_string "neon_fp_mul")
+	  (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_s_scalar_q,\
+			   neon_fp_mul_d_q, neon_fp_mul_d_scalar_q")
+	    (const_string "neon_fp_mul_q")
+	  (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_scalar,\
+			   neon_fp_mla_d")
+	    (const_string "neon_fp_mla")
+	  (eq_attr "type" "neon_fp_mla_s_q, neon_fp_mla_s_scalar_q,
+			   neon_fp_mla_d_q, neon_fp_mla_d_scalar_q")
+	    (const_string "neon_fp_mla_q")
+	  (eq_attr "type" "neon_fp_recpe_s, neon_fp_rsqrte_s,\
+			   neon_fp_recpx_s,\
+			   neon_fp_recpe_d, neon_fp_rsqrte_d,\
+			   neon_fp_recpx_d")
+	    (const_string "neon_fp_recpe_rsqrte")
+	  (eq_attr "type" "neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\
+			   neon_fp_recpx_s_q,\
+			   neon_fp_recpe_d_q, neon_fp_rsqrte_d_q,\
+			   neon_fp_recpx_d_q")
+	    (const_string "neon_fp_recpe_rsqrte_q")
+	  (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s,\
+			   neon_fp_recps_d, neon_fp_rsqrts_d")
+	    (const_string "neon_fp_recps_rsqrts")
+	  (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q,\
+			   neon_fp_recps_d_q, neon_fp_rsqrts_d_q")
+	    (const_string "neon_fp_recps_rsqrts_q")
+	  (eq_attr "type" "neon_bsl, neon_cls, neon_cnt,\
+			   neon_rev, neon_permute, neon_rbit,\
+			   neon_tbl1, neon_tbl2, neon_zip,\
+			   neon_dup, neon_dup_q, neon_ext, neon_ext_q,\
+			   neon_move, neon_move_q, neon_move_narrow_q")
+	    (const_string "neon_bitops")
+	  (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q,\
+			   neon_rev_q, neon_permute_q, neon_rbit_q")
+	    (const_string "neon_bitops_q")
+	  (eq_attr "type" "neon_from_gp,f_mcr,f_mcrr")
+	    (const_string "neon_from_gp")
+	  (eq_attr "type" "neon_from_gp_q")
+	    (const_string "neon_from_gp_q")
+
+	  (eq_attr "type" "f_loads, f_loadd,\
+			   neon_load1_1reg, neon_load1_1reg_q,\
+			   neon_load1_2reg, neon_load1_2reg_q")
+	    (const_string "neon_load_a")
+	  (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
+			   neon_load1_4reg, neon_load1_4reg_q")
+	    (const_string "neon_load_b")
+	  (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q,\
+			   neon_load1_all_lanes, neon_load1_all_lanes_q,\
+			   neon_load2_2reg, neon_load2_2reg_q,\
+			   neon_load2_all_lanes, neon_load2_all_lanes_q")
+	    (const_string "neon_load_c")
+	  (eq_attr "type" "neon_load2_4reg, neon_load2_4reg_q,\
+			   neon_load3_3reg, neon_load3_3reg_q,\
+			   neon_load3_one_lane, neon_load3_one_lane_q,\
+			   neon_load4_4reg, neon_load4_4reg_q")
+	    (const_string "neon_load_d")
+	  (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q,\
+			   neon_load3_all_lanes, neon_load3_all_lanes_q,\
+			   neon_load4_all_lanes, neon_load4_all_lanes_q")
+	    (const_string "neon_load_e")
+	  (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
+	    (const_string "neon_load_f")
+
+	  (eq_attr "type" "f_stores, f_stored,\
+			   neon_store1_1reg")
+	    (const_string "neon_store_a")
+	  (eq_attr "type" "neon_store1_2reg, neon_store1_1reg_q")
+	    (const_string "neon_store_b")
+	  (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\
+			   neon_store3_3reg, neon_store3_3reg_q,\
+			   neon_store2_4reg, neon_store2_4reg_q,\
+			   neon_store4_4reg, neon_store4_4reg_q,\
+			   neon_store2_2reg, neon_store2_2reg_q,\
+			   neon_store3_one_lane, neon_store3_one_lane_q,\
+			   neon_store4_one_lane, neon_store4_one_lane_q,\
+			   neon_store1_4reg, neon_store1_4reg_q,\
+			   neon_store1_one_lane, neon_store1_one_lane_q,\
+			   neon_store2_one_lane, neon_store2_one_lane_q")
+	    (const_string "neon_store_complex")]
+	  (const_string "unknown")))
+
+;; The tsv110 core is modelled as issues pipeline that has
+;; the following functional units.
+;; 1.  Three pipelines for integer operations: ALU1, ALU2, ALU3
+
+(define_cpu_unit "tsv110_alu1_issue" "tsv110")
+(define_reservation "tsv110_alu1" "tsv110_alu1_issue")
+
+(define_cpu_unit "tsv110_alu2_issue" "tsv110")
+(define_reservation "tsv110_alu2" "tsv110_alu2_issue")
+
+(define_cpu_unit "tsv110_alu3_issue" "tsv110")
+(define_reservation "tsv110_alu3" "tsv110_alu3_issue")
+
+;; 2.  One pipeline for complex integer operations: MDU
+
+(define_cpu_unit "tsv110_mdu_issue" "tsv110")
+(define_reservation "tsv110_mdu" "tsv110_mdu_issue")
+
+;; 3.  Two asymmetric pipelines for Asimd and FP operations: FSU1, FSU2
+(define_automaton "tsv110_fsu")
+
+(define_cpu_unit "tsv110_fsu1_issue"
+		 "tsv110_fsu")
+(define_cpu_unit "tsv110_fsu2_issue"
+		 "tsv110_fsu")
+
+(define_reservation "tsv110_fsu1" "tsv110_fsu1_issue")
+(define_reservation "tsv110_fsu2" "tsv110_fsu2_issue")
+
+;; 4.  Two pipeline for branch operations but same with alu2 and alu3: BRU1, BRU2
+
+;; 5.  Two pipelines for load and store operations: LS1, LS2.
+
+(define_cpu_unit "tsv110_ls1_issue" "tsv110")
+(define_cpu_unit "tsv110_ls2_issue" "tsv110")
+(define_reservation "tsv110_ls1" "tsv110_ls1_issue")
+(define_reservation "tsv110_ls2" "tsv110_ls2_issue")
+
+;; Block all issue queues.
+
+(define_reservation "tsv110_block" "tsv110_fsu1_issue + tsv110_fsu2_issue
+				  + tsv110_mdu_issue + tsv110_alu1_issue
+				  + tsv110_alu2_issue + tsv110_alu3_issue + tsv110_ls1_issue + tsv110_ls2_issue")
+
+;; Simple Execution Unit:
+;;
+;; Simple ALU without shift
+(define_insn_reservation "tsv110_alu" 1
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "alu_imm,logic_imm,\
+			alu_sreg,logic_reg,\
+			adc_imm,adc_reg,\
+			adr,bfm,clz,rbit,rev,\
+			shift_imm,shift_reg,\
+			mov_imm,mov_reg,\
+			mvn_imm,mvn_reg,\
+			mrs,multiple"))
+  "tsv110_alu1|tsv110_alu2|tsv110_alu3")
+  
+(define_insn_reservation "tsv110_alus" 1
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "alus_imm,logics_imm,\
+			alus_sreg,logics_reg,\
+			adcs_imm,adcs_reg"))
+  "tsv110_alu2|tsv110_alu3")
+
+;; ALU ops with shift
+(define_insn_reservation "tsv110_alu_shift" 2
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "extend,\
+			alu_shift_imm_lsl_1to4,alu_shift_imm_other,alu_shift_reg,\
+			crc,logic_shift_imm,logic_shift_reg,\
+			mov_shift,mvn_shift,\
+			mov_shift_reg,mvn_shift_reg"))
+  "tsv110_mdu")
+  
+(define_insn_reservation "tsv110_alus_shift" 2
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "alus_shift_imm,alus_shift_reg,\
+			logics_shift_imm,logics_shift_reg"))
+  "tsv110_alu2|tsv110_alu3")
+
+;; Multiplies instructions
+(define_insn_reservation "tsv110_mult" 3
+  (and (eq_attr "tune" "tsv110")
+       (ior (eq_attr "mul32" "yes")
+	    (eq_attr "widen_mul64" "yes")))
+  "tsv110_mdu")
+
+;; Integer divide
+(define_insn_reservation "tsv110_div" 10
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "udiv,sdiv"))
+  "tsv110_mdu")
+
+;; Block all issue pipes for a cycle
+(define_insn_reservation "tsv110_block" 1
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "block"))
+  "tsv110_block")
+
+;; Branch execution Unit
+;;
+;; Branches take two issue slot.
+;; No latency as there is no result
+(define_insn_reservation "tsv110_branch" 0
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "branch"))
+  "tsv110_alu2|tsv110_alu3")
+
+;; Load-store execution Unit
+;;
+;; Loads of up to two words.
+(define_insn_reservation "tsv110_load1" 4
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "load_4,load_8"))
+  "tsv110_ls1|tsv110_ls2")
+
+;; Stores of up to two words.
+(define_insn_reservation "tsv110_store1" 0
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "store_4,store_8"))
+  "tsv110_ls1|tsv110_ls2")
+
+;; Advanced SIMD Unit - Integer Arithmetic Instructions.
+
+(define_insn_reservation  "tsv110_neon_abd_aba" 4
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "neon_abd,neon_arith_acc"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation  "tsv110_neon_abd_aba_q" 4
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "neon_arith_acc_q"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation  "tsv110_neon_arith_basic" 2
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_arith_basic"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation  "tsv110_neon_arith_complex" 4
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_arith_complex"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+;; Integer Multiply Instructions.
+;; D-form
+(define_insn_reservation "tsv110_neon_multiply" 4
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_multiply"))
+  "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_neon_multiply_dlong" 2
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "neon_mul_d_long"))
+  "tsv110_fsu1")
+
+;; Q-form
+(define_insn_reservation "tsv110_neon_multiply_q" 8
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_multiply_q"))
+  "tsv110_fsu1")
+
+;; Integer Shift Instructions.
+
+(define_insn_reservation
+  "tsv110_neon_shift_acc" 4
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_shift_acc,\
+	   neon_shift_imm_basic,neon_shift_imm_complex,neon_shift_reg_basic,\
+	   neon_shift_reg_complex"))
+  "tsv110_fsu1")
+
+(define_insn_reservation
+  "tsv110_neon_shift_acc_q" 4
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_shift_reg_basic_q,\
+	   neon_shift_reg_complex_q"))
+  "tsv110_fsu1")
+
+;; Floating Point Instructions.
+
+(define_insn_reservation
+  "tsv110_neon_fp_negabs" 2
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_fp_negabs"))
+  "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+  "tsv110_neon_fp_arith" 4
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_fp_arith"))
+  "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+  "tsv110_neon_fp_arith_q" 4
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_fp_arith_q"))
+  "tsv110_fsu1|tsv110_fsu2")
+  
+(define_insn_reservation
+  "tsv110_neon_fp_minmax_q" 2
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "neon_fp_minmax_s_q,neon_fp_minmax_d_q"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+  "tsv110_neon_fp_reductions_q" 4
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_fp_reductions_q"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+  "tsv110_neon_fp_cvt_int" 2
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_fp_cvt_int,neon_fp_cvt_int_q"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+  "tsv110_neon_fp_mul" 5
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_fp_mul"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+  "tsv110_neon_fp_mul_q" 5
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_fp_mul_q"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+  "tsv110_neon_fp_mla" 7
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_fp_mla,\
+	   neon_fp_recps_rsqrts"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+  "tsv110_neon_fp_recpe_rsqrte" 3
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+  "tsv110_neon_fp_mla_q" 7
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_fp_mla_q,\
+	   neon_fp_recps_rsqrts_q"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+  "tsv110_neon_fp_recpe_rsqrte_q" 3
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte_q"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+;; Miscellaneous Instructions.
+
+(define_insn_reservation
+  "tsv110_neon_bitops" 2
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_bitops"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+  "tsv110_neon_dup" 2
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "neon_from_gp,f_mcr"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+  "tsv110_neon_mov" 2
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "f_mcrr"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+  "tsv110_neon_bitops_q" 2
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_bitops_q"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+  "tsv110_neon_from_gp_q" 4
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_from_gp_q"))
+  "(tsv110_alu1+tsv110_fsu1)|(tsv110_alu1+tsv110_fsu2)")
+
+(define_insn_reservation
+  "tsv110_neon_to_gp" 3
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "neon_to_gp,neon_to_gp_q"))
+  "tsv110_fsu1")
+
+;; Load Instructions.
+
+(define_insn_reservation
+  "tsv110_neon_ld1_lane" 8
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q,\
+	   neon_load1_all_lanes,neon_load1_all_lanes_q"))
+  "(tsv110_ls1 + tsv110_fsu1)|(tsv110_ls1 + tsv110_fsu2)|(tsv110_ls2 + tsv110_fsu1)|(tsv110_ls2 + tsv110_fsu2)")
+
+(define_insn_reservation
+  "tsv110_neon_ld1_reg1" 6
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "f_loads,f_loadd,neon_load1_1reg,neon_load1_1reg_q"))
+  "tsv110_ls1|tsv110_ls2")
+
+(define_insn_reservation
+  "tsv110_neon_ld1_reg2" 6
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
+  "tsv110_ls1|tsv110_ls2")
+
+(define_insn_reservation
+  "tsv110_neon_ld1_reg3" 7
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q"))
+  "tsv110_ls1|tsv110_ls2")
+
+(define_insn_reservation
+  "tsv110_neon_ld1_reg4" 7
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q"))
+  "tsv110_ls1|tsv110_ls2")
+
+(define_insn_reservation
+  "tsv110_neon_ld2" 8
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q,\
+	   neon_load2_2reg,neon_load2_2reg_q,neon_load2_all_lanes,\
+	   neon_load2_all_lanes_q,neon_load2_one_lane,neon_load2_one_lane_q"))
+  "(tsv110_ls1 + tsv110_fsu1)|(tsv110_ls1 + tsv110_fsu2)|(tsv110_ls2 + tsv110_fsu1)|(tsv110_ls2 + tsv110_fsu2)")
+
+(define_insn_reservation
+  "tsv110_neon_ld3" 9
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\
+	   neon_load3_one_lane,neon_load3_one_lane_q,\
+	   neon_load3_all_lanes,neon_load3_all_lanes_q"))
+  "(tsv110_ls1 + tsv110_fsu1)|(tsv110_ls1 + tsv110_fsu2)|(tsv110_ls2 + tsv110_fsu1)|(tsv110_ls2 + tsv110_fsu2)")
+
+(define_insn_reservation
+  "tsv110_neon_ld4_lane" 9
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
+	   neon_load4_one_lane,neon_load4_one_lane_q"))
+  "(tsv110_ls1 + tsv110_fsu1)|(tsv110_ls1 + tsv110_fsu2)|(tsv110_ls2 + tsv110_fsu1)|(tsv110_ls2 + tsv110_fsu2)")
+
+(define_insn_reservation
+  "tsv110_neon_ld4_reg" 11
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
+	   neon_load4_one_lane,neon_load4_one_lane_q"))
+  "(tsv110_ls1 + tsv110_fsu1)|(tsv110_ls1 + tsv110_fsu2)|(tsv110_ls2 + tsv110_fsu1)|(tsv110_ls2 + tsv110_fsu2)")
+
+;; Store Instructions.
+
+(define_insn_reservation
+  "tsv110_neon_store_a" 0
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_store_a"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+  "tsv110_neon_store_b" 0
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_store_b"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+;; These block issue for a number of cycles proportional to the number
+;; of 64-bit chunks they will store, we don't attempt to model that
+;; precisely, treat them as blocking execution for two cycles when
+;; issued.
+(define_insn_reservation
+  "tsv110_neon_store_complex" 0
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_store_complex"))
+  "tsv110_block*2")
+
+;; Floating-Point Operations.
+
+(define_insn_reservation "tsv110_fp_const" 2
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "fconsts,fconstd,fmov"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_fp_add_sub" 5
+  (and (eq_attr "tune" "tsv110")
+    (eq_attr "type" "fadds,faddd,fmuls,fmuld"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_fp_mac" 7
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_fp_cvt" 3
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "f_cvt"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_fp_cvtf2i" 4
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "f_cvtf2i"))
+  "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_fp_cvti2f" 5
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "f_cvti2f"))
+  "(tsv110_alu1+tsv110_fsu1)|(tsv110_alu1+tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_cmp" 4
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "fcmps,fcmpd"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_fp_arith" 2
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "ffariths,ffarithd"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_fp_divs" 12
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "fdivs,neon_fp_div_s,fdivd,neon_fp_div_d,\
+	   neon_fp_div_s_q,neon_fp_div_d_q"))
+  "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_fp_sqrts" 24
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "fsqrts,neon_fp_sqrt_s,fsqrtd,neon_fp_sqrt_d,\
+	   neon_fp_sqrt_s_q,neon_fp_sqrt_d_q"))
+  "tsv110_fsu2")
+
+(define_insn_reservation "tsv110_crypto_aes" 3
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "crypto_aese,crypto_aesmc"))
+  "tsv110_fsu1")
+  
+(define_insn_reservation "tsv110_crypto_sha1_fast" 2
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor"))
+  "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_crypto_sha256_fast" 2
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "crypto_sha256_fast"))
+  "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_crypto_complex" 5
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow"))
+  "tsv110_fsu1")
+
+;; We lie with calls.  They take up all issue slots, but are otherwise
+;; not harmful.
+(define_insn_reservation "tsv110_call" 1
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "call"))
+  "tsv110_alu1_issue+tsv110_alu2_issue+tsv110_alu3_issue+tsv110_fsu1_issue+tsv110_fsu2_issue\
+    +tsv110_mdu_issue+tsv110_ls1_issue+tsv110_ls2_issue"
+)
+
+;; Simple execution unit bypasses
+(define_bypass 1 "tsv110_alu"
+	         "tsv110_alu,tsv110_alu_shift")
+(define_bypass 2 "tsv110_alu_shift"
+	         "tsv110_alu,tsv110_alu_shift")
+
+;; An MLA or a MUL can feed a dependent MLA.
+(define_bypass 3 "tsv110_neon_*mla*,tsv110_neon_*mul*"
+		 "tsv110_neon_*mla*")
+
+;; We don't need to care about control hazards, either the branch is
+;; predicted in which case we pay no penalty, or the branch is
+;; mispredicted in which case instruction scheduling will be unlikely to
+;; help.
+(define_bypass 1 "tsv110_*"
+		 "tsv110_call,tsv110_branch")
diff --git a/gcc/config/aarch64/x-darwin b/gcc/config/aarch64/x-darwin
new file mode 100644
index 0000000000000..6d788d5e89cfb
--- /dev/null
+++ b/gcc/config/aarch64/x-darwin
@@ -0,0 +1,3 @@
+host-aarch64-darwin.o : $(srcdir)/config/aarch64/host-aarch64-darwin.c
+	$(COMPILE) $<
+	$(POSTCOMPILE)
diff --git a/gcc/config/darwin-c.c b/gcc/config/darwin-c.c
index 8fe02338bb1da..b0424a981e866 100644
--- a/gcc/config/darwin-c.c
+++ b/gcc/config/darwin-c.c
@@ -1,5 +1,5 @@
 /* Darwin support needed only by C/C++ frontends.
-   Copyright (C) 2001-2018 Free Software Foundation, Inc.
+   Copyright (C) 2001-2021 Free Software Foundation, Inc.
    Contributed by Apple Computer Inc.
 
 This file is part of GCC.
@@ -44,13 +44,12 @@ static bool using_frameworks = false;
 static const char *find_subframework_header (cpp_reader *pfile, const char *header,
 					     cpp_dir **dirp);
 
-typedef struct align_stack
-{
-  int alignment;
-  struct align_stack * prev;
-} align_stack;
+struct fld_align_stack {
+  int	alignment;
+  struct fld_align_stack * prev;
+};
 
-static struct align_stack * field_align_stack = NULL;
+static struct fld_align_stack * field_align_stack;
 
 /* Maintain a small stack of alignments.  This is similar to pragma
    pack's stack, but simpler.  */
@@ -58,7 +57,7 @@ static struct align_stack * field_align_stack = NULL;
 static void
 push_field_alignment (int bit_alignment)
 {
-  align_stack *entry = XNEW (align_stack);
+  fld_align_stack *entry = XNEW (fld_align_stack);
 
   entry->alignment = maximum_field_alignment;
   entry->prev = field_align_stack;
@@ -72,14 +71,14 @@ pop_field_alignment (void)
 {
   if (field_align_stack)
     {
-      align_stack *entry = field_align_stack;
+      fld_align_stack *entry = field_align_stack;
 
       maximum_field_alignment = entry->alignment;
       field_align_stack = entry->prev;
       free (entry);
     }
   else
-    error ("too many #pragma options align=reset");
+    error ("too many %<#pragma options align=reset%>");
 }
 
 /* Handlers for Darwin-specific pragmas.  */
@@ -99,17 +98,17 @@ darwin_pragma_options (cpp_reader *pfile ATTRIBUTE_UNUSED)
   tree t, x;
 
   if (pragma_lex (&t) != CPP_NAME)
-    BAD ("malformed '#pragma options', ignoring");
+    BAD ("malformed %<#pragma options%>, ignoring");
   arg = IDENTIFIER_POINTER (t);
   if (strcmp (arg, "align"))
-    BAD ("malformed '#pragma options', ignoring");
+    BAD ("malformed %<#pragma options%>, ignoring");
   if (pragma_lex (&t) != CPP_EQ)
-    BAD ("malformed '#pragma options', ignoring");
+    BAD ("malformed %<#pragma options%>, ignoring");
   if (pragma_lex (&t) != CPP_NAME)
-    BAD ("malformed '#pragma options', ignoring");
+    BAD ("malformed %<#pragma options%>, ignoring");
 
   if (pragma_lex (&x) != CPP_EOF)
-    warning (OPT_Wpragmas, "junk at end of '#pragma options'");
+    warning (OPT_Wpragmas, "junk at end of %<#pragma options%>");
 
   arg = IDENTIFIER_POINTER (t);
   if (!strcmp (arg, "mac68k"))
@@ -119,7 +118,7 @@ darwin_pragma_options (cpp_reader *pfile ATTRIBUTE_UNUSED)
   else if (!strcmp (arg, "reset"))
     pop_field_alignment ();
   else
-    BAD ("malformed '#pragma options align={mac68k|power|reset}', ignoring");
+    BAD ("malformed %<#pragma options align={mac68k|power|reset}%>, ignoring");
 }
 
 /* #pragma unused ([var {, var}*]) */
@@ -131,7 +130,7 @@ darwin_pragma_unused (cpp_reader *pfile ATTRIBUTE_UNUSED)
   int tok;
 
   if (pragma_lex (&x) != CPP_OPEN_PAREN)
-    BAD ("missing '(' after '#pragma unused', ignoring");
+    BAD ("missing %<(%> after %<#pragma unused%>, ignoring");
 
   while (1)
     {
@@ -152,10 +151,10 @@ darwin_pragma_unused (cpp_reader *pfile ATTRIBUTE_UNUSED)
     }
 
   if (tok != CPP_CLOSE_PAREN)
-    BAD ("missing ')' after '#pragma unused', ignoring");
+    BAD ("missing %<)%> after %<#pragma unused%>, ignoring");
 
   if (pragma_lex (&x) != CPP_EOF)
-    BAD ("junk at end of '#pragma unused'");
+    BAD ("junk at end of %<#pragma unused%>");
 }
 
 /* Parse the ms_struct pragma.  */
@@ -166,7 +165,7 @@ darwin_pragma_ms_struct (cpp_reader *pfile ATTRIBUTE_UNUSED)
   tree t;
 
   if (pragma_lex (&t) != CPP_NAME)
-    BAD ("malformed '#pragma ms_struct', ignoring");
+    BAD ("malformed %<#pragma ms_struct%>, ignoring");
   arg = IDENTIFIER_POINTER (t);
 
   if (!strcmp (arg, "on"))
@@ -174,10 +173,10 @@ darwin_pragma_ms_struct (cpp_reader *pfile ATTRIBUTE_UNUSED)
   else if (!strcmp (arg, "off") || !strcmp (arg, "reset"))
     darwin_ms_struct = false;
   else
-    BAD ("malformed '#pragma ms_struct {on|off|reset}', ignoring");
+    BAD ("malformed %<#pragma ms_struct {on|off|reset}%>, ignoring");
 
   if (pragma_lex (&t) != CPP_EOF)
-    BAD ("junk at end of '#pragma ms_struct'");
+    BAD ("junk at end of %<#pragma ms_struct%>");
 }
 
 static struct frameworks_in_use {
@@ -692,10 +691,10 @@ macosx_version_as_macro (void)
   if (!version_array)
     goto fail;
 
-  if (version_array[MAJOR] != 10)
+  if (version_array[MAJOR] < 10 || version_array[MAJOR] > 11)
     goto fail;
 
-  if (version_array[MINOR] < 10)
+  if (version_array[MAJOR] == 10 && version_array[MINOR] < 10)
     version_macro = version_as_legacy_macro (version_array);
   else
     version_macro = version_as_modern_macro (version_array);
@@ -706,7 +705,7 @@ macosx_version_as_macro (void)
   return version_macro;
 
  fail:
-  error ("unknown value %qs of -mmacosx-version-min",
+  error ("unknown value %qs of %<-mmacosx-version-min%>",
          darwin_macosx_version_min);
   return "1000";
 }
@@ -809,7 +808,8 @@ darwin_cfstring_ref_p (const_tree strp)
     tn = DECL_NAME (tn);
   return (tn 
 	  && IDENTIFIER_POINTER (tn)
-	  && !strncmp (IDENTIFIER_POINTER (tn), "CFStringRef", 8));
+	  && !strncmp (IDENTIFIER_POINTER (tn), "CFStringRef",
+		       strlen ("CFStringRef")));
 }
 
 /* At present the behavior of this is undefined and it does nothing.  */
diff --git a/gcc/config/darwin-d.c b/gcc/config/darwin-d.c
new file mode 100644
index 0000000000000..67d69b721b565
--- /dev/null
+++ b/gcc/config/darwin-d.c
@@ -0,0 +1,75 @@
+/* Darwin support needed only by D front-end.
+   Copyright (C) 2020-2021 Free Software Foundation, Inc.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm_d.h"
+#include "d/d-target.h"
+#include "d/d-target-def.h"
+
+/* Implement TARGET_D_OS_VERSIONS for Darwin targets.  */
+
+static void
+darwin_d_os_builtins (void)
+{
+  d_add_builtin_version ("Posix");
+  d_add_builtin_version ("OSX");
+  d_add_builtin_version ("darwin");
+}
+
+/* Handle a call to `__traits(getTargetInfo, "objectFormat")'.  */
+
+static tree
+darwin_d_handle_target_object_format (void)
+{
+  const char *objfmt = "macho";
+
+  return build_string_literal (strlen (objfmt) + 1, objfmt);
+}
+
+/* Implement TARGET_D_REGISTER_OS_TARGET_INFO for Darwin targets.  */
+
+static void
+darwin_d_register_target_info (void)
+{
+  const struct d_target_info_spec handlers[] = {
+    { "objectFormat", darwin_d_handle_target_object_format },
+    { NULL, NULL },
+  };
+
+  d_add_target_info_handlers (handlers);
+}
+
+#undef TARGET_D_OS_VERSIONS
+#define TARGET_D_OS_VERSIONS darwin_d_os_builtins
+
+#undef TARGET_D_REGISTER_OS_TARGET_INFO
+#define TARGET_D_REGISTER_OS_TARGET_INFO darwin_d_register_target_info
+
+/* Define TARGET_D_MINFO_SECTION for Darwin targets.  */
+
+#undef TARGET_D_MINFO_SECTION
+#define TARGET_D_MINFO_SECTION "__DATA,__minfodata"
+
+#undef TARGET_D_MINFO_START_NAME
+#define TARGET_D_MINFO_START_NAME "*section$start$__DATA$__minfodata"
+
+#undef TARGET_D_MINFO_END_NAME
+#define TARGET_D_MINFO_END_NAME "*section$end$__DATA$__minfodata"
+
+struct gcc_targetdm targetdm = TARGETDM_INITIALIZER;
diff --git a/gcc/config/darwin-driver.c b/gcc/config/darwin-driver.c
index 2d72ab9a4d255..3d7768f055de4 100644
--- a/gcc/config/darwin-driver.c
+++ b/gcc/config/darwin-driver.c
@@ -1,5 +1,5 @@
 /* Additional functions for the GCC driver on Darwin native.
-   Copyright (C) 2006-2018 Free Software Foundation, Inc.
+   Copyright (C) 2006-2021 Free Software Foundation, Inc.
    Contributed by Apple Computer Inc.
 
 This file is part of GCC.
@@ -43,13 +43,13 @@ static const char *
 validate_macosx_version_min (const char *version_str)
 {
   size_t version_len;
-  unsigned long major, minor, tiny = 0;
+  unsigned long major, minor = 0, tiny = 0;
   char *end;
   const char *old_version = version_str;
   bool need_rewrite = false;
 
   version_len = strlen (version_str);
-  if (version_len < 4) /* The minimum would be 10.x  */
+  if (version_len < 2) /* The minimum would be 11  */
     return NULL;
 
   /* Version string must consist of digits and periods only.  */
@@ -63,18 +63,27 @@ validate_macosx_version_min (const char *version_str)
     need_rewrite = true;
 
   major = strtoul (version_str, &end, 10);
-  version_str = end + ((*end == '.') ? 1 : 0);
 
-  if (major != 10) /* So far .. all MacOS 10 ... */
+  if (major < 10 || major > 11 ) /* MacOS 10 and 11 are known. */
     return NULL;
 
-  /* Version string components must be present and numeric.  */
-  if (!ISDIGIT (version_str[0]))
+  /* Skip a separating period, if there's one.  */
+  version_str = end + ((*end == '.') ? 1 : 0);
+
+  if (major == 11 && *end != '\0' && !ISDIGIT (version_str[0]))
+     /* For MacOS 11, we allow just the major number, but if the minor is
+	there it must be numeric.  */
+    return NULL;
+  else if (major == 11 && *end == '\0')
+    /* We will rewrite 11 =>  11.0.0.  */
+    need_rewrite = true;
+  else if (major == 10 && (*end == '\0' || !ISDIGIT (version_str[0])))
+    /* Otherwise, minor version components must be present and numeric.  */
     return NULL;
 
   /* If we have one or more leading zeros on a component, then rewrite the
      version string.  */
-  if (version_str[0] == '0' && version_str[1] != '\0'
+  if (*end != '\0' && version_str[0] == '0' && version_str[1] != '\0'
       && version_str[1] != '.')
     need_rewrite = true;
 
@@ -104,7 +113,7 @@ validate_macosx_version_min (const char *version_str)
   if (need_rewrite)
     {
       char *new_version;
-      asprintf (&new_version, "10.%lu.%lu", minor, tiny);
+      asprintf (&new_version, "%2lu.%lu.%lu", major, minor, tiny);
       return new_version;
     }
 
@@ -115,6 +124,12 @@ validate_macosx_version_min (const char *version_str)
 #include <sys/sysctl.h>
 #include "xregex.h"
 
+/* Determine the version of the running OS.
+   We only look at the first two components (ignoring the patch one) and
+   report NN.MM.0 where NN is currently either 10 or 11 and MM is the OS
+   minor release number.
+   If we can't parse what the kernel gives us, warn the user, and do nothing.  */
+
 static char *
 darwin_find_version_from_kernel (void)
 {
@@ -125,8 +140,6 @@ darwin_find_version_from_kernel (void)
   char * version_p;
   char * new_flag;
 
-  /* Determine the version of the running OS.  If we can't, warn user,
-     and do nothing.  */
   if (sysctl (osversion_name, ARRAY_SIZE (osversion_name), osversion,
 	      &osversion_len, NULL, 0) == -1)
     {
@@ -144,10 +157,24 @@ darwin_find_version_from_kernel (void)
     major_vers = major_vers * 10 + (*version_p++ - '0');
   if (*version_p++ != '.')
     goto parse_failed;
-  
-  /* The major kernel version number is 4 plus the second OS version
-     component.  */
-  if (major_vers - 4 <= 4)
+
+  /* Darwin20 sees a transition to macOS 11.  In this, it seems that the
+     mapping to macOS minor version is now shifted to the kernel minor
+     version - 1 (at least for the initial releases).  At this stage, we
+     don't know what macOS version will correspond to Darwin21.  */
+  if (major_vers >= 20)
+    {
+      int minor_vers = *version_p++ - '0';
+      if (ISDIGIT (*version_p))
+	minor_vers = minor_vers * 10 + (*version_p++ - '0');
+      if (*version_p++ != '.')
+	goto parse_failed;
+      if (minor_vers > 0)
+	minor_vers -= 1; /* Kernel 20.3 => macOS 11.2.  */
+      /* It's not yet clear whether patch level will be considered.  */
+      asprintf (&new_flag, "11.%02d.00", minor_vers);
+    }
+  else if (major_vers - 4 <= 4)
     /* On 10.4 and earlier, the old linker is used which does not
        support three-component system versions.
        FIXME: we should not assume this - a newer linker could be used.  */
@@ -202,7 +229,7 @@ darwin_default_min_version (void)
       const char *checked = validate_macosx_version_min (new_flag);
       if (checked == NULL)
 	{
-	  warning (0, "couldn%'t understand version %s\n", new_flag);
+	  warning (0, "could not understand version %s", new_flag);
 	  return NULL;
 	}
       new_flag = xstrndup (checked, strlen (checked));
@@ -321,7 +348,7 @@ darwin_driver_init (unsigned int *decoded_options_count,
 	  vers_string =
 	    validate_macosx_version_min ((*decoded_options)[i].arg);
 	  if (vers_string == NULL)
-	    warning (0, "%qs is not valid for %<mmacosx-version-min%>\n",
+	    warning (0, "%qs is not valid for %<-mmacosx-version-min%>",
 		     (*decoded_options)[i].arg);
 	  else if (vers_string == (*decoded_options)[i].arg)
 	    vers_string = xstrndup ((*decoded_options)[i].arg, 32);
diff --git a/gcc/config/darwin-f.c b/gcc/config/darwin-f.c
index 5807985a5d3e1..1330cdc81876b 100644
--- a/gcc/config/darwin-f.c
+++ b/gcc/config/darwin-f.c
@@ -1,5 +1,5 @@
 /* Darwin support needed only by Fortran frontends.
-   Copyright (C) 2008-2018 Free Software Foundation, Inc.
+   Copyright (C) 2008-2021 Free Software Foundation, Inc.
    Contributed by Daniel Franke.
 
 This file is part of GCC.
@@ -22,7 +22,7 @@ along with GCC; see the file COPYING3.  If not see
 /* Provide stubs for the hooks defined by darwin.h
      TARGET_EXTRA_PRE_INCLUDES, TARGET_EXTRA_INCLUDES
 
-   As both, gcc and gfortran link in incpath.o, we can not
+   As both, gcc and gfortran link in incpath.o, we cannot
    conditionally undefine said hooks if fortran is build.
    However, we can define do-nothing stubs of said hooks as
    we are not interested in objc include files in Fortran.
diff --git a/gcc/config/darwin-protos.h b/gcc/config/darwin-protos.h
index 2dea40b65c8fe..f5ef82456aa06 100644
--- a/gcc/config/darwin-protos.h
+++ b/gcc/config/darwin-protos.h
@@ -1,5 +1,5 @@
 /* Prototypes.
-   Copyright (C) 2001-2018 Free Software Foundation, Inc.
+   Copyright (C) 2001-2021 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -17,6 +17,9 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
+#ifndef CONFIG_DARWIN_PROTOS_H
+#define CONFIG_DARWIN_PROTOS_H
+
 extern void darwin_init_sections (void);
 extern int name_needs_quotes (const char *);
 
@@ -66,6 +69,8 @@ extern void darwin_non_lazy_pcrel (FILE *, rtx);
 
 extern void darwin_emit_unwind_label (FILE *, tree, int, int);
 extern void darwin_emit_except_table_label (FILE *);
+extern rtx darwin_make_eh_symbol_indirect (rtx, bool);
+extern bool darwin_should_restore_cfa_state (void);
 
 extern void darwin_pragma_ignore (struct cpp_reader *);
 extern void darwin_pragma_options (struct cpp_reader *);
@@ -122,4 +127,6 @@ extern bool darwin_kextabi_p (void);
 extern void darwin_override_options (void);
 extern void darwin_patch_builtins (void);
 extern void darwin_rename_builtins (void);
-extern bool darwin_libc_has_function (enum function_class fn_class);
+extern bool darwin_libc_has_function (enum function_class fn_class, tree);
+
+#endif /* CONFIG_DARWIN_PROTOS_H */
diff --git a/gcc/config/darwin-sections.def b/gcc/config/darwin-sections.def
index 2d63cce037a65..8be89624c9cb6 100644
--- a/gcc/config/darwin-sections.def
+++ b/gcc/config/darwin-sections.def
@@ -1,4 +1,4 @@
-/* Copyright (C) 2005-2018 Free Software Foundation, Inc.
+/* Copyright (C) 2005-2021 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -198,3 +198,18 @@ DEF_SECTION (objc2_image_info_section, 0,
 	     ".section __DATA, __objc_imageinfo, regular, no_dead_strip", 1)
 DEF_SECTION (objc2_constant_string_object_section, 0,
 	     ".section __DATA, __objc_stringobj, regular, no_dead_strip", 1)
+
+/* Additions for compatibility with later runtime conventions especially for
+   sections containing strings.  */
+DEF_SECTION (objc2_data_section, 0, ".section __DATA, __data", 1)
+
+DEF_SECTION (objc2_ivar_section, 0, ".section __DATA, __objc_ivar", 1)
+
+DEF_SECTION (objc2_class_names_section, 0,
+	     ".section __TEXT, __objc_classname, cstring_literals", 1)
+
+DEF_SECTION (objc2_method_names_section, 0,
+	     ".section __TEXT, __objc_methname, cstring_literals", 1)
+
+DEF_SECTION (objc2_method_types_section, 0,
+	     ".section __TEXT, __objc_methtype, cstring_literals", 1)
diff --git a/gcc/config/darwin.c b/gcc/config/darwin.c
index 2493a586cb0e6..5d173919ee050 100644
--- a/gcc/config/darwin.c
+++ b/gcc/config/darwin.c
@@ -1,5 +1,5 @@
 /* Functions for generic Darwin as target machine for GNU C compiler.
-   Copyright (C) 1989-2018 Free Software Foundation, Inc.
+   Copyright (C) 1989-2021 Free Software Foundation, Inc.
    Contributed by Apple Computer Inc.
 
 This file is part of GCC.
@@ -18,8 +18,6 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define IN_TARGET_CODE 1
-
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
@@ -46,19 +44,27 @@ along with GCC; see the file COPYING3.  If not see
 #include "langhooks.h"
 #include "toplev.h"
 #include "lto-section-names.h"
-
-/* Darwin supports a feature called fix-and-continue, which is used
-   for rapid turn around debugging.  When code is compiled with the
-   -mfix-and-continue flag, two changes are made to the generated code
-   that allow the system to do things that it would normally not be
-   able to do easily.  These changes allow gdb to load in
-   recompilation of a translation unit that has been changed into a
-   running program and replace existing functions and methods of that
-   translation unit with versions of those functions and methods
-   from the newly compiled translation unit.  The new functions access
-   the existing static symbols from the old translation unit, if the
-   symbol existed in the unit to be replaced, and from the new
-   translation unit, otherwise.
+#include "intl.h"
+#include "optabs.h"
+
+/* Fix and Continue.
+
+   NOTES:
+   1) this facility requires suitable support from a modified version
+   of GDB, which is not provided on any system after MacOS 10.7/Darwin11.
+   2) There is no support for this in any X86 version of the FSF compiler.
+
+   Fix and continue was used on some earlier MacOS systems for rapid turn
+   around debugging.  When code is compiled with the -mfix-and-continue
+   flag, two changes are made to the generated code that allow the system
+   to do things that it would normally not be able to do easily.  These
+   changes allow gdb to load in recompilation of a translation unit that
+   has been changed into a running program and replace existing functions
+   and methods of that translation unit with versions of those functions
+   and methods from the newly compiled translation unit.  The new functions
+   access the existing static symbols from the old translation unit, if the
+   symbol existed in the unit to be replaced, and from the new translation
+   unit, otherwise.
 
    The changes are to insert 5 nops at the beginning of all functions
    and to use indirection to get at static symbols.  The 5 nops
@@ -107,14 +113,14 @@ section * darwin_sections[NUM_DARWIN_SECTIONS];
 
 /* While we transition to using in-tests instead of ifdef'd code.  */
 #if !HAVE_lo_sum
-#define gen_macho_high(a,b) (a)
-#define gen_macho_low(a,b,c) (a)
+#define gen_macho_high(m,a,b) (a)
+#define gen_macho_low(m,a,b,c) (a)
 #endif
 
 /* True if we're setting __attribute__ ((ms_struct)).  */
 int darwin_ms_struct = false;
 
-/* Earlier versions of Darwin as do not recognize an alignment field in 
+/* Earlier versions of Darwin as do not recognize an alignment field in
    .comm directives, this should be set for versions that allow it.  */
 int emit_aligned_common = false;
 
@@ -126,11 +132,11 @@ output_objc_section_asm_op (const void *directive)
 {
   static bool been_here = false;
 
-  /* The NeXT ObjC Runtime requires these sections to be present and in 
-     order in the object.  The code below implements this by emitting 
+  /* The NeXT ObjC Runtime requires these sections to be present and in
+     order in the object.  The code below implements this by emitting
      a section header for each ObjC section the first time that an ObjC
      section is requested.  */
-  if (! been_here)
+  if (darwin_symbol_stubs && ! been_here)
     {
       section *saved_in_section = in_section;
       static const enum darwin_section_enum tomark[] =
@@ -168,20 +174,23 @@ output_objc_section_asm_op (const void *directive)
       /* ABI=2 */
       static const enum darwin_section_enum tomarkv2[] =
 	{
+	  objc2_method_names_section,
 	  objc2_message_refs_section,
+	  objc2_selector_refs_section,
+	  objc2_ivar_section,
 	  objc2_classdefs_section,
 	  objc2_metadata_section,
 	  objc2_classrefs_section,
+	  objc2_class_names_section,
 	  objc2_classlist_section,
 	  objc2_categorylist_section,
-	  objc2_selector_refs_section,
 	  objc2_nonlazy_class_section,
 	  objc2_nonlazy_category_section,
 	  objc2_protocollist_section,
 	  objc2_protocolrefs_section,
 	  objc2_super_classrefs_section,
+	  objc2_constant_string_object_section,
 	  objc2_image_info_section,
-	  objc2_constant_string_object_section
 	} ;
       size_t i;
 
@@ -237,7 +246,7 @@ name_needs_quotes (const char *name)
 {
   int c;
   while ((c = *name++) != '\0')
-    if (! ISIDNUM (c) 
+    if (! ISIDNUM (c)
 	  && c != '.' && c != '$' && c != '_' )
       return 1;
   return 0;
@@ -373,20 +382,22 @@ machopic_gen_offset (rtx orig)
     }
 }
 
-static GTY(()) const char * function_base_func_name;
-static GTY(()) int current_pic_label_num;
-static GTY(()) int emitted_pic_label_num;
+static GTY(()) const char * function_base_func_name = NULL;
+static GTY(()) unsigned current_pic_label_num = 0;
+static GTY(()) unsigned emitted_pic_label_num = 0;
 
+/* We need to keep one picbase label per function, but (when we emit code
+   to reload the picbase for setjump receiver) we might need to check for
+   a second use.  So, only update the picbase label counter when we see a
+   new function.  When there's no function decl, we assume that the call is
+   from the x86 stub generation code.  */
 static void
 update_pic_label_number_if_needed (void)
 {
-  const char *current_name;
-
-  /* When we are generating _get_pc thunks within stubs, there is no current
-     function.  */
   if (current_function_decl)
     {
-      current_name =
+
+      const char *current_name =
 	IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (current_function_decl));
       if (function_base_func_name != current_name)
 	{
@@ -404,11 +415,11 @@ update_pic_label_number_if_needed (void)
 void
 machopic_output_function_base_name (FILE *file)
 {
-  /* If dynamic-no-pic is on, we should not get here.  */
-  gcc_assert (!MACHO_DYNAMIC_NO_PIC_P);
+  /* We should only get here for -fPIC.  */
+  gcc_checking_assert (MACHOPIC_PURE);
 
   update_pic_label_number_if_needed ();
-  fprintf (file, "L%d$pb", current_pic_label_num);
+  fprintf (file, "L%u$pb", current_pic_label_num);
 }
 
 char curr_picbasename[32];
@@ -416,11 +427,11 @@ char curr_picbasename[32];
 const char *
 machopic_get_function_picbase (void)
 {
-  /* If dynamic-no-pic is on, we should not get here.  */
-  gcc_assert (!MACHO_DYNAMIC_NO_PIC_P);
+  /* We should only get here for -fPIC.  */
+  gcc_checking_assert (MACHOPIC_PURE);
 
   update_pic_label_number_if_needed ();
-  snprintf (curr_picbasename, 32, "L%d$pb", current_pic_label_num);
+  snprintf (curr_picbasename, 32, "L%u$pb", current_pic_label_num);
   return (const char *) curr_picbasename;
 }
 
@@ -591,15 +602,6 @@ machopic_indirection_name (rtx sym_ref, bool stub_p)
   return p->ptr_name;
 }
 
-/* Return the name of the stub for the mcount function.  */
-
-const char*
-machopic_mcount_stub_name (void)
-{
-  rtx symbol = gen_rtx_SYMBOL_REF (Pmode, "*mcount");
-  return machopic_indirection_name (symbol, /*stub_p=*/true);
-}
-
 /* If NAME is the name of a stub or a non-lazy pointer , mark the stub
    or non-lazy pointer as used -- and mark the object to which the
    pointer/stub refers as used as well, since the pointer/stub will
@@ -650,8 +652,8 @@ machopic_indirect_data_reference (rtx orig, rtx reg)
 	    {
 	  /* Create a new register for CSE opportunities.  */
 	  rtx hi_reg = (!can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode));
- 	  emit_insn (gen_macho_high (hi_reg, orig));
- 	  emit_insn (gen_macho_low (reg, hi_reg, orig));
+	  emit_insn (gen_macho_high (Pmode, hi_reg, orig));
+	  emit_insn (gen_macho_low (Pmode, reg, hi_reg, orig));
 	      return reg;
  	    }
 	  else if (DARWIN_X86)
@@ -707,8 +709,8 @@ machopic_indirect_data_reference (rtx orig, rtx reg)
       ptr_ref = gen_const_mem (Pmode, ptr_ref);
       machopic_define_symbol (ptr_ref);
 
-      if (DARWIN_X86 
-          && reg 
+      if (DARWIN_X86
+          && reg
           && MACHO_DYNAMIC_NO_PIC_P)
 	{
 	    emit_insn (gen_rtx_SET (reg, ptr_ref));
@@ -724,33 +726,18 @@ machopic_indirect_data_reference (rtx orig, rtx reg)
 	 appropriate.  */
       if (GET_CODE (XEXP (orig, 0)) == PLUS)
 	return machopic_indirect_data_reference (XEXP (orig, 0), reg);
-      else 
+      else
 	return orig;
     }
   else if (GET_CODE (orig) == MEM)
     {
-      XEXP (ptr_ref, 0) = 
+      XEXP (ptr_ref, 0) =
 		machopic_indirect_data_reference (XEXP (orig, 0), reg);
       return ptr_ref;
     }
   else if (GET_CODE (orig) == PLUS)
     {
       rtx base, result;
-      /* When the target is i386, this code prevents crashes due to the
-	compiler's ignorance on how to move the PIC base register to
-	other registers.  (The reload phase sometimes introduces such
-	insns.)  */
-      if (GET_CODE (XEXP (orig, 0)) == REG
-	   && REGNO (XEXP (orig, 0)) == PIC_OFFSET_TABLE_REGNUM
-	   /* Prevent the same register from being erroneously used
-	      as both the base and index registers.  */
-	   && (DARWIN_X86 && (GET_CODE (XEXP (orig, 1)) == CONST))
-	   && reg)
-	{
-	  emit_move_insn (reg, XEXP (orig, 0));
-	  XEXP (ptr_ref, 0) = reg;
-	  return ptr_ref;
-	}
 
       /* Legitimize both operands of the PLUS.  */
       base = machopic_indirect_data_reference (XEXP (orig, 0), reg);
@@ -820,8 +807,7 @@ machopic_legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
 
   /* First handle a simple SYMBOL_REF or LABEL_REF */
   if (GET_CODE (orig) == LABEL_REF
-      || (GET_CODE (orig) == SYMBOL_REF
-	  ))
+      || GET_CODE (orig) == SYMBOL_REF)
     {
       /* addr(foo) = &func+(foo-func) */
       orig = machopic_indirect_data_reference (orig, reg);
@@ -856,7 +842,7 @@ machopic_legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
 	      rtx asym = XEXP (orig, 0);
 	      rtx mem;
 
-	      emit_insn (gen_macho_high (temp_reg, asym));
+	      emit_insn (gen_macho_high (Pmode, temp_reg, asym));
 	      mem = gen_const_mem (GET_MODE (orig),
 				   gen_rtx_LO_SUM (Pmode, temp_reg,
 						   copy_rtx (asym)));
@@ -920,11 +906,8 @@ machopic_legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
 		  emit_move_insn (reg, pic);
 		  pic = reg;
 		}
-#if 0
-	      emit_use (gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM));
-#endif
 
-	      if (lra_in_progress)
+	      if (lra_in_progress && HARD_REGISTER_P (pic))
 		df_set_regs_ever_live (REGNO (pic), true);
 	      pic_ref = gen_rtx_PLUS (Pmode, pic,
 				      machopic_gen_offset (XEXP (orig, 0)));
@@ -991,10 +974,8 @@ machopic_legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
 		      emit_move_insn (reg, pic);
 		      pic = reg;
 		    }
-#if 0
-		  emit_use (pic_offset_table_rtx);
-#endif
-		  if (lra_in_progress)
+
+		  if (lra_in_progress && HARD_REGISTER_P (pic))
 		    df_set_regs_ever_live (REGNO (pic), true);
 		  pic_ref = gen_rtx_PLUS (Pmode,
 					  pic,
@@ -1004,26 +985,22 @@ machopic_legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
 	}
 
       if (GET_CODE (pic_ref) != REG)
-        {
-          if (reg != 0)
-            {
-              emit_move_insn (reg, pic_ref);
-              return reg;
-            }
-          else
-            {
-              return force_reg (mode, pic_ref);
-            }
-        }
+	{
+	  if (reg != 0)
+	    {
+	      emit_move_insn (reg, pic_ref);
+	      return reg;
+	    }
+	  else
+	    {
+	      return force_reg (mode, pic_ref);
+	    }
+	}
       else
-        {
-          return pic_ref;
-        }
+	{
+	  return pic_ref;
+	}
     }
-
-  else if (GET_CODE (orig) == SYMBOL_REF)
-    return orig;
-
   else if (GET_CODE (orig) == PLUS
 	   && (GET_CODE (XEXP (orig, 0)) == MEM
 	       || GET_CODE (XEXP (orig, 0)) == SYMBOL_REF
@@ -1053,12 +1030,10 @@ machopic_legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
 	}
       /* Likewise, should we set special REG_NOTEs here?  */
     }
-
   else if (GET_CODE (orig) == CONST)
     {
       return machopic_legitimize_pic_address (XEXP (orig, 0), Pmode, reg);
     }
-
   else if (GET_CODE (orig) == MEM
 	   && GET_CODE (XEXP (orig, 0)) == SYMBOL_REF)
     {
@@ -1301,6 +1276,17 @@ darwin_encode_section_info (tree decl, rtx rtl, int first)
       || (DECL_WEAK (decl) && ! MACHO_SYMBOL_HIDDEN_VIS_P (sym_ref))
       || lookup_attribute ("weakref", DECL_ATTRIBUTES (decl)))
      SYMBOL_REF_FLAGS (sym_ref) |= MACHO_SYMBOL_FLAG_MUST_INDIRECT;
+
+#if DARWIN_PPC
+  /* Objective C V2 (m64) IVAR offset refs from Apple GCC-4.x have an
+     indirection for m64 code on PPC.  Historically, these indirections
+     also appear in the .data section.  */
+  tree o2meta = lookup_attribute ("OBJC2META", DECL_ATTRIBUTES (decl));
+  o2meta = o2meta ? TREE_VALUE (o2meta) : NULL_TREE;
+
+  if (o2meta && strncmp (IDENTIFIER_POINTER (o2meta), "V2_IVRF",7) == 0)
+    SYMBOL_REF_FLAGS (sym_ref) |= MACHO_SYMBOL_FLAG_MUST_INDIRECT;
+#endif
 }
 
 void
@@ -1330,8 +1316,8 @@ static section *
 darwin_mergeable_string_section (tree exp,
 				 unsigned HOST_WIDE_INT align)
 {
-  /* Darwin's ld expects to see non-writable string literals in the .cstring 
-     section.  Later versions of ld check and complain when CFStrings are 
+  /* Darwin's ld expects to see non-writable string literals in the .cstring
+     section.  Later versions of ld check and complain when CFStrings are
      enabled.  Therefore we shall force the strings into .cstring since we
      don't support writable ones anyway.  */
   if ((darwin_constant_cfstrings || flag_merge_constants)
@@ -1361,36 +1347,40 @@ darwin_mergeable_constant_section (tree exp,
 				   unsigned HOST_WIDE_INT align,
 				   bool zsize)
 {
+  if (zsize)
+    return darwin_sections[zobj_const_section];
+
   machine_mode mode = DECL_MODE (exp);
-  unsigned int modesize = GET_MODE_BITSIZE (mode);
+  if (!flag_merge_constants
+      || mode == VOIDmode
+      || mode == BLKmode
+      || align < 8
+      || align > 256
+      || (align & (align -1)) != 0)
+    return readonly_data_section;
 
-  if (DARWIN_SECTION_ANCHORS 
-      && flag_section_anchors 
-      && zsize)
-    return darwin_sections[zobj_const_section];
+  /* This will ICE if the mode is not a constant size, but that is reasonable,
+     since one cannot put a variable-sized thing into a constant section, we
+     shouldn't be trying.  */
+  const unsigned int modesize = GET_MODE_BITSIZE (mode).to_constant ();
 
-  if (flag_merge_constants
-      && mode != VOIDmode
-      && mode != BLKmode
-      && modesize <= align
-      && align >= 8
-      && align <= 256
-      && (align & (align -1)) == 0)
-    {
-      tree size = TYPE_SIZE_UNIT (TREE_TYPE (exp));
+  if (modesize > align)
+    return readonly_data_section;
 
-      if (TREE_CODE (size) == INTEGER_CST)
-	{
-	  if (wi::to_wide (size) == 4)
-	    return darwin_sections[literal4_section];
-	  else if (wi::to_wide (size) == 8)
-	    return darwin_sections[literal8_section];
-	  else if (HAVE_GAS_LITERAL16
-		   && TARGET_64BIT
-		   && wi::to_wide (size) == 16)
-	    return darwin_sections[literal16_section];
-	}
-    }
+  tree size = TYPE_SIZE_UNIT (TREE_TYPE (exp));
+
+  if (TREE_CODE (size) != INTEGER_CST)
+    return readonly_data_section;
+
+  unsigned isize = TREE_INT_CST_LOW (size);
+  if (isize == 4)
+    return darwin_sections[literal4_section];
+  else if (isize == 8)
+    return darwin_sections[literal8_section];
+  else if (HAVE_GAS_LITERAL16
+	   && TARGET_64BIT
+	   && isize == 16)
+    return darwin_sections[literal16_section];
 
   return readonly_data_section;
 }
@@ -1411,14 +1401,14 @@ machopic_reloc_rw_mask (void)
 
 /* We have to deal with ObjC/C++ metadata section placement in the common
    code, since it will also be called from LTO.
-   
+
    Return metadata attributes, if present (searching for ABI=2 first)
    Return NULL_TREE if no such attributes are found.  */
 
 static tree
 is_objc_metadata (tree decl)
 {
-  if (DECL_P (decl) 
+  if (DECL_P (decl)
       && (TREE_CODE (decl) == VAR_DECL || TREE_CODE (decl) == CONST_DECL)
       && DECL_ATTRIBUTES (decl))
     {
@@ -1444,7 +1434,7 @@ darwin_objc2_section (tree decl ATTRIBUTE_UNUSED, tree meta, section * base)
   gcc_assert (TREE_CODE (ident) == IDENTIFIER_NODE);
   p = IDENTIFIER_POINTER (ident);
 
-  gcc_checking_assert (flag_next_runtime == 1 && flag_objc_abi == 2);
+  gcc_checking_assert (flag_next_runtime >= 1 && flag_objc_abi == 2);
 
   objc_metadata_seen = 1;
 
@@ -1455,11 +1445,20 @@ darwin_objc2_section (tree decl ATTRIBUTE_UNUSED, tree meta, section * base)
      first.  */
   if      (!strncmp (p, "V2_BASE", 7))
     return base;
+  else if (!strncmp (p, "V2_CNAM", 7))
+    return darwin_sections[objc2_class_names_section];
+  else if (!strncmp (p, "V2_MNAM", 7))
+    return darwin_sections[objc2_method_names_section];
+  else if (!strncmp (p, "V2_MTYP", 7))
+    return darwin_sections[objc2_method_types_section];
   else if (!strncmp (p, "V2_STRG", 7))
     return darwin_sections[cstring_section];
 
   else if (!strncmp (p, "G2_META", 7) || !strncmp (p, "G2_CLAS", 7))
     return darwin_sections[objc2_classdefs_section];
+  else if (!strncmp (p, "V2_PCOL", 7))
+    return ld_uses_coal_sects ? darwin_sections[data_coal_section]
+			      : darwin_sections[objc2_data_section];
   else if (!strncmp (p, "V2_MREF", 7))
     return darwin_sections[objc2_message_refs_section];
   else if (!strncmp (p, "V2_CLRF", 7))
@@ -1495,6 +1494,9 @@ darwin_objc2_section (tree decl ATTRIBUTE_UNUSED, tree meta, section * base)
   else if (!strncmp (p, "V2_CSTR", 7))
     return darwin_sections[objc2_constant_string_object_section];
 
+  else if (!strncmp (p, "V2_IVRF", 7))
+    return darwin_sections[objc2_ivar_section];
+
   /* Not recognized, default.  */
   return base;
 }
@@ -1508,7 +1510,7 @@ darwin_objc1_section (tree decl ATTRIBUTE_UNUSED, tree meta, section * base)
   gcc_assert (TREE_CODE (ident) == IDENTIFIER_NODE);
   p = IDENTIFIER_POINTER (ident);
 
-  gcc_checking_assert (flag_next_runtime == 1 && flag_objc_abi < 2);
+  gcc_checking_assert (flag_next_runtime >= 1 && flag_objc_abi < 2);
 
   objc_metadata_seen = 1;
 
@@ -1596,12 +1598,27 @@ machopic_select_section (tree decl,
 	  && DECL_WEAK (decl)
 	  && !lookup_attribute ("weak_import", DECL_ATTRIBUTES (decl)));
 
-  zsize = (DECL_P (decl)
-	   && (TREE_CODE (decl) == VAR_DECL || TREE_CODE (decl) == CONST_DECL) 
+  /* Darwin pads zero-sized objects with at least one byte, so that the ld64
+     atom model is preserved (objects must have distinct regions starting with
+     a unique linker-visible symbol).
+     In order to support section anchors, we need to move objects with zero
+     size into sections which are marked as "no section anchors"; the padded
+     objects, obviously, have real sizes that differ from their DECL sizes.  */
+  zsize = DARWIN_SECTION_ANCHORS && flag_section_anchors;
+
+  /* In the streaming of LTO symbol data, we might have a situation where the
+     var is incomplete or layout not finished (DECL_SIZE_UNIT is NULL_TREE).
+     We cannot tell if it is zero-sized then, but we can get the section
+     category correct so that nm reports the right kind of section
+     (e.g. BSS c.f. data).  */
+  zsize = (zsize
+	   && DECL_P (decl)
+	   && (TREE_CODE (decl) == VAR_DECL || TREE_CODE (decl) == CONST_DECL)
+	   && DECL_SIZE_UNIT (decl)
 	   && tree_to_uhwi (DECL_SIZE_UNIT (decl)) == 0);
 
   one = DECL_P (decl)
-	&& TREE_CODE (decl) == VAR_DECL 
+	&& TREE_CODE (decl) == VAR_DECL
 	&& DECL_COMDAT_GROUP (decl);
 
   use_coal = (weak || one) && ld_uses_coal_sects;
@@ -1642,18 +1659,14 @@ machopic_select_section (tree decl,
 	{
 	  if (ro)
 	    base_section = darwin_sections[const_data_coal_section];
-	  else 
+	  else
 	    base_section = darwin_sections[data_coal_section];
 	}
-      else if (DARWIN_SECTION_ANCHORS 
-	       && flag_section_anchors
-	       && zsize)
+      else if (zsize)
 	{
 	  /* If we're doing section anchors, then punt zero-sized objects into
 	     their own sections so that they don't interfere with offset
-	     computation for the remaining vars.  This does not need to be done
-	     for stuff in mergeable sections, since these are ineligible for 
-	     anchors.  */
+	     computation for the remaining vars.  */
 	  if (ro)
 	    base_section = darwin_sections[zobj_const_data_section];
 	  else
@@ -1684,10 +1697,10 @@ machopic_select_section (tree decl,
       gcc_unreachable ();
     }
 
-  /* Darwin weird special cases.  
+  /* Darwin weird special cases.
      a) OBJC Meta-data. */
-  if (DECL_P (decl) 
-      && (TREE_CODE (decl) == VAR_DECL 
+  if (DECL_P (decl)
+      && (TREE_CODE (decl) == VAR_DECL
 	  || TREE_CODE (decl) == CONST_DECL)
       && DECL_ATTRIBUTES (decl))
     {
@@ -1729,83 +1742,14 @@ machopic_select_section (tree decl,
       else
 	return base_section;
     }
-  /* c) legacy meta-data selection.  */
-  else if (TREE_CODE (decl) == VAR_DECL
+  else if (flag_next_runtime
+	   && VAR_P (decl)
 	   && DECL_NAME (decl)
 	   && TREE_CODE (DECL_NAME (decl)) == IDENTIFIER_NODE
 	   && IDENTIFIER_POINTER (DECL_NAME (decl))
-	   && flag_next_runtime
 	   && !strncmp (IDENTIFIER_POINTER (DECL_NAME (decl)), "_OBJC_", 6))
-    {
-      const char *name = IDENTIFIER_POINTER (DECL_NAME (decl));
-      static bool warned_objc_46 = false;
-      /* We shall assert that zero-sized objects are an error in ObjC 
-         meta-data.  */
-      gcc_assert (tree_to_uhwi (DECL_SIZE_UNIT (decl)) != 0);
-      
-      /* ??? This mechanism for determining the metadata section is
-	 broken when LTO is in use, since the frontend that generated
-	 the data is not identified.  We will keep the capability for
-	 the short term - in case any non-Objective-C programs are using
-	 it to place data in specified sections.  */
-      if (!warned_objc_46)
-	{
-	  location_t loc = DECL_SOURCE_LOCATION (decl);
-	  warning_at (loc, 0, "the use of _OBJC_-prefixed variable names"
-		      " to select meta-data sections is deprecated at 4.6"
-		      " and will be removed in 4.7");
-	  warned_objc_46 = true;
-	}
-      
-      if (!strncmp (name, "_OBJC_CLASS_METHODS_", 20))
-        return darwin_sections[objc_cls_meth_section];
-      else if (!strncmp (name, "_OBJC_INSTANCE_METHODS_", 23))
-        return darwin_sections[objc_inst_meth_section];
-      else if (!strncmp (name, "_OBJC_CATEGORY_CLASS_METHODS_", 29))
-        return darwin_sections[objc_cat_cls_meth_section];
-      else if (!strncmp (name, "_OBJC_CATEGORY_INSTANCE_METHODS_", 32))
-        return darwin_sections[objc_cat_inst_meth_section];
-      else if (!strncmp (name, "_OBJC_CLASS_VARIABLES_", 22))
-        return darwin_sections[objc_class_vars_section];
-      else if (!strncmp (name, "_OBJC_INSTANCE_VARIABLES_", 25))
-        return darwin_sections[objc_instance_vars_section];
-      else if (!strncmp (name, "_OBJC_CLASS_PROTOCOLS_", 22))
-        return darwin_sections[objc_cat_cls_meth_section];
-      else if (!strncmp (name, "_OBJC_CLASS_NAME_", 17))
-        return darwin_sections[objc_class_names_section];
-      else if (!strncmp (name, "_OBJC_METH_VAR_NAME_", 20))
-        return darwin_sections[objc_meth_var_names_section];
-      else if (!strncmp (name, "_OBJC_METH_VAR_TYPE_", 20))
-        return darwin_sections[objc_meth_var_types_section];
-      else if (!strncmp (name, "_OBJC_CLASS_REFERENCES", 22))
-        return darwin_sections[objc_cls_refs_section];
-      else if (!strncmp (name, "_OBJC_CLASS_", 12))
-        return darwin_sections[objc_class_section];
-      else if (!strncmp (name, "_OBJC_METACLASS_", 16))
-        return darwin_sections[objc_meta_class_section];
-      else if (!strncmp (name, "_OBJC_CATEGORY_", 15))
-        return darwin_sections[objc_category_section];
-      else if (!strncmp (name, "_OBJC_SELECTOR_REFERENCES", 25))
-        return darwin_sections[objc_selector_refs_section];
-      else if (!strncmp (name, "_OBJC_SELECTOR_FIXUP", 20))
-        return darwin_sections[objc_selector_fixup_section];
-      else if (!strncmp (name, "_OBJC_SYMBOLS", 13))
-        return darwin_sections[objc_symbols_section];
-      else if (!strncmp (name, "_OBJC_MODULES", 13))
-        return darwin_sections[objc_module_info_section];
-      else if (!strncmp (name, "_OBJC_IMAGE_INFO", 16))
-        return darwin_sections[objc_image_info_section];
-      else if (!strncmp (name, "_OBJC_PROTOCOL_INSTANCE_METHODS_", 32))
-        return darwin_sections[objc_cat_inst_meth_section];
-      else if (!strncmp (name, "_OBJC_PROTOCOL_CLASS_METHODS_", 29))
-        return darwin_sections[objc_cat_cls_meth_section];
-      else if (!strncmp (name, "_OBJC_PROTOCOL_REFS_", 20))
-        return darwin_sections[objc_cat_cls_meth_section];
-      else if (!strncmp (name, "_OBJC_PROTOCOL_", 15))
-        return darwin_sections[objc_protocol_section];
-      else
-        return base_section;
-    }
+    /* c) legacy meta-data selection was deprecated at 4.6, removed now.  */
+    gcc_unreachable ();
 
   return base_section;
 }
@@ -1817,19 +1761,19 @@ section *
 machopic_select_rtx_section (machine_mode mode, rtx x,
 			     unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
 {
-  if (GET_MODE_SIZE (mode) == 8
+  if (known_eq (GET_MODE_SIZE (mode), 8)
       && (GET_CODE (x) == CONST_INT
 	  || GET_CODE (x) == CONST_WIDE_INT
 	  || GET_CODE (x) == CONST_DOUBLE))
     return darwin_sections[literal8_section];
-  else if (GET_MODE_SIZE (mode) == 4
+  else if (known_eq (GET_MODE_SIZE (mode), 4)
 	   && (GET_CODE (x) == CONST_INT
 	       || GET_CODE (x) == CONST_WIDE_INT
 	       || GET_CODE (x) == CONST_DOUBLE))
     return darwin_sections[literal4_section];
   else if (HAVE_GAS_LITERAL16
 	   && TARGET_64BIT
-	   && GET_MODE_SIZE (mode) == 16
+	   && known_eq (GET_MODE_SIZE (mode), 16)
 	   && (GET_CODE (x) == CONST_INT
 	       || GET_CODE (x) == CONST_WIDE_INT
 	       || GET_CODE (x) == CONST_DOUBLE
@@ -1882,12 +1826,12 @@ sort_cdtor_records (const void * a, const void * b)
   return 0;
 }
 
-static void 
+static void
 finalize_ctors ()
 {
   unsigned int i;
   cdtor_record *elt;
- 
+
   if (MACHOPIC_INDIRECT)
     switch_to_section (darwin_sections[mod_init_section]);
   else
@@ -1927,13 +1871,21 @@ darwin_globalize_label (FILE *stream, const char *name)
 {
   if (!!strncmp (name, "_OBJC_", 6))
     default_globalize_label (stream, name);
+  /* We have some Objective C cases that need to be global, but only on newer
+     OS versions.  */
+  if (flag_objc_abi < 2 || flag_next_runtime < 100700)
+    return;
+  if (!strncmp (name+6, "LabelPro", 8))
+    default_globalize_label (stream, name);
+  if (!strncmp (name+6, "Protocol_", 9))
+    default_globalize_label (stream, name);
 }
 
-/* This routine returns non-zero if 'name' starts with the special objective-c 
-   anonymous file-scope static name.  It accommodates c++'s mangling of such 
+/* This routine returns non-zero if 'name' starts with the special objective-c
+   anonymous file-scope static name.  It accommodates c++'s mangling of such
    symbols (in this case the symbols will have form _ZL{d}*_OBJC_* d=digit).  */
-   
-int 
+
+int
 darwin_label_is_anonymous_local_objc_name (const char *name)
 {
   const unsigned char *p = (const unsigned char *) name;
@@ -1945,7 +1897,49 @@ darwin_label_is_anonymous_local_objc_name (const char *name)
     while (*p >= '0' && *p <= '9')
       p++;
   }
-  return (!strncmp ((const char *)p, "_OBJC_", 6));
+  if (strncmp ((const char *)p, "_OBJC_", 6) != 0)
+    return false;
+
+  /* We need some of the objective c meta-data symbols to be visible to the
+     linker (when the target OS version is newer).  FIXME: this is horrible,
+     we need a better mechanism.  */
+
+  if (flag_objc_abi < 2 || flag_next_runtime < 100700)
+    return true;
+
+  p += 6;
+  if (!strncmp ((const char *)p, "ClassRef", 8))
+    return false;
+  else if (!strncmp ((const char *)p, "SelRef", 6))
+    return false;
+  else if (!strncmp ((const char *)p, "Category", 8))
+    {
+      if (p[8] == '_' || p[8] == 'I' || p[8] == 'P' || p[8] == 'C' )
+	return false;
+      return true;
+    }
+  else if (!strncmp ((const char *)p, "ClassMethods", 12))
+    return false;
+  else if (!strncmp ((const char *)p, "Instance", 8))
+    {
+      if (p[8] == 'I' || p[8] == 'M')
+	return false;
+      return true;
+    }
+  else if (!strncmp ((const char *)p, "CLASS_RO", 8))
+    return false;
+  else if (!strncmp ((const char *)p, "METACLASS_RO", 12))
+    return false;
+  else if (!strncmp ((const char *)p, "Protocol", 8))
+    {
+      if (p[8] == '_' || p[8] == 'I' || p[8] == 'P'
+	  || p[8] == 'M' || p[8] == 'C' || p[8] == 'O')
+	return false;
+      return true;
+    }
+  else if (!strncmp ((const char *)p, "LabelPro", 8))
+    return false;
+  return true;
 }
 
 /* LTO support for Mach-O.
@@ -2151,7 +2145,7 @@ darwin_emit_unwind_label (FILE *file, tree decl, int for_eh, int empty)
   char buf[32];
   static int invok_count = 0;
   static tree last_fun_decl = NULL_TREE;
-  
+
   /* Modern linkers can produce distinct FDEs without compiler support.  */
   if (! for_eh || ! ld_needs_eh_markers)
     return;
@@ -2163,7 +2157,7 @@ darwin_emit_unwind_label (FILE *file, tree decl, int for_eh, int empty)
      function.  This is of limited use, probably, since we do not currently
      enable partitioning.  */
   strcpy (buf, ".eh");
-  if (decl && TREE_CODE (decl) == FUNCTION_DECL) 
+  if (decl && TREE_CODE (decl) == FUNCTION_DECL)
     {
       if (decl == last_fun_decl)
         {
@@ -2225,6 +2219,41 @@ darwin_emit_except_table_label (FILE *file)
 			       except_table_label_num++);
   ASM_OUTPUT_LABEL (file, section_start_label);
 }
+
+rtx
+darwin_make_eh_symbol_indirect (rtx orig, bool ARG_UNUSED (pubvis))
+{
+  if (DARWIN_PPC == 0 && TARGET_64BIT)
+    return orig;
+
+  return gen_rtx_SYMBOL_REF (Pmode,
+			     machopic_indirection_name (orig,
+							/*stub_p=*/false));
+}
+
+/* The unwinders in earlier Darwin versions are based on an old version
+   of libgcc_s and need current frame address stateto be reset after a
+   DW_CFA_restore_state recovers the register values.  */
+
+bool
+darwin_should_restore_cfa_state (void)
+{
+  return generating_for_darwin_version <= 10;
+}
+
+/* Return, and mark as used, the name of the stub for the mcount function.
+   Currently, this is only called by X86 code in the expansion of the
+   FUNCTION_PROFILER macro, when stubs are enabled.  */
+
+const char*
+machopic_mcount_stub_name (void)
+{
+  rtx symbol = gen_rtx_SYMBOL_REF (Pmode, "*mcount");
+  const char *name = machopic_indirection_name (symbol, /*stub_p=*/true);
+  machopic_validate_stub_or_non_lazy_ptr (name);
+  return name;
+}
+
 /* Generate a PC-relative reference to a Mach-O non-lazy-symbol.  */
 
 void
@@ -2259,8 +2288,8 @@ darwin_non_lazy_pcrel (FILE *file, rtx addr)
    The machopic_define_symbol calls are telling the machopic subsystem
    that the name *is* defined in this module, so it doesn't need to
    make them indirect.  */
-void 
-darwin_asm_declare_object_name (FILE *file, 
+void
+darwin_asm_declare_object_name (FILE *file,
 				const char *nam, tree decl)
 {
   const char *xname = nam;
@@ -2269,11 +2298,11 @@ darwin_asm_declare_object_name (FILE *file,
 
   weak = (DECL_P (decl)
 	  && DECL_WEAK (decl)
-	  && !lookup_attribute ("weak_import", 
+	  && !lookup_attribute ("weak_import",
 				 DECL_ATTRIBUTES (decl)));
 
-  local_def = DECL_INITIAL (decl) || (TREE_STATIC (decl) 
-				      && (!DECL_COMMON (decl) 
+  local_def = DECL_INITIAL (decl) || (TREE_STATIC (decl)
+				      && (!DECL_COMMON (decl)
 					  || !TREE_PUBLIC (decl)));
 
   if (GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF)
@@ -2291,17 +2320,17 @@ darwin_asm_declare_object_name (FILE *file,
 #ifdef DEBUG_DARWIN_MEM_ALLOCATORS
 fprintf (file, "# dadon: %s %s (%llu, %u) local %d weak %d"
 	       " stat %d com %d pub %d t-const %d t-ro %d init %lx\n",
-	xname, (TREE_CODE (decl) == VAR_DECL?"var":"const"), 
-	(unsigned long long)size, DECL_ALIGN (decl), local_def, 
+	xname, (TREE_CODE (decl) == VAR_DECL?"var":"const"),
+	(unsigned long long)size, DECL_ALIGN (decl), local_def,
 	DECL_WEAK (decl), TREE_STATIC (decl), DECL_COMMON (decl),
 	TREE_PUBLIC (decl), TREE_CONSTANT (decl), TREE_READONLY (decl),
-	(unsigned long)DECL_INITIAL (decl)); 
+	(unsigned long)DECL_INITIAL (decl));
 #endif
 
-  /* Darwin needs help to support local zero-sized objects. 
+  /* Darwin needs help to support local zero-sized objects.
      They must be made at least one byte, and the section containing must be
      marked as unsuitable for section-anchors (see storage allocators below).
-     
+
      For non-zero objects this output is handled by varasm.c.
   */
   if (!size)
@@ -2322,7 +2351,7 @@ fprintf (file, "# dadon: %s %s (%llu, %u) local %d weak %d"
       /* Check that we've correctly picked up the zero-sized item and placed it
          properly.  */
       gcc_assert ((!DARWIN_SECTION_ANCHORS || !flag_section_anchors)
-		  || (in_section 
+		  || (in_section
 		      && (in_section->common.flags & SECTION_NO_ANCHOR)));
     }
   else
@@ -2343,7 +2372,7 @@ darwin_asm_declare_constant_name (FILE *file, const char *name,
       /* Check that we've correctly picked up the zero-sized item and placed it
          properly.  */
       gcc_assert ((!DARWIN_SECTION_ANCHORS || !flag_section_anchors)
-		  || (in_section 
+		  || (in_section
 		      && (in_section->common.flags & SECTION_NO_ANCHOR)));
     }
 }
@@ -2369,7 +2398,7 @@ darwin_asm_declare_constant_name (FILE *file, const char *name,
 /* Emit a chunk of data for items coalesced by the linker.  */
 static void
 darwin_emit_weak_or_comdat (FILE *fp, tree decl, const char *name,
-				  unsigned HOST_WIDE_INT size, 
+				  unsigned HOST_WIDE_INT size,
 				  bool use_coal,
 				  unsigned int align)
 {
@@ -2391,7 +2420,7 @@ darwin_emit_weak_or_comdat (FILE *fp, tree decl, const char *name,
   if (TREE_PUBLIC (decl))
     darwin_globalize_label (fp, name);
 
-  /* ... and we let it deal with outputting one byte of zero for them too.  */ 
+  /* ... and we let it deal with outputting one byte of zero for them too.  */
   darwin_asm_declare_object_name (fp, name, decl);
   if (size)
     assemble_zeros (size);
@@ -2400,7 +2429,7 @@ darwin_emit_weak_or_comdat (FILE *fp, tree decl, const char *name,
 /* Emit a chunk of data for ObjC meta-data that got placed in BSS erroneously.  */
 static void
 darwin_emit_objc_zeroed (FILE *fp, tree decl, const char *name,
-				  unsigned HOST_WIDE_INT size, 
+				  unsigned HOST_WIDE_INT size,
 				  unsigned int align, tree meta)
 {
   section *ocs = data_section;
@@ -2416,14 +2445,14 @@ darwin_emit_objc_zeroed (FILE *fp, tree decl, const char *name,
   gcc_assert (size);
   fprintf (fp, "\t.align\t%d\n", floor_log2 (align / BITS_PER_UNIT));
 
-  /* ... and we let it deal with outputting one byte of zero for them too.  */ 
+  /* ... and we let it deal with outputting one byte of zero for them too.  */
   darwin_asm_declare_object_name (fp, name, decl);
   assemble_zeros (size);
 }
 
 /* This routine emits 'local' storage:
 
-   When Section Anchors are off this routine emits .zerofill commands in 
+   When Section Anchors are off this routine emits .zerofill commands in
    sections named for their alignment.
 
    When Section Anchors are on, smaller (non-zero-sized) items are placed in
@@ -2432,15 +2461,11 @@ darwin_emit_objc_zeroed (FILE *fp, tree decl, const char *name,
    The routine has no checking - it is all assumed to be done by the caller.
 */
 static void
-darwin_emit_local_bss (FILE *fp, tree decl, const char *name, 
-			unsigned HOST_WIDE_INT size, 
+darwin_emit_local_bss (FILE *fp, tree decl, const char *name,
+			unsigned HOST_WIDE_INT size,
 			unsigned int l2align)
 {
-   /* FIXME: We have a fudge to make this work with Java even when the target does
-   not use sections anchors -- Java seems to need at least one small item in a
-   non-zerofill segment.   */
-   if ((DARWIN_SECTION_ANCHORS && flag_section_anchors && size < BYTES_ZFILL)
-       || (size && size <= 2))
+   if (DARWIN_SECTION_ANCHORS && flag_section_anchors && size < BYTES_ZFILL)
     {
       /* Put smaller objects in _static_data, where the section anchors system
 	 can get them.
@@ -2461,21 +2486,18 @@ darwin_emit_local_bss (FILE *fp, tree decl, const char *name,
       if (l2align)
 	fprintf (fp, "\t.align\t%u\n", l2align);
 
-      assemble_name (fp, name);        
+      assemble_name (fp, name);
       fprintf (fp, ":\n\t.space\t" HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
     }
-  else 
+  else
     {
-      /* When we are on a non-section anchor target, we can get zero-sized
-	 items here.  However, all we need to do is to bump them to one byte
-	 and the section alignment will take care of the rest.  */
+      /* When we are on a non-section anchor target (or not using section
+	 anchors, we can get zero-sized items here.  However, all we need to
+	 do is to bump them to one byte and the section alignment will take
+	 care of the rest.  */
       char secnam[64];
-      unsigned int flags ;
-      snprintf (secnam, 64, "__DATA,__%sbss%u", ((size)?"":"zo_"), 
-						(unsigned) l2align);
-      /* We can't anchor (yet, if ever) in zerofill sections, because we can't
-	 switch to them and emit a label.  */
-      flags = SECTION_BSS|SECTION_WRITE|SECTION_NO_ANCHOR;
+      snprintf (secnam, 64, "__DATA,__bss");
+      unsigned int flags = SECTION_BSS|SECTION_WRITE|SECTION_NO_ANCHOR;
       in_section = get_section (secnam, flags, NULL);
       fprintf (fp, "\t.zerofill %s,", secnam);
       assemble_name (fp, name);
@@ -2486,7 +2508,7 @@ darwin_emit_local_bss (FILE *fp, tree decl, const char *name,
 	fprintf (fp, "," HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
 		 size, (unsigned) l2align);
       else
-	fprintf (fp, "," HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
+	fprintf (fp, "," HOST_WIDE_INT_PRINT_UNSIGNED",0\n", size);
     }
 
   (*targetm.encode_section_info) (decl, DECL_RTL (decl), false);
@@ -2497,12 +2519,12 @@ darwin_emit_local_bss (FILE *fp, tree decl, const char *name,
 /* Emit a chunk of common.  */
 static void
 darwin_emit_common (FILE *fp, const char *name,
-		    unsigned HOST_WIDE_INT size, unsigned int align) 
+		    unsigned HOST_WIDE_INT size, unsigned int align)
 {
   unsigned HOST_WIDE_INT rounded;
   unsigned int l2align;
 
-  /* Earlier systems complain if the alignment exceeds the page size. 
+  /* Earlier systems complain if the alignment exceeds the page size.
      The magic number is 4096 * 8 - hard-coded for legacy systems.  */
   if (!emit_aligned_common && (align > 32768UL))
     align = 4096UL; /* In units.  */
@@ -2513,9 +2535,9 @@ darwin_emit_common (FILE *fp, const char *name,
   if (!align)
     align = 1;
 
-  /* For earlier toolchains, we need to emit the var as a rounded size to 
+  /* For earlier toolchains, we need to emit the var as a rounded size to
      tell ld the alignment.  */
-  if (size < align) 
+  if (size < align)
     rounded = align;
   else
     rounded = (size + (align-1)) & ~(align-1);
@@ -2536,7 +2558,7 @@ darwin_emit_common (FILE *fp, const char *name,
 
   fputs ("\t.comm\t", fp);
   assemble_name (fp, name);
-  fprintf (fp, "," HOST_WIDE_INT_PRINT_UNSIGNED, 
+  fprintf (fp, "," HOST_WIDE_INT_PRINT_UNSIGNED,
 	   emit_aligned_common?size:rounded);
   if (l2align && emit_aligned_common)
     fprintf (fp, ",%u", l2align);
@@ -2558,18 +2580,18 @@ darwin_output_aligned_bss (FILE *fp, tree decl, const char *name,
   one = DECL_ONE_ONLY (decl);
   weak = (DECL_P (decl)
 	  && DECL_WEAK (decl)
-	  && !lookup_attribute ("weak_import", 
+	  && !lookup_attribute ("weak_import",
 				 DECL_ATTRIBUTES (decl)));
 
 #ifdef DEBUG_DARWIN_MEM_ALLOCATORS
 fprintf (fp, "# albss: %s (%lld,%d) ro %d cst %d stat %d com %d"
 	     " pub %d weak %d one %d init %lx\n",
-	name, (long long)size, (int)align, TREE_READONLY (decl), 
+	name, (long long)size, (int)align, TREE_READONLY (decl),
 	TREE_CONSTANT (decl), TREE_STATIC (decl), DECL_COMMON (decl),
-	pub, weak, one, (unsigned long)DECL_INITIAL (decl)); 
+	pub, weak, one, (unsigned long)DECL_INITIAL (decl));
 #endif
 
-  /* ObjC metadata can get put in BSS because varasm.c decides it's BSS 
+  /* ObjC metadata can get put in BSS because varasm.c decides it's BSS
      before the target has a chance to comment.  */
   if ((meta = is_objc_metadata (decl)))
     {
@@ -2578,8 +2600,8 @@ fprintf (fp, "# albss: %s (%lld,%d) ro %d cst %d stat %d com %d"
     }
 
   /* Check that any initializer is valid.  */
-  gcc_assert ((DECL_INITIAL (decl) == NULL) 
-	       || (DECL_INITIAL (decl) == error_mark_node) 
+  gcc_assert ((DECL_INITIAL (decl) == NULL)
+	       || (DECL_INITIAL (decl) == error_mark_node)
 	       || initializer_zerop (DECL_INITIAL (decl)));
 
   gcc_assert (DECL_SECTION_NAME (decl) == NULL);
@@ -2591,7 +2613,7 @@ fprintf (fp, "# albss: %s (%lld,%d) ro %d cst %d stat %d com %d"
 
   l2align = floor_log2 (align / BITS_PER_UNIT);
   gcc_assert (l2align <= L2_MAX_OFILE_ALIGNMENT);
-  
+
   last_assemble_variable_decl = decl;
 
   /* We would rather not have to check this here - but it seems that we might
@@ -2599,24 +2621,23 @@ fprintf (fp, "# albss: %s (%lld,%d) ro %d cst %d stat %d com %d"
   if (one || weak)
     {
       /* Weak or COMDAT objects are put in mergeable sections.  */
-      darwin_emit_weak_or_comdat (fp, decl, name, size, 
+      darwin_emit_weak_or_comdat (fp, decl, name, size,
 				  ld_uses_coal_sects, DECL_ALIGN (decl));
       return;
-    } 
+    }
 
   /* If this is not public, then emit according to local rules.  */
   if (!pub)
     {
-      darwin_emit_local_bss (fp, decl, name, size, l2align);	
+      darwin_emit_local_bss (fp, decl, name, size, l2align);
       return;
     }
 
-  /* So we have a public symbol (small item fudge for Java, see above).  */
-  if ((DARWIN_SECTION_ANCHORS && flag_section_anchors && size < BYTES_ZFILL) 
-       || (size && size <= 2))
+  /* So we have a public symbol.  */
+  if (DARWIN_SECTION_ANCHORS && flag_section_anchors && size < BYTES_ZFILL)
     {
       /* Put smaller objects in data, where the section anchors system can get
-	 them.  However, if they are zero-sized punt them to yet a different 
+	 them.  However, if they are zero-sized punt them to yet a different
 	 section (that is not allowed to participate in anchoring).  */
       if (!size)
 	{
@@ -2636,18 +2657,12 @@ fprintf (fp, "# albss: %s (%lld,%d) ro %d cst %d stat %d com %d"
       assemble_name (fp, name);
       fprintf (fp, ":\n\t.space\t" HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
     }
-  else 
+  else
     {
+      /* Section anchors not in use.  */
+      unsigned int flags = SECTION_BSS|SECTION_WRITE|SECTION_NO_ANCHOR;
       char secnam[64];
-      unsigned int flags ;
-      /* When we are on a non-section anchor target, we can get zero-sized
-	 items here.  However, all we need to do is to bump them to one byte
-	 and the section alignment will take care of the rest.  */
-      snprintf (secnam, 64, "__DATA,__%spu_bss%u", ((size)?"":"zo_"), l2align);
-
-      /* We can't anchor in zerofill sections, because we can't switch
-	 to them and emit a label.  */
-      flags = SECTION_BSS|SECTION_WRITE|SECTION_NO_ANCHOR;
+      snprintf (secnam, 64, "__DATA,__common");
       in_section = get_section (secnam, flags, NULL);
       fprintf (fp, "\t.zerofill %s,", secnam);
       assemble_name (fp, name);
@@ -2657,7 +2672,7 @@ fprintf (fp, "# albss: %s (%lld,%d) ro %d cst %d stat %d com %d"
       if (l2align)
 	fprintf (fp, "," HOST_WIDE_INT_PRINT_UNSIGNED",%u\n", size, l2align);
       else
-	fprintf (fp, "," HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
+	fprintf (fp, "," HOST_WIDE_INT_PRINT_UNSIGNED",0\n", size);
     }
   (* targetm.encode_section_info) (decl, DECL_RTL (decl), false);
 }
@@ -2666,7 +2681,7 @@ fprintf (fp, "# albss: %s (%lld,%d) ro %d cst %d stat %d com %d"
    supports this).  */
 void
 darwin_asm_output_aligned_decl_common (FILE *fp, tree decl, const char *name,
-				       unsigned HOST_WIDE_INT size, 
+				       unsigned HOST_WIDE_INT size,
 				       unsigned int align)
 {
   unsigned int l2align;
@@ -2677,7 +2692,7 @@ darwin_asm_output_aligned_decl_common (FILE *fp, tree decl, const char *name,
   if (decl==NULL)
     {
 #ifdef DEBUG_DARWIN_MEM_ALLOCATORS
-fprintf (fp, "# adcom: %s (%d,%d) decl=0x0\n", name, (int)size, (int)align); 
+fprintf (fp, "# adcom: %s (%d,%d) decl=0x0\n", name, (int)size, (int)align);
 #endif
       darwin_emit_common (fp, name, size, align);
       return;
@@ -2686,18 +2701,18 @@ fprintf (fp, "# adcom: %s (%d,%d) decl=0x0\n", name, (int)size, (int)align);
   one = DECL_ONE_ONLY (decl);
   weak = (DECL_P (decl)
 	  && DECL_WEAK (decl)
-	  && !lookup_attribute ("weak_import", 
+	  && !lookup_attribute ("weak_import",
 				 DECL_ATTRIBUTES (decl)));
 
 #ifdef DEBUG_DARWIN_MEM_ALLOCATORS
 fprintf (fp, "# adcom: %s (%lld,%d) ro %d cst %d stat %d com %d pub %d"
 	     " weak %d one %d init %lx\n",
-	name,  (long long)size, (int)align, TREE_READONLY (decl), 
+	name,  (long long)size, (int)align, TREE_READONLY (decl),
 	TREE_CONSTANT (decl), TREE_STATIC (decl), DECL_COMMON (decl),
-	TREE_PUBLIC (decl), weak, one, (unsigned long)DECL_INITIAL (decl)); 
+	TREE_PUBLIC (decl), weak, one, (unsigned long)DECL_INITIAL (decl));
 #endif
 
-  /* ObjC metadata can get put in BSS because varasm.c decides it's BSS 
+  /* ObjC metadata can get put in BSS because varasm.c decides it's BSS
      before the target has a chance to comment.  */
   if ((meta = is_objc_metadata (decl)))
     {
@@ -2713,24 +2728,24 @@ fprintf (fp, "# adcom: %s (%lld,%d) ro %d cst %d stat %d com %d pub %d"
   if (one || weak)
     {
       /* Weak or COMDAT objects are put in mergable sections.  */
-      darwin_emit_weak_or_comdat (fp, decl, name, size, 
+      darwin_emit_weak_or_comdat (fp, decl, name, size,
 				  ld_uses_coal_sects, DECL_ALIGN (decl));
       return;
-    } 
+    }
 
-  /* We should only get here for DECL_COMMON, with a zero init (and, in 
+  /* We should only get here for DECL_COMMON, with a zero init (and, in
      principle, only for public symbols too - although we deal with local
      ones below).  */
 
   /* Check the initializer is OK.  */
-  gcc_assert (DECL_COMMON (decl) 
-	      && ((DECL_INITIAL (decl) == NULL) 
-	       || (DECL_INITIAL (decl) == error_mark_node) 
+  gcc_assert (DECL_COMMON (decl)
+	      && ((DECL_INITIAL (decl) == NULL)
+	       || (DECL_INITIAL (decl) == error_mark_node)
 	       || initializer_zerop (DECL_INITIAL (decl))));
 
   last_assemble_variable_decl = decl;
 
-  if (!size || !align) 
+  if (!size || !align)
     align = DECL_ALIGN (decl);
 
   l2align = floor_log2 (align / BITS_PER_UNIT);
@@ -2740,13 +2755,13 @@ fprintf (fp, "# adcom: %s (%lld,%d) ro %d cst %d stat %d com %d pub %d"
   if (TREE_PUBLIC (decl) != 0)
     darwin_emit_common (fp, name, size, align);
   else
-    darwin_emit_local_bss (fp, decl, name, size, l2align);	
+    darwin_emit_local_bss (fp, decl, name, size, l2align);
 }
 
 /* Output a chunk of BSS with alignment specfied.  */
 void
-darwin_asm_output_aligned_decl_local (FILE *fp, tree decl, const char *name, 
-				      unsigned HOST_WIDE_INT size, 
+darwin_asm_output_aligned_decl_local (FILE *fp, tree decl, const char *name,
+				      unsigned HOST_WIDE_INT size,
 				      unsigned int align)
 {
   unsigned long l2align;
@@ -2756,18 +2771,18 @@ darwin_asm_output_aligned_decl_local (FILE *fp, tree decl, const char *name,
   one = DECL_ONE_ONLY (decl);
   weak = (DECL_P (decl)
 	  && DECL_WEAK (decl)
-	  && !lookup_attribute ("weak_import", 
+	  && !lookup_attribute ("weak_import",
 				 DECL_ATTRIBUTES (decl)));
 
 #ifdef DEBUG_DARWIN_MEM_ALLOCATORS
 fprintf (fp, "# adloc: %s (%lld,%d) ro %d cst %d stat %d one %d pub %d"
 	     " weak %d init %lx\n",
-	name, (long long)size, (int)align, TREE_READONLY (decl), 
+	name, (long long)size, (int)align, TREE_READONLY (decl),
 	TREE_CONSTANT (decl), TREE_STATIC (decl), one, TREE_PUBLIC (decl),
-	weak , (unsigned long)DECL_INITIAL (decl)); 
+	weak , (unsigned long)DECL_INITIAL (decl));
 #endif
 
-  /* ObjC metadata can get put in BSS because varasm.c decides it's BSS 
+  /* ObjC metadata can get put in BSS because varasm.c decides it's BSS
      before the target has a chance to comment.  */
   if ((meta = is_objc_metadata (decl)))
     {
@@ -2783,16 +2798,16 @@ fprintf (fp, "# adloc: %s (%lld,%d) ro %d cst %d stat %d one %d pub %d"
   if (one || weak)
     {
       /* Weak or COMDAT objects are put in mergable sections.  */
-      darwin_emit_weak_or_comdat (fp, decl, name, size, 
+      darwin_emit_weak_or_comdat (fp, decl, name, size,
 				  ld_uses_coal_sects, DECL_ALIGN (decl));
       return;
-    } 
+    }
 
   /* .. and it should be suitable for placement in local mem.  */
   gcc_assert(!TREE_PUBLIC (decl) && !DECL_COMMON (decl));
   /* .. and any initializer must be all-zero.  */
-  gcc_assert ((DECL_INITIAL (decl) == NULL) 
-	       || (DECL_INITIAL (decl) == error_mark_node) 
+  gcc_assert ((DECL_INITIAL (decl) == NULL)
+	       || (DECL_INITIAL (decl) == error_mark_node)
 	       || initializer_zerop (DECL_INITIAL (decl)));
 
   last_assemble_variable_decl = decl;
@@ -2831,7 +2846,7 @@ darwin_assemble_visibility (tree decl, int vis)
 
 /* vec used by darwin_asm_dwarf_section.
    Maybe a hash tab would be better here - but the intention is that this is
-   a very short list (fewer than 16 items) and each entry should (ideally, 
+   a very short list (fewer than 16 items) and each entry should (ideally,
    eventually) only be presented once.
 
    A structure to hold a dwarf debug section used entry.  */
@@ -2846,7 +2861,7 @@ dwarf_sect_used_entry;
 /* A list of used __DWARF sections.  */
 static GTY (()) vec<dwarf_sect_used_entry, va_gc> *dwarf_sect_names_table;
 
-/* This is called when we are asked to assemble a named section and the 
+/* This is called when we are asked to assemble a named section and the
    name begins with __DWARF,.  We keep a list of the section names (without
    the __DWARF, prefix) and use this to emit our required start label on the
    first switch to each section.  */
@@ -2887,7 +2902,7 @@ darwin_asm_dwarf_section (const char *name, unsigned int flags,
   if (dwarf_sect_names_table == NULL)
     vec_alloc (dwarf_sect_names_table, 16);
   else
-    for (i = 0; 
+    for (i = 0;
 	 dwarf_sect_names_table->iterate (i, &ref);
 	 i++)
       {
@@ -3020,8 +3035,9 @@ darwin_file_end (void)
      }
 
   machopic_finish (asm_out_file);
-  if (lang_GNU_CXX ())
+  if (flag_apple_kext)
     {
+      /* These sections are only used for kernel code.  */
       switch_to_section (darwin_sections[constructor_section]);
       switch_to_section (darwin_sections[destructor_section]);
       ASM_OUTPUT_ALIGN (asm_out_file, 1);
@@ -3124,8 +3140,14 @@ darwin_file_end (void)
 bool
 darwin_binds_local_p (const_tree decl)
 {
-  return default_binds_local_p_1 (decl,
-				  TARGET_KEXTABI && DARWIN_VTABLE_P (decl));
+  /* We use the "shlib" input to indicate that a symbol should be
+     considered overridable; only relevant for vtables in kernel modules
+     on earlier system versions, and with a TODO to complete.  */
+  bool force_overridable = TARGET_KEXTABI && DARWIN_VTABLE_P (decl);
+  return default_binds_local_p_3 (decl, force_overridable /* shlib */,
+				  false /* weak dominate */,
+				  false /* extern_protected_data */,
+				  false /* common_local_p */);
 }
 
 /* The Darwin's implementation of TARGET_ASM_OUTPUT_ANCHOR.  Define the
@@ -3186,10 +3208,14 @@ darwin_override_options (void)
   /* Keep track of which (major) version we're generating code for.  */
   if (darwin_macosx_version_min)
     {
-      if (strverscmp (darwin_macosx_version_min, "10.6") >= 0)
+      if (strverscmp (darwin_macosx_version_min, "10.7") >= 0)
+	generating_for_darwin_version = 11;
+      else if (strverscmp (darwin_macosx_version_min, "10.6") >= 0)
 	generating_for_darwin_version = 10;
       else if (strverscmp (darwin_macosx_version_min, "10.5") >= 0)
 	generating_for_darwin_version = 9;
+      else if (strverscmp (darwin_macosx_version_min, "10.4") >= 0)
+	generating_for_darwin_version = 8;
 
       /* Earlier versions are not specifically accounted, until required.  */
     }
@@ -3205,6 +3231,20 @@ darwin_override_options (void)
      should check for correctness re. the ABI.  TODO: check and provide the
      flags (runtime & ABI) from the lto wrapper).  */
 
+  /* At present, make a hard update to the runtime version based on the target
+     OS version.  */
+  if (flag_next_runtime)
+    {
+      if (generating_for_darwin_version > 10)
+	flag_next_runtime = 100705;
+      else if (generating_for_darwin_version > 9)
+	flag_next_runtime = 100608;
+      else if (generating_for_darwin_version > 8)
+	flag_next_runtime = 100508;
+      else
+	flag_next_runtime = 100000;
+    }
+
   /* Unless set, force ABI=2 for NeXT and m64, 0 otherwise.  */
   if (!global_options_set.x_flag_objc_abi)
     global_options.x_flag_objc_abi
@@ -3229,9 +3269,9 @@ darwin_override_options (void)
 		  global_options.x_flag_objc_abi);
     }
 
-  /* Don't emit DWARF3/4 unless specifically selected.  This is a 
+  /* Don't emit DWARF3/4 unless specifically selected.  This is a
      workaround for tool bugs.  */
-  if (!global_options_set.x_dwarf_strict) 
+  if (!global_options_set.x_dwarf_strict)
     dwarf_strict = 1;
   if (!global_options_set.x_dwarf_version)
     dwarf_version = 2;
@@ -3239,12 +3279,12 @@ darwin_override_options (void)
   if (global_options_set.x_dwarf_split_debug_info)
     {
       inform (input_location,
-	      "-gsplit-dwarf is not supported on this platform, ignored");
+	      "%<-gsplit-dwarf%> is not supported on this platform, ignored");
       dwarf_split_debug_info = 0;
       global_options_set.x_dwarf_split_debug_info = 0;
     }
 
-  /* Do not allow unwind tables to be generated by default for m32.  
+  /* Do not allow unwind tables to be generated by default for m32.
      fnon-call-exceptions will override this, regardless of what we do.  */
   if (generating_for_darwin_version < 10
       && !global_options_set.x_flag_asynchronous_unwind_tables
@@ -3269,8 +3309,8 @@ darwin_override_options (void)
 	       && !global_options_set.x_flag_asynchronous_unwind_tables)))
     {
       inform (input_location,
-	      "-freorder-blocks-and-partition does not work with exceptions "
-	      "on this architecture");
+	      "%<-freorder-blocks-and-partition%> does not work with "
+	      "exceptions on this architecture");
       flag_reorder_blocks_and_partition = 0;
       flag_reorder_blocks = 1;
     }
@@ -3278,7 +3318,7 @@ darwin_override_options (void)
     /* FIXME: flag_objc_sjlj_exceptions is no longer needed since there is only
        one valid choice of exception scheme for each runtime.  */
     if (!global_options_set.x_flag_objc_sjlj_exceptions)
-      global_options.x_flag_objc_sjlj_exceptions = 
+      global_options.x_flag_objc_sjlj_exceptions =
 				flag_next_runtime && !TARGET_64BIT;
 
     /* FIXME: and this could be eliminated then too.  */
@@ -3360,7 +3400,8 @@ darwin_override_options (void)
   else if (DARWIN_X86 && darwin_symbol_stubs && TARGET_64BIT)
     {
       inform (input_location,
-	      "%<-msymbol-stubs%> is not required for 64b code (ignored)");
+	      "%<-mpic-symbol-stubs%> is not required for 64-bit code "
+	      "(ignored)");
       darwin_symbol_stubs = false;
     }
 
@@ -3447,7 +3488,7 @@ static GTY (()) hash_table<cfstring_hasher> *cfstring_htab;
 static tree
 add_builtin_field_decl (tree type, const char *name, tree **chain)
 {
-  tree field = build_decl (BUILTINS_LOCATION, FIELD_DECL, 
+  tree field = build_decl (BUILTINS_LOCATION, FIELD_DECL,
 			    get_identifier (name), type);
 
   if (*chain != NULL)
@@ -3463,9 +3504,9 @@ darwin_init_cfstring_builtins (unsigned builtin_cfstring)
   tree cfsfun, fields, pccfstring_ftype_pcchar;
   tree *chain = NULL;
 
-  darwin_builtin_cfstring = 
+  darwin_builtin_cfstring =
     (enum built_in_function) builtin_cfstring;
-  
+
   /* struct __builtin_CFString {
        const int *isa;		(will point at
        int flags;		 __CFConstantStringClassReference)
@@ -3473,10 +3514,10 @@ darwin_init_cfstring_builtins (unsigned builtin_cfstring)
        long length;
      };  */
 
-  pcint_type_node = build_pointer_type 
+  pcint_type_node = build_pointer_type
 		   (build_qualified_type (integer_type_node, TYPE_QUAL_CONST));
 
-  pcchar_type_node = build_pointer_type 
+  pcchar_type_node = build_pointer_type
 		   (build_qualified_type (char_type_node, TYPE_QUAL_CONST));
 
   cfstring_type_node = (*lang_hooks.types.make_type) (RECORD_TYPE);
@@ -3492,13 +3533,13 @@ darwin_init_cfstring_builtins (unsigned builtin_cfstring)
   /* const struct __builtin_CFstring *
      __builtin___CFStringMakeConstantString (const char *); */
 
-  ccfstring_type_node = build_qualified_type 
+  ccfstring_type_node = build_qualified_type
 			(cfstring_type_node, TYPE_QUAL_CONST);
   pccfstring_type_node = build_pointer_type (ccfstring_type_node);
-  pccfstring_ftype_pcchar = build_function_type_list 
+  pccfstring_ftype_pcchar = build_function_type_list
 			(pccfstring_type_node, pcchar_type_node, NULL_TREE);
 
-  cfsfun  = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, 
+  cfsfun  = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
 			get_identifier ("__builtin___CFStringMakeConstantString"),
 			pccfstring_ftype_pcchar);
 
@@ -3509,8 +3550,7 @@ darwin_init_cfstring_builtins (unsigned builtin_cfstring)
      in place of the existing, which may be NULL.  */
   DECL_LANG_SPECIFIC (cfsfun) = NULL;
   (*lang_hooks.dup_lang_specific_decl) (cfsfun);
-  DECL_BUILT_IN_CLASS (cfsfun) = BUILT_IN_MD;
-  DECL_FUNCTION_CODE (cfsfun) = darwin_builtin_cfstring;
+  set_decl_built_in_function (cfsfun, BUILT_IN_MD, darwin_builtin_cfstring);
   lang_hooks.builtin_function (cfsfun);
 
   /* extern int __CFConstantStringClassReference[];  */
@@ -3523,7 +3563,7 @@ darwin_init_cfstring_builtins (unsigned builtin_cfstring)
   (*lang_hooks.decls.pushdecl) (cfstring_class_reference);
   DECL_EXTERNAL (cfstring_class_reference) = 1;
   rest_of_decl_compilation (cfstring_class_reference, 0, 0);
-  
+
   /* Initialize the hash table used to hold the constant CFString objects.  */
   cfstring_htab = hash_table<cfstring_hasher>::create_ggc (31);
 
@@ -3531,16 +3571,16 @@ darwin_init_cfstring_builtins (unsigned builtin_cfstring)
 }
 
 tree
-darwin_fold_builtin (tree fndecl, int n_args, tree *argp, 
+darwin_fold_builtin (tree fndecl, int n_args, tree *argp,
 		     bool ARG_UNUSED (ignore))
 {
-  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
-  
+  unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
+
   if (fcode == darwin_builtin_cfstring)
     {
       if (!darwin_constant_cfstrings)
 	{
-	  error ("built-in function %qD requires the" 
+	  error ("built-in function %qD requires the"
 		 " %<-mconstant-cfstrings%> flag", fndecl);
 	  return error_mark_node;
 	}
@@ -3587,10 +3627,12 @@ darwin_rename_builtins (void)
 }
 
 bool
-darwin_libc_has_function (enum function_class fn_class)
+darwin_libc_has_function (enum function_class fn_class,
+			  tree type ATTRIBUTE_UNUSED)
 {
   if (fn_class == function_sincos)
-    return false;
+    return (strverscmp (darwin_macosx_version_min, "10.9") >= 0);
+
   if (fn_class == function_c99_math_complex
       || fn_class == function_c99_misc)
     return (TARGET_64BIT
@@ -3667,8 +3709,9 @@ darwin_build_constant_cfstring (tree str)
 	  for (l = 0; l < length; l++)
 	    if (!s[l] || !isascii (s[l]))
 	      {
-		warning (darwin_warn_nonportable_cfstrings, "%s in CFString literal",
-			 s[l] ? "non-ASCII character" : "embedded NUL");
+		warning (darwin_warn_nonportable_cfstrings,
+			 s[l] ? G_("non-ASCII character in CFString literal")
+			      : G_("embedded NUL in CFString literal"));
 		break;
 	      }
 	}
@@ -3678,12 +3721,12 @@ darwin_build_constant_cfstring (tree str)
 
       /* isa *. */
       field = TYPE_FIELDS (ccfstring_type_node);
-      CONSTRUCTOR_APPEND_ELT(v, NULL_TREE, 
-			     build1 (ADDR_EXPR,  TREE_TYPE (field),  
+      CONSTRUCTOR_APPEND_ELT(v, NULL_TREE,
+			     build1 (ADDR_EXPR,  TREE_TYPE (field),
 				     cfstring_class_reference));
       /* flags */
       field = DECL_CHAIN (field);
-      CONSTRUCTOR_APPEND_ELT(v, NULL_TREE, 
+      CONSTRUCTOR_APPEND_ELT(v, NULL_TREE,
 			     build_int_cst (TREE_TYPE (field), 0x000007c8));
       /* string *. */
       field = DECL_CHAIN (field);
@@ -3707,7 +3750,7 @@ darwin_build_constant_cfstring (tree str)
 	TREE_LANG_FLAG_4 (constructor) = 1;  /* TREE_HAS_CONSTRUCTOR  */
 
       /* Create an anonymous global variable for this CFString.  */
-      var = build_decl (input_location, CONST_DECL, 
+      var = build_decl (input_location, CONST_DECL,
 			NULL, TREE_TYPE (constructor));
       DECL_ARTIFICIAL (var) = 1;
       TREE_STATIC (var) = 1;
@@ -3741,7 +3784,7 @@ darwin_cfstring_p (tree str)
 
   key.literal = str;
   cfstring_descriptor **loc = cfstring_htab->find_slot (&key, NO_INSERT);
-  
+
   if (loc)
     return true;
 
@@ -3773,7 +3816,7 @@ darwin_function_section (tree decl, enum node_frequency freq,
   bool weak = (decl
 	       && DECL_WEAK (decl)
 	       && (!DECL_ATTRIBUTES (decl)
-		   || !lookup_attribute ("weak_import", 
+		   || !lookup_attribute ("weak_import",
 					  DECL_ATTRIBUTES (decl))));
 
   bool use_coal = weak && ld_uses_coal_sects;
diff --git a/gcc/config/darwin.h b/gcc/config/darwin.h
index 10b5e49172cfa..d2b2c141c8ed9 100644
--- a/gcc/config/darwin.h
+++ b/gcc/config/darwin.h
@@ -1,5 +1,5 @@
 /* Target definitions for Darwin (Mac OS X) systems.
-   Copyright (C) 1989-2018 Free Software Foundation, Inc.
+   Copyright (C) 1989-2021 Free Software Foundation, Inc.
    Contributed by Apple Computer Inc.
 
 This file is part of GCC.
@@ -43,9 +43,6 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define DARWIN_X86 0
 #define DARWIN_PPC 0
 
-/* Don't assume anything about the header files.  */
-#define NO_IMPLICIT_EXTERN_C
-
 /* Suppress g++ attempt to link in the math library automatically. */
 #define MATH_LIBRARY ""
 
@@ -110,7 +107,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 /* Default to using the NeXT-style runtime, since that's what is
    pre-installed on Darwin systems.  */
 
-#define NEXT_OBJC_RUNTIME 1
+#define NEXT_OBJC_RUNTIME 100508
 
 /* Don't default to pcc-struct-return, because gcc is the only compiler, and
    we want to retain compatibility with older gcc versions.  */
@@ -121,13 +118,24 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 /* True if pragma ms_struct is in effect.  */
 extern GTY(()) int darwin_ms_struct;
 
-#define DRIVER_SELF_SPECS					\
-  "%{gfull:-g -fno-eliminate-unused-debug-symbols} %<gfull",	\
-  "%{gused:-g -feliminate-unused-debug-symbols} %<gused",	\
-  "%{fapple-kext|mkernel:-static}",				\
-  "%{shared:-Zdynamiclib} %<shared",                            \
-  "%{gsplit-dwarf:%ngsplit-dwarf is not supported on this platform } \
-     %<gsplit-dwarf"
+/* The majority of Darwin's special driver opts are direct access to ld flags
+   (to save the user typing -Wl,xxxxx or Xlinker xxxxx) but we can't process
+   them here, since doing so will make it appear that there are linker infiles
+   and the linker will invoked even when it is not necessary.
+
+   However, a few can be handled and we can elide options that are silently-
+   ignored defaults, plus warn on obsolete ones that no longer function.  */
+#undef SUBTARGET_DRIVER_SELF_SPECS
+#define SUBTARGET_DRIVER_SELF_SPECS					\
+"%{fapple-kext|mkernel:-static}",					\
+"%{gfull:-g -fno-eliminate-unused-debug-symbols} %<gfull",		\
+"%{gsplit-dwarf:%ngsplit-dwarf is not supported on this platform} \
+   %<gsplit-dwarf",							\
+"%{gused:-g -feliminate-unused-debug-symbols} %<gused",			\
+"%{shared:-Zdynamiclib} %<shared",					\
+"%{static:%{Zdynamic:%e conflicting code gen style switches are used}}",\
+"%{y*:%nthe y option is obsolete and ignored} %<y*",			\
+"%<Mach "
 
 #if LD64_HAS_EXPORT_DYNAMIC
 #define DARWIN_RDYNAMIC "%{rdynamic:-export_dynamic}"
@@ -212,43 +220,54 @@ extern GTY(()) int darwin_ms_struct;
    "%X %{s} %{t} %{Z} %{u*} \
     %{e*} %{r} \
     %{o*}%{!o:-o a.out} \
-    %{!nostdlib:%{!nostartfiles:%S}} \
+    %{!nostdlib:%{!r:%{!nostartfiles:%S}}} \
     %{L*} %(link_libgcc) %o %{fprofile-arcs|fprofile-generate*|coverage:-lgcov} \
     %{fopenacc|fopenmp|%:gt(%{ftree-parallelize-loops=*:%*} 1): \
       %{static|static-libgcc|static-libstdc++|static-libgfortran: libgomp.a%s; : -lgomp } } \
     %{fgnu-tm: \
       %{static|static-libgcc|static-libstdc++|static-libgfortran: libitm.a%s; : -litm } } \
-    %{!nostdlib:%{!nodefaultlibs:\
+    %{!nostdlib:%{!r:%{!nodefaultlibs:\
       %{%:sanitize(address): -lasan } \
       %{%:sanitize(undefined): -lubsan } \
       %(link_ssp) \
+      %:version-compare(>< 10.6 10.7 mmacosx-version-min= -ld10-uwfef.o) \
       %(link_gcc_c_sequence) \
-    }}\
+    }}}\
     %{!nostdlib:%{!r:%{!nostartfiles:%E}}} %{T*} %{F*} "\
     DARWIN_PIE_SPEC \
     DARWIN_NOPIE_SPEC \
     DARWIN_RDYNAMIC \
     DARWIN_NOCOMPACT_UNWIND \
-    "}}}}}}} %<pie %<no-pie %<rdynamic "
+    "}}}}}}} %<pie %<no-pie %<rdynamic %<X "
 
 #define DSYMUTIL "\ndsymutil"
 
+/* Spec that controls whether the debug linker is run automatically for
+   a link step.  This needs to be done if there is a source file on the
+   command line which will result in a temporary object (and debug is
+   enabled).  */
+
 #define DSYMUTIL_SPEC \
    "%{!fdump=*:%{!fsyntax-only:%{!c:%{!M:%{!MM:%{!E:%{!S:\
     %{v} \
-    %{gdwarf-2:%{!gstabs*:%{%:debug-level-gt(0): -idsym}}}\
-    %{.c|.cc|.C|.cpp|.cp|.c++|.cxx|.CPP|.m|.mm: \
-    %{gdwarf-2:%{!gstabs*:%{%:debug-level-gt(0): -dsym}}}}}}}}}}}"
+    %{g*:%{!gstabs*:%{%:debug-level-gt(0): -idsym}}}\
+    %{.c|.cc|.C|.cpp|.cp|.c++|.cxx|.CPP|.m|.mm|.s|.f|.f90|\
+      .f95|.f03|.f77|.for|.F|.F90|.F95|.F03: \
+    %{g*:%{!gstabs*:%{%:debug-level-gt(0): -dsym}}}}}}}}}}}"
 
 #define LINK_COMMAND_SPEC LINK_COMMAND_SPEC_A DSYMUTIL_SPEC
 
 /* Tell collect2 to run dsymutil for us as necessary.  */
 #define COLLECT_RUN_DSYMUTIL 1
 
-/* We only want one instance of %G, since libSystem (Darwin's -lc) does not depend
-   on libgcc.  */
+/* Fix PR47558 by linking against libSystem ahead of libgcc. See also
+   PR 80556 and the fallout from this.  */
+
 #undef  LINK_GCC_C_SEQUENCE_SPEC
-#define LINK_GCC_C_SEQUENCE_SPEC "%G %L"
+#define LINK_GCC_C_SEQUENCE_SPEC \
+"%{!static:%{!static-libgcc: \
+    %:version-compare(>= 10.6 mmacosx-version-min= -lSystem) } } \
+  %G %{!nolibc:%L}"
 
 /* ld64 supports a sysroot, it just has a different name and there's no easy
    way to check for it at config time.  */
@@ -345,13 +364,11 @@ extern GTY(()) int darwin_ms_struct;
    %{Zunexported_symbols_list*:-unexported_symbols_list %*} \
    %{Zweak_reference_mismatches*:-weak_reference_mismatches %*} \
    %{!Zweak_reference_mismatches*:-weak_reference_mismatches non-weak} \
-   %{X} \
-   %{y*} \
    %{w} \
    %{pagezero_size*} %{segs_read_*} %{seglinkedit} %{noseglinkedit}  \
    %{sectalign*} %{sectobjectsymbols*} %{segcreate*} %{whyload} \
    %{whatsloaded} %{dylinker_install_name*} \
-   %{dylinker} %{Mach} "
+   %{dylinker} "
 
 
 /* Machine dependent libraries.  */
@@ -466,21 +483,31 @@ extern GTY(()) int darwin_ms_struct;
   %{Zforce_cpusubtype_ALL:-force_cpusubtype_ALL} \
   %{static}" ASM_MMACOSX_VERSION_MIN_SPEC
 
-/* Default ASM_DEBUG_SPEC.  Darwin's as cannot currently produce dwarf
-   debugging data.  */
-
+#ifdef HAVE_AS_STABS_DIRECTIVE
+/* We only pass a debug option to the assembler if that supports stabs, since
+   dwarf is not uniformly supported in the assemblers.  */
 #define ASM_DEBUG_SPEC  "%{g*:%{%:debug-level-gt(0):%{!gdwarf*:--gstabs}}}"
+#else
+#define ASM_DEBUG_SPEC  ""
+#endif
+
+#undef  ASM_DEBUG_OPTION_SPEC
+#define ASM_DEBUG_OPTION_SPEC	""
+
 #define ASM_FINAL_SPEC \
-  "%{gsplit-dwarf:%ngsplit-dwarf is not supported on this platform } %<gsplit-dwarf"
+  "%{gsplit-dwarf:%ngsplit-dwarf is not supported on this platform} %<gsplit-dwarf"
+
+/* We now require C++11 to bootstrap and newer tools than those based on
+   stabs, so require DWARF-2, even if stabs is supported by the assembler.  */
+
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+#define DARWIN_PREFER_DWARF
+#define DWARF2_DEBUGGING_INFO 1
 
-/* We still allow output of STABS if the assembler supports it.  */
 #ifdef HAVE_AS_STABS_DIRECTIVE
 #define DBX_DEBUGGING_INFO 1
-#define PREFERRED_DEBUGGING_TYPE DBX_DEBUG
 #endif
 
-#define DWARF2_DEBUGGING_INFO 1
-
 #define DEBUG_FRAME_SECTION	  "__DWARF,__debug_frame,regular,debug"
 #define DEBUG_INFO_SECTION	  "__DWARF,__debug_info,regular,debug"
 #define DEBUG_ABBREV_SECTION	  "__DWARF,__debug_abbrev,regular,debug"
@@ -584,6 +611,14 @@ extern GTY(()) int darwin_ms_struct;
 /* Emit a label to separate the exception table.  */
 #define TARGET_ASM_EMIT_EXCEPT_TABLE_LABEL darwin_emit_except_table_label
 
+/* Make an EH (personality or LDSA) symbol indirect as needed.  */
+#define TARGET_ASM_MAKE_EH_SYMBOL_INDIRECT darwin_make_eh_symbol_indirect
+
+/* Some of Darwin's unwinders need current frame address state to be reset
+   after a DW_CFA_restore_state recovers the register values.  */
+#undef TARGET_ASM_SHOULD_RESTORE_CFA_STATE
+#define TARGET_ASM_SHOULD_RESTORE_CFA_STATE darwin_should_restore_cfa_state
+
 /* Our profiling scheme doesn't LP labels and counter words.  */
 
 #define NO_PROFILE_COUNTERS	1
@@ -646,6 +681,7 @@ extern GTY(()) int darwin_ms_struct;
    that the name *is* defined in this module, so it doesn't need to
    make them indirect.  */
 
+#undef ASM_DECLARE_FUNCTION_NAME
 #define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)			\
   do {									\
     const char *xname = NAME;						\
@@ -802,6 +838,12 @@ extern GTY(()) section * darwin_sections[NUM_DARWIN_SECTIONS];
   do {							\
     if (strcmp ("LC", PREFIX) == 0)			\
       sprintf (LABEL, "*%s%ld", "lC", (long)(NUM));	\
+    else if (strcmp ("Lubsan_data", PREFIX) == 0)	\
+      sprintf (LABEL, "*%s%ld", "lubsan_data", (long)(NUM));\
+    else if (strcmp ("Lubsan_type", PREFIX) == 0)	\
+      sprintf (LABEL, "*%s%ld", "lubsan_type", (long)(NUM));\
+    else if (strcmp ("LASAN", PREFIX) == 0)	\
+      sprintf (LABEL, "*%s%ld", "lASAN", (long)(NUM));\
     else						\
       sprintf (LABEL, "*%s%ld", PREFIX, (long)(NUM));	\
   } while (0)
@@ -840,6 +882,13 @@ extern GTY(()) section * darwin_sections[NUM_DARWIN_SECTIONS];
 #define MACHO_SYMBOL_HIDDEN_VIS_P(RTX) \
   ((SYMBOL_REF_FLAGS (RTX) & MACHO_SYMBOL_FLAG_HIDDEN_VIS) != 0)
 
+/* Set on a symbol that should be made visible to the linker (overriding
+   'L' symbol prefixes).  */
+
+#define MACHO_SYMBOL_FLAG_LINKER_VIS ((SYMBOL_FLAG_SUBT_DEP) << 4)
+#define MACHO_SYMBOL_LINKER_VIS_P(RTX) \
+  ((SYMBOL_REF_FLAGS (RTX) & MACHO_SYMBOL_FLAG_LINKER_VIS) != 0)
+
 /* Set on a symbol that is a pic stub or symbol indirection (i.e. the
    L_xxxxx${stub,non_lazy_ptr,lazy_ptr}.  */
 
@@ -1036,25 +1085,34 @@ extern void darwin_driver_init (unsigned int *,struct cl_decoded_option **);
 #undef SUPPORTS_INIT_PRIORITY
 #define SUPPORTS_INIT_PRIORITY 0
 
+#undef STACK_CHECK_STATIC_BUILTIN
+#define STACK_CHECK_STATIC_BUILTIN 1
+
 /* When building cross-compilers (and native crosses) we shall default to 
    providing an osx-version-min of this unless overridden by the User.
    10.5 is the only version that fully supports all our archs so that's the
    fall-back default.  */
+#ifndef DEF_MIN_OSX_VERSION
 #define DEF_MIN_OSX_VERSION "10.5"
+#endif
 
 /* Later versions of ld64 support coalescing weak code/data without requiring
    that they be placed in specially identified sections.  This is the earliest
    _tested_ version known to support this so far.  */
-#define MIN_LD64_NO_COAL_SECTS "236.4"
+#define MIN_LD64_NO_COAL_SECTS "236.3"
 
 /* From at least version 62.1, ld64 can build symbol indirection stubs as
    needed, and there is no need for the compiler to emit them.  */
-#define MIN_LD64_OMIT_STUBS "85.2"
+#define MIN_LD64_OMIT_STUBS "62.1"
 
+/* If we have no definition for the linker version, pick the minimum version
+   that will bootstrap the compiler.  */
 #ifndef LD64_VERSION
-#define LD64_VERSION "62.1"
-#else
-#define DEF_LD64 LD64_VERSION
+# ifndef  DEF_LD64
+#  define LD64_VERSION "85.2.1"
+# else
+#  define LD64_VERSION DEF_LD64
+# endif
 #endif
 
 #endif /* CONFIG_DARWIN_H */
diff --git a/gcc/config/darwin.opt b/gcc/config/darwin.opt
index 6183be34ca979..23f359367ce31 100644
--- a/gcc/config/darwin.opt
+++ b/gcc/config/darwin.opt
@@ -1,6 +1,6 @@
 ; Processor-independent options for Darwin.
 
-; Copyright (C) 2005-2018 Free Software Foundation, Inc.
+; Copyright (C) 2005-2021 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
@@ -30,7 +30,7 @@ dependency-file
 C ObjC C++ ObjC++ Separate Alias(MF) MissingArgError(missing filename after %qs)
 
 fapple-kext
-Target Report C++ Var(flag_apple_kext)
+Target C++ Var(flag_apple_kext)
 Generate code for darwin loadable kernel extensions.
 
 iframework
@@ -38,28 +38,28 @@ Target RejectNegative C ObjC C++ ObjC++ Joined Separate
 -iframework <dir>	Add <dir> to the end of the system framework include path.
 
 mconstant-cfstrings
-Target Report Var(darwin_constant_cfstrings) Init(1)
+Target Var(darwin_constant_cfstrings) Init(1)
 Generate compile-time CFString objects.
 
 Wnonportable-cfstrings
-Target Report Var(darwin_warn_nonportable_cfstrings) Init(1) Warning
+Target Var(darwin_warn_nonportable_cfstrings) Init(1) Warning
 Warn if constant CFString objects contain non-portable characters.
 
 ; Use new-style pic stubs if this is true, x86 only so far.
 matt-stubs
-Target Report Var(darwin_macho_att_stub) Init(1)
+Target Var(darwin_macho_att_stub) Init(1)
 Generate AT&T-style stubs for Mach-O.
 
 mdynamic-no-pic
-Target Common Report Mask(MACHO_DYNAMIC_NO_PIC)
+Target Common Mask(MACHO_DYNAMIC_NO_PIC)
 Generate code suitable for executables (NOT shared libs).
 
 mfix-and-continue
-Target Report Var(darwin_fix_and_continue)
+Target Var(darwin_fix_and_continue)
 Generate code suitable for fast turn around debugging.
 
 mkernel
-Target Report Var(flag_mkernel)
+Target Var(flag_mkernel)
 Generate code for the kernel or loadable kernel extensions.
 
 ; The Init here is for the convenience of GCC developers, so that cc1
@@ -67,75 +67,75 @@ Generate code for the kernel or loadable kernel extensions.
 ; driver will always pass a -mmacosx-version-min, so in normal use the
 ; Init is never used.
 mmacosx-version-min=
-Target RejectNegative Joined Report Var(darwin_macosx_version_min) Init(DEF_MIN_OSX_VERSION)
-The earliest MacOS X version on which this program will run.
+Target RejectNegative Joined Var(darwin_macosx_version_min) Init(DEF_MIN_OSX_VERSION)
+The earliest macOS version on which this program will run.
 
 ; Really, only relevant to PowerPC which has a 4 byte bool by default.
 mone-byte-bool
-Target RejectNegative Report Var(darwin_one_byte_bool)
+Target RejectNegative Var(darwin_one_byte_bool)
 Set sizeof(bool) to 1.
 
 msymbol-stubs
-Target Report Var(darwin_symbol_stubs) Init(0)
+Target Var(darwin_symbol_stubs) Init(0)
 Force generation of external symbol indirection stubs.
 
 ; Some code-gen may be improved / adjusted if the linker is sufficiently modern.
 mtarget-linker=
-Target RejectNegative Joined Report Alias(mtarget-linker)
+Target RejectNegative Joined Alias(mtarget-linker)
 
 mtarget-linker
-Target RejectNegative Joined Separate Report Var(darwin_target_linker) Init(LD64_VERSION)
-The version of ld64 in use for this toolchain.
+Target RejectNegative Joined Separate Var(darwin_target_linker) Init(LD64_VERSION)
+-mtarget-linker <version>	Specify that ld64 <version> is the toolchain linker for the current invocation.
 
 ; Driver options.
 
 all_load
 Driver RejectNegative Alias(Zall_load)
-Loads all members of archive libraries
+Load all members of archive libraries, rather than only those that satisfy undefined symbols.
 
 allowable_client
 Driver RejectNegative Separate Alias(Zallowable_client)
--allowable_client <name>	The output dylib is private to the client(s) named
+-allowable_client <name>	The output dylib is private to the client <name>.
 
 arch
 Driver RejectNegative Separate
--arch <name>	Specify that the output file should be generated for architecture  \"name\"
+-arch <name>	Generate output for architecture <name>.
 
 arch_errors_fatal
 Driver RejectNegative Alias(Zarch_errors_fatal)
-Mismatches between file architecture and the \"-arch\" are errors instead of warnings
+Mismatches between file architecture and one specified by \"-arch\" are errors instead of warnings.
 
 asm_macosx_version_min=
 Driver RejectNegative Joined
-The earliest MacOS X version on which this program will run (formatted for the assembler)
+The earliest macOS version on which this program will run (formatted for the assembler).
 
 bind_at_load
 Driver RejectNegative Alias(Zbind_at_load)
-Produce an output file that will bind symbols on load, rather than lazily.
+Generate an output executable that binds symbols on load, rather than lazily.
 
 bundle
 Driver RejectNegative Alias(Zbundle)
-Produce a Mach-O bundle (file type MH_BUNDLE)
+Generate a Mach-O bundle (file type MH_BUNDLE).
 
 bundle_loader
 Driver RejectNegative Separate Alias(Zbundle_loader)
--bundle_loader <executable>	Treat \"executable\" (that will be loading this bundle) as if it was one of the dynamic libraries the bundle is linked against for symbol resolution
+-bundle_loader <executable>	Treat <executable> (that will be loading this bundle) as if it was one of the dynamic libraries the bundle is linked against for symbol resolution.
 
 client_name
 Driver RejectNegative Separate
--client_name <name>	Enable the executable being built to link against a private dylib (using allowable_client)
+-client_name <name>	Enable the executable being built to link against a private dylib (using allowable_client).
 
 compatibility_version
 Driver RejectNegative Separate
--compatibility_version <number>	Set the minimum version for the client interface.  Clients must record a greater number than this or the binding will fail at runtime
+-compatibility_version <number>	Set the version for the client interface.  Client programs must record a value less than or equal to <number>, or the binding will fail at runtime.
 
 current_version
 Driver RejectNegative Separate
--current_version <number>	Set the current version for the library.
+-current_version <number>	Set the current version for the library to <number>.
 
 dead_strip
 Driver RejectNegative Alias(Zdead_strip)
-Remove code and data that is unreachable from any exported symbol (including the entry point)
+Remove code and data that is unreachable from any exported symbol (including the entry point).
 
 dylib_file
 Driver Separate Alias(Zdylib_file)
@@ -154,15 +154,15 @@ The default (and opposite of -static), implied by user mode executables, shared
 
 dynamiclib
 Driver RejectNegative Alias(Zdynamiclib)
-Produce a Mach-O shared library (file type MH_DYLIB), synonym for -shared
+Produce a Mach-O shared library (file type MH_DYLIB), synonym for \"-shared\".
 
 exported_symbols_list
 Driver RejectNegative Separate Alias(Zexported_symbols_list)
--exported_symbols_list <filename>	Global symbols in \"filename\" will be exported from the linked output file, any symbols not mentioned will be treated as hidden.
+-exported_symbols_list <filename>	Export global symbols in <filename> in linked output file; any symbols not mentioned will be treated as \"hidden\".
 
 filelist
 Driver RejectNegative Separate
-Supply a list of objects to be linked from a file, rather than the command line
+Supply a list of objects to be linked from a file, rather than the command line.
 
 findirect-virtual-calls
 Driver RejectNegative
@@ -178,11 +178,11 @@ For the assembler (and linker) permit any architecture sub-variant to be used wi
 
 force_flat_namespace
 Driver RejectNegative Alias(Zforce_flat_namespace)
-Set the output object such that, on loading, dyld  will ignore any two-level information and resolve symbols in the discovery order for loaded libs.
+Set the output object such that, on loading, dyld will ignore any two-level namespace information and resolve symbols in the discovery order for loaded libs.
 
 framework
 Driver RejectNegative Separate
--framework <name>	The linker should search for the named framework in the framework search path.
+-framework <name>	The linker should search for the framework <name> in the framework search path.
 
 fterminated-vtables
 Driver RejectNegative
@@ -190,23 +190,23 @@ Used for generating code for some older kernel revisions.
 
 gfull
 Driver RejectNegative
-Abbreviation for \"-g -fno-eliminate-unused-debug-symbols\"
+Abbreviation for \"-g -fno-eliminate-unused-debug-symbols\".
 
 gused
 Driver RejectNegative
-Abbreviation for \"-g -feliminate-unused-debug-symbols\"
+Abbreviation for \"-g -feliminate-unused-debug-symbols\".
 
 headerpad_max_install_names
 Driver RejectNegative
-Automatically adds space for longer path names in load commands (up to MAXPATHLEN)
+Automatically adds space for longer path names in load commands (up to MAXPATHLEN).
 
 image_base
 Driver RejectNegative Separate Alias(Zimage_base)
--image_base <address>	Choose a base address for a dylib or bundle.
+-image_base <address>	Specify <address> as the base address for a dylib or bundle.
 
 init
 Driver RejectNegative Separate Alias(Zinit)
--init <symbol_name>	The symbol \"symbol_name\" will be used as the first initialiser for a dylib.
+-init <symbol_name>	The symbol <symbol_name> will be used as the first initialiser for a dylib.
 
 install_name
 Driver RejectNegative Separate Alias(Zinstall_name)
@@ -218,11 +218,11 @@ Usually \"private extern\" (hidden) symbols are made local when linking, this co
 
 multi_module
 Driver RejectNegative Alias(Zmulti_module)
-(Obsolete after 10.4) Multi modules are ignored at runtime since MacOS 10.4
+(Obsolete after 10.4) Multi modules are ignored at runtime since macOS 10.4.
 
 multiply_defined
 Driver RejectNegative Separate Alias(Zmultiply_defined)
-(Obsolete after 10.4) -multiply_defined <treatment> Provided a mechanism for warning about symbols defined in multiple dylibs.
+(Obsolete after 10.4) -multiply_defined <treatment>	Provided a mechanism for warning about symbols defined in multiple dylibs.
 
 multiply_defined_unused
 Driver RejectNegative Separate Alias(Zmultiplydefinedunused)
@@ -230,11 +230,11 @@ Driver RejectNegative Separate Alias(Zmultiplydefinedunused)
 
 no_dead_strip_inits_and_terms
 Driver RejectNegative Alias(Zno_dead_strip_inits_and_terms)
-(Obsolete) The linker never dead strips these items, so the option is not needed.
+(Obsolete) Current linkers never dead-strip these items, so the option is not needed.
 
 nofixprebinding
 Driver RejectNegative
-(Obsolete after 10.3.9) Set MH_NOPREFIXBINDING, in an exectuable.
+(Obsolete after 10.3.9) Set MH_NOPREFIXBINDING, in an executable.
 
 nomultidefs
 Driver RejectNegative
@@ -253,7 +253,7 @@ Driver RejectNegative
 
 pagezero_size
 Driver RejectNegative Separate
--pagezero_size size	Allows setting the page 0 size to 4kb for certain special cases.
+-pagezero_size <size>	Allows setting the page 0 size to 4kb when required.
 
 prebind
 Driver RejectNegative Negative(noprebind)
@@ -280,15 +280,15 @@ Synonym for \"-export-dynamic\" for linker versions that support it.
 
 read_only_relocs
 Driver RejectNegative Separate
--read_only_relocs <treatment>	This will allow relocs in read-only pages (not advisable).
+-read_only_relocs <treatment>	Allow relocations in read-only pages (not recommended).
 
 sectalign
 Driver RejectNegative Separate Args(3)
--sectalign <segname> <sectname> <value>	Set section \"sectname\" in segment \"segname\" to have alignment \"value\" which must be an integral power of two expressed in hexadecimal form.
+-sectalign <segname> <sectname> <value>	Set section <sectname> in segment <segname> to have alignment <value> which must be an integral power of two expressed in hexadecimal form.
 
 sectcreate
 Driver RejectNegative Separate Args(3)
--sectcreate <segname> <sectname> <file>	Create section \"sectname\" in segment \"segname\" from the contents of \"file\".
+-sectcreate <segname> <sectname> <file>	Create section <sectname> in segment <segname> from the contents of <file>.
 
 sectobjectsymbols
 Driver RejectNegative Separate Args(2)
@@ -296,29 +296,29 @@ Driver RejectNegative Separate Args(2)
 
 sectorder
 Driver RejectNegative Separate Args(3)
-(Obsolete) -sectorder <segname> <sectname> orderfile	Replaced by a more general option \"-order_file\".
+(Obsolete) -sectorder <segname> <sectname> <orderfile>	Replaced by a more general option \"-order_file\".
 
 seg_addr_table
 Driver RejectNegative Separate Alias(Zseg_addr_table)
--seg_addr_table <file>	Specify the base addresses for dynamic libraries, \"file\" contains a line for each library.
+-seg_addr_table <file>	Specify the base addresses for dynamic libraries; <file> contains a line for each library.
 
 ; This is only usable by the ld_classic linker.
 seg_addr_table_filename
 Driver RejectNegative Separate Alias(Zfn_seg_addr_table_filename)
-(Obsolete, ld_classic only) -seg_addr_table_filename <path>
+(Obsolete, ld_classic only) -seg_addr_table_filename <path>.
 
 seg1addr
 Driver RejectNegative Separate
-Synonym for \"image_base\"
+Synonym for \"image_base\".
 
 segaddr
 Driver RejectNegative Separate Args(2) Alias(Zsegaddr)
--segaddr <name> <address>	Set the base address of segment \"name\" to \"address\" which must be aligned to a page boundary (currently 4kb).
+-segaddr <name> <address>	Set the base address of segment <name> to <address> which must be aligned to a page boundary (currently 4kb).
 
 ; This is only usable by the ld_classic linker.
 segcreate
 Driver RejectNegative Separate Args(3)
-(Obsolete, ld_classic only) -sectcreate segname sectname file
+(Obsolete, ld_classic only) -sectcreate <segname> <sectname> <file>	Allowed creation of a section from a file.
 
 seglinkedit
 Driver RejectNegative Negative(noseglinkedit)
@@ -326,15 +326,15 @@ Driver RejectNegative Negative(noseglinkedit)
 
 segprot
 Driver RejectNegative Separate Args(3)
--segprot <segname> max_prot init_prot	The protection values are \"r\", \"w\", \"x\" or \"-\" the latter meaning \"no access\".
+-segprot <segname> <max_prot> <init_prot>	The virtual memory protections for segment <segname> have maximum and initial values <max_prot> and <init_prot> respectively.  The specified values may contain \"r\", \"w\", \"x\" or \"-\" the latter meaning \"no access\".
 
 segs_read_only_addr
 Driver RejectNegative Separate Alias(Zsegs_read_only_addr)
--segs_read_only_addr address 	Allows specifying the address of the read only portion of a dylib.
+-segs_read_only_addr <address>	Specify that <address> is the base address of the read-only segments of a dylib.
 
 segs_read_write_addr
 Driver RejectNegative Separate Alias(Zsegs_read_write_addr)
--segs_read_write_addr address 	Allows specifying the address of the read/write portion of a dylib.
+-segs_read_write_addr <address>	Specify that <address> is the base address address of the read-write segments of a dylib.
 
 single_module
 Driver RejectNegative Alias(Zsingle_module)
@@ -342,19 +342,19 @@ Driver RejectNegative Alias(Zsingle_module)
 
 sub_library
 Driver RejectNegative Separate
--sub_library <name>	Library named \"name\" will be re-exported (only useful for dylibs).
+-sub_library <name>	Library named <name> will be re-exported (only useful for dylibs).
 
 sub_umbrella
 Driver RejectNegative Separate
--sub_umbrella <name>	Framework named \"name\" will be re-exported (only useful for dylibs).
+-sub_umbrella <name>	Framework named <name> will be re-exported (only useful for dylibs).
 
 twolevel_namespace
 Driver RejectNegative
-This is the default
+This is the default.
 
 twolevel_namespace_hints
 Driver RejectNegative
-Specifies content that can speed up dynamic loading when the binaries are unchanged.
+Add extra information to the executable that can speed up dynamic loading (provided that dependent libraries are unchanged).
 
 umbrella
 Driver RejectNegative Separate Alias(Zumbrella)
@@ -366,7 +366,7 @@ Driver RejectNegative Separate
 
 unexported_symbols_list
 Driver RejectNegative Separate Alias(Zunexported_symbols_list)
--unexported_symbols_list <filename>	Don't export global symbols listed in filename.
+-unexported_symbols_list <filename>	Do not export the global symbols listed in <filename>.
 
 weak_reference_mismatches
 Driver RejectNegative Separate Alias(Zweak_reference_mismatches)
@@ -374,7 +374,7 @@ Driver RejectNegative Separate Alias(Zweak_reference_mismatches)
 
 whatsloaded
 Driver RejectNegative
-Logs the object files the linker loads
+Logs which object files the linker loads.
 
 whyload
 Driver RejectNegative
@@ -386,7 +386,7 @@ Driver RejectNegative
 
 y
 Driver RejectNegative Joined
-(Obsolete, ignored)	Old support similar to whyload.
+(Obsolete, ignored)	Old support similar to \"-whyload\".
 
 Mach
 Driver RejectNegative
diff --git a/gcc/config/host-darwin.c b/gcc/config/host-darwin.c
index 8f700eec85dc7..14a01fe71f2a4 100644
--- a/gcc/config/host-darwin.c
+++ b/gcc/config/host-darwin.c
@@ -1,5 +1,5 @@
 /* Darwin host-specific hook definitions.
-   Copyright (C) 2003-2018 Free Software Foundation, Inc.
+   Copyright (C) 2003-2021 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -24,7 +24,10 @@
 #include "config/host-darwin.h"
 
 /* Yes, this is really supposed to work.  */
-static char pch_address_space[1024*1024*1024] __attribute__((aligned (4096)));
+/* This allows for a pagesize of 16384, which we have on Darwin20, but should
+   continue to work OK for pagesize 4096 which we have on earlier versions.
+   The size is 1 (binary) Gb.  */
+static char pch_address_space[65536*16384] __attribute__((aligned (16384)));
 
 /* Return the address of the PCH address space, if the PCH will fit in it.  */
 
@@ -58,7 +61,8 @@ darwin_gt_pch_use_address (void *addr, size_t sz, int fd, size_t off)
   sz = (sz + pagesize - 1) / pagesize * pagesize;
 
   if (munmap (pch_address_space + sz, sizeof (pch_address_space) - sz) != 0)
-    fatal_error (input_location, "couldn%'t unmap pch_address_space: %m");
+    fatal_error (input_location,
+		 "could not unmap %<pch_address_space%>: %m");
 
   if (ret)
     {
diff --git a/gcc/config/host-darwin.h b/gcc/config/host-darwin.h
index 47c3f9d2da740..4acae9cf341db 100644
--- a/gcc/config/host-darwin.h
+++ b/gcc/config/host-darwin.h
@@ -1,5 +1,5 @@
 /* Darwin host-specific hook definitions.
-   Copyright (C) 2003-2018 Free Software Foundation, Inc.
+   Copyright (C) 2003-2021 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/t-darwin b/gcc/config/t-darwin
index 4fa3208b8247f..d9d4c73ed2b8a 100644
--- a/gcc/config/t-darwin
+++ b/gcc/config/t-darwin
@@ -1,4 +1,4 @@
-# Copyright (C) 2002-2018 Free Software Foundation, Inc.
+# Copyright (C) 2002-2021 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
@@ -26,6 +26,9 @@ darwin-c.o: $(srcdir)/config/darwin-c.c
 	$(COMPILE) $(PREPROCESSOR_DEFINES) $<
 	$(POSTCOMPILE)
 
+darwin-d.o: $(srcdir)/config/darwin-d.c
+	$(COMPILE) $<
+	$(POSTCOMPILE)
 
 darwin-f.o: $(srcdir)/config/darwin-f.c
 	$(COMPILE) $<
diff --git a/gcc/genconditions.c b/gcc/genconditions.c
index 110e6c66951a5..a237df5012721 100644
--- a/gcc/genconditions.c
+++ b/gcc/genconditions.c
@@ -1,5 +1,5 @@
 /* Process machine description and calculate constant conditions.
-   Copyright (C) 2001-2018 Free Software Foundation, Inc.
+   Copyright (C) 2001-2021 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/genconfig.c b/gcc/genconfig.c
index c1bfde8d54bbb..df02bfe1b0da5 100644
--- a/gcc/genconfig.c
+++ b/gcc/genconfig.c
@@ -1,6 +1,6 @@
 /* Generate from machine description:
    - some #define configuration flags.
-   Copyright (C) 1987-2018 Free Software Foundation, Inc.
+   Copyright (C) 1987-2021 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/testsuite/gcc.target/riscv/arch-1.c b/gcc/testsuite/gcc.target/riscv/arch-1.c
new file mode 100644
index 0000000000000..945897723dd5c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/arch-1.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O -march=rv32i -march=rv32I -mabi=ilp32" } */
+int foo()
+{
+}
+/* { dg-error ".'-march=rv32I': first ISA subset must be 'e', 'i' or 'g'" "" { target *-*-* } 0 } */
diff --git a/gcc/testsuite/gcc.target/riscv/arch-10.c b/gcc/testsuite/gcc.target/riscv/arch-10.c
new file mode 100644
index 0000000000000..47dbda333c9aa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/arch-10.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv32gf2 -mabi=ilp32" } */
+int foo()
+{
+}
+/* { dg-error "Extension `f' appear more than one time." "" { target *-*-* } 0 } */
diff --git a/gcc/testsuite/gcc.target/riscv/arch-11.c b/gcc/testsuite/gcc.target/riscv/arch-11.c
new file mode 100644
index 0000000000000..129d8f72804f2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/arch-11.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv32g_zicsr2 -mabi=ilp32" } */
+int foo()
+{
+}
diff --git a/gcc/testsuite/gcc.target/riscv/arch-2.c b/gcc/testsuite/gcc.target/riscv/arch-2.c
new file mode 100644
index 0000000000000..36b7850d7c645
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/arch-2.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-O -march=rv32ixabc_xfoo -mabi=ilp32" } */
+int foo()
+{
+}
diff --git a/gcc/testsuite/gcc.target/riscv/arch-3.c b/gcc/testsuite/gcc.target/riscv/arch-3.c
new file mode 100644
index 0000000000000..124699405c5c4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/arch-3.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-O -march=rv32isabc_xbar -mabi=ilp32" } */
+int foo()
+{
+}
diff --git a/gcc/testsuite/gcc.target/riscv/arch-4.c b/gcc/testsuite/gcc.target/riscv/arch-4.c
new file mode 100644
index 0000000000000..6e55a7eaef52a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/arch-4.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-O -march=rv32i2p3_m4p2 -mabi=ilp32" } */
+int foo()
+{
+}
diff --git a/gcc/testsuite/gcc.target/riscv/arch-5.c b/gcc/testsuite/gcc.target/riscv/arch-5.c
new file mode 100644
index 0000000000000..b0a1bd445fed6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/arch-5.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-O -march=rv32isabc_hghi_zfoo_xbar -mabi=ilp32" } */
+int foo()
+{
+}
diff --git a/gcc/testsuite/gcc.target/riscv/arch-6.c b/gcc/testsuite/gcc.target/riscv/arch-6.c
new file mode 100644
index 0000000000000..b36dccbf46b9b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/arch-6.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-O -march=rv32id -mabi=ilp32" } */
+int foo()
+{
+}
diff --git a/gcc/testsuite/gcc.target/riscv/arch-7.c b/gcc/testsuite/gcc.target/riscv/arch-7.c
new file mode 100644
index 0000000000000..74ab248fa5777
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/arch-7.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv32i -march=rv32im_s -mabi=ilp32" } */
+int foo()
+{
+}
+/* { dg-error ".'-march=rv32im_s': name of supervisor extension must be more than 1 letter" "" { target *-*-* } 0 } */
diff --git a/gcc/testsuite/gcc.target/riscv/arch-8.c b/gcc/testsuite/gcc.target/riscv/arch-8.c
new file mode 100644
index 0000000000000..d7760fc576f3d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/arch-8.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-O -march=rv32id_zicsr_zifence -mabi=ilp32" } */
+int foo()
+{
+}
diff --git a/gcc/testsuite/gcc.target/riscv/arch-9.c b/gcc/testsuite/gcc.target/riscv/arch-9.c
new file mode 100644
index 0000000000000..74e64103563dd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/arch-9.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv32g2 -mabi=ilp32" } */
+int foo()
+{
+}
+/* { dg-warning "version of `g` will be omitted, please specify version for individual extension." "" { target *-*-* } 0 } */
diff --git a/gcc/testsuite/gcc.target/riscv/attribute-1.c b/gcc/testsuite/gcc.target/riscv/attribute-1.c
new file mode 100644
index 0000000000000..7150f492b0706
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/attribute-1.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O -mriscv-attribute" } */
+int foo()
+{
+}
+/* { dg-final { scan-assembler ".attribute arch" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/attribute-10.c b/gcc/testsuite/gcc.target/riscv/attribute-10.c
new file mode 100644
index 0000000000000..26fdd08b26dd8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/attribute-10.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv32i -march=rv32im_sx_unexpectedstring -mabi=ilp32" } */
+int foo()
+{
+}
+/* { dg-error "unexpected ISA string at end:" "" { target { "riscv*-*-*" } } 0 } */
diff --git a/gcc/testsuite/gcc.target/riscv/attribute-11.c b/gcc/testsuite/gcc.target/riscv/attribute-11.c
new file mode 100644
index 0000000000000..98bd8d4da42b5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/attribute-11.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O -mriscv-attribute -march=rv32id -mabi=ilp32 -misa-spec=2.2" } */
+int foo()
+{
+}
+/* { dg-final { scan-assembler ".attribute arch, \"rv32i2p0_f2p0_d2p0\"" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/attribute-12.c b/gcc/testsuite/gcc.target/riscv/attribute-12.c
new file mode 100644
index 0000000000000..44fccad3b29ec
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/attribute-12.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O -mriscv-attribute -march=rv32ifd -mabi=ilp32 -misa-spec=2.2" } */
+int foo()
+{
+}
+/* { dg-final { scan-assembler ".attribute arch, \"rv32i2p0_f2p0_d2p0\"" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/attribute-13.c b/gcc/testsuite/gcc.target/riscv/attribute-13.c
new file mode 100644
index 0000000000000..1b8f93ceaaf78
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/attribute-13.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O -mriscv-attribute -march=rv32if3d -mabi=ilp32 -misa-spec=2.2" } */
+int foo()
+{
+}
+/* { dg-final { scan-assembler ".attribute arch, \"rv32i2p0_f3p0_d2p0\"" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/attribute-14.c b/gcc/testsuite/gcc.target/riscv/attribute-14.c
new file mode 100644
index 0000000000000..2591c1f92f6e6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/attribute-14.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O -mriscv-attribute -march=rv32if -mabi=ilp32 -misa-spec=20190608" } */
+int foo()
+{
+}
+/* { dg-final { scan-assembler ".attribute arch, \"rv32i2p1_f2p2_zicsr2p0\"" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/attribute-15.c b/gcc/testsuite/gcc.target/riscv/attribute-15.c
new file mode 100644
index 0000000000000..9cae1a27a6fe8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/attribute-15.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O -mriscv-attribute -march=rv32gc -mabi=ilp32 -misa-spec=2.2" } */
+int foo()
+{
+}
+/* { dg-final { scan-assembler ".attribute arch, \"rv32i2p0_m2p0_a2p0_f2p0_d2p0_c2p0\"" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/attribute-16.c b/gcc/testsuite/gcc.target/riscv/attribute-16.c
new file mode 100644
index 0000000000000..f090363b9793c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/attribute-16.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O -mriscv-attribute -march=rv32gc -mabi=ilp32 -misa-spec=20190608" } */
+int foo()
+{
+}
+/* { dg-final { scan-assembler ".attribute arch, \"rv32i2p1_m2p0_a2p0_f2p2_d2p2_c2p0_zicsr2p0" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/attribute-17.c b/gcc/testsuite/gcc.target/riscv/attribute-17.c
new file mode 100644
index 0000000000000..19ef540b5b985
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/attribute-17.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O -mriscv-attribute -march=rv32gc -mabi=ilp32 -misa-spec=20191213" } */
+int foo()
+{
+}
+/* { dg-final { scan-assembler ".attribute arch, \"rv32i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/attribute-18.c b/gcc/testsuite/gcc.target/riscv/attribute-18.c
new file mode 100644
index 0000000000000..492360cf7c117
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/attribute-18.c
@@ -0,0 +1,4 @@
+/* { dg-do compile } */
+/* { dg-options "-mriscv-attribute -march=rv64imafdcp -mabi=lp64d -misa-spec=2.2" } */
+int foo() {}
+/* { dg-final { scan-assembler ".attribute arch, \"rv64i2p0_m2p0_a2p0_f2p0_d2p0_c2p0_p\"" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/attribute-2.c b/gcc/testsuite/gcc.target/riscv/attribute-2.c
new file mode 100644
index 0000000000000..3636a1a29f3ff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/attribute-2.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O -mno-riscv-attribute" } */
+int foo()
+{
+}
+/* { dg-final { scan-assembler-not ".attribute arch" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/attribute-3.c b/gcc/testsuite/gcc.target/riscv/attribute-3.c
new file mode 100644
index 0000000000000..735992df79114
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/attribute-3.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O -mriscv-attribute -mpreferred-stack-boundary=8" } */
+int foo()
+{
+}
+/* { dg-final { scan-assembler ".attribute stack_align, 256" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/attribute-4.c b/gcc/testsuite/gcc.target/riscv/attribute-4.c
new file mode 100644
index 0000000000000..404faada308f6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/attribute-4.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O -mriscv-attribute -mstrict-align" } */
+int foo()
+{
+}
+/* { dg-final { scan-assembler ".attribute unaligned_access, 0" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/attribute-5.c b/gcc/testsuite/gcc.target/riscv/attribute-5.c
new file mode 100644
index 0000000000000..de8909435b160
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/attribute-5.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O -mriscv-attribute -mno-strict-align" } */
+int foo()
+{
+}
+/* { dg-final { scan-assembler ".attribute unaligned_access, 1" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/attribute-7.c b/gcc/testsuite/gcc.target/riscv/attribute-7.c
new file mode 100644
index 0000000000000..3d033931b6f9e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/attribute-7.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O -mriscv-attribute -march=rv32e1p9 -mabi=ilp32e" } */
+int foo()
+{
+}
+/* { dg-final { scan-assembler ".attribute arch, \"rv32e1p9\"" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/attribute-8.c b/gcc/testsuite/gcc.target/riscv/attribute-8.c
new file mode 100644
index 0000000000000..90f5a4022a0cb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/attribute-8.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O -mriscv-attribute -march=rv32i2p0xabc_xv5 -mabi=ilp32" } */
+int foo()
+{
+}
+/* { dg-final { scan-assembler ".attribute arch, \"rv32i2p0_xabc_xv5p0\"" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/attribute-9.c b/gcc/testsuite/gcc.target/riscv/attribute-9.c
new file mode 100644
index 0000000000000..4598872f0a68e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/attribute-9.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O -mriscv-attribute -march=rv32i2p0sabc_xbar -mabi=ilp32e" } */
+int foo()
+{
+}
+/* { dg-final { scan-assembler ".attribute arch, \"rv32i2p0_sabc_xbar\"" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/flattened-struct-abi-1.c b/gcc/testsuite/gcc.target/riscv/flattened-struct-abi-1.c
new file mode 100644
index 0000000000000..f6a3c51b3fb0b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/flattened-struct-abi-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc -mabi=ilp32d" } */
+struct s1 { int : 0; float f; int i; int : 0; };
+
+void dummy(float, int);
+
+void f(struct s1 s) { /* { dg-warning "flattened struct" } */
+  dummy(s.f + 1.0, s.i + 1);
+}
diff --git a/gcc/testsuite/gcc.target/riscv/flattened-struct-abi-2.c b/gcc/testsuite/gcc.target/riscv/flattened-struct-abi-2.c
new file mode 100644
index 0000000000000..760826a42f1ca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/flattened-struct-abi-2.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc -mabi=ilp32d" } */
+struct s1 { int : 0; float f; float g; int : 0; };
+
+void dummy(float, float);
+
+void f(struct s1 s) { /* { dg-warning "flattened struct" } */
+  dummy(s.f + 1.0, s.g + 2.0);
+}
diff --git a/gcc/testsuite/gcc.target/riscv/interrupt-1.c b/gcc/testsuite/gcc.target/riscv/interrupt-1.c
new file mode 100644
index 0000000000000..666b29a49bb8c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/interrupt-1.c
@@ -0,0 +1,8 @@
+/* Verify the return instruction is mret.  */
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+void __attribute__ ((interrupt))
+foo (void)
+{
+}
+/* { dg-final { scan-assembler "mret" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/interrupt-2.c b/gcc/testsuite/gcc.target/riscv/interrupt-2.c
new file mode 100644
index 0000000000000..82e3fb24e8137
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/interrupt-2.c
@@ -0,0 +1,13 @@
+/* Verify that arg regs used as temporaries get saved.  */
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+void __attribute__ ((interrupt))
+foo2 (void)
+{
+  extern volatile int INTERRUPT_FLAG;
+  INTERRUPT_FLAG = 0;
+
+  extern volatile int COUNTER;
+  COUNTER++;
+}
+/* { dg-final { scan-assembler-times "s\[wd\]\ta\[0-7\],\[0-9\]+\\(sp\\)" 2 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/interrupt-3.c b/gcc/testsuite/gcc.target/riscv/interrupt-3.c
new file mode 100644
index 0000000000000..3d1d44df45e6f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/interrupt-3.c
@@ -0,0 +1,9 @@
+/* Verify t0 is saved before use.  */
+/* { dg-do compile } */
+/* { dg-options "-O0 -fomit-frame-pointer" } */
+void __attribute__ ((interrupt))
+foo (void)
+{
+  char array[4096];
+}
+/* { dg-final { scan-assembler "s\[wd\]\tt0" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/interrupt-4.c b/gcc/testsuite/gcc.target/riscv/interrupt-4.c
new file mode 100644
index 0000000000000..658aa176e7794
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/interrupt-4.c
@@ -0,0 +1,18 @@
+/* Verify t0 is saved before use.  */
+/* { dg-do compile } */
+/* { dg-options "-O0 -fomit-frame-pointer" } */
+void __attribute__ ((interrupt))
+foo2 (void)
+{
+  char array[4096];
+  extern volatile int INTERRUPT_FLAG;
+  INTERRUPT_FLAG = 0;
+
+  extern volatile int COUNTER;
+#ifdef __riscv_atomic
+  __atomic_fetch_add (&COUNTER, 1, __ATOMIC_RELAXED);
+#else
+  COUNTER++;
+#endif
+}
+/* { dg-final { scan-assembler "s\[wd\]\tt0" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/interrupt-5.c b/gcc/testsuite/gcc.target/riscv/interrupt-5.c
new file mode 100644
index 0000000000000..324954eb1dd3a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/interrupt-5.c
@@ -0,0 +1,21 @@
+/* Verify proper errors are generated for invalid code.  */
+int __attribute__ ((interrupt))
+sub0 (void)
+{ /* { dg-error "function cannot return a value" } */
+  return 10;
+}
+
+void __attribute__ ((interrupt))
+sub1 (int i)
+{ /* { dg-error "function cannot have arguments" } */
+}
+
+void __attribute__ ((interrupt, naked))
+sub2 (void)
+{ /* { dg-error "are mutually exclusive" } */
+}
+
+void __attribute__ ((interrupt ("hypervisor")))
+sub3 (void)
+{ /* { dg-warning "argument to" } */
+}
diff --git a/gcc/testsuite/gcc.target/riscv/interrupt-conflict-mode.c b/gcc/testsuite/gcc.target/riscv/interrupt-conflict-mode.c
new file mode 100644
index 0000000000000..e9f145265c09a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/interrupt-conflict-mode.c
@@ -0,0 +1,10 @@
+/* Verify proper errors are generated for conflicted interrupt type.  */
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+void __attribute__ ((interrupt ("user")))
+foo(void);
+
+void __attribute__ ((interrupt ("machine")))
+foo (void)
+{ /* { dg-error "function cannot have different interrupt type" } */
+}
diff --git a/gcc/testsuite/gcc.target/riscv/interrupt-debug.c b/gcc/testsuite/gcc.target/riscv/interrupt-debug.c
new file mode 100644
index 0000000000000..a1b6dac8fbbf8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/interrupt-debug.c
@@ -0,0 +1,15 @@
+/* Verify that we can compile with debug info.  */
+/* { dg-do compile } */
+/* { dg-options "-Og -g" } */
+extern int var1;
+extern int var2;
+extern void sub2 (void);
+
+void __attribute__ ((interrupt))
+sub (void)
+{
+  if (var1)
+    var2 = 0;
+  else
+    sub2 ();
+}
diff --git a/gcc/testsuite/gcc.target/riscv/interrupt-mmode.c b/gcc/testsuite/gcc.target/riscv/interrupt-mmode.c
new file mode 100644
index 0000000000000..fd7a7a17e1756
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/interrupt-mmode.c
@@ -0,0 +1,8 @@
+/* Verify the return instruction is mret.  */
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+void __attribute__ ((interrupt ("machine")))
+foo (void)
+{
+}
+/* { dg-final { scan-assembler "mret" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/interrupt-smode.c b/gcc/testsuite/gcc.target/riscv/interrupt-smode.c
new file mode 100644
index 0000000000000..2f696d30b0250
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/interrupt-smode.c
@@ -0,0 +1,8 @@
+/* Verify the return instruction is mret.  */
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+void __attribute__ ((interrupt ("supervisor")))
+foo (void)
+{
+}
+/* { dg-final { scan-assembler "sret" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/interrupt-umode.c b/gcc/testsuite/gcc.target/riscv/interrupt-umode.c
new file mode 100644
index 0000000000000..cd120e489ca0f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/interrupt-umode.c
@@ -0,0 +1,8 @@
+/* Verify the return instruction is mret.  */
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+void __attribute__ ((interrupt ("user")))
+foo (void)
+{
+}
+/* { dg-final { scan-assembler "uret" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/li.c b/gcc/testsuite/gcc.target/riscv/li.c
new file mode 100644
index 0000000000000..fa5c02caee894
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/li.c
@@ -0,0 +1,36 @@
+/* { dg-do run } */
+/* { dg-options "-O1" } */
+#include <stdlib.h>
+#define LOAD_IMM(var, val) \
+        asm ("li %0, %1\n": "=r"(var): "i" (val))
+
+#define CHECK_LI(type, val) \
+  { \
+    type var; \
+    LOAD_IMM(var, val); \
+    if (var != val) \
+      abort(); \
+  }
+
+#define CHECK_LI32(val) CHECK_LI(int, val)
+#define CHECK_LI64(val) CHECK_LI(long long, val)
+
+int main()
+{
+  CHECK_LI32(0x8001);
+  CHECK_LI32(0x1f01);
+  CHECK_LI32(0x12345001);
+  CHECK_LI32(0xf2345001);
+#if __riscv_xlen == 64
+  CHECK_LI64(0x8001ll);
+  CHECK_LI64(0x1f01ll);
+  CHECK_LI64(0x12345001ll);
+  CHECK_LI64(0xf2345001ll);
+  CHECK_LI64(0xf12345001ll);
+  CHECK_LI64(0xff00ff00ff001f01ll);
+  CHECK_LI64(0x7ffffffff2345001ll);
+  CHECK_LI64(0x7f0f243ff2345001ll);
+  CHECK_LI64(0x1234567887654321ll);
+#endif
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/load-immediate.c b/gcc/testsuite/gcc.target/riscv/load-immediate.c
new file mode 100644
index 0000000000000..f8fe7473c314c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/load-immediate.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O2" } */
+
+/* Check that we don't have unnecessary load immediate instructions.  */
+void
+sub1 (int *a, long long *b)
+{
+  *a = 1;
+  *b = 1;
+}
+
+void
+sub2 (short *a, short *b)
+{
+  *a = -32768;
+  *b = 32767;
+}
+
+void
+sub3 (int *a, long long *b)
+{
+  *a = 10000;
+  *b = 10000;
+}
+
+void
+sub4 (int *a, short *b)
+{
+  *a = 1;
+  *b = 1;
+}
+/* { dg-final { scan-assembler-times "\tli\t" 4 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/losum-overflow.c b/gcc/testsuite/gcc.target/riscv/losum-overflow.c
new file mode 100644
index 0000000000000..9c01c7feb545e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/losum-overflow.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc -mabi=ilp32 -O2 -fno-section-anchors" } */
+
+/* Check for %lo overflow.  Adding an offset larger than the alignment can
+   overflow if the data is allocated to an address mod 4KB that is between
+   2KB-offset+1 and 2KB-1.  */
+typedef long long int int64_t;
+
+#pragma pack(push)
+#pragma pack(1)
+struct S0 {
+   signed f0 : 4;
+   const volatile int64_t  f1;
+   volatile signed f2 : 1;
+   signed f3 : 31;
+   unsigned f4 : 8;
+   signed f5 : 20;
+   unsigned f6 : 5;
+};
+#pragma pack(pop)
+
+struct S0 g_3030 = {0,-9L,-0,-22553,7,-841,1};
+
+int64_t
+sub (void)
+{
+  return g_3030.f1;
+}
+/* { dg-final { scan-assembler-not "%lo\\(g_3030\\+4\\)" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/mcpu-1.c b/gcc/testsuite/gcc.target/riscv/mcpu-1.c
new file mode 100644
index 0000000000000..6f6005c79eb04
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/mcpu-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-skip-if "-march given" { *-*-* } { "-march=*" } } */
+/* { dg-options "-mcpu=sifive-e20 -mabi=ilp32" } */
+/* sifive-e20 = rv32imc */
+
+#if !((__riscv_xlen == 32)		\
+      && !defined(__riscv_32e)		\
+      && defined(__riscv_mul)		\
+      && !defined(__riscv_atomic)	\
+      && !defined(__riscv_flen)		\
+      && defined(__riscv_compressed))
+#error "unexpected arch"
+#endif
+
+int main()
+{
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/mcpu-2.c b/gcc/testsuite/gcc.target/riscv/mcpu-2.c
new file mode 100644
index 0000000000000..2992f4e108364
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/mcpu-2.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-skip-if "-march given" { *-*-* } { "-march=*" } } */
+/* { dg-options "-mcpu=sifive-e34 -mabi=ilp32" } */
+/* sifive-e34 = rv32imafc */
+
+#if !((__riscv_xlen == 32)		\
+      && !defined(__riscv_32e)		\
+      && defined(__riscv_mul)		\
+      && defined(__riscv_atomic)	\
+      && (__riscv_flen == 32)		\
+      && defined(__riscv_compressed))
+#error "unexpected arch"
+#endif
+
+int main()
+{
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/mcpu-3.c b/gcc/testsuite/gcc.target/riscv/mcpu-3.c
new file mode 100644
index 0000000000000..97b3f8190faf2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/mcpu-3.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-skip-if "-march given" { *-*-* } { "-march=*" } } */
+/* { dg-options "-mcpu=sifive-s51 -mabi=lp64" } */
+/* sifive-s51 = rv64imac */
+
+#if !((__riscv_xlen == 64)		\
+      && !defined(__riscv_32e)		\
+      && defined(__riscv_mul)		\
+      && defined(__riscv_atomic)	\
+      && !defined(__riscv_flen)		\
+      && defined(__riscv_compressed))
+#error "unexpected arch"
+#endif
+
+int main()
+{
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/mcpu-4.c b/gcc/testsuite/gcc.target/riscv/mcpu-4.c
new file mode 100644
index 0000000000000..52c598754227b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/mcpu-4.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-skip-if "-march given" { *-*-* } { "-march=*" } } */
+/* { dg-options "-mcpu=sifive-u74 -mabi=lp64" } */
+/* sifive-u74 = rv64imafdc */
+
+#if !((__riscv_xlen == 64)		\
+      && !defined(__riscv_32e)		\
+      && defined(__riscv_mul)		\
+      && defined(__riscv_atomic)	\
+      && (__riscv_flen == 64)		\
+      && defined(__riscv_compressed))
+#error "unexpected arch"
+#endif
+
+int main()
+{
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/mcpu-5.c b/gcc/testsuite/gcc.target/riscv/mcpu-5.c
new file mode 100644
index 0000000000000..c4ea7b5e41a37
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/mcpu-5.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-skip-if "-march given" { *-*-* } { "-march=*" } } */
+/* Verify -march will override arch option from -mcpu.  */
+/* { dg-options "-mcpu=sifive-u74 -march=rv32ic -mabi=ilp32" } */
+/* sifive-s51 = rv64imafdc */
+
+#if !((__riscv_xlen == 32)		\
+      && !defined(__riscv_32e)		\
+      && !defined(__riscv_mul)		\
+      && !defined(__riscv_atomic)	\
+      && !defined(__riscv_flen)		\
+      && defined(__riscv_compressed))
+#error "unexpected arch"
+#endif
+
+int main()
+{
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/mcpu-6.c b/gcc/testsuite/gcc.target/riscv/mcpu-6.c
new file mode 100644
index 0000000000000..57e3345630c38
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/mcpu-6.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* Verify -mtune has higher priority than -mcpu for pipeline model .  */
+/* { dg-options "-mcpu=sifive-u74 -mtune=rocket -fdump-rtl-sched2-details -O3 -march=rv32i -mabi=ilp32" } */
+/* { dg-final { scan-rtl-dump "simple_return\[ \]+:alu" "sched2" } } */
+
+int main()
+{
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.target/riscv/mcpu-7.c b/gcc/testsuite/gcc.target/riscv/mcpu-7.c
new file mode 100644
index 0000000000000..fe3c04be4b6d1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/mcpu-7.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* Verify -mtune has higher priority than -mcpu for pipeline model .  */
+/* { dg-options "-mcpu=sifive-s21 -mtune=sifive-u74 -fdump-rtl-sched2-details -O3 -march=rv32i -mabi=ilp32" } */
+/* { dg-final { scan-rtl-dump "simple_return\[ \]+:sifive_7_B" "sched2" } } */
+
+int main()
+{
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.target/riscv/pr93304.c b/gcc/testsuite/gcc.target/riscv/pr93304.c
new file mode 100644
index 0000000000000..248f205e0d2d7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr93304.c
@@ -0,0 +1,18 @@
+/* Verify the regrename won't rename registers to register which never used
+   before.  */
+/* { dg-do compile } */
+/* { dg-options "-O -frename-registers" } */
+
+static unsigned _t = 0;
+
+void __attribute__ ((interrupt))
+foo (void)
+{
+  _t++;
+}
+
+/* Register rename will try to use registers from the lower register
+   regradless of the REG_ALLOC_ORDER.
+   In theory, t2 should not used in such small program if regrename
+   not executed incorrectly, because t0-a2 should be enough.  */
+/* { dg-final { scan-assembler-not "t2" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/pr95252.c b/gcc/testsuite/gcc.target/riscv/pr95252.c
new file mode 100644
index 0000000000000..0366c089f83dd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr95252.c
@@ -0,0 +1,47 @@
+/* PR target/95252 */
+/* { dg-options "-O3 -funroll-loops -msave-restore" } */
+/* { dg-do run } */
+
+int a[6], b = 1, d, e;
+long long c;
+static int f = 1;
+
+void
+fn1 (int p1)
+{
+  b = (b >> 1) & (1 ^ a[(1 ^ p1) & 5]);
+}
+
+void
+fn2 ()
+{
+  b = (b >> 1) & (1 ^ a[(b ^ 1) & 1]);
+  fn1 (c >> 1 & 5);
+  fn1 (c >> 2 & 5);
+  fn1 (c >> 4 & 5);
+  fn1 (c >> 8 & 5);
+}
+
+int
+main ()
+{
+  int i, j;
+  for (; d;)
+    {
+      for (; e;)
+	fn2 ();
+      f = 0;
+    }
+  for (i = 0; i < 8; i++)
+    {
+      if (f)
+	i = 9;
+      for (j = 0; j < 7; j++)
+	fn2 ();
+    }
+
+  if (b != 0)
+    __builtin_abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/pr95683.c b/gcc/testsuite/gcc.target/riscv/pr95683.c
new file mode 100644
index 0000000000000..00cfbdcf28266
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr95683.c
@@ -0,0 +1,10 @@
+/* PR target/95683 */
+/* { dg-options "-Os" } */
+/* { dg-do compile } */
+void a() {
+  asm(""
+      :
+      :
+      : "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3",
+        "t4", "t5", "t6", "ra");
+}
diff --git a/gcc/testsuite/gcc.target/riscv/pr96759.c b/gcc/testsuite/gcc.target/riscv/pr96759.c
new file mode 100644
index 0000000000000..621c39196fca3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr96759.c
@@ -0,0 +1,13 @@
+/* { dg-options "-mno-strict-align" } */
+/* { dg-do compile } */
+
+struct S {
+  int a;
+  double b;
+};
+struct S GetNumbers();
+struct S g;
+
+void foo(){
+  g = GetNumbers();
+}
diff --git a/gcc/testsuite/gcc.target/riscv/pr98777.c b/gcc/testsuite/gcc.target/riscv/pr98777.c
new file mode 100644
index 0000000000000..ea2c2f9ca64e3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr98777.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-fstrict-aliasing -O" } */
+
+typedef struct {
+  _Complex e;
+  _Complex f;
+  _Complex g;
+  _Complex h;
+  _Complex i;
+  _Complex j;
+  _Complex k;
+  _Complex l;
+  _Complex m;
+  _Complex n;
+  _Complex o;
+  _Complex p;
+} Scl16;
+
+Scl16 g1sScl16, g2sScl16, g3sScl16, g4sScl16, g5sScl16, g6sScl16, g7sScl16,
+    g8sScl16, g9sScl16, g10sScl16, g11sScl16, g12sScl16, g13sScl16, g14sScl16,
+    g15sScl16, g16sScl16;
+
+void testvaScl16();
+
+void
+testitScl16() {
+  testvaScl16(g10sScl16, g11sScl16, g12sScl16, g13sScl16, g14sScl16, g1sScl16,
+              g2sScl16, g3sScl16, g4sScl16, g5sScl16, g6sScl16, g7sScl16,
+              g8sScl16, g9sScl16, g10sScl16, g11sScl16, g12sScl16, g13sScl16,
+              g14sScl16, g15sScl16, g16sScl16);
+}
diff --git a/gcc/testsuite/gcc.target/riscv/pr99702.c b/gcc/testsuite/gcc.target/riscv/pr99702.c
new file mode 100644
index 0000000000000..a28724c0958b9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr99702.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+char n;
+void *i, *j;
+void foo(void) {
+  __builtin_memcpy(i, j, n);
+}
diff --git a/gcc/testsuite/gcc.target/riscv/predef-1.c b/gcc/testsuite/gcc.target/riscv/predef-1.c
new file mode 100644
index 0000000000000..70f121f15fafb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/predef-1.c
@@ -0,0 +1,66 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv32i -mabi=ilp32 -mcmodel=medlow" } */
+
+int main () {
+#if !defined(__riscv)
+#error "__riscv"
+#endif
+
+#if defined(__riscv_compressed)
+#error "__riscv_compressed"
+#endif
+
+#if defined(__riscv_32e)
+#error "__riscv_32e"
+#endif
+
+#if defined(__riscv_atomic)
+#error "__riscv_atomic"
+#endif
+
+#if defined(__riscv_mul)
+#error "__riscv_mul"
+#endif
+#if defined(__riscv_div)
+#error "__riscv_div"
+#endif
+#if defined(__riscv_muldiv)
+#error "__riscv_muldiv"
+#endif
+
+#if __riscv_xlen != 32
+#error "__riscv_xlen"
+#endif
+
+#if defined(__riscv_fdiv)
+#error "__riscv_fdiv"
+#endif
+#if defined(__riscv_fsqrt)
+#error "__riscv_fsqrt"
+#endif
+
+#if defined(__riscv_abi_rve)
+#error "__riscv_abi_rve"
+#endif
+#if !defined(__riscv_float_abi_soft)
+#error "__riscv_float_abi_soft"
+#endif
+#if defined(__riscv_float_abi_single)
+#error "__riscv_float_abi_single"
+#endif
+#if defined(__riscv_float_abi_double)
+#error "__riscv_float_abi_double"
+#endif
+
+#if !defined(__riscv_cmodel_medlow)
+#error "__riscv_cmodel_medlow"
+#endif
+#if defined(__riscv_cmodel_medany)
+#error "__riscv_cmodel_medlow"
+#endif
+#if defined(__riscv_cmodel_pic)
+#error "__riscv_cmodel_medlow"
+#endif
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/predef-10.c b/gcc/testsuite/gcc.target/riscv/predef-10.c
new file mode 100644
index 0000000000000..7c447bfb08dbe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/predef-10.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv32i2p0 -mabi=ilp32 -mcmodel=medlow -misa-spec=2.2" } */
+
+int main () {
+
+#ifndef __riscv_arch_test
+#error "__riscv_arch_test"
+#endif
+
+#if __riscv_xlen != 32
+#error "__riscv_xlen"
+#endif
+
+#if !defined(__riscv_i) || (__riscv_i != (2 * 1000 * 1000))
+#error "__riscv_i"
+#endif
+
+#if defined(__riscv_c)
+#error "__riscv_c"
+#endif
+
+#if defined(__riscv_e)
+#error "__riscv_e"
+#endif
+
+#if defined(__riscv_a)
+#error "__riscv_a"
+#endif
+
+#if defined(__riscv_m)
+#error "__riscv_m"
+#endif
+
+#if defined(__riscv_f)
+#error "__riscv_f"
+#endif
+
+#if defined(__riscv_d)
+#error "__riscv_d"
+#endif
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/predef-11.c b/gcc/testsuite/gcc.target/riscv/predef-11.c
new file mode 100644
index 0000000000000..80f48113dfa68
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/predef-11.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gc -mabi=lp64 -mcmodel=medlow -misa-spec=2.2" } */
+
+int main () {
+
+#ifndef __riscv_arch_test
+#error "__riscv_arch_test"
+#endif
+
+#if __riscv_xlen != 64
+#error "__riscv_xlen"
+#endif
+
+#if !defined(__riscv_i) || (__riscv_i != (2 * 1000 * 1000))
+#error "__riscv_i"
+#endif
+
+#if !defined(__riscv_c) || (__riscv_c != (2 * 1000 * 1000))
+#error "__riscv_c"
+#endif
+
+#if defined(__riscv_e)
+#error "__riscv_e"
+#endif
+
+#if !defined(__riscv_a) || (__riscv_a != (2 * 1000 * 1000))
+#error "__riscv_a"
+#endif
+
+#if !defined(__riscv_m) || (__riscv_m != (2 * 1000 * 1000))
+#error "__riscv_m"
+#endif
+
+#if !defined(__riscv_f) || (__riscv_f != (2 * 1000 * 1000))
+#error "__riscv_f"
+#endif
+
+#if !defined(__riscv_d) || (__riscv_d != (2 * 1000 * 1000))
+#error "__riscv_d"
+#endif
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/predef-12.c b/gcc/testsuite/gcc.target/riscv/predef-12.c
new file mode 100644
index 0000000000000..dd35dbde925b3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/predef-12.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gc -mabi=lp64 -mcmodel=medlow -misa-spec=20191213" } */
+
+int main () {
+
+#ifndef __riscv_arch_test
+#error "__riscv_arch_test"
+#endif
+
+#if __riscv_xlen != 64
+#error "__riscv_xlen"
+#endif
+
+#if !defined(__riscv_i) || (__riscv_i != (2 * 1000 * 1000 + 1 * 1000))
+#error "__riscv_i"
+#endif
+
+#if !defined(__riscv_c) || (__riscv_c != (2 * 1000 * 1000))
+#error "__riscv_c"
+#endif
+
+#if defined(__riscv_e)
+#error "__riscv_e"
+#endif
+
+#if !defined(__riscv_a) || (__riscv_a != (2 * 1000 * 1000 + 1 * 1000))
+#error "__riscv_a"
+#endif
+
+#if !defined(__riscv_m) || (__riscv_m != (2 * 1000 * 1000))
+#error "__riscv_m"
+#endif
+
+#if !defined(__riscv_f) || (__riscv_f != (2 * 1000 * 1000 + 2 * 1000))
+#error "__riscv_f"
+#endif
+
+#if !defined(__riscv_d) || (__riscv_d != (2 * 1000 * 1000 + 2 * 1000))
+#error "__riscv_d"
+#endif
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/predef-13.c b/gcc/testsuite/gcc.target/riscv/predef-13.c
new file mode 100644
index 0000000000000..95cf0012408ce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/predef-13.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv32e -mabi=ilp32e -mcmodel=medlow -misa-spec=2.2" } */
+
+int main () {
+
+#ifndef __riscv_arch_test
+#error "__riscv_arch_test"
+#endif
+
+#if __riscv_xlen != 32
+#error "__riscv_xlen"
+#endif
+
+#if defined(__riscv_i)
+#error "__riscv_i"
+#endif
+
+#if defined(__riscv_c)
+#error "__riscv_c"
+#endif
+
+#if !defined(__riscv_e) || (__riscv_e != (1 * 1000 * 1000 + 9 * 1000))
+#error "__riscv_e"
+#endif
+
+#if defined(__riscv_a)
+#error "__riscv_a"
+#endif
+
+#if defined(__riscv_m)
+#error "__riscv_m"
+#endif
+
+#if defined(__riscv_f)
+#error "__riscv_f"
+#endif
+
+#if defined(__riscv_d)
+#error "__riscv_d"
+#endif
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/predef-2.c b/gcc/testsuite/gcc.target/riscv/predef-2.c
new file mode 100644
index 0000000000000..6f3c8c3864ddb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/predef-2.c
@@ -0,0 +1,66 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv32imaf -mabi=ilp32f -mcmodel=medany" } */
+
+int main () {
+#if !defined(__riscv)
+#error "__riscv"
+#endif
+
+#if defined(__riscv_compressed)
+#error "__riscv_compressed"
+#endif
+
+#if defined(__riscv_32e)
+#error "__riscv_32e"
+#endif
+
+#if !defined(__riscv_atomic)
+#error "__riscv_atomic"
+#endif
+
+#if !defined(__riscv_mul)
+#error "__riscv_mul"
+#endif
+#if !defined(__riscv_div)
+#error "__riscv_div"
+#endif
+#if !defined(__riscv_muldiv)
+#error "__riscv_muldiv"
+#endif
+
+#if __riscv_xlen != 32
+#error "__riscv_xlen"
+#endif
+
+#if !defined(__riscv_fdiv)
+#error "__riscv_fdiv"
+#endif
+#if !defined(__riscv_fsqrt)
+#error "__riscv_fsqrt"
+#endif
+
+#if defined(__riscv_abi_rve)
+#error "__riscv_abi_rve"
+#endif
+#if defined(__riscv_float_abi_soft)
+#error "__riscv_float_abi_soft"
+#endif
+#if !defined(__riscv_float_abi_single)
+#error "__riscv_float_abi_single"
+#endif
+#if defined(__riscv_float_abi_double)
+#error "__riscv_float_abi_double"
+#endif
+
+#if defined(__riscv_cmodel_medlow)
+#error "__riscv_cmodel_medlow"
+#endif
+#if !defined(__riscv_cmodel_medany)
+#error "__riscv_cmodel_medlow"
+#endif
+#if defined(__riscv_cmodel_pic)
+#error "__riscv_cmodel_medlow"
+#endif
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/predef-3.c b/gcc/testsuite/gcc.target/riscv/predef-3.c
new file mode 100644
index 0000000000000..d7c9793b3d7cc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/predef-3.c
@@ -0,0 +1,66 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv32imafdc -mabi=ilp32d -fpic" } */
+
+int main () {
+#if !defined(__riscv)
+#error "__riscv"
+#endif
+
+#if !defined(__riscv_compressed)
+#error "__riscv_compressed"
+#endif
+
+#if defined(__riscv_32e)
+#error "__riscv_32e"
+#endif
+
+#if !defined(__riscv_atomic)
+#error "__riscv_atomic"
+#endif
+
+#if !defined(__riscv_mul)
+#error "__riscv_mul"
+#endif
+#if !defined(__riscv_div)
+#error "__riscv_div"
+#endif
+#if !defined(__riscv_muldiv)
+#error "__riscv_muldiv"
+#endif
+
+#if __riscv_xlen != 32
+#error "__riscv_xlen"
+#endif
+
+#if !defined(__riscv_fdiv)
+#error "__riscv_fdiv"
+#endif
+#if !defined(__riscv_fsqrt)
+#error "__riscv_fsqrt"
+#endif
+
+#if defined(__riscv_abi_rve)
+#error "__riscv_abi_rve"
+#endif
+#if defined(__riscv_float_abi_soft)
+#error "__riscv_float_abi_soft"
+#endif
+#if defined(__riscv_float_abi_single)
+#error "__riscv_float_abi_single"
+#endif
+#if !defined(__riscv_float_abi_double)
+#error "__riscv_float_abi_double"
+#endif
+
+#if defined(__riscv_cmodel_medlow)
+#error "__riscv_cmodel_medlow"
+#endif
+#if !defined(__riscv_cmodel_medany)
+#error "__riscv_cmodel_medany"
+#endif
+#if !defined(__riscv_cmodel_pic)
+#error "__riscv_cmodel_pic"
+#endif
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/predef-4.c b/gcc/testsuite/gcc.target/riscv/predef-4.c
new file mode 100644
index 0000000000000..822f61782c327
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/predef-4.c
@@ -0,0 +1,66 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64ia -mabi=lp64 -mcmodel=medlow" } */
+
+int main () {
+#if !defined(__riscv)
+#error "__riscv"
+#endif
+
+#if defined(__riscv_compressed)
+#error "__riscv_compressed"
+#endif
+
+#if defined(__riscv_32e)
+#error "__riscv_32e"
+#endif
+
+#if !defined(__riscv_atomic)
+#error "__riscv_atomic"
+#endif
+
+#if defined(__riscv_mul)
+#error "__riscv_mul"
+#endif
+#if defined(__riscv_div)
+#error "__riscv_div"
+#endif
+#if defined(__riscv_muldiv)
+#error "__riscv_muldiv"
+#endif
+
+#if __riscv_xlen != 64
+#error "__riscv_xlen"
+#endif
+
+#if defined(__riscv_fdiv)
+#error "__riscv_fdiv"
+#endif
+#if defined(__riscv_fsqrt)
+#error "__riscv_fsqrt"
+#endif
+
+#if defined(__riscv_abi_rve)
+#error "__riscv_abi_rve"
+#endif
+#if !defined(__riscv_float_abi_soft)
+#error "__riscv_float_abi_soft"
+#endif
+#if defined(__riscv_float_abi_single)
+#error "__riscv_float_abi_single"
+#endif
+#if defined(__riscv_float_abi_double)
+#error "__riscv_float_abi_double"
+#endif
+
+#if !defined(__riscv_cmodel_medlow)
+#error "__riscv_cmodel_medlow"
+#endif
+#if defined(__riscv_cmodel_medany)
+#error "__riscv_cmodel_medlow"
+#endif
+#if defined(__riscv_cmodel_pic)
+#error "__riscv_cmodel_medlow"
+#endif
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/predef-5.c b/gcc/testsuite/gcc.target/riscv/predef-5.c
new file mode 100644
index 0000000000000..6649049099d80
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/predef-5.c
@@ -0,0 +1,66 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64imf -mabi=lp64f -mcmodel=medany" } */
+
+int main () {
+#if !defined(__riscv)
+#error "__riscv"
+#endif
+
+#if defined(__riscv_compressed)
+#error "__riscv_compressed"
+#endif
+
+#if defined(__riscv_32e)
+#error "__riscv_32e"
+#endif
+
+#if defined(__riscv_atomic)
+#error "__riscv_atomic"
+#endif
+
+#if !defined(__riscv_mul)
+#error "__riscv_mul"
+#endif
+#if !defined(__riscv_div)
+#error "__riscv_div"
+#endif
+#if !defined(__riscv_muldiv)
+#error "__riscv_muldiv"
+#endif
+
+#if __riscv_xlen != 64
+#error "__riscv_xlen"
+#endif
+
+#if !defined(__riscv_fdiv)
+#error "__riscv_fdiv"
+#endif
+#if !defined(__riscv_fsqrt)
+#error "__riscv_fsqrt"
+#endif
+
+#if defined(__riscv_abi_rve)
+#error "__riscv_abi_rve"
+#endif
+#if defined(__riscv_float_abi_soft)
+#error "__riscv_float_abi_soft"
+#endif
+#if !defined(__riscv_float_abi_single)
+#error "__riscv_float_abi_single"
+#endif
+#if defined(__riscv_float_abi_double)
+#error "__riscv_float_abi_double"
+#endif
+
+#if defined(__riscv_cmodel_medlow)
+#error "__riscv_cmodel_medlow"
+#endif
+#if !defined(__riscv_cmodel_medany)
+#error "__riscv_cmodel_medlow"
+#endif
+#if defined(__riscv_cmodel_pic)
+#error "__riscv_cmodel_medlow"
+#endif
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/predef-6.c b/gcc/testsuite/gcc.target/riscv/predef-6.c
new file mode 100644
index 0000000000000..7530f9598aeb9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/predef-6.c
@@ -0,0 +1,66 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gc -mabi=lp64d -fpic" } */
+
+int main () {
+#if !defined(__riscv)
+#error "__riscv"
+#endif
+
+#if !defined(__riscv_compressed)
+#error "__riscv_compressed"
+#endif
+
+#if defined(__riscv_32e)
+#error "__riscv_32e"
+#endif
+
+#if !defined(__riscv_atomic)
+#error "__riscv_atomic"
+#endif
+
+#if !defined(__riscv_mul)
+#error "__riscv_mul"
+#endif
+#if !defined(__riscv_div)
+#error "__riscv_div"
+#endif
+#if !defined(__riscv_muldiv)
+#error "__riscv_muldiv"
+#endif
+
+#if __riscv_xlen != 64
+#error "__riscv_xlen"
+#endif
+
+#if !defined(__riscv_fdiv)
+#error "__riscv_fdiv"
+#endif
+#if !defined(__riscv_fsqrt)
+#error "__riscv_fsqrt"
+#endif
+
+#if defined(__riscv_abi_rve)
+#error "__riscv_abi_rve"
+#endif
+#if defined(__riscv_float_abi_soft)
+#error "__riscv_float_abi_soft"
+#endif
+#if defined(__riscv_float_abi_single)
+#error "__riscv_float_abi_single"
+#endif
+#if !defined(__riscv_float_abi_double)
+#error "__riscv_float_abi_double"
+#endif
+
+#if defined(__riscv_cmodel_medlow)
+#error "__riscv_cmodel_medlow"
+#endif
+#if !defined(__riscv_cmodel_medany)
+#error "__riscv_cmodel_medany"
+#endif
+#if !defined(__riscv_cmodel_pic)
+#error "__riscv_cmodel_medpic"
+#endif
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/predef-7.c b/gcc/testsuite/gcc.target/riscv/predef-7.c
new file mode 100644
index 0000000000000..0358f325c5d83
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/predef-7.c
@@ -0,0 +1,66 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv32em -mabi=ilp32e -mno-div -mcmodel=medlow" } */
+
+int main () {
+#if !defined(__riscv)
+#error "__riscv"
+#endif
+
+#if defined(__riscv_compressed)
+#error "__riscv_compressed"
+#endif
+
+#if !defined(__riscv_32e)
+#error "__riscv_32e"
+#endif
+
+#if defined(__riscv_atomic)
+#error "__riscv_atomic"
+#endif
+
+#if !defined(__riscv_mul)
+#error "__riscv_mul"
+#endif
+#if defined(__riscv_div)
+#error "__riscv_div"
+#endif
+#if defined(__riscv_muldiv)
+#error "__riscv_muldiv"
+#endif
+
+#if __riscv_xlen != 32
+#error "__riscv_xlen"
+#endif
+
+#if defined(__riscv_fdiv)
+#error "__riscv_fdiv"
+#endif
+#if defined(__riscv_fsqrt)
+#error "__riscv_fsqrt"
+#endif
+
+#if !defined(__riscv_abi_rve)
+#error "__riscv_abi_rve"
+#endif
+#if !defined(__riscv_float_abi_soft)
+#error "__riscv_float_abi_soft"
+#endif
+#if defined(__riscv_float_abi_single)
+#error "__riscv_float_abi_single"
+#endif
+#if defined(__riscv_float_abi_double)
+#error "__riscv_float_abi_double"
+#endif
+
+#if !defined(__riscv_cmodel_medlow)
+#error "__riscv_cmodel_medlow"
+#endif
+#if defined(__riscv_cmodel_medany)
+#error "__riscv_cmodel_medlow"
+#endif
+#if defined(__riscv_cmodel_pic)
+#error "__riscv_cmodel_medlow"
+#endif
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/predef-8.c b/gcc/testsuite/gcc.target/riscv/predef-8.c
new file mode 100644
index 0000000000000..41cd9feab03f4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/predef-8.c
@@ -0,0 +1,66 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv32if -mabi=ilp32f -mno-fdiv -mcmodel=medany" } */
+
+int main () {
+#if !defined(__riscv)
+#error "__riscv"
+#endif
+
+#if defined(__riscv_compressed)
+#error "__riscv_compressed"
+#endif
+
+#if defined(__riscv_32e)
+#error "__riscv_32e"
+#endif
+
+#if defined(__riscv_atomic)
+#error "__riscv_atomic"
+#endif
+
+#if defined(__riscv_mul)
+#error "__riscv_mul"
+#endif
+#if defined(__riscv_div)
+#error "__riscv_div"
+#endif
+#if defined(__riscv_muldiv)
+#error "__riscv_muldiv"
+#endif
+
+#if __riscv_xlen != 32
+#error "__riscv_xlen"
+#endif
+
+#if defined(__riscv_fdiv)
+#error "__riscv_fdiv"
+#endif
+#if defined(__riscv_fsqrt)
+#error "__riscv_fsqrt"
+#endif
+
+#if defined(__riscv_abi_rve)
+#error "__riscv_abi_rve"
+#endif
+#if defined(__riscv_float_abi_soft)
+#error "__riscv_float_abi_soft"
+#endif
+#if !defined(__riscv_float_abi_single)
+#error "__riscv_float_abi_single"
+#endif
+#if defined(__riscv_float_abi_double)
+#error "__riscv_float_abi_double"
+#endif
+
+#if defined(__riscv_cmodel_medlow)
+#error "__riscv_cmodel_medlow"
+#endif
+#if !defined(__riscv_cmodel_medany)
+#error "__riscv_cmodel_medlow"
+#endif
+#if defined(__riscv_cmodel_pic)
+#error "__riscv_cmodel_medlow"
+#endif
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/read-thread-pointer.c b/gcc/testsuite/gcc.target/riscv/read-thread-pointer.c
new file mode 100644
index 0000000000000..401fb42112994
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/read-thread-pointer.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+
+void *get_tp()
+{
+    return __builtin_thread_pointer ();
+}
+/* { dg-final { scan-assembler "mv\[ \t\]*\[at\]\[0-9\]+,tp" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/riscv.exp b/gcc/testsuite/gcc.target/riscv/riscv.exp
index 6a141964d22a9..2b83bf9e5a239 100644
--- a/gcc/testsuite/gcc.target/riscv/riscv.exp
+++ b/gcc/testsuite/gcc.target/riscv/riscv.exp
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2018 Free Software Foundation, Inc.
+# Copyright (C) 2017-2021 Free Software Foundation, Inc.
 
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
diff --git a/gcc/testsuite/gcc.target/riscv/save-restore-2.c b/gcc/testsuite/gcc.target/riscv/save-restore-2.c
new file mode 100644
index 0000000000000..204bf67b66e5e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/save-restore-2.c
@@ -0,0 +1,22 @@
+/* { dg-options "-Os -msave-restore" } */
+
+/* With -msave-restore in use it should not be possible to remove the calls
+   to the save and restore stubs in this case (in current GCC).  */
+
+extern void fn2 ();
+
+volatile int a = 0;
+
+int
+fn1 ()
+{
+  fn2 ();
+
+  while (a)
+    ;
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler "call\[ \t\]*t0,__riscv_save_0" } } */
+/* { dg-final { scan-assembler "tail\[ \t\]*__riscv_restore_0" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/save-restore-3.c b/gcc/testsuite/gcc.target/riscv/save-restore-3.c
new file mode 100644
index 0000000000000..6bf9fb014d6b0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/save-restore-3.c
@@ -0,0 +1,16 @@
+/* { dg-options "-Os -msave-restore" } */
+
+/* With -msave-restore in use GCC should be able to remove the calls to the
+   save and restore stubs in this case, replacing them with a tail call to
+   foo.  */
+
+extern int foo ();
+
+int bar ()
+{
+  return foo ();
+}
+
+/* { dg-final { scan-assembler-not "call\[ \t\]*t0,__riscv_save_0" } } */
+/* { dg-final { scan-assembler-not "tail\[ \t\]*__riscv_restore_0" } } */
+/* { dg-final { scan-assembler "tail\[ \t\]*foo" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/save-restore-4.c b/gcc/testsuite/gcc.target/riscv/save-restore-4.c
new file mode 100644
index 0000000000000..9a0313f2c427e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/save-restore-4.c
@@ -0,0 +1,19 @@
+/* { dg-options "-Os -msave-restore" } */
+
+/* This test covers a case where we can't (currently) remove the calls to
+   the save/restore stubs.  The cast of the return value from BAR requires
+   a zero extension between the call to BAR, and the return from FOO, this
+   currently prevents the removal of the save/restore calls.  */
+
+typedef unsigned long long u_64;
+typedef unsigned int u_32;
+
+extern u_32 bar (u_32 arg);
+
+u_64 foo (u_32 arg)
+{
+  return (u_64) bar (arg);
+}
+
+/* { dg-final { scan-assembler "call\[ \t\]*t0,__riscv_save_0" } } */
+/* { dg-final { scan-assembler "tail\[ \t\]*__riscv_restore_0" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/save-restore-5.c b/gcc/testsuite/gcc.target/riscv/save-restore-5.c
new file mode 100644
index 0000000000000..fe0ffdcd50446
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/save-restore-5.c
@@ -0,0 +1,9 @@
+typedef int (*FPTR) (void);
+FPTR a;
+
+int
+func ()
+{
+  int b = a ();
+  return b;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/save-restore-6.c b/gcc/testsuite/gcc.target/riscv/save-restore-6.c
new file mode 100644
index 0000000000000..530865456a22b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/save-restore-6.c
@@ -0,0 +1,16 @@
+/* { dg-options "-Os -msave-restore" } */
+
+/* With -msave-restore in use GCC should be able to remove the calls to the
+   save and restore stubs in this case, replacing them with a tail call to
+   other_func.  */
+
+extern void other_func ();
+
+void func ()
+{
+  other_func ();
+}
+
+/* { dg-final { scan-assembler-not "call\[ \t\]*t0,__riscv_save_0" } } */
+/* { dg-final { scan-assembler-not "tail\[ \t\]*__riscv_restore_0" } } */
+/* { dg-final { scan-assembler "tail\[ \t\]*other_func" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/save-restore-7.c b/gcc/testsuite/gcc.target/riscv/save-restore-7.c
new file mode 100644
index 0000000000000..06719c4e41383
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/save-restore-7.c
@@ -0,0 +1,30 @@
+/* { dg-options "-Os -msave-restore" } */
+
+/* With -msave-restore in use it should not be possible to remove the calls
+   to the save and restore stubs in this case (in current GCC).  */
+
+enum
+  {
+   VAL_A,
+   VAL_B,
+   VAL_C,
+   VAL_D
+  } a;
+
+extern void other_1 ();
+extern void other_2 ();
+
+void func ()
+{
+  switch (a)
+    {
+    case VAL_B:
+    case VAL_C:
+      other_1 ();
+    case VAL_D:
+      other_2 ();
+    }
+}
+
+/* { dg-final { scan-assembler "call\[ \t\]*t0,__riscv_save_0" } } */
+/* { dg-final { scan-assembler "tail\[ \t\]*__riscv_restore_0" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/save-restore-8.c b/gcc/testsuite/gcc.target/riscv/save-restore-8.c
new file mode 100644
index 0000000000000..8880cd288eea8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/save-restore-8.c
@@ -0,0 +1,12 @@
+/* { dg-options "-Os -msave-restore" } */
+
+/* As a leaf function this should never have the calls to the save and
+   restore stubs added, but lets check anyway.  */
+
+int func ()
+{
+  return 3;
+}
+
+/* { dg-final { scan-assembler-not "call\[ \t\]*t0,__riscv_save_0" } } */
+/* { dg-final { scan-assembler-not "tail\[ \t\]*__riscv_restore_0" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/save-restore-9.c b/gcc/testsuite/gcc.target/riscv/save-restore-9.c
new file mode 100644
index 0000000000000..2567daeb376be
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/save-restore-9.c
@@ -0,0 +1,23 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msave-restore" } */
+
+int
+__attribute__((noinline,noclone))
+foo (int u)
+{
+  return u + 1;
+}
+
+int
+__attribute__((noinline,noclone))
+bar (int a, int b, int c, int d, int e, int f, int g, int h, int u)
+{
+  return foo (u);
+}
+
+int main()
+{
+  if (bar (1, 2, 3, 4, 5, 6, 7, 8, 9) != 10)
+    __builtin_abort();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/shift-shift-1.c b/gcc/testsuite/gcc.target/riscv/shift-shift-1.c
new file mode 100644
index 0000000000000..a5343a31b140d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/shift-shift-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc -mabi=ilp32 -O" } */
+
+/* Test for lshrsi3_zero_extend_3+1 pattern that uses p2m1_shift_operand.  */
+unsigned int
+sub1 (unsigned int i)
+{
+  return (i << 1) >> 1;
+}
+
+unsigned int
+sub2 (unsigned int i)
+{
+  return (i << 20) >> 20;
+}
+/* { dg-final { scan-assembler-times "slli" 2 } } */
+/* { dg-final { scan-assembler-times "srli" 2 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/shift-shift-2.c b/gcc/testsuite/gcc.target/riscv/shift-shift-2.c
new file mode 100644
index 0000000000000..10a5bb728bec2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/shift-shift-2.c
@@ -0,0 +1,41 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64 -O" } */
+
+/* Test for lshrsi3_zero_extend_3+1 pattern that uses p2m1_shift_operand.  */
+unsigned int
+sub1 (unsigned int i)
+{
+  return (i << 1) >> 1;
+}
+
+unsigned int
+sub2 (unsigned int i)
+{
+  return (i << 20) >> 20;
+}
+
+unsigned long
+sub3 (unsigned long i)
+{
+  return (i << 1) >> 1;
+}
+
+unsigned long
+sub4 (unsigned long i)
+{
+  return (i << 52) >> 52;
+}
+
+unsigned int
+sub5 (unsigned int i)
+{
+  unsigned int j;
+  j = i >> 24;
+  j = j * (1 << 24);
+  j = i - j;
+  return j;
+}
+/* { dg-final { scan-assembler-times "slli" 5 } } */
+/* { dg-final { scan-assembler-times "srli" 5 } } */
+/* { dg-final { scan-assembler-times "slliw" 1 } } */
+/* { dg-final { scan-assembler-times "srliw" 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/shift-shift-3.c b/gcc/testsuite/gcc.target/riscv/shift-shift-3.c
new file mode 100644
index 0000000000000..c974e75b38a9b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/shift-shift-3.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64 -O" } */
+
+/* Test for lshrsi3_zero_extend_3+2 pattern that uses
+   high_mask_shift_operand.  */
+unsigned long
+sub1 (unsigned long i)
+{
+  return (i >> 32) << 32;
+}
+
+unsigned long
+sub2 (unsigned long i)
+{
+  return (i >> 63) << 63;
+}
+/* { dg-final { scan-assembler-times "slli" 2 } } */
+/* { dg-final { scan-assembler-times "srli" 2 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/shift-shift-4.c b/gcc/testsuite/gcc.target/riscv/shift-shift-4.c
new file mode 100644
index 0000000000000..72a45ee87ae63
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/shift-shift-4.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32i -mabi=ilp32 -O2" } */
+
+/* One zero-extend shift can be eliminated by modifying the constant in the
+   greater than test.  Started working after modifying the splitter
+   lshrsi3_zero_extend_3+1 to use a temporary reg for the first split dest.  */
+int
+sub (int i)
+{
+  i &= 0x7fffffff;
+  return i > 0x7f800000;
+}
+/* { dg-final { scan-assembler-not "srli" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/shift-shift-5.c b/gcc/testsuite/gcc.target/riscv/shift-shift-5.c
new file mode 100644
index 0000000000000..0ecab9723c9cf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/shift-shift-5.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O2" } */
+
+/* Fails if lshrsi3_zero_extend_3+1 uses a temp reg which has no REG_DEST
+   note.  */
+unsigned long
+sub (long l)
+{
+  union u {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+    struct s { int a : 19; unsigned int b : 13; int x; } s;
+#else
+    struct s { int x; unsigned int b : 13; int a : 19; } s;
+#endif
+    long l;
+  } u;
+  u.l = l;
+  return u.s.b;
+}
+/* { dg-final { scan-assembler "srliw" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/shorten-memrefs-1.c b/gcc/testsuite/gcc.target/riscv/shorten-memrefs-1.c
new file mode 100644
index 0000000000000..958942a6f7f02
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/shorten-memrefs-1.c
@@ -0,0 +1,26 @@
+/* { dg-options "-Os -march=rv32imc -mabi=ilp32" } */
+
+/* These stores cannot be compressed because x0 is not a compressed reg.
+   Therefore the shorten_memrefs pass should not attempt to rewrite them into a
+   compressible format.  */
+
+void
+store1z (int *array)
+{
+  array[200] = 0;
+  array[201] = 0;
+  array[202] = 0;
+  array[203] = 0;
+}
+
+void
+store2z (long long *array)
+{
+  array[200] = 0;
+  array[201] = 0;
+  array[202] = 0;
+  array[203] = 0;
+}
+
+/* { dg-final { scan-assembler-not "store1z:\n\taddi" } } */
+/* { dg-final { scan-assembler-not "store2z:\n\taddi" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/shorten-memrefs-2.c b/gcc/testsuite/gcc.target/riscv/shorten-memrefs-2.c
new file mode 100644
index 0000000000000..2c2f41548c618
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/shorten-memrefs-2.c
@@ -0,0 +1,51 @@
+/* { dg-options "-Os -march=rv32imc -mabi=ilp32" } */
+
+/* shorten_memrefs should rewrite these load/stores into a compressible
+   format.  */
+
+void
+store1a (int *array, int a)
+{
+  array[200] = a;
+  array[201] = a;
+  array[202] = a;
+  array[203] = a;
+}
+
+void
+store2a (long long *array, long long a)
+{
+  array[200] = a;
+  array[201] = a;
+  array[202] = a;
+  array[203] = a;
+}
+
+int
+load1r (int *array)
+{
+  int a = 0;
+  a += array[200];
+  a += array[201];
+  a += array[202];
+  a += array[203];
+  return a;
+}
+
+long long
+load2r (long long *array)
+{
+  int a = 0;
+  a += array[200];
+  a += array[201];
+  a += array[202];
+  a += array[203];
+  return a;
+}
+
+/* { dg-final { scan-assembler "store1a:\n\taddi" } } */
+/* The sd insns in store2a are not rewritten because shorten_memrefs currently
+   only optimizes lw and sw.
+/* { dg-final { scan-assembler "store2a:\n\taddi" { xfail riscv*-*-*  } } } */
+/* { dg-final { scan-assembler "load1r:\n\taddi" } } */
+/* { dg-final { scan-assembler "load2r:\n\taddi" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/shorten-memrefs-3.c b/gcc/testsuite/gcc.target/riscv/shorten-memrefs-3.c
new file mode 100644
index 0000000000000..2001fe871ee1d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/shorten-memrefs-3.c
@@ -0,0 +1,39 @@
+/* { dg-options "-Os -march=rv32imc -mabi=ilp32" } */
+
+/* These loads cannot be compressed because only one compressed reg is
+   available (since args are passed in a0-a4, that leaves a5-a7 available, of
+   which only a5 is a compressed reg). Therefore the shorten_memrefs pass should
+   not attempt to rewrite these loads into a compressible format. It may not
+   be possible to avoid this because shorten_memrefs happens before reg alloc.
+*/
+
+extern int sub1 (int, int, int, int, int, int, int);
+
+int
+load1a (int a0, int a1, int a2, int a3, int a4, int *array)
+{
+  int a = 0;
+  a += array[200];
+  a += array[201];
+  a += array[202];
+  a += array[203];
+  return sub1 (a0, a1, a2, a3, a4, 0, a);
+}
+
+extern long long sub2 (long long, long long, long long, long long, long long,
+           long long, long long);
+
+long long
+load2a (long long a0, long long a1, long long a2, long long a3, long long a4,
+  long long *array)
+{
+  int a = 0;
+  a += array[200];
+  a += array[201];
+  a += array[202];
+  a += array[203];
+  return sub2 (a0, a1, a2, a3, a4, 0, a);
+}
+
+/* { dg-final { scan-assembler-not "load1a:\n\taddi" { xfail riscv*-*-* } } } */
+/* { dg-final { scan-assembler-not "load2a:\n.*addi\[ \t\]*\[at\]\[0-9\],\[at\]\[0-9\],\[0-9\]*" { xfail riscv*-*-*  } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/shorten-memrefs-4.c b/gcc/testsuite/gcc.target/riscv/shorten-memrefs-4.c
new file mode 100644
index 0000000000000..cd4784913e473
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/shorten-memrefs-4.c
@@ -0,0 +1,26 @@
+/* { dg-options "-Os -march=rv64imc -mabi=lp64" } */
+
+/* These stores cannot be compressed because x0 is not a compressed reg.
+   Therefore the shorten_memrefs pass should not attempt to rewrite them into a
+   compressible format.  */
+
+void
+store1z (int *array)
+{
+  array[200] = 0;
+  array[201] = 0;
+  array[202] = 0;
+  array[203] = 0;
+}
+
+void
+store2z (long long *array)
+{
+  array[200] = 0;
+  array[201] = 0;
+  array[202] = 0;
+  array[203] = 0;
+}
+
+/* { dg-final { scan-assembler-not "store1z:\n\taddi" } } */
+/* { dg-final { scan-assembler-not "store2z:\n\taddi" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/shorten-memrefs-5.c b/gcc/testsuite/gcc.target/riscv/shorten-memrefs-5.c
new file mode 100644
index 0000000000000..80b3897e4da17
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/shorten-memrefs-5.c
@@ -0,0 +1,53 @@
+/* { dg-options "-Os -march=rv64imc -mabi=lp64" } */
+
+/* shorten_memrefs should rewrite these load/stores into a compressible
+   format.  */
+
+void
+store1a (int *array, int a)
+{
+  array[200] = a;
+  array[201] = a;
+  array[202] = a;
+  array[203] = a;
+}
+
+void
+store2a (long long *array, long long a)
+{
+  array[200] = a;
+  array[201] = a;
+  array[202] = a;
+  array[203] = a;
+}
+
+int
+load1r (int *array)
+{
+  int a = 0;
+  a += array[200];
+  a += array[201];
+  a += array[202];
+  a += array[203];
+  return a;
+}
+
+long long
+load2r (long long *array)
+{
+  int a = 0;
+  a += array[200];
+  a += array[201];
+  a += array[202];
+  a += array[203];
+  return a;
+}
+
+/* { dg-final { scan-assembler "store1a:\n\taddi" } } */
+/* The sd insns in store2a are not rewritten because shorten_memrefs currently
+   only optimizes lw and sw.
+/* { dg-final { scan-assembler "store2a:\n\taddi" { xfail riscv*-*-* } } } */
+/* { dg-final { scan-assembler "load1r:\n\taddi" } } */
+/* The ld insns in load2r are not rewritten because shorten_memrefs currently
+   only optimizes lw and sw.
+/* { dg-final { scan-assembler "load2r:\n\taddi" { xfail riscv*-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/shorten-memrefs-6.c b/gcc/testsuite/gcc.target/riscv/shorten-memrefs-6.c
new file mode 100644
index 0000000000000..3403c7044df05
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/shorten-memrefs-6.c
@@ -0,0 +1,39 @@
+/* { dg-options "-Os -march=rv64imc -mabi=lp64" } */
+
+/* These loads cannot be compressed because only one compressed reg is
+   available (since args are passed in a0-a4, that leaves a5-a7 available, of
+   which only a5 is a compressed reg). Therefore the shorten_memrefs pass should
+   not attempt to rewrite these loads into a compressible format. It may not
+   be possible to avoid this because shorten_memrefs happens before reg alloc.
+*/
+
+extern int sub1 (int, int, int, int, int, int, int);
+
+int
+load1a (int a0, int a1, int a2, int a3, int a4, int *array)
+{
+  int a = 0;
+  a += array[200];
+  a += array[201];
+  a += array[202];
+  a += array[203];
+  return sub1 (a0, a1, a2, a3, a4, 0, a);
+}
+
+extern long long sub2 (long long, long long, long long, long long, long long,
+           long long, long long);
+
+long long
+load2a (long long a0, long long a1, long long a2, long long a3, long long a4,
+  long long *array)
+{
+  int a = 0;
+  a += array[200];
+  a += array[201];
+  a += array[202];
+  a += array[203];
+  return sub2 (a0, a1, a2, a3, a4, 0, a);
+}
+
+/* { dg-final { scan-assembler-not "load1a:\n\taddi" { xfail riscv*-*-* } } } */
+/* { dg-final { scan-assembler-not "load2a:\n.*addi\[ \t\]*\[at\]\[0-9\],\[at\]\[0-9\],\[0-9\]*" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/shorten-memrefs-7.c b/gcc/testsuite/gcc.target/riscv/shorten-memrefs-7.c
new file mode 100644
index 0000000000000..a5833fd356d8a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/shorten-memrefs-7.c
@@ -0,0 +1,46 @@
+/* { dg-options "-Os -march=rv32imc -mabi=ilp32 -mno-shorten-memrefs" } */
+
+/* Check that these load/stores do not get rewritten into a compressible format
+   when shorten_memrefs is disabled.  */
+
+void
+store1a (int *array, int a)
+{
+  array[200] = a;
+  array[201] = a;
+  array[202] = a;
+  array[203] = a;
+}
+
+void
+store2a (long long *array, long long a)
+{
+  array[200] = a;
+  array[201] = a;
+  array[202] = a;
+  array[203] = a;
+}
+
+int
+load1r (int *array)
+{
+  int a = 0;
+  a += array[200];
+  a += array[201];
+  a += array[202];
+  a += array[203];
+  return a;
+}
+
+long long
+load2r (long long *array)
+{
+  int a = 0;
+  a += array[200];
+  a += array[201];
+  a += array[202];
+  a += array[203];
+  return a;
+}
+
+/* { dg-final { scan-assembler-not "addi" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/shorten-memrefs-8.c b/gcc/testsuite/gcc.target/riscv/shorten-memrefs-8.c
new file mode 100644
index 0000000000000..a9128caeea9d9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/shorten-memrefs-8.c
@@ -0,0 +1,27 @@
+/* { dg-options "-Os -march=rv32imc -mabi=ilp32" } */
+
+/* shorten_memrefs should use a correct base address*/
+
+void
+store (char *p, int k)
+{
+  *(int *)(p + 17) = k;
+  *(int *)(p + 21) = k;
+  *(int *)(p + 25) = k;
+  *(int *)(p + 29) = k;
+}
+
+int
+load (char *p)
+{
+  int a = 0;
+  a += *(int *)(p + 17);
+  a += *(int *)(p + 21);
+  a += *(int *)(p + 25);
+  a += *(int *)(p + 29);
+  return a;
+}
+
+/* { dg-final { scan-assembler "store:\n\taddi\ta\[0-7\],a\[0-7\],1" } } */
+/* { dg-final { scan-assembler "load:\n\taddi\ta\[0-7\],a\[0-7\],1" } } */
+
diff --git a/gcc/testsuite/gcc.target/riscv/switch-qi.c b/gcc/testsuite/gcc.target/riscv/switch-qi.c
new file mode 100644
index 0000000000000..973d09aaaf110
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/switch-qi.c
@@ -0,0 +1,15 @@
+/* { dg-do compile { target { riscv64*-*-* } } } */
+/* { dg-options "-march=rv64gc -mabi=lp64 -O2" } */
+
+/* Test for riscv_extend_comparands patch.  */
+extern void asdf(int);
+void foo(signed char x) {
+  switch (x) {
+  case 0: asdf(10); break;
+  case 1: asdf(11); break;
+  case 2: asdf(12); break;
+  case 3: asdf(13); break;
+  case 4: asdf(14); break;
+  }
+}
+/* { dg-final { scan-assembler-not "andi" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/switch-si.c b/gcc/testsuite/gcc.target/riscv/switch-si.c
new file mode 100644
index 0000000000000..de4d68f4d0e96
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/switch-si.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+/* Test for do_tablejump patch.  */
+extern void asdf(int);
+void foo(int x) {
+  switch (x) {
+  case 0: asdf(10); break;
+  case 1: asdf(11); break;
+  case 2: asdf(12); break;
+  case 3: asdf(13); break;
+  case 4: asdf(14); break;
+  }
+}
+/* { dg-final { scan-assembler-not "srli" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/weak-1.c b/gcc/testsuite/gcc.target/riscv/weak-1.c
new file mode 100644
index 0000000000000..0f20501f7c532
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/weak-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmodel=medany -mexplicit-relocs -O" } */
+
+/* Verify that the branch doesn't get optimized away.  */
+extern int weak_func(void) __attribute__ ((weak));
+
+int
+sub (void)
+{
+  if (weak_func)
+    return weak_func ();
+  return 0;
+}
+/* { dg-final { scan-assembler "b\(ne|eq\)" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/zero-extend-5.c b/gcc/testsuite/gcc.target/riscv/zero-extend-5.c
new file mode 100644
index 0000000000000..1a135b8c097f1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zero-extend-5.c
@@ -0,0 +1,8 @@
+/* { dg-do compile { target { riscv64*-*-* } } } */
+/* { dg-options "-march=rv64gc -mabi=lp64 -O2" } */
+int
+sub (unsigned int i, unsigned int j, unsigned int k, int *array)
+{
+  return array[i] + array[j] + array[k];
+}
+/* { dg-final { scan-assembler-times "slli" 3 } } */