Commit 61650023 authored by Huacai Chen's avatar Huacai Chen
Browse files

LoongArch: Add vector extensions support



Add LoongArch's vector extensions support, which including 128bit LSX
(i.e., Loongson SIMD eXtension) and 256bit LASX (i.e., Loongson Advanced
SIMD eXtension).

Linux kernel doesn't use vector itself, it only handle exceptions and
context save/restore. So it only needs a subset of these instructions:

* Vector load/store:   vld vst vldx vstx xvld xvst xvldx xvstx
* 8bit-elements move:  vpickve2gr.b xvpickve2gr.b vinsgr2vr.b xvinsgr2vr.b
* 16bit-elements move: vpickve2gr.h xvpickve2gr.h vinsgr2vr.h xvinsgr2vr.h
* 32bit-elements move: vpickve2gr.w xvpickve2gr.w vinsgr2vr.w xvinsgr2vr.w
* 64bit-elements move: vpickve2gr.d xvpickve2gr.d vinsgr2vr.d xvinsgr2vr.d
* Elements permute:    vpermi.w vpermi.d xvpermi.w xvpermi.d xvpermi.q

Introduce AS_HAS_LSX_EXTENSION and AS_HAS_LASX_EXTENSION to avoid non-
vector toolchains complains unsupported instructions.

Signed-off-by: default avatarHuacai Chen <chenhuacai@loongson.cn>
parent aa5e65dc
Loading
Loading
Loading
Loading
+43 −8
Original line number Diff line number Diff line
@@ -164,14 +164,6 @@ config 32BIT
config 64BIT
	def_bool y

config CPU_HAS_FPU
	bool
	default y

config CPU_HAS_PREFETCH
	bool
	default y

config GENERIC_BUG
	def_bool y
	depends on BUG
@@ -247,6 +239,12 @@ config AS_HAS_EXPLICIT_RELOCS
config AS_HAS_FCSR_CLASS
	def_bool $(as-instr,movfcsr2gr \$t0$(comma)\$fcsr0)

config AS_HAS_LSX_EXTENSION
	def_bool $(as-instr,vld \$vr0$(comma)\$a0$(comma)0)

config AS_HAS_LASX_EXTENSION
	def_bool $(as-instr,xvld \$xr0$(comma)\$a0$(comma)0)

menu "Kernel type and options"

source "kernel/Kconfig.hz"
@@ -487,6 +485,43 @@ config ARCH_STRICT_ALIGN
	  to run kernel only on systems with h/w unaligned access support in
	  order to optimise for performance.

config CPU_HAS_FPU
	bool
	default y

config CPU_HAS_LSX
	bool "Support for the Loongson SIMD Extension"
	depends on AS_HAS_LSX_EXTENSION
	help
	  Loongson SIMD Extension (LSX) introduces 128 bit wide vector registers
	  and a set of SIMD instructions to operate on them. When this option
	  is enabled the kernel will support allocating & switching LSX
	  vector register contexts. If you know that your kernel will only be
	  running on CPUs which do not support LSX or that your userland will
	  not be making use of it then you may wish to say N here to reduce
	  the size & complexity of your kernel.

	  If unsure, say Y.

config CPU_HAS_LASX
	bool "Support for the Loongson Advanced SIMD Extension"
	depends on CPU_HAS_LSX
	depends on AS_HAS_LASX_EXTENSION
	help
	  Loongson Advanced SIMD Extension (LASX) introduces 256 bit wide vector
	  registers and a set of SIMD instructions to operate on them. When this
	  option is enabled the kernel will support allocating & switching LASX
	  vector register contexts. If you know that your kernel will only be
	  running on CPUs which do not support LASX or that your userland will
	  not be making use of it then you may wish to say N here to reduce
	  the size & complexity of your kernel.

	  If unsure, say Y.

config CPU_HAS_PREFETCH
	bool
	default y

config KEXEC
	bool "Kexec system call"
	select KEXEC_CORE
+393 −0
Original line number Diff line number Diff line
@@ -270,6 +270,399 @@
	fld.d	$f31, \tmp, THREAD_FPR31 - THREAD_FPR0
	.endm

	.macro	lsx_save_data thread tmp
	li.w	\tmp, THREAD_FPR0
	PTR_ADD \tmp, \thread, \tmp
	vst	$vr0, \tmp, THREAD_FPR0  - THREAD_FPR0
	vst	$vr1, \tmp, THREAD_FPR1  - THREAD_FPR0
	vst	$vr2, \tmp, THREAD_FPR2  - THREAD_FPR0
	vst	$vr3, \tmp, THREAD_FPR3  - THREAD_FPR0
	vst	$vr4, \tmp, THREAD_FPR4  - THREAD_FPR0
	vst	$vr5, \tmp, THREAD_FPR5  - THREAD_FPR0
	vst	$vr6, \tmp, THREAD_FPR6  - THREAD_FPR0
	vst	$vr7, \tmp, THREAD_FPR7  - THREAD_FPR0
	vst	$vr8, \tmp, THREAD_FPR8  - THREAD_FPR0
	vst	$vr9, \tmp, THREAD_FPR9  - THREAD_FPR0
	vst	$vr10, \tmp, THREAD_FPR10 - THREAD_FPR0
	vst	$vr11, \tmp, THREAD_FPR11 - THREAD_FPR0
	vst	$vr12, \tmp, THREAD_FPR12 - THREAD_FPR0
	vst	$vr13, \tmp, THREAD_FPR13 - THREAD_FPR0
	vst	$vr14, \tmp, THREAD_FPR14 - THREAD_FPR0
	vst	$vr15, \tmp, THREAD_FPR15 - THREAD_FPR0
	vst	$vr16, \tmp, THREAD_FPR16 - THREAD_FPR0
	vst	$vr17, \tmp, THREAD_FPR17 - THREAD_FPR0
	vst	$vr18, \tmp, THREAD_FPR18 - THREAD_FPR0
	vst	$vr19, \tmp, THREAD_FPR19 - THREAD_FPR0
	vst	$vr20, \tmp, THREAD_FPR20 - THREAD_FPR0
	vst	$vr21, \tmp, THREAD_FPR21 - THREAD_FPR0
	vst	$vr22, \tmp, THREAD_FPR22 - THREAD_FPR0
	vst	$vr23, \tmp, THREAD_FPR23 - THREAD_FPR0
	vst	$vr24, \tmp, THREAD_FPR24 - THREAD_FPR0
	vst	$vr25, \tmp, THREAD_FPR25 - THREAD_FPR0
	vst	$vr26, \tmp, THREAD_FPR26 - THREAD_FPR0
	vst	$vr27, \tmp, THREAD_FPR27 - THREAD_FPR0
	vst	$vr28, \tmp, THREAD_FPR28 - THREAD_FPR0
	vst	$vr29, \tmp, THREAD_FPR29 - THREAD_FPR0
	vst	$vr30, \tmp, THREAD_FPR30 - THREAD_FPR0
	vst	$vr31, \tmp, THREAD_FPR31 - THREAD_FPR0
	.endm

	.macro	lsx_restore_data thread tmp
	li.w	\tmp, THREAD_FPR0
	PTR_ADD	\tmp, \thread, \tmp
	vld	$vr0, \tmp, THREAD_FPR0  - THREAD_FPR0
	vld	$vr1, \tmp, THREAD_FPR1  - THREAD_FPR0
	vld	$vr2, \tmp, THREAD_FPR2  - THREAD_FPR0
	vld	$vr3, \tmp, THREAD_FPR3  - THREAD_FPR0
	vld	$vr4, \tmp, THREAD_FPR4  - THREAD_FPR0
	vld	$vr5, \tmp, THREAD_FPR5  - THREAD_FPR0
	vld	$vr6, \tmp, THREAD_FPR6  - THREAD_FPR0
	vld	$vr7, \tmp, THREAD_FPR7  - THREAD_FPR0
	vld	$vr8, \tmp, THREAD_FPR8  - THREAD_FPR0
	vld	$vr9, \tmp, THREAD_FPR9  - THREAD_FPR0
	vld	$vr10, \tmp, THREAD_FPR10 - THREAD_FPR0
	vld	$vr11, \tmp, THREAD_FPR11 - THREAD_FPR0
	vld	$vr12, \tmp, THREAD_FPR12 - THREAD_FPR0
	vld	$vr13, \tmp, THREAD_FPR13 - THREAD_FPR0
	vld	$vr14, \tmp, THREAD_FPR14 - THREAD_FPR0
	vld	$vr15, \tmp, THREAD_FPR15 - THREAD_FPR0
	vld	$vr16, \tmp, THREAD_FPR16 - THREAD_FPR0
	vld	$vr17, \tmp, THREAD_FPR17 - THREAD_FPR0
	vld	$vr18, \tmp, THREAD_FPR18 - THREAD_FPR0
	vld	$vr19, \tmp, THREAD_FPR19 - THREAD_FPR0
	vld	$vr20, \tmp, THREAD_FPR20 - THREAD_FPR0
	vld	$vr21, \tmp, THREAD_FPR21 - THREAD_FPR0
	vld	$vr22, \tmp, THREAD_FPR22 - THREAD_FPR0
	vld	$vr23, \tmp, THREAD_FPR23 - THREAD_FPR0
	vld	$vr24, \tmp, THREAD_FPR24 - THREAD_FPR0
	vld	$vr25, \tmp, THREAD_FPR25 - THREAD_FPR0
	vld	$vr26, \tmp, THREAD_FPR26 - THREAD_FPR0
	vld	$vr27, \tmp, THREAD_FPR27 - THREAD_FPR0
	vld	$vr28, \tmp, THREAD_FPR28 - THREAD_FPR0
	vld	$vr29, \tmp, THREAD_FPR29 - THREAD_FPR0
	vld	$vr30, \tmp, THREAD_FPR30 - THREAD_FPR0
	vld	$vr31, \tmp, THREAD_FPR31 - THREAD_FPR0
	.endm

	.macro	lsx_save_all	thread tmp0 tmp1
	fpu_save_cc		\thread, \tmp0, \tmp1
	fpu_save_csr		\thread, \tmp0
	lsx_save_data		\thread, \tmp0
	.endm

	.macro	lsx_restore_all	thread tmp0 tmp1
	lsx_restore_data	\thread, \tmp0
	fpu_restore_cc		\thread, \tmp0, \tmp1
	fpu_restore_csr		\thread, \tmp0
	.endm

	.macro	lsx_save_upper vd base tmp off
	vpickve2gr.d	\tmp, \vd, 1
	st.d		\tmp, \base, (\off+8)
	.endm

	.macro	lsx_save_all_upper thread base tmp
	li.w		\tmp, THREAD_FPR0
	PTR_ADD		\base, \thread, \tmp
	lsx_save_upper	$vr0,  \base, \tmp, (THREAD_FPR0-THREAD_FPR0)
	lsx_save_upper	$vr1,  \base, \tmp, (THREAD_FPR1-THREAD_FPR0)
	lsx_save_upper	$vr2,  \base, \tmp, (THREAD_FPR2-THREAD_FPR0)
	lsx_save_upper	$vr3,  \base, \tmp, (THREAD_FPR3-THREAD_FPR0)
	lsx_save_upper	$vr4,  \base, \tmp, (THREAD_FPR4-THREAD_FPR0)
	lsx_save_upper	$vr5,  \base, \tmp, (THREAD_FPR5-THREAD_FPR0)
	lsx_save_upper	$vr6,  \base, \tmp, (THREAD_FPR6-THREAD_FPR0)
	lsx_save_upper	$vr7,  \base, \tmp, (THREAD_FPR7-THREAD_FPR0)
	lsx_save_upper	$vr8,  \base, \tmp, (THREAD_FPR8-THREAD_FPR0)
	lsx_save_upper	$vr9,  \base, \tmp, (THREAD_FPR9-THREAD_FPR0)
	lsx_save_upper	$vr10, \base, \tmp, (THREAD_FPR10-THREAD_FPR0)
	lsx_save_upper	$vr11, \base, \tmp, (THREAD_FPR11-THREAD_FPR0)
	lsx_save_upper	$vr12, \base, \tmp, (THREAD_FPR12-THREAD_FPR0)
	lsx_save_upper	$vr13, \base, \tmp, (THREAD_FPR13-THREAD_FPR0)
	lsx_save_upper	$vr14, \base, \tmp, (THREAD_FPR14-THREAD_FPR0)
	lsx_save_upper	$vr15, \base, \tmp, (THREAD_FPR15-THREAD_FPR0)
	lsx_save_upper	$vr16, \base, \tmp, (THREAD_FPR16-THREAD_FPR0)
	lsx_save_upper	$vr17, \base, \tmp, (THREAD_FPR17-THREAD_FPR0)
	lsx_save_upper	$vr18, \base, \tmp, (THREAD_FPR18-THREAD_FPR0)
	lsx_save_upper	$vr19, \base, \tmp, (THREAD_FPR19-THREAD_FPR0)
	lsx_save_upper	$vr20, \base, \tmp, (THREAD_FPR20-THREAD_FPR0)
	lsx_save_upper	$vr21, \base, \tmp, (THREAD_FPR21-THREAD_FPR0)
	lsx_save_upper	$vr22, \base, \tmp, (THREAD_FPR22-THREAD_FPR0)
	lsx_save_upper	$vr23, \base, \tmp, (THREAD_FPR23-THREAD_FPR0)
	lsx_save_upper	$vr24, \base, \tmp, (THREAD_FPR24-THREAD_FPR0)
	lsx_save_upper	$vr25, \base, \tmp, (THREAD_FPR25-THREAD_FPR0)
	lsx_save_upper	$vr26, \base, \tmp, (THREAD_FPR26-THREAD_FPR0)
	lsx_save_upper	$vr27, \base, \tmp, (THREAD_FPR27-THREAD_FPR0)
	lsx_save_upper	$vr28, \base, \tmp, (THREAD_FPR28-THREAD_FPR0)
	lsx_save_upper	$vr29, \base, \tmp, (THREAD_FPR29-THREAD_FPR0)
	lsx_save_upper	$vr30, \base, \tmp, (THREAD_FPR30-THREAD_FPR0)
	lsx_save_upper	$vr31, \base, \tmp, (THREAD_FPR31-THREAD_FPR0)
	.endm

	.macro	lsx_restore_upper vd base tmp off
	ld.d		\tmp, \base, (\off+8)
	vinsgr2vr.d	\vd,  \tmp, 1
	.endm

	.macro	lsx_restore_all_upper thread base tmp
	li.w		  \tmp, THREAD_FPR0
	PTR_ADD		  \base, \thread, \tmp
	lsx_restore_upper $vr0,  \base, \tmp, (THREAD_FPR0-THREAD_FPR0)
	lsx_restore_upper $vr1,  \base, \tmp, (THREAD_FPR1-THREAD_FPR0)
	lsx_restore_upper $vr2,  \base, \tmp, (THREAD_FPR2-THREAD_FPR0)
	lsx_restore_upper $vr3,  \base, \tmp, (THREAD_FPR3-THREAD_FPR0)
	lsx_restore_upper $vr4,  \base, \tmp, (THREAD_FPR4-THREAD_FPR0)
	lsx_restore_upper $vr5,  \base, \tmp, (THREAD_FPR5-THREAD_FPR0)
	lsx_restore_upper $vr6,  \base, \tmp, (THREAD_FPR6-THREAD_FPR0)
	lsx_restore_upper $vr7,  \base, \tmp, (THREAD_FPR7-THREAD_FPR0)
	lsx_restore_upper $vr8,  \base, \tmp, (THREAD_FPR8-THREAD_FPR0)
	lsx_restore_upper $vr9,  \base, \tmp, (THREAD_FPR9-THREAD_FPR0)
	lsx_restore_upper $vr10, \base, \tmp, (THREAD_FPR10-THREAD_FPR0)
	lsx_restore_upper $vr11, \base, \tmp, (THREAD_FPR11-THREAD_FPR0)
	lsx_restore_upper $vr12, \base, \tmp, (THREAD_FPR12-THREAD_FPR0)
	lsx_restore_upper $vr13, \base, \tmp, (THREAD_FPR13-THREAD_FPR0)
	lsx_restore_upper $vr14, \base, \tmp, (THREAD_FPR14-THREAD_FPR0)
	lsx_restore_upper $vr15, \base, \tmp, (THREAD_FPR15-THREAD_FPR0)
	lsx_restore_upper $vr16, \base, \tmp, (THREAD_FPR16-THREAD_FPR0)
	lsx_restore_upper $vr17, \base, \tmp, (THREAD_FPR17-THREAD_FPR0)
	lsx_restore_upper $vr18, \base, \tmp, (THREAD_FPR18-THREAD_FPR0)
	lsx_restore_upper $vr19, \base, \tmp, (THREAD_FPR19-THREAD_FPR0)
	lsx_restore_upper $vr20, \base, \tmp, (THREAD_FPR20-THREAD_FPR0)
	lsx_restore_upper $vr21, \base, \tmp, (THREAD_FPR21-THREAD_FPR0)
	lsx_restore_upper $vr22, \base, \tmp, (THREAD_FPR22-THREAD_FPR0)
	lsx_restore_upper $vr23, \base, \tmp, (THREAD_FPR23-THREAD_FPR0)
	lsx_restore_upper $vr24, \base, \tmp, (THREAD_FPR24-THREAD_FPR0)
	lsx_restore_upper $vr25, \base, \tmp, (THREAD_FPR25-THREAD_FPR0)
	lsx_restore_upper $vr26, \base, \tmp, (THREAD_FPR26-THREAD_FPR0)
	lsx_restore_upper $vr27, \base, \tmp, (THREAD_FPR27-THREAD_FPR0)
	lsx_restore_upper $vr28, \base, \tmp, (THREAD_FPR28-THREAD_FPR0)
	lsx_restore_upper $vr29, \base, \tmp, (THREAD_FPR29-THREAD_FPR0)
	lsx_restore_upper $vr30, \base, \tmp, (THREAD_FPR30-THREAD_FPR0)
	lsx_restore_upper $vr31, \base, \tmp, (THREAD_FPR31-THREAD_FPR0)
	.endm

	.macro	lsx_init_upper vd tmp
	vinsgr2vr.d	\vd, \tmp, 1
	.endm

	.macro	lsx_init_all_upper tmp
	not		\tmp, zero
	lsx_init_upper	$vr0 \tmp
	lsx_init_upper	$vr1 \tmp
	lsx_init_upper	$vr2 \tmp
	lsx_init_upper	$vr3 \tmp
	lsx_init_upper	$vr4 \tmp
	lsx_init_upper	$vr5 \tmp
	lsx_init_upper	$vr6 \tmp
	lsx_init_upper	$vr7 \tmp
	lsx_init_upper	$vr8 \tmp
	lsx_init_upper	$vr9 \tmp
	lsx_init_upper	$vr10 \tmp
	lsx_init_upper	$vr11 \tmp
	lsx_init_upper	$vr12 \tmp
	lsx_init_upper	$vr13 \tmp
	lsx_init_upper	$vr14 \tmp
	lsx_init_upper	$vr15 \tmp
	lsx_init_upper	$vr16 \tmp
	lsx_init_upper	$vr17 \tmp
	lsx_init_upper	$vr18 \tmp
	lsx_init_upper	$vr19 \tmp
	lsx_init_upper	$vr20 \tmp
	lsx_init_upper	$vr21 \tmp
	lsx_init_upper	$vr22 \tmp
	lsx_init_upper	$vr23 \tmp
	lsx_init_upper	$vr24 \tmp
	lsx_init_upper	$vr25 \tmp
	lsx_init_upper	$vr26 \tmp
	lsx_init_upper	$vr27 \tmp
	lsx_init_upper	$vr28 \tmp
	lsx_init_upper	$vr29 \tmp
	lsx_init_upper	$vr30 \tmp
	lsx_init_upper	$vr31 \tmp
	.endm

	.macro	lasx_save_data thread tmp
	li.w	\tmp, THREAD_FPR0
	PTR_ADD	\tmp, \thread, \tmp
	xvst	$xr0, \tmp, THREAD_FPR0  - THREAD_FPR0
	xvst	$xr1, \tmp, THREAD_FPR1  - THREAD_FPR0
	xvst	$xr2, \tmp, THREAD_FPR2  - THREAD_FPR0
	xvst	$xr3, \tmp, THREAD_FPR3  - THREAD_FPR0
	xvst	$xr4, \tmp, THREAD_FPR4  - THREAD_FPR0
	xvst	$xr5, \tmp, THREAD_FPR5  - THREAD_FPR0
	xvst	$xr6, \tmp, THREAD_FPR6  - THREAD_FPR0
	xvst	$xr7, \tmp, THREAD_FPR7  - THREAD_FPR0
	xvst	$xr8, \tmp, THREAD_FPR8  - THREAD_FPR0
	xvst	$xr9, \tmp, THREAD_FPR9  - THREAD_FPR0
	xvst	$xr10, \tmp, THREAD_FPR10 - THREAD_FPR0
	xvst	$xr11, \tmp, THREAD_FPR11 - THREAD_FPR0
	xvst	$xr12, \tmp, THREAD_FPR12 - THREAD_FPR0
	xvst	$xr13, \tmp, THREAD_FPR13 - THREAD_FPR0
	xvst	$xr14, \tmp, THREAD_FPR14 - THREAD_FPR0
	xvst	$xr15, \tmp, THREAD_FPR15 - THREAD_FPR0
	xvst	$xr16, \tmp, THREAD_FPR16 - THREAD_FPR0
	xvst	$xr17, \tmp, THREAD_FPR17 - THREAD_FPR0
	xvst	$xr18, \tmp, THREAD_FPR18 - THREAD_FPR0
	xvst	$xr19, \tmp, THREAD_FPR19 - THREAD_FPR0
	xvst	$xr20, \tmp, THREAD_FPR20 - THREAD_FPR0
	xvst	$xr21, \tmp, THREAD_FPR21 - THREAD_FPR0
	xvst	$xr22, \tmp, THREAD_FPR22 - THREAD_FPR0
	xvst	$xr23, \tmp, THREAD_FPR23 - THREAD_FPR0
	xvst	$xr24, \tmp, THREAD_FPR24 - THREAD_FPR0
	xvst	$xr25, \tmp, THREAD_FPR25 - THREAD_FPR0
	xvst	$xr26, \tmp, THREAD_FPR26 - THREAD_FPR0
	xvst	$xr27, \tmp, THREAD_FPR27 - THREAD_FPR0
	xvst	$xr28, \tmp, THREAD_FPR28 - THREAD_FPR0
	xvst	$xr29, \tmp, THREAD_FPR29 - THREAD_FPR0
	xvst	$xr30, \tmp, THREAD_FPR30 - THREAD_FPR0
	xvst	$xr31, \tmp, THREAD_FPR31 - THREAD_FPR0
	.endm

	.macro	lasx_restore_data thread tmp
	li.w	\tmp, THREAD_FPR0
	PTR_ADD	\tmp, \thread, \tmp
	xvld	$xr0, \tmp, THREAD_FPR0  - THREAD_FPR0
	xvld	$xr1, \tmp, THREAD_FPR1  - THREAD_FPR0
	xvld	$xr2, \tmp, THREAD_FPR2  - THREAD_FPR0
	xvld	$xr3, \tmp, THREAD_FPR3  - THREAD_FPR0
	xvld	$xr4, \tmp, THREAD_FPR4  - THREAD_FPR0
	xvld	$xr5, \tmp, THREAD_FPR5  - THREAD_FPR0
	xvld	$xr6, \tmp, THREAD_FPR6  - THREAD_FPR0
	xvld	$xr7, \tmp, THREAD_FPR7  - THREAD_FPR0
	xvld	$xr8, \tmp, THREAD_FPR8  - THREAD_FPR0
	xvld	$xr9, \tmp, THREAD_FPR9  - THREAD_FPR0
	xvld	$xr10, \tmp, THREAD_FPR10 - THREAD_FPR0
	xvld	$xr11, \tmp, THREAD_FPR11 - THREAD_FPR0
	xvld	$xr12, \tmp, THREAD_FPR12 - THREAD_FPR0
	xvld	$xr13, \tmp, THREAD_FPR13 - THREAD_FPR0
	xvld	$xr14, \tmp, THREAD_FPR14 - THREAD_FPR0
	xvld	$xr15, \tmp, THREAD_FPR15 - THREAD_FPR0
	xvld	$xr16, \tmp, THREAD_FPR16 - THREAD_FPR0
	xvld	$xr17, \tmp, THREAD_FPR17 - THREAD_FPR0
	xvld	$xr18, \tmp, THREAD_FPR18 - THREAD_FPR0
	xvld	$xr19, \tmp, THREAD_FPR19 - THREAD_FPR0
	xvld	$xr20, \tmp, THREAD_FPR20 - THREAD_FPR0
	xvld	$xr21, \tmp, THREAD_FPR21 - THREAD_FPR0
	xvld	$xr22, \tmp, THREAD_FPR22 - THREAD_FPR0
	xvld	$xr23, \tmp, THREAD_FPR23 - THREAD_FPR0
	xvld	$xr24, \tmp, THREAD_FPR24 - THREAD_FPR0
	xvld	$xr25, \tmp, THREAD_FPR25 - THREAD_FPR0
	xvld	$xr26, \tmp, THREAD_FPR26 - THREAD_FPR0
	xvld	$xr27, \tmp, THREAD_FPR27 - THREAD_FPR0
	xvld	$xr28, \tmp, THREAD_FPR28 - THREAD_FPR0
	xvld	$xr29, \tmp, THREAD_FPR29 - THREAD_FPR0
	xvld	$xr30, \tmp, THREAD_FPR30 - THREAD_FPR0
	xvld	$xr31, \tmp, THREAD_FPR31 - THREAD_FPR0
	.endm

	.macro	lasx_save_all	thread tmp0 tmp1
	fpu_save_cc		\thread, \tmp0, \tmp1
	fpu_save_csr		\thread, \tmp0
	lasx_save_data		\thread, \tmp0
	.endm

	.macro	lasx_restore_all thread tmp0 tmp1
	lasx_restore_data	\thread, \tmp0
	fpu_restore_cc		\thread, \tmp0, \tmp1
	fpu_restore_csr		\thread, \tmp0
	.endm

	.macro	lasx_save_upper xd base tmp off
	/* Nothing */
	.endm

	.macro	lasx_save_all_upper thread base tmp
	/* Nothing */
	.endm

	.macro	lasx_restore_upper xd base tmp0 tmp1 off
	vld		\tmp0, \base, (\off+16)
	xvpermi.q 	\xd,   \tmp1, 0x2
	.endm

	.macro	lasx_restore_all_upper thread base tmp
	li.w		\tmp, THREAD_FPR0
	PTR_ADD		\base, \thread, \tmp
	/* Save $vr31 ($xr31 lower bits) with xvpickve2gr */
	xvpickve2gr.d	$r17, $xr31, 0
	xvpickve2gr.d	$r18, $xr31, 1
	lasx_restore_upper $xr0, \base, $vr31, $xr31, (THREAD_FPR0-THREAD_FPR0)
	lasx_restore_upper $xr1, \base, $vr31, $xr31, (THREAD_FPR1-THREAD_FPR0)
	lasx_restore_upper $xr2, \base, $vr31, $xr31, (THREAD_FPR2-THREAD_FPR0)
	lasx_restore_upper $xr3, \base, $vr31, $xr31, (THREAD_FPR3-THREAD_FPR0)
	lasx_restore_upper $xr4, \base, $vr31, $xr31, (THREAD_FPR4-THREAD_FPR0)
	lasx_restore_upper $xr5, \base, $vr31, $xr31, (THREAD_FPR5-THREAD_FPR0)
	lasx_restore_upper $xr6, \base, $vr31, $xr31, (THREAD_FPR6-THREAD_FPR0)
	lasx_restore_upper $xr7, \base, $vr31, $xr31, (THREAD_FPR7-THREAD_FPR0)
	lasx_restore_upper $xr8, \base, $vr31, $xr31, (THREAD_FPR8-THREAD_FPR0)
	lasx_restore_upper $xr9, \base, $vr31, $xr31, (THREAD_FPR9-THREAD_FPR0)
	lasx_restore_upper $xr10, \base, $vr31, $xr31, (THREAD_FPR10-THREAD_FPR0)
	lasx_restore_upper $xr11, \base, $vr31, $xr31, (THREAD_FPR11-THREAD_FPR0)
	lasx_restore_upper $xr12, \base, $vr31, $xr31, (THREAD_FPR12-THREAD_FPR0)
	lasx_restore_upper $xr13, \base, $vr31, $xr31, (THREAD_FPR13-THREAD_FPR0)
	lasx_restore_upper $xr14, \base, $vr31, $xr31, (THREAD_FPR14-THREAD_FPR0)
	lasx_restore_upper $xr15, \base, $vr31, $xr31, (THREAD_FPR15-THREAD_FPR0)
	lasx_restore_upper $xr16, \base, $vr31, $xr31, (THREAD_FPR16-THREAD_FPR0)
	lasx_restore_upper $xr17, \base, $vr31, $xr31, (THREAD_FPR17-THREAD_FPR0)
	lasx_restore_upper $xr18, \base, $vr31, $xr31, (THREAD_FPR18-THREAD_FPR0)
	lasx_restore_upper $xr19, \base, $vr31, $xr31, (THREAD_FPR19-THREAD_FPR0)
	lasx_restore_upper $xr20, \base, $vr31, $xr31, (THREAD_FPR20-THREAD_FPR0)
	lasx_restore_upper $xr21, \base, $vr31, $xr31, (THREAD_FPR21-THREAD_FPR0)
	lasx_restore_upper $xr22, \base, $vr31, $xr31, (THREAD_FPR22-THREAD_FPR0)
	lasx_restore_upper $xr23, \base, $vr31, $xr31, (THREAD_FPR23-THREAD_FPR0)
	lasx_restore_upper $xr24, \base, $vr31, $xr31, (THREAD_FPR24-THREAD_FPR0)
	lasx_restore_upper $xr25, \base, $vr31, $xr31, (THREAD_FPR25-THREAD_FPR0)
	lasx_restore_upper $xr26, \base, $vr31, $xr31, (THREAD_FPR26-THREAD_FPR0)
	lasx_restore_upper $xr27, \base, $vr31, $xr31, (THREAD_FPR27-THREAD_FPR0)
	lasx_restore_upper $xr28, \base, $vr31, $xr31, (THREAD_FPR28-THREAD_FPR0)
	lasx_restore_upper $xr29, \base, $vr31, $xr31, (THREAD_FPR29-THREAD_FPR0)
	lasx_restore_upper $xr30, \base, $vr31, $xr31, (THREAD_FPR30-THREAD_FPR0)
	lasx_restore_upper $xr31, \base, $vr31, $xr31, (THREAD_FPR31-THREAD_FPR0)
	/* Restore $vr31 ($xr31 lower bits) with xvinsgr2vr */
	xvinsgr2vr.d	$xr31, $r17, 0
	xvinsgr2vr.d	$xr31, $r18, 1
	.endm

	.macro	lasx_init_upper xd tmp
	xvinsgr2vr.d	\xd, \tmp, 2
	xvinsgr2vr.d	\xd, \tmp, 3
	.endm

	.macro	lasx_init_all_upper tmp
	not		\tmp, zero
	lasx_init_upper	$xr0 \tmp
	lasx_init_upper	$xr1 \tmp
	lasx_init_upper	$xr2 \tmp
	lasx_init_upper	$xr3 \tmp
	lasx_init_upper	$xr4 \tmp
	lasx_init_upper	$xr5 \tmp
	lasx_init_upper	$xr6 \tmp
	lasx_init_upper	$xr7 \tmp
	lasx_init_upper	$xr8 \tmp
	lasx_init_upper	$xr9 \tmp
	lasx_init_upper	$xr10 \tmp
	lasx_init_upper	$xr11 \tmp
	lasx_init_upper	$xr12 \tmp
	lasx_init_upper	$xr13 \tmp
	lasx_init_upper	$xr14 \tmp
	lasx_init_upper	$xr15 \tmp
	lasx_init_upper	$xr16 \tmp
	lasx_init_upper	$xr17 \tmp
	lasx_init_upper	$xr18 \tmp
	lasx_init_upper	$xr19 \tmp
	lasx_init_upper	$xr20 \tmp
	lasx_init_upper	$xr21 \tmp
	lasx_init_upper	$xr22 \tmp
	lasx_init_upper	$xr23 \tmp
	lasx_init_upper	$xr24 \tmp
	lasx_init_upper	$xr25 \tmp
	lasx_init_upper	$xr26 \tmp
	lasx_init_upper	$xr27 \tmp
	lasx_init_upper	$xr28 \tmp
	lasx_init_upper	$xr29 \tmp
	lasx_init_upper	$xr30 \tmp
	lasx_init_upper	$xr31 \tmp
	.endm

.macro not dst src
	nor	\dst, \src, zero
.endm
+182 −3
Original line number Diff line number Diff line
@@ -28,6 +28,26 @@ extern void _init_fpu(unsigned int);
extern void _save_fp(struct loongarch_fpu *);
extern void _restore_fp(struct loongarch_fpu *);

extern void _save_lsx(struct loongarch_fpu *fpu);
extern void _restore_lsx(struct loongarch_fpu *fpu);
extern void _init_lsx_upper(void);
extern void _restore_lsx_upper(struct loongarch_fpu *fpu);

extern void _save_lasx(struct loongarch_fpu *fpu);
extern void _restore_lasx(struct loongarch_fpu *fpu);
extern void _init_lasx_upper(void);
extern void _restore_lasx_upper(struct loongarch_fpu *fpu);

static inline void enable_lsx(void);
static inline void disable_lsx(void);
static inline void save_lsx(struct task_struct *t);
static inline void restore_lsx(struct task_struct *t);

static inline void enable_lasx(void);
static inline void disable_lasx(void);
static inline void save_lasx(struct task_struct *t);
static inline void restore_lasx(struct task_struct *t);

/*
 * Mask the FCSR Cause bits according to the Enable bits, observing
 * that Unimplemented is always enabled.
@@ -44,6 +64,29 @@ static inline int is_fp_enabled(void)
		1 : 0;
}

static inline int is_lsx_enabled(void)
{
	if (!cpu_has_lsx)
		return 0;

	return (csr_read32(LOONGARCH_CSR_EUEN) & CSR_EUEN_LSXEN) ?
		1 : 0;
}

static inline int is_lasx_enabled(void)
{
	if (!cpu_has_lasx)
		return 0;

	return (csr_read32(LOONGARCH_CSR_EUEN) & CSR_EUEN_LASXEN) ?
		1 : 0;
}

static inline int is_simd_enabled(void)
{
	return is_lsx_enabled() | is_lasx_enabled();
}

#define enable_fpu()		set_csr_euen(CSR_EUEN_FPEN)

#define disable_fpu()		clear_csr_euen(CSR_EUEN_FPEN)
@@ -81,9 +124,22 @@ static inline void own_fpu(int restore)
static inline void lose_fpu_inatomic(int save, struct task_struct *tsk)
{
	if (is_fpu_owner()) {
		if (!is_simd_enabled()) {
			if (save)
				_save_fp(&tsk->thread.fpu);
			disable_fpu();
		} else {
			if (save) {
				if (!is_lasx_enabled())
					save_lsx(tsk);
				else
					save_lasx(tsk);
			}
			disable_fpu();
			disable_lsx();
			disable_lasx();
			clear_tsk_thread_flag(tsk, TIF_USEDSIMD);
		}
		clear_tsk_thread_flag(tsk, TIF_USEDFPU);
	}
	KSTK_EUEN(tsk) &= ~(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN);
@@ -129,4 +185,127 @@ static inline union fpureg *get_fpu_regs(struct task_struct *tsk)
	return tsk->thread.fpu.fpr;
}

static inline int is_simd_owner(void)
{
	return test_thread_flag(TIF_USEDSIMD);
}

#ifdef CONFIG_CPU_HAS_LSX

static inline void enable_lsx(void)
{
	if (cpu_has_lsx)
		csr_xchg32(CSR_EUEN_LSXEN, CSR_EUEN_LSXEN, LOONGARCH_CSR_EUEN);
}

static inline void disable_lsx(void)
{
	if (cpu_has_lsx)
		csr_xchg32(0, CSR_EUEN_LSXEN, LOONGARCH_CSR_EUEN);
}

static inline void save_lsx(struct task_struct *t)
{
	if (cpu_has_lsx)
		_save_lsx(&t->thread.fpu);
}

static inline void restore_lsx(struct task_struct *t)
{
	if (cpu_has_lsx)
		_restore_lsx(&t->thread.fpu);
}

static inline void init_lsx_upper(void)
{
	/*
	 * Check cpu_has_lsx only if it's a constant. This will allow the
	 * compiler to optimise out code for CPUs without LSX without adding
	 * an extra redundant check for CPUs with LSX.
	 */
	if (__builtin_constant_p(cpu_has_lsx) && !cpu_has_lsx)
		return;

	_init_lsx_upper();
}

static inline void restore_lsx_upper(struct task_struct *t)
{
	if (cpu_has_lsx)
		_restore_lsx_upper(&t->thread.fpu);
}

#else
static inline void enable_lsx(void) {}
static inline void disable_lsx(void) {}
static inline void save_lsx(struct task_struct *t) {}
static inline void restore_lsx(struct task_struct *t) {}
static inline void init_lsx_upper(void) {}
static inline void restore_lsx_upper(struct task_struct *t) {}
#endif

#ifdef CONFIG_CPU_HAS_LASX

static inline void enable_lasx(void)
{

	if (cpu_has_lasx)
		csr_xchg32(CSR_EUEN_LASXEN, CSR_EUEN_LASXEN, LOONGARCH_CSR_EUEN);
}

static inline void disable_lasx(void)
{
	if (cpu_has_lasx)
		csr_xchg32(0, CSR_EUEN_LASXEN, LOONGARCH_CSR_EUEN);
}

static inline void save_lasx(struct task_struct *t)
{
	if (cpu_has_lasx)
		_save_lasx(&t->thread.fpu);
}

static inline void restore_lasx(struct task_struct *t)
{
	if (cpu_has_lasx)
		_restore_lasx(&t->thread.fpu);
}

static inline void init_lasx_upper(void)
{
	if (cpu_has_lasx)
		_init_lasx_upper();
}

static inline void restore_lasx_upper(struct task_struct *t)
{
	if (cpu_has_lasx)
		_restore_lasx_upper(&t->thread.fpu);
}

#else
static inline void enable_lasx(void) {}
static inline void disable_lasx(void) {}
static inline void save_lasx(struct task_struct *t) {}
static inline void restore_lasx(struct task_struct *t) {}
static inline void init_lasx_upper(void) {}
static inline void restore_lasx_upper(struct task_struct *t) {}
#endif

static inline int thread_lsx_context_live(void)
{
	if (__builtin_constant_p(cpu_has_lsx) && !cpu_has_lsx)
		return 0;

	return test_thread_flag(TIF_LSX_CTX_LIVE);
}

static inline int thread_lasx_context_live(void)
{
	if (__builtin_constant_p(cpu_has_lasx) && !cpu_has_lasx)
		return 0;

	return test_thread_flag(TIF_LASX_CTX_LIVE);
}

#endif /* _ASM_FPU_H */
+13 −3
Original line number Diff line number Diff line
@@ -46,6 +46,16 @@ struct user_fp_state {
	uint32_t fcsr;
};

struct user_lsx_state {
	/* 32 registers, 128 bits width per register. */
	uint64_t vregs[32*2];
};

struct user_lasx_state {
	/* 32 registers, 256 bits width per register. */
	uint64_t vregs[32*4];
};

struct user_watch_state {
	uint64_t dbg_info;
	struct {
+18 −0
Original line number Diff line number Diff line
@@ -41,4 +41,22 @@ struct fpu_context {
	__u32	fcsr;
};

/* LSX context */
#define LSX_CTX_MAGIC		0x53580001
#define LSX_CTX_ALIGN		16
struct lsx_context {
	__u64	regs[2*32];
	__u64	fcc;
	__u32	fcsr;
};

/* LASX context */
#define LASX_CTX_MAGIC		0x41535801
#define LASX_CTX_ALIGN		32
struct lasx_context {
	__u64	regs[4*32];
	__u64	fcc;
	__u32	fcsr;
};

#endif /* _UAPI_ASM_SIGCONTEXT_H */
Loading