M7350v1_en_gpl

This commit is contained in:
T
2024-09-09 08:52:07 +00:00
commit f9cc65cfda
65988 changed files with 26357421 additions and 0 deletions
+18
View File
@@ -0,0 +1,18 @@
obj-$(CONFIG_KVM) += kvm/
# Xen paravirtualization support
obj-$(CONFIG_XEN) += xen/
# lguest paravirtualization support
obj-$(CONFIG_LGUEST_GUEST) += lguest/
obj-y += kernel/
obj-y += mm/
obj-y += crypto/
obj-y += vdso/
obj-$(CONFIG_IA32_EMULATION) += ia32/
obj-y += platform/
obj-y += net/
File diff suppressed because it is too large Load Diff
+506
View File
@@ -0,0 +1,506 @@
# Put here option for CPU selection and depending optimization
choice
prompt "Processor family"
default M686 if X86_32
default GENERIC_CPU if X86_64
config M386
bool "386"
depends on X86_32 && !UML
---help---
This is the processor type of your CPU. This information is used for
optimizing purposes. In order to compile a kernel that can run on
all x86 CPU types (albeit not optimally fast), you can specify
"386" here.
The kernel will not necessarily run on earlier architectures than
the one you have chosen, e.g. a Pentium optimized kernel will run on
a PPro, but not necessarily on a i486.
Here are the settings recommended for greatest speed:
- "386" for the AMD/Cyrix/Intel 386DX/DXL/SL/SLC/SX, Cyrix/TI
486DLC/DLC2, and UMC 486SX-S. Only "386" kernels will run on a 386
class machine.
- "486" for the AMD/Cyrix/IBM/Intel 486DX/DX2/DX4 or
SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S.
- "586" for generic Pentium CPUs lacking the TSC
(time stamp counter) register.
- "Pentium-Classic" for the Intel Pentium.
- "Pentium-MMX" for the Intel Pentium MMX.
- "Pentium-Pro" for the Intel Pentium Pro.
- "Pentium-II" for the Intel Pentium II or pre-Coppermine Celeron.
- "Pentium-III" for the Intel Pentium III or Coppermine Celeron.
- "Pentium-4" for the Intel Pentium 4 or P4-based Celeron.
- "K6" for the AMD K6, K6-II and K6-III (aka K6-3D).
- "Athlon" for the AMD K7 family (Athlon/Duron/Thunderbird).
- "Crusoe" for the Transmeta Crusoe series.
- "Efficeon" for the Transmeta Efficeon series.
- "Winchip-C6" for original IDT Winchip.
- "Winchip-2" for IDT Winchips with 3dNow! capabilities.
- "GeodeGX1" for Geode GX1 (Cyrix MediaGX).
- "Geode GX/LX" For AMD Geode GX and LX processors.
- "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3.
- "VIA C3-2" for VIA C3-2 "Nehemiah" (model 9 and above).
- "VIA C7" for VIA C7.
If you don't know what to do, choose "386".
config M486
bool "486"
depends on X86_32
---help---
Select this for a 486 series processor, either Intel or one of the
compatible processors from AMD, Cyrix, IBM, or Intel. Includes DX,
DX2, and DX4 variants; also SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or
U5S.
config M586
bool "586/K5/5x86/6x86/6x86MX"
depends on X86_32
---help---
Select this for an 586 or 686 series processor such as the AMD K5,
the Cyrix 5x86, 6x86 and 6x86MX. This choice does not
assume the RDTSC (Read Time Stamp Counter) instruction.
config M586TSC
bool "Pentium-Classic"
depends on X86_32
---help---
Select this for a Pentium Classic processor with the RDTSC (Read
Time Stamp Counter) instruction for benchmarking.
config M586MMX
bool "Pentium-MMX"
depends on X86_32
---help---
Select this for a Pentium with the MMX graphics/multimedia
extended instructions.
config M686
bool "Pentium-Pro"
depends on X86_32
---help---
Select this for Intel Pentium Pro chips. This enables the use of
Pentium Pro extended instructions, and disables the init-time guard
against the f00f bug found in earlier Pentiums.
config MPENTIUMII
bool "Pentium-II/Celeron(pre-Coppermine)"
depends on X86_32
---help---
Select this for Intel chips based on the Pentium-II and
pre-Coppermine Celeron core. This option enables an unaligned
copy optimization, compiles the kernel with optimization flags
tailored for the chip, and applies any applicable Pentium Pro
optimizations.
config MPENTIUMIII
bool "Pentium-III/Celeron(Coppermine)/Pentium-III Xeon"
depends on X86_32
---help---
Select this for Intel chips based on the Pentium-III and
Celeron-Coppermine core. This option enables use of some
extended prefetch instructions in addition to the Pentium II
extensions.
config MPENTIUMM
bool "Pentium M"
depends on X86_32
---help---
Select this for Intel Pentium M (not Pentium-4 M)
notebook chips.
config MPENTIUM4
bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/older Xeon"
depends on X86_32
---help---
Select this for Intel Pentium 4 chips. This includes the
Pentium 4, Pentium D, P4-based Celeron and Xeon, and
Pentium-4 M (not Pentium M) chips. This option enables compile
flags optimized for the chip, uses the correct cache line size, and
applies any applicable optimizations.
CPUIDs: F[0-6][1-A] (in /proc/cpuinfo show = cpu family : 15 )
Select this for:
Pentiums (Pentium 4, Pentium D, Celeron, Celeron D) corename:
-Willamette
-Northwood
-Mobile Pentium 4
-Mobile Pentium 4 M
-Extreme Edition (Gallatin)
-Prescott
-Prescott 2M
-Cedar Mill
-Presler
-Smithfiled
Xeons (Intel Xeon, Xeon MP, Xeon LV, Xeon MV) corename:
-Foster
-Prestonia
-Gallatin
-Nocona
-Irwindale
-Cranford
-Potomac
-Paxville
-Dempsey
config MK6
bool "K6/K6-II/K6-III"
depends on X86_32
---help---
Select this for an AMD K6-family processor. Enables use of
some extended instructions, and passes appropriate optimization
flags to GCC.
config MK7
bool "Athlon/Duron/K7"
depends on X86_32
---help---
Select this for an AMD Athlon K7-family processor. Enables use of
some extended instructions, and passes appropriate optimization
flags to GCC.
config MK8
bool "Opteron/Athlon64/Hammer/K8"
---help---
Select this for an AMD Opteron or Athlon64 Hammer-family processor.
Enables use of some extended instructions, and passes appropriate
optimization flags to GCC.
config MCRUSOE
bool "Crusoe"
depends on X86_32
---help---
Select this for a Transmeta Crusoe processor. Treats the processor
like a 586 with TSC, and sets some GCC optimization flags (like a
Pentium Pro with no alignment requirements).
config MEFFICEON
bool "Efficeon"
depends on X86_32
---help---
Select this for a Transmeta Efficeon processor.
config MWINCHIPC6
bool "Winchip-C6"
depends on X86_32
---help---
Select this for an IDT Winchip C6 chip. Linux and GCC
treat this chip as a 586TSC with some extended instructions
and alignment requirements.
config MWINCHIP3D
bool "Winchip-2/Winchip-2A/Winchip-3"
depends on X86_32
---help---
Select this for an IDT Winchip-2, 2A or 3. Linux and GCC
treat this chip as a 586TSC with some extended instructions
and alignment requirements. Also enable out of order memory
stores for this CPU, which can increase performance of some
operations.
config MELAN
bool "AMD Elan"
depends on X86_32
---help---
Select this for an AMD Elan processor.
Do not use this option for K6/Athlon/Opteron processors!
config MGEODEGX1
bool "GeodeGX1"
depends on X86_32
---help---
Select this for a Geode GX1 (Cyrix MediaGX) chip.
config MGEODE_LX
bool "Geode GX/LX"
depends on X86_32
---help---
Select this for AMD Geode GX and LX processors.
config MCYRIXIII
bool "CyrixIII/VIA-C3"
depends on X86_32
---help---
Select this for a Cyrix III or C3 chip. Presently Linux and GCC
treat this chip as a generic 586. Whilst the CPU is 686 class,
it lacks the cmov extension which gcc assumes is present when
generating 686 code.
Note that Nehemiah (Model 9) and above will not boot with this
kernel due to them lacking the 3DNow! instructions used in earlier
incarnations of the CPU.
config MVIAC3_2
bool "VIA C3-2 (Nehemiah)"
depends on X86_32
---help---
Select this for a VIA C3 "Nehemiah". Selecting this enables usage
of SSE and tells gcc to treat the CPU as a 686.
Note, this kernel will not boot on older (pre model 9) C3s.
config MVIAC7
bool "VIA C7"
depends on X86_32
---help---
Select this for a VIA C7. Selecting this uses the correct cache
shift and tells gcc to treat the CPU as a 686.
config MPSC
bool "Intel P4 / older Netburst based Xeon"
depends on X86_64
---help---
Optimize for Intel Pentium 4, Pentium D and older Nocona/Dempsey
Xeon CPUs with Intel 64bit which is compatible with x86-64.
Note that the latest Xeons (Xeon 51xx and 53xx) are not based on the
Netburst core and shouldn't use this option. You can distinguish them
using the cpu family field
in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one.
config MCORE2
bool "Core 2/newer Xeon"
---help---
Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and
53xx) CPUs. You can distinguish newer from older Xeons by the CPU
family in /proc/cpuinfo. Newer ones have 6 and older ones 15
(not a typo)
config MATOM
bool "Intel Atom"
---help---
Select this for the Intel Atom platform. Intel Atom CPUs have an
in-order pipelining architecture and thus can benefit from
accordingly optimized code. Use a recent GCC with specific Atom
support in order to fully benefit from selecting this option.
config GENERIC_CPU
bool "Generic-x86-64"
depends on X86_64
---help---
Generic x86-64 CPU.
Run equally well on all x86-64 CPUs.
endchoice
config X86_GENERIC
bool "Generic x86 support"
depends on X86_32
---help---
Instead of just including optimizations for the selected
x86 variant (e.g. PII, Crusoe or Athlon), include some more
generic optimizations as well. This will make the kernel
perform better on x86 CPUs other than that selected.
This is really intended for distributors who need more
generic optimizations.
#
# Define implied options from the CPU selection here
config X86_INTERNODE_CACHE_SHIFT
int
default "12" if X86_VSMP
default X86_L1_CACHE_SHIFT
config X86_CMPXCHG
def_bool X86_64 || (X86_32 && !M386)
config X86_L1_CACHE_SHIFT
int
default "7" if MPENTIUM4 || MPSC
default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
default "4" if MELAN || M486 || M386 || MGEODEGX1
default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
config X86_XADD
def_bool y
depends on X86_64 || !M386
config X86_PPRO_FENCE
bool "PentiumPro memory ordering errata workaround"
depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 || MGEODEGX1
---help---
Old PentiumPro multiprocessor systems had errata that could cause
memory operations to violate the x86 ordering standard in rare cases.
Enabling this option will attempt to work around some (but not all)
occurrences of this problem, at the cost of much heavier spinlock and
memory barrier operations.
If unsure, say n here. Even distro kernels should think twice before
enabling this: there are few systems, and an unlikely bug.
config X86_F00F_BUG
def_bool y
depends on M586MMX || M586TSC || M586 || M486 || M386
config X86_INVD_BUG
def_bool y
depends on M486 || M386
config X86_WP_WORKS_OK
def_bool y
depends on !M386
config X86_INVLPG
def_bool y
depends on X86_32 && !M386
config X86_BSWAP
def_bool y
depends on X86_32 && !M386
config X86_POPAD_OK
def_bool y
depends on X86_32 && !M386
config X86_ALIGNMENT_16
def_bool y
depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1
config X86_INTEL_USERCOPY
def_bool y
depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2
config X86_USE_PPRO_CHECKSUM
def_bool y
depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM
config X86_USE_3DNOW
def_bool y
depends on (MCYRIXIII || MK7 || MGEODE_LX) && !UML
config X86_OOSTORE
def_bool y
depends on (MWINCHIP3D || MWINCHIPC6) && MTRR
#
# P6_NOPs are a relatively minor optimization that require a family >=
# 6 processor, except that it is broken on certain VIA chips.
# Furthermore, AMD chips prefer a totally different sequence of NOPs
# (which work on all CPUs). In addition, it looks like Virtual PC
# does not understand them.
#
# As a result, disallow these if we're not compiling for X86_64 (these
# NOPs do work on all x86-64 capable chips); the list of processors in
# the right-hand clause are the cores that benefit from this optimization.
#
config X86_P6_NOP
def_bool y
depends on X86_64
depends on (MCORE2 || MPENTIUM4 || MPSC)
config X86_TSC
def_bool y
depends on ((MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) && !X86_NUMAQ) || X86_64
config X86_CMPXCHG64
def_bool y
depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM
# this should be set for all -march=.. options where the compiler
# generates cmov.
config X86_CMOV
def_bool y
depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX)
config X86_MINIMUM_CPU_FAMILY
int
default "64" if X86_64
default "6" if X86_32 && X86_P6_NOP
default "5" if X86_32 && X86_CMPXCHG64
default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK)
default "3"
config X86_DEBUGCTLMSR
def_bool y
depends on !(MK6 || MWINCHIPC6 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386) && !UML
menuconfig PROCESSOR_SELECT
bool "Supported processor vendors" if EXPERT
---help---
This lets you choose what x86 vendor support code your kernel
will include.
config CPU_SUP_INTEL
default y
bool "Support Intel processors" if PROCESSOR_SELECT
---help---
This enables detection, tunings and quirks for Intel processors
You need this enabled if you want your kernel to run on an
Intel CPU. Disabling this option on other types of CPUs
makes the kernel a tiny bit smaller. Disabling it on an Intel
CPU might render the kernel unbootable.
If unsure, say N.
config CPU_SUP_CYRIX_32
default y
bool "Support Cyrix processors" if PROCESSOR_SELECT
depends on M386 || M486 || M586 || M586TSC || M586MMX || (EXPERT && !64BIT)
---help---
This enables detection, tunings and quirks for Cyrix processors
You need this enabled if you want your kernel to run on a
Cyrix CPU. Disabling this option on other types of CPUs
makes the kernel a tiny bit smaller. Disabling it on a Cyrix
CPU might render the kernel unbootable.
If unsure, say N.
config CPU_SUP_AMD
default y
bool "Support AMD processors" if PROCESSOR_SELECT
---help---
This enables detection, tunings and quirks for AMD processors
You need this enabled if you want your kernel to run on an
AMD CPU. Disabling this option on other types of CPUs
makes the kernel a tiny bit smaller. Disabling it on an AMD
CPU might render the kernel unbootable.
If unsure, say N.
config CPU_SUP_CENTAUR
default y
bool "Support Centaur processors" if PROCESSOR_SELECT
---help---
This enables detection, tunings and quirks for Centaur processors
You need this enabled if you want your kernel to run on a
Centaur CPU. Disabling this option on other types of CPUs
makes the kernel a tiny bit smaller. Disabling it on a Centaur
CPU might render the kernel unbootable.
If unsure, say N.
config CPU_SUP_TRANSMETA_32
default y
bool "Support Transmeta processors" if PROCESSOR_SELECT
depends on !64BIT
---help---
This enables detection, tunings and quirks for Transmeta processors
You need this enabled if you want your kernel to run on a
Transmeta CPU. Disabling this option on other types of CPUs
makes the kernel a tiny bit smaller. Disabling it on a Transmeta
CPU might render the kernel unbootable.
If unsure, say N.
config CPU_SUP_UMC_32
default y
bool "Support UMC processors" if PROCESSOR_SELECT
depends on M386 || M486 || (EXPERT && !64BIT)
---help---
This enables detection, tunings and quirks for UMC processors
You need this enabled if you want your kernel to run on a
UMC CPU. Disabling this option on other types of CPUs
makes the kernel a tiny bit smaller. Disabling it on a UMC
CPU might render the kernel unbootable.
If unsure, say N.
+302
View File
@@ -0,0 +1,302 @@
menu "Kernel hacking"
config TRACE_IRQFLAGS_SUPPORT
def_bool y
source "lib/Kconfig.debug"
config STRICT_DEVMEM
bool "Filter access to /dev/mem"
---help---
If this option is disabled, you allow userspace (root) access to all
of memory, including kernel and userspace memory. Accidental
access to this is obviously disastrous, but specific access can
be used by people debugging the kernel. Note that with PAT support
enabled, even in this case there are restrictions on /dev/mem
use due to the cache aliasing requirements.
If this option is switched on, the /dev/mem file only allows
userspace access to PCI space and the BIOS code and data regions.
This is sufficient for dosemu and X and all common users of
/dev/mem.
If in doubt, say Y.
config X86_VERBOSE_BOOTUP
bool "Enable verbose x86 bootup info messages"
default y
---help---
Enables the informational output from the decompression stage
(e.g. bzImage) of the boot. If you disable this you will still
see errors. Disable this if you want silent bootup.
config EARLY_PRINTK
bool "Early printk" if EXPERT
default y
---help---
Write kernel log output directly into the VGA buffer or to a serial
port.
This is useful for kernel debugging when your machine crashes very
early before the console code is initialized. For normal operation
it is not recommended because it looks ugly and doesn't cooperate
with klogd/syslogd or the X server. You should normally N here,
unless you want to debug such a crash.
config EARLY_PRINTK_INTEL_MID
bool "Early printk for Intel MID platform support"
depends on EARLY_PRINTK && X86_INTEL_MID
config EARLY_PRINTK_DBGP
bool "Early printk via EHCI debug port"
depends on EARLY_PRINTK && PCI
---help---
Write kernel log output directly into the EHCI debug port.
This is useful for kernel debugging when your machine crashes very
early before the console code is initialized. For normal operation
it is not recommended because it looks ugly and doesn't cooperate
with klogd/syslogd or the X server. You should normally N here,
unless you want to debug such a crash. You need usb debug device.
config DEBUG_STACKOVERFLOW
bool "Check for stack overflows"
depends on DEBUG_KERNEL
---help---
Say Y here if you want to check the overflows of kernel, IRQ
and exception stacks. This option will cause messages of the
stacks in detail when free stack space drops below a certain
limit.
If in doubt, say "N".
config X86_PTDUMP
bool "Export kernel pagetable layout to userspace via debugfs"
depends on DEBUG_KERNEL
select DEBUG_FS
---help---
Say Y here if you want to show the kernel pagetable layout in a
debugfs file. This information is only useful for kernel developers
who are working in architecture specific areas of the kernel.
It is probably not a good idea to enable this feature in a production
kernel.
If in doubt, say "N"
config DEBUG_RODATA
bool "Write protect kernel read-only data structures"
default y
depends on DEBUG_KERNEL
---help---
Mark the kernel read-only data as write-protected in the pagetables,
in order to catch accidental (and incorrect) writes to such const
data. This is recommended so that we can catch kernel bugs sooner.
If in doubt, say "Y".
config DEBUG_RODATA_TEST
bool "Testcase for the DEBUG_RODATA feature"
depends on DEBUG_RODATA
default y
---help---
This option enables a testcase for the DEBUG_RODATA
feature as well as for the change_page_attr() infrastructure.
If in doubt, say "N"
config DEBUG_SET_MODULE_RONX
bool "Set loadable kernel module data as NX and text as RO"
depends on MODULES
---help---
This option helps catch unintended modifications to loadable
kernel module's text and read-only data. It also prevents execution
of module data. Such protection may interfere with run-time code
patching and dynamic kernel tracing - and they might also protect
against certain classes of kernel exploits.
If in doubt, say "N".
config DEBUG_NX_TEST
tristate "Testcase for the NX non-executable stack feature"
depends on DEBUG_KERNEL && m
---help---
This option enables a testcase for the CPU NX capability
and the software setup of this feature.
If in doubt, say "N"
config DOUBLEFAULT
default y
bool "Enable doublefault exception handler" if EXPERT
depends on X86_32
---help---
This option allows trapping of rare doublefault exceptions that
would otherwise cause a system to silently reboot. Disabling this
option saves about 4k and might cause you much additional grey
hair.
config IOMMU_DEBUG
bool "Enable IOMMU debugging"
depends on GART_IOMMU && DEBUG_KERNEL
depends on X86_64
---help---
Force the IOMMU to on even when you have less than 4GB of
memory and add debugging code. On overflow always panic. And
allow to enable IOMMU leak tracing. Can be disabled at boot
time with iommu=noforce. This will also enable scatter gather
list merging. Currently not recommended for production
code. When you use it make sure you have a big enough
IOMMU/AGP aperture. Most of the options enabled by this can
be set more finegrained using the iommu= command line
options. See Documentation/x86/x86_64/boot-options.txt for more
details.
config IOMMU_STRESS
bool "Enable IOMMU stress-test mode"
---help---
This option disables various optimizations in IOMMU related
code to do real stress testing of the IOMMU code. This option
will cause a performance drop and should only be enabled for
testing.
config IOMMU_LEAK
bool "IOMMU leak tracing"
depends on IOMMU_DEBUG && DMA_API_DEBUG
---help---
Add a simple leak tracer to the IOMMU code. This is useful when you
are debugging a buggy device driver that leaks IOMMU mappings.
config HAVE_MMIOTRACE_SUPPORT
def_bool y
config X86_DECODER_SELFTEST
bool "x86 instruction decoder selftest"
depends on DEBUG_KERNEL && KPROBES
---help---
Perform x86 instruction decoder selftests at build time.
This option is useful for checking the sanity of x86 instruction
decoder code.
If unsure, say "N".
#
# IO delay types:
#
config IO_DELAY_TYPE_0X80
int
default "0"
config IO_DELAY_TYPE_0XED
int
default "1"
config IO_DELAY_TYPE_UDELAY
int
default "2"
config IO_DELAY_TYPE_NONE
int
default "3"
choice
prompt "IO delay type"
default IO_DELAY_0X80
config IO_DELAY_0X80
bool "port 0x80 based port-IO delay [recommended]"
---help---
This is the traditional Linux IO delay used for in/out_p.
It is the most tested hence safest selection here.
config IO_DELAY_0XED
bool "port 0xed based port-IO delay"
---help---
Use port 0xed as the IO delay. This frees up port 0x80 which is
often used as a hardware-debug port.
config IO_DELAY_UDELAY
bool "udelay based port-IO delay"
---help---
Use udelay(2) as the IO delay method. This provides the delay
while not having any side-effect on the IO port space.
config IO_DELAY_NONE
bool "no port-IO delay"
---help---
No port-IO delay. Will break on old boxes that require port-IO
delay for certain operations. Should work on most new machines.
endchoice
if IO_DELAY_0X80
config DEFAULT_IO_DELAY_TYPE
int
default IO_DELAY_TYPE_0X80
endif
if IO_DELAY_0XED
config DEFAULT_IO_DELAY_TYPE
int
default IO_DELAY_TYPE_0XED
endif
if IO_DELAY_UDELAY
config DEFAULT_IO_DELAY_TYPE
int
default IO_DELAY_TYPE_UDELAY
endif
if IO_DELAY_NONE
config DEFAULT_IO_DELAY_TYPE
int
default IO_DELAY_TYPE_NONE
endif
config DEBUG_BOOT_PARAMS
bool "Debug boot parameters"
depends on DEBUG_KERNEL
depends on DEBUG_FS
---help---
This option will cause struct boot_params to be exported via debugfs.
config CPA_DEBUG
bool "CPA self-test code"
depends on DEBUG_KERNEL
---help---
Do change_page_attr() self-tests every 30 seconds.
config OPTIMIZE_INLINING
bool "Allow gcc to uninline functions marked 'inline'"
---help---
This option determines if the kernel forces gcc to inline the functions
developers have marked 'inline'. Doing so takes away freedom from gcc to
do what it thinks is best, which is desirable for the gcc 3.x series of
compilers. The gcc 4.x series have a rewritten inlining algorithm and
enabling this option will generate a smaller kernel there. Hopefully
this algorithm is so good that allowing gcc 4.x and above to make the
decision will become the default in the future. Until then this option
is there to test gcc for this.
If unsure, say N.
config DEBUG_STRICT_USER_COPY_CHECKS
bool "Strict copy size checks"
depends on DEBUG_KERNEL && !TRACE_BRANCH_PROFILING
---help---
Enabling this option turns a certain set of sanity checks for user
copy operations into compile time failures.
The copy_from_user() etc checks are there to help test if there
are sufficient security checks on the length argument of
the copy operation, by having gcc prove that the argument is
within bounds.
If unsure, or if you run an older (pre 4.4) gcc, say N.
config DEBUG_NMI_SELFTEST
bool "NMI Selftest"
depends on DEBUG_KERNEL && X86_LOCAL_APIC
---help---
Enabling this option turns on a quick NMI selftest to verify
that the NMI behaves correctly.
This might help diagnose strange hangs that rely on NMI to
function properly.
If unsure, say N.
endmenu
+223
View File
@@ -0,0 +1,223 @@
# Unified Makefile for i386 and x86_64
# select defconfig based on actual architecture
ifeq ($(ARCH),x86)
KBUILD_DEFCONFIG := i386_defconfig
else
KBUILD_DEFCONFIG := $(ARCH)_defconfig
endif
# BITS is used as extension for files which are available in a 32 bit
# and a 64 bit version to simplify shared Makefiles.
# e.g.: obj-y += foo_$(BITS).o
export BITS
ifeq ($(CONFIG_X86_32),y)
BITS := 32
UTS_MACHINE := i386
CHECKFLAGS += -D__i386__
biarch := $(call cc-option,-m32)
KBUILD_AFLAGS += $(biarch)
KBUILD_CFLAGS += $(biarch)
ifdef CONFIG_RELOCATABLE
LDFLAGS_vmlinux := --emit-relocs
endif
KBUILD_CFLAGS += -msoft-float -mregparm=3 -freg-struct-return
# prevent gcc from keeping the stack 16 byte aligned
KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2)
# Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use
# a lot more stack due to the lack of sharing of stacklots:
KBUILD_CFLAGS += $(call cc-ifversion, -lt, 0400, \
$(call cc-option,-fno-unit-at-a-time))
# CPU-specific tuning. Anything which can be shared with UML should go here.
include $(srctree)/arch/x86/Makefile_32.cpu
KBUILD_CFLAGS += $(cflags-y)
# temporary until string.h is fixed
KBUILD_CFLAGS += -ffreestanding
else
BITS := 64
UTS_MACHINE := x86_64
CHECKFLAGS += -D__x86_64__ -m64
KBUILD_AFLAGS += -m64
KBUILD_CFLAGS += -m64
# FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu)
cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
cflags-$(CONFIG_MCORE2) += \
$(call cc-option,-march=core2,$(call cc-option,-mtune=generic))
cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \
$(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
KBUILD_CFLAGS += $(cflags-y)
KBUILD_CFLAGS += -mno-red-zone
KBUILD_CFLAGS += -mcmodel=kernel
# -funit-at-a-time shrinks the kernel .text considerably
# unfortunately it makes reading oopses harder.
KBUILD_CFLAGS += $(call cc-option,-funit-at-a-time)
# this works around some issues with generating unwind tables in older gccs
# newer gccs do it by default
KBUILD_CFLAGS += -maccumulate-outgoing-args
endif
ifdef CONFIG_CC_STACKPROTECTOR
cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh
ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC) $(KBUILD_CPPFLAGS) $(biarch)),y)
stackp-y := -fstack-protector
KBUILD_CFLAGS += $(stackp-y)
else
$(warning stack protector enabled but no compiler support)
endif
endif
ifdef CONFIG_X86_X32
x32_ld_ok := $(call try-run,\
/bin/echo -e '1: .quad 1b' | \
$(CC) $(KBUILD_AFLAGS) -c -xassembler -o "$$TMP" - && \
$(OBJCOPY) -O elf32-x86-64 "$$TMP" "$$TMPO" && \
$(LD) -m elf32_x86_64 "$$TMPO" -o "$$TMP",y,n)
ifeq ($(x32_ld_ok),y)
CONFIG_X86_X32_ABI := y
KBUILD_AFLAGS += -DCONFIG_X86_X32_ABI
KBUILD_CFLAGS += -DCONFIG_X86_X32_ABI
else
$(warning CONFIG_X86_X32 enabled but no binutils support)
endif
endif
export CONFIG_X86_X32_ABI
# Don't unroll struct assignments with kmemcheck enabled
ifeq ($(CONFIG_KMEMCHECK),y)
KBUILD_CFLAGS += $(call cc-option,-fno-builtin-memcpy)
endif
# Stackpointer is addressed different for 32 bit and 64 bit x86
sp-$(CONFIG_X86_32) := esp
sp-$(CONFIG_X86_64) := rsp
# do binutils support CFI?
cfi := $(call as-instr,.cfi_startproc\n.cfi_rel_offset $(sp-y)$(comma)0\n.cfi_endproc,-DCONFIG_AS_CFI=1)
# is .cfi_signal_frame supported too?
cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1)
cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTIONS=1)
# does binutils support specific instructions?
asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1)
KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr)
KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr)
LDFLAGS := -m elf_$(UTS_MACHINE)
# Speed up the build
KBUILD_CFLAGS += -pipe
# Workaround for a gcc prelease that unfortunately was shipped in a suse release
KBUILD_CFLAGS += -Wno-sign-compare
#
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
# prevent gcc from generating any FP code by mistake
KBUILD_CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,)
KBUILD_CFLAGS += $(call cc-option,-mno-avx,)
KBUILD_CFLAGS += $(mflags-y)
KBUILD_AFLAGS += $(mflags-y)
archscripts:
$(Q)$(MAKE) $(build)=arch/x86/tools relocs
###
# Syscall table generation
archheaders:
$(Q)$(MAKE) $(build)=arch/x86/syscalls all
###
# Kernel objects
head-y := arch/x86/kernel/head_$(BITS).o
head-y += arch/x86/kernel/head$(BITS).o
head-y += arch/x86/kernel/head.o
head-y += arch/x86/kernel/init_task.o
libs-y += arch/x86/lib/
# See arch/x86/Kbuild for content of core part of the kernel
core-y += arch/x86/
# drivers-y are linked after core-y
drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/
drivers-$(CONFIG_PCI) += arch/x86/pci/
# must be linked after kernel/
drivers-$(CONFIG_OPROFILE) += arch/x86/oprofile/
# suspend and hibernation support
drivers-$(CONFIG_PM) += arch/x86/power/
drivers-$(CONFIG_FB) += arch/x86/video/
####
# boot loader support. Several targets are kept for legacy purposes
boot := arch/x86/boot
BOOT_TARGETS = bzlilo bzdisk fdimage fdimage144 fdimage288 isoimage
PHONY += bzImage $(BOOT_TARGETS)
# Default kernel to build
all: bzImage
# KBUILD_IMAGE specify target image being built
KBUILD_IMAGE := $(boot)/bzImage
bzImage: vmlinux
ifeq ($(CONFIG_X86_DECODER_SELFTEST),y)
$(Q)$(MAKE) $(build)=arch/x86/tools posttest
endif
$(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE)
$(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot
$(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@
$(BOOT_TARGETS): vmlinux
$(Q)$(MAKE) $(build)=$(boot) $@
PHONY += install
install:
$(Q)$(MAKE) $(build)=$(boot) $@
PHONY += vdso_install
vdso_install:
$(Q)$(MAKE) $(build)=arch/x86/vdso $@
archclean:
$(Q)rm -rf $(objtree)/arch/i386
$(Q)rm -rf $(objtree)/arch/x86_64
$(Q)$(MAKE) $(clean)=$(boot)
define archhelp
echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)'
echo ' install - Install kernel using'
echo ' (your) ~/bin/$(INSTALLKERNEL) or'
echo ' (distribution) /sbin/$(INSTALLKERNEL) or'
echo ' install to $$(INSTALL_PATH) and run lilo'
echo ' fdimage - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)'
echo ' fdimage144 - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)'
echo ' fdimage288 - Create 2.8MB boot floppy image (arch/x86/boot/fdimage)'
echo ' isoimage - Create a boot CD-ROM image (arch/x86/boot/image.iso)'
echo ' bzdisk/fdimage*/isoimage also accept:'
echo ' FDARGS="..." arguments for the booted kernel'
echo ' FDINITRD=file initrd for the booted kernel'
endef
+60
View File
@@ -0,0 +1,60 @@
core-y += arch/x86/crypto/
ifeq ($(CONFIG_X86_32),y)
START := 0x8048000
LDFLAGS += -m elf_i386
ELF_ARCH := i386
ELF_FORMAT := elf32-i386
CHECKFLAGS += -D__i386__
KBUILD_CFLAGS += $(call cc-option,-m32)
KBUILD_AFLAGS += $(call cc-option,-m32)
LINK-y += $(call cc-option,-m32)
export LDFLAGS
LDS_EXTRA := -Ui386
export LDS_EXTRA
# First of all, tune CFLAGS for the specific CPU. This actually sets cflags-y.
include $(srctree)/arch/x86/Makefile_32.cpu
# prevent gcc from keeping the stack 16 byte aligned. Taken from i386.
cflags-y += $(call cc-option,-mpreferred-stack-boundary=2)
# Prevent sprintf in nfsd from being converted to strcpy and resulting in
# an unresolved reference.
cflags-y += -ffreestanding
# Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use
# a lot more stack due to the lack of sharing of stacklots. Also, gcc
# 4.3.0 needs -funit-at-a-time for extern inline functions.
KBUILD_CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then \
echo $(call cc-option,-fno-unit-at-a-time); \
else echo $(call cc-option,-funit-at-a-time); fi ;)
KBUILD_CFLAGS += $(cflags-y)
else
START := 0x60000000
KBUILD_CFLAGS += -fno-builtin -m64
CHECKFLAGS += -m64 -D__x86_64__
KBUILD_AFLAGS += -m64
LDFLAGS += -m elf_x86_64
KBUILD_CPPFLAGS += -m64
ELF_ARCH := i386:x86-64
ELF_FORMAT := elf64-x86-64
# Not on all 64-bit distros /lib is a symlink to /lib64. PLD is an example.
LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib64
LINK-y += -m64
# Do unit-at-a-time unconditionally on x86_64, following the host
KBUILD_CFLAGS += $(call cc-option,-funit-at-a-time)
endif
+71
View File
@@ -0,0 +1,71 @@
# CPU tuning section - shared with UML.
# Must change only cflags-y (or [yn]), not CFLAGS! That makes a difference for UML.
#-mtune exists since gcc 3.4
HAS_MTUNE := $(call cc-option-yn, -mtune=i386)
ifeq ($(HAS_MTUNE),y)
tune = $(call cc-option,-mtune=$(1),$(2))
else
tune = $(call cc-option,-mcpu=$(1),$(2))
endif
align := $(cc-option-align)
cflags-$(CONFIG_M386) += -march=i386
cflags-$(CONFIG_M486) += -march=i486
cflags-$(CONFIG_M586) += -march=i586
cflags-$(CONFIG_M586TSC) += -march=i586
cflags-$(CONFIG_M586MMX) += -march=pentium-mmx
cflags-$(CONFIG_M686) += -march=i686
cflags-$(CONFIG_MPENTIUMII) += -march=i686 $(call tune,pentium2)
cflags-$(CONFIG_MPENTIUMIII) += -march=i686 $(call tune,pentium3)
cflags-$(CONFIG_MPENTIUMM) += -march=i686 $(call tune,pentium3)
cflags-$(CONFIG_MPENTIUM4) += -march=i686 $(call tune,pentium4)
cflags-$(CONFIG_MK6) += -march=k6
# Please note, that patches that add -march=athlon-xp and friends are pointless.
# They make zero difference whatsosever to performance at this time.
cflags-$(CONFIG_MK7) += -march=athlon
cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,-march=athlon)
cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586)
cflags-$(CONFIG_MWINCHIP3D) += $(call cc-option,-march=winchip2,-march=i586)
cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686)
cflags-$(CONFIG_MVIAC7) += -march=i686
cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2)
cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom,$(call cc-option,-march=core2,-march=i686)) \
$(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
# AMD Elan support
cflags-$(CONFIG_MELAN) += -march=i486
# Geode GX1 support
cflags-$(CONFIG_MGEODEGX1) += -march=pentium-mmx
cflags-$(CONFIG_MGEODE_LX) += $(call cc-option,-march=geode,-march=pentium-mmx)
# add at the end to overwrite eventual tuning options from earlier
# cpu entries
cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686))
# Work around the pentium-mmx code generator madness of gcc4.4.x which
# does stack alignment by generating horrible code _before_ the mcount
# prologue (push %ebp, mov %esp, %ebp) which breaks the function graph
# tracer assumptions. For i686, generic, core2 this is set by the
# compiler anyway
ifeq ($(CONFIG_FUNCTION_GRAPH_TRACER), y)
ADD_ACCUMULATE_OUTGOING_ARGS := y
endif
# Work around to a bug with asm goto with first implementations of it
# in gcc causing gcc to mess up the push and pop of the stack in some
# uses of asm goto.
ifeq ($(CONFIG_JUMP_LABEL), y)
ADD_ACCUMULATE_OUTGOING_ARGS := y
endif
cflags-$(ADD_ACCUMULATE_OUTGOING_ARGS) += $(call cc-option,-maccumulate-outgoing-args)
# Bug fix for binutils: this option is required in order to keep
# binutils from generating NOPL instructions against our will.
ifneq ($(CONFIG_X86_P6_NOP),y)
cflags-y += $(call cc-option,-Wa$(comma)-mtune=generic32,)
endif
+194
View File
@@ -0,0 +1,194 @@
#
# arch/x86/boot/Makefile
#
# This file is subject to the terms and conditions of the GNU General Public
# License. See the file "COPYING" in the main directory of this archive
# for more details.
#
# Copyright (C) 1994 by Linus Torvalds
# Changed by many, many contributors over the years.
#
# If you want to preset the SVGA mode, uncomment the next line and
# set SVGA_MODE to whatever number you want.
# Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode.
# The number is the same as you would ordinarily press at bootup.
SVGA_MODE := -DSVGA_MODE=NORMAL_VGA
targets := vmlinux.bin setup.bin setup.elf bzImage
targets += fdimage fdimage144 fdimage288 image.iso mtools.conf
subdir- := compressed
setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpucheck.o
setup-y += early_serial_console.o edd.o header.o main.o mca.o memory.o
setup-y += pm.o pmjump.o printf.o regs.o string.o tty.o video.o
setup-y += video-mode.o version.o
setup-$(CONFIG_X86_APM_BOOT) += apm.o
# The link order of the video-*.o modules can matter. In particular,
# video-vga.o *must* be listed first, followed by video-vesa.o.
# Hardware-specific drivers should follow in the order they should be
# probed, and video-bios.o should typically be last.
setup-y += video-vga.o
setup-y += video-vesa.o
setup-y += video-bios.o
targets += $(setup-y)
hostprogs-y := mkcpustr tools/build
HOST_EXTRACFLAGS += -I$(srctree)/tools/include $(LINUXINCLUDE) \
-D__EXPORTED_HEADERS__
$(obj)/cpu.o: $(obj)/cpustr.h
quiet_cmd_cpustr = CPUSTR $@
cmd_cpustr = $(obj)/mkcpustr > $@
targets += cpustr.h
$(obj)/cpustr.h: $(obj)/mkcpustr FORCE
$(call if_changed,cpustr)
# ---------------------------------------------------------------------------
# How to compile the 16-bit code. Note we always compile for -march=i386,
# that way we can complain to the user if the CPU is insufficient.
KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \
-DDISABLE_BRANCH_PROFILING \
-Wall -Wstrict-prototypes \
-march=i386 -mregparm=3 \
-include $(srctree)/$(src)/code16gcc.h \
-fno-strict-aliasing -fomit-frame-pointer \
$(call cc-option, -ffreestanding) \
$(call cc-option, -fno-toplevel-reorder,\
$(call cc-option, -fno-unit-at-a-time)) \
$(call cc-option, -fno-stack-protector) \
$(call cc-option, -mpreferred-stack-boundary=2)
KBUILD_CFLAGS += $(call cc-option, -m32)
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
GCOV_PROFILE := n
$(obj)/bzImage: asflags-y := $(SVGA_MODE)
quiet_cmd_image = BUILD $@
cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin > $@
$(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE
$(call if_changed,image)
@echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S
$(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
$(call if_changed,objcopy)
SETUP_OBJS = $(addprefix $(obj)/,$(setup-y))
sed-voffset := -e 's/^\([0-9a-fA-F]*\) . \(_text\|_end\)$$/\#define VO_\2 0x\1/p'
quiet_cmd_voffset = VOFFSET $@
cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@
targets += voffset.h
$(obj)/voffset.h: vmlinux FORCE
$(call if_changed,voffset)
sed-zoffset := -e 's/^\([0-9a-fA-F]*\) . \(startup_32\|input_data\|_end\|z_.*\)$$/\#define ZO_\2 0x\1/p'
quiet_cmd_zoffset = ZOFFSET $@
cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@
targets += zoffset.h
$(obj)/zoffset.h: $(obj)/compressed/vmlinux FORCE
$(call if_changed,zoffset)
AFLAGS_header.o += -I$(obj)
$(obj)/header.o: $(obj)/voffset.h $(obj)/zoffset.h
LDFLAGS_setup.elf := -T
$(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE
$(call if_changed,ld)
OBJCOPYFLAGS_setup.bin := -O binary
$(obj)/setup.bin: $(obj)/setup.elf FORCE
$(call if_changed,objcopy)
$(obj)/compressed/vmlinux: FORCE
$(Q)$(MAKE) $(build)=$(obj)/compressed $@
# Set this if you want to pass append arguments to the
# bzdisk/fdimage/isoimage kernel
FDARGS =
# Set this if you want an initrd included with the
# bzdisk/fdimage/isoimage kernel
FDINITRD =
image_cmdline = default linux $(FDARGS) $(if $(FDINITRD),initrd=initrd.img,)
$(obj)/mtools.conf: $(src)/mtools.conf.in
sed -e 's|@OBJ@|$(obj)|g' < $< > $@
# This requires write access to /dev/fd0
bzdisk: $(obj)/bzImage $(obj)/mtools.conf
MTOOLSRC=$(obj)/mtools.conf mformat a: ; sync
syslinux /dev/fd0 ; sync
echo '$(image_cmdline)' | \
MTOOLSRC=$(src)/mtools.conf mcopy - a:syslinux.cfg
if [ -f '$(FDINITRD)' ] ; then \
MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' a:initrd.img ; \
fi
MTOOLSRC=$(obj)/mtools.conf mcopy $(obj)/bzImage a:linux ; sync
# These require being root or having syslinux 2.02 or higher installed
fdimage fdimage144: $(obj)/bzImage $(obj)/mtools.conf
dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=1440
MTOOLSRC=$(obj)/mtools.conf mformat v: ; sync
syslinux $(obj)/fdimage ; sync
echo '$(image_cmdline)' | \
MTOOLSRC=$(obj)/mtools.conf mcopy - v:syslinux.cfg
if [ -f '$(FDINITRD)' ] ; then \
MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' v:initrd.img ; \
fi
MTOOLSRC=$(obj)/mtools.conf mcopy $(obj)/bzImage v:linux ; sync
fdimage288: $(obj)/bzImage $(obj)/mtools.conf
dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=2880
MTOOLSRC=$(obj)/mtools.conf mformat w: ; sync
syslinux $(obj)/fdimage ; sync
echo '$(image_cmdline)' | \
MTOOLSRC=$(obj)/mtools.conf mcopy - w:syslinux.cfg
if [ -f '$(FDINITRD)' ] ; then \
MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' w:initrd.img ; \
fi
MTOOLSRC=$(obj)/mtools.conf mcopy $(obj)/bzImage w:linux ; sync
isoimage: $(obj)/bzImage
-rm -rf $(obj)/isoimage
mkdir $(obj)/isoimage
for i in lib lib64 share end ; do \
if [ -f /usr/$$i/syslinux/isolinux.bin ] ; then \
cp /usr/$$i/syslinux/isolinux.bin $(obj)/isoimage ; \
break ; \
fi ; \
if [ $$i = end ] ; then exit 1 ; fi ; \
done
cp $(obj)/bzImage $(obj)/isoimage/linux
echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg
if [ -f '$(FDINITRD)' ] ; then \
cp '$(FDINITRD)' $(obj)/isoimage/initrd.img ; \
fi
mkisofs -J -r -o $(obj)/image.iso -b isolinux.bin -c boot.cat \
-no-emul-boot -boot-load-size 4 -boot-info-table \
$(obj)/isoimage
isohybrid $(obj)/image.iso 2>/dev/null || true
rm -rf $(obj)/isoimage
bzlilo: $(obj)/bzImage
if [ -f $(INSTALL_PATH)/vmlinuz ]; then mv $(INSTALL_PATH)/vmlinuz $(INSTALL_PATH)/vmlinuz.old; fi
if [ -f $(INSTALL_PATH)/System.map ]; then mv $(INSTALL_PATH)/System.map $(INSTALL_PATH)/System.old; fi
cat $(obj)/bzImage > $(INSTALL_PATH)/vmlinuz
cp System.map $(INSTALL_PATH)/
if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi
install:
sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(obj)/bzImage \
System.map "$(INSTALL_PATH)"
+165
View File
@@ -0,0 +1,165 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007-2008 rPath, Inc. - All Rights Reserved
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Enable A20 gate (return -1 on failure)
*/
#include "boot.h"
#define MAX_8042_LOOPS 100000
#define MAX_8042_FF 32
static int empty_8042(void)
{
u8 status;
int loops = MAX_8042_LOOPS;
int ffs = MAX_8042_FF;
while (loops--) {
io_delay();
status = inb(0x64);
if (status == 0xff) {
/* FF is a plausible, but very unlikely status */
if (!--ffs)
return -1; /* Assume no KBC present */
}
if (status & 1) {
/* Read and discard input data */
io_delay();
(void)inb(0x60);
} else if (!(status & 2)) {
/* Buffers empty, finished! */
return 0;
}
}
return -1;
}
/* Returns nonzero if the A20 line is enabled. The memory address
used as a test is the int $0x80 vector, which should be safe. */
#define A20_TEST_ADDR (4*0x80)
#define A20_TEST_SHORT 32
#define A20_TEST_LONG 2097152 /* 2^21 */
static int a20_test(int loops)
{
int ok = 0;
int saved, ctr;
set_fs(0x0000);
set_gs(0xffff);
saved = ctr = rdfs32(A20_TEST_ADDR);
while (loops--) {
wrfs32(++ctr, A20_TEST_ADDR);
io_delay(); /* Serialize and make delay constant */
ok = rdgs32(A20_TEST_ADDR+0x10) ^ ctr;
if (ok)
break;
}
wrfs32(saved, A20_TEST_ADDR);
return ok;
}
/* Quick test to see if A20 is already enabled */
static int a20_test_short(void)
{
return a20_test(A20_TEST_SHORT);
}
/* Longer test that actually waits for A20 to come on line; this
is useful when dealing with the KBC or other slow external circuitry. */
static int a20_test_long(void)
{
return a20_test(A20_TEST_LONG);
}
static void enable_a20_bios(void)
{
struct biosregs ireg;
initregs(&ireg);
ireg.ax = 0x2401;
intcall(0x15, &ireg, NULL);
}
static void enable_a20_kbc(void)
{
empty_8042();
outb(0xd1, 0x64); /* Command write */
empty_8042();
outb(0xdf, 0x60); /* A20 on */
empty_8042();
outb(0xff, 0x64); /* Null command, but UHCI wants it */
empty_8042();
}
static void enable_a20_fast(void)
{
u8 port_a;
port_a = inb(0x92); /* Configuration port A */
port_a |= 0x02; /* Enable A20 */
port_a &= ~0x01; /* Do not reset machine */
outb(port_a, 0x92);
}
/*
* Actual routine to enable A20; return 0 on ok, -1 on failure
*/
#define A20_ENABLE_LOOPS 255 /* Number of times to try */
int enable_a20(void)
{
int loops = A20_ENABLE_LOOPS;
int kbc_err;
while (loops--) {
/* First, check to see if A20 is already enabled
(legacy free, etc.) */
if (a20_test_short())
return 0;
/* Next, try the BIOS (INT 0x15, AX=0x2401) */
enable_a20_bios();
if (a20_test_short())
return 0;
/* Try enabling A20 through the keyboard controller */
kbc_err = empty_8042();
if (a20_test_short())
return 0; /* BIOS worked, but with delayed reaction */
if (!kbc_err) {
enable_a20_kbc();
if (a20_test_long())
return 0;
}
/* Finally, try enabling the "fast A20 gate" */
enable_a20_fast();
if (a20_test_long())
return 0;
}
return -1;
}
+75
View File
@@ -0,0 +1,75 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* Original APM BIOS checking by Stephen Rothwell, May 1994
* (sfr@canb.auug.org.au)
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Get APM BIOS information
*/
#include "boot.h"
int query_apm_bios(void)
{
struct biosregs ireg, oreg;
/* APM BIOS installation check */
initregs(&ireg);
ireg.ah = 0x53;
intcall(0x15, &ireg, &oreg);
if (oreg.flags & X86_EFLAGS_CF)
return -1; /* No APM BIOS */
if (oreg.bx != 0x504d) /* "PM" signature */
return -1;
if (!(oreg.cx & 0x02)) /* 32 bits supported? */
return -1;
/* Disconnect first, just in case */
ireg.al = 0x04;
intcall(0x15, &ireg, NULL);
/* 32-bit connect */
ireg.al = 0x03;
intcall(0x15, &ireg, &oreg);
boot_params.apm_bios_info.cseg = oreg.ax;
boot_params.apm_bios_info.offset = oreg.ebx;
boot_params.apm_bios_info.cseg_16 = oreg.cx;
boot_params.apm_bios_info.dseg = oreg.dx;
boot_params.apm_bios_info.cseg_len = oreg.si;
boot_params.apm_bios_info.cseg_16_len = oreg.hsi;
boot_params.apm_bios_info.dseg_len = oreg.di;
if (oreg.flags & X86_EFLAGS_CF)
return -1;
/* Redo the installation check as the 32-bit connect;
some BIOSes return different flags this way... */
ireg.al = 0x00;
intcall(0x15, &ireg, &oreg);
if ((oreg.eflags & X86_EFLAGS_CF) || oreg.bx != 0x504d) {
/* Failure with 32-bit connect, try to disconect and ignore */
ireg.al = 0x04;
intcall(0x15, &ireg, NULL);
return -1;
}
boot_params.apm_bios_info.version = oreg.ax;
boot_params.apm_bios_info.flags = oreg.cx;
return 0;
}
+82
View File
@@ -0,0 +1,82 @@
/* -----------------------------------------------------------------------
*
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2 or (at your
* option) any later version; incorporated herein by reference.
*
* ----------------------------------------------------------------------- */
/*
* "Glove box" for BIOS calls. Avoids the constant problems with BIOSes
* touching registers they shouldn't be.
*/
.code16gcc
.text
.globl intcall
.type intcall, @function
intcall:
/* Self-modify the INT instruction. Ugly, but works. */
cmpb %al, 3f
je 1f
movb %al, 3f
jmp 1f /* Synchronize pipeline */
1:
/* Save state */
pushfl
pushw %fs
pushw %gs
pushal
/* Copy input state to stack frame */
subw $44, %sp
movw %dx, %si
movw %sp, %di
movw $11, %cx
rep; movsd
/* Pop full state from the stack */
popal
popw %gs
popw %fs
popw %es
popw %ds
popfl
/* Actual INT */
.byte 0xcd /* INT opcode */
3: .byte 0
/* Push full state to the stack */
pushfl
pushw %ds
pushw %es
pushw %fs
pushw %gs
pushal
/* Re-establish C environment invariants */
cld
movzwl %sp, %esp
movw %cs, %ax
movw %ax, %ds
movw %ax, %es
/* Copy output state from stack frame */
movw 68(%esp), %di /* Original %cx == 3rd argument */
andw %di, %di
jz 4f
movw %sp, %si
movw $11, %cx
rep; movsd
4: addw $44, %sp
/* Restore state and return */
popal
popw %gs
popw %fs
popfl
retl
.size intcall, .-intcall
+43
View File
@@ -0,0 +1,43 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Very simple bitops for the boot code.
*/
#ifndef BOOT_BITOPS_H
#define BOOT_BITOPS_H
#define _LINUX_BITOPS_H /* Inhibit inclusion of <linux/bitops.h> */
static inline int constant_test_bit(int nr, const void *addr)
{
const u32 *p = (const u32 *)addr;
return ((1UL << (nr & 31)) & (p[nr >> 5])) != 0;
}
static inline int variable_test_bit(int nr, const void *addr)
{
u8 v;
const u32 *p = (const u32 *)addr;
asm("btl %2,%1; setc %0" : "=qm" (v) : "m" (*p), "Ir" (nr));
return v;
}
#define test_bit(nr,addr) \
(__builtin_constant_p(nr) ? \
constant_test_bit((nr),(addr)) : \
variable_test_bit((nr),(addr)))
static inline void set_bit(int nr, void *addr)
{
asm("btsl %1,%0" : "+m" (*(u32 *)addr) : "Ir" (nr));
}
#endif /* BOOT_BITOPS_H */
+369
View File
@@ -0,0 +1,369 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Header file for the real-mode kernel code
*/
#ifndef BOOT_BOOT_H
#define BOOT_BOOT_H
#define STACK_SIZE 512 /* Minimum number of bytes for stack */
#ifndef __ASSEMBLY__
#include <stdarg.h>
#include <linux/types.h>
#include <linux/edd.h>
#include <asm/boot.h>
#include <asm/setup.h>
#include "bitops.h"
#include <asm/cpufeature.h>
#include <asm/processor-flags.h>
#include "ctype.h"
/* Useful macros */
#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
extern struct setup_header hdr;
extern struct boot_params boot_params;
#define cpu_relax() asm volatile("rep; nop")
/* Basic port I/O */
static inline void outb(u8 v, u16 port)
{
asm volatile("outb %0,%1" : : "a" (v), "dN" (port));
}
static inline u8 inb(u16 port)
{
u8 v;
asm volatile("inb %1,%0" : "=a" (v) : "dN" (port));
return v;
}
static inline void outw(u16 v, u16 port)
{
asm volatile("outw %0,%1" : : "a" (v), "dN" (port));
}
static inline u16 inw(u16 port)
{
u16 v;
asm volatile("inw %1,%0" : "=a" (v) : "dN" (port));
return v;
}
static inline void outl(u32 v, u16 port)
{
asm volatile("outl %0,%1" : : "a" (v), "dN" (port));
}
static inline u32 inl(u16 port)
{
u32 v;
asm volatile("inl %1,%0" : "=a" (v) : "dN" (port));
return v;
}
static inline void io_delay(void)
{
const u16 DELAY_PORT = 0x80;
asm volatile("outb %%al,%0" : : "dN" (DELAY_PORT));
}
/* These functions are used to reference data in other segments. */
static inline u16 ds(void)
{
u16 seg;
asm("movw %%ds,%0" : "=rm" (seg));
return seg;
}
static inline void set_fs(u16 seg)
{
asm volatile("movw %0,%%fs" : : "rm" (seg));
}
static inline u16 fs(void)
{
u16 seg;
asm volatile("movw %%fs,%0" : "=rm" (seg));
return seg;
}
static inline void set_gs(u16 seg)
{
asm volatile("movw %0,%%gs" : : "rm" (seg));
}
static inline u16 gs(void)
{
u16 seg;
asm volatile("movw %%gs,%0" : "=rm" (seg));
return seg;
}
typedef unsigned int addr_t;
static inline u8 rdfs8(addr_t addr)
{
u8 v;
asm volatile("movb %%fs:%1,%0" : "=q" (v) : "m" (*(u8 *)addr));
return v;
}
static inline u16 rdfs16(addr_t addr)
{
u16 v;
asm volatile("movw %%fs:%1,%0" : "=r" (v) : "m" (*(u16 *)addr));
return v;
}
static inline u32 rdfs32(addr_t addr)
{
u32 v;
asm volatile("movl %%fs:%1,%0" : "=r" (v) : "m" (*(u32 *)addr));
return v;
}
static inline void wrfs8(u8 v, addr_t addr)
{
asm volatile("movb %1,%%fs:%0" : "+m" (*(u8 *)addr) : "qi" (v));
}
static inline void wrfs16(u16 v, addr_t addr)
{
asm volatile("movw %1,%%fs:%0" : "+m" (*(u16 *)addr) : "ri" (v));
}
static inline void wrfs32(u32 v, addr_t addr)
{
asm volatile("movl %1,%%fs:%0" : "+m" (*(u32 *)addr) : "ri" (v));
}
static inline u8 rdgs8(addr_t addr)
{
u8 v;
asm volatile("movb %%gs:%1,%0" : "=q" (v) : "m" (*(u8 *)addr));
return v;
}
static inline u16 rdgs16(addr_t addr)
{
u16 v;
asm volatile("movw %%gs:%1,%0" : "=r" (v) : "m" (*(u16 *)addr));
return v;
}
static inline u32 rdgs32(addr_t addr)
{
u32 v;
asm volatile("movl %%gs:%1,%0" : "=r" (v) : "m" (*(u32 *)addr));
return v;
}
static inline void wrgs8(u8 v, addr_t addr)
{
asm volatile("movb %1,%%gs:%0" : "+m" (*(u8 *)addr) : "qi" (v));
}
static inline void wrgs16(u16 v, addr_t addr)
{
asm volatile("movw %1,%%gs:%0" : "+m" (*(u16 *)addr) : "ri" (v));
}
static inline void wrgs32(u32 v, addr_t addr)
{
asm volatile("movl %1,%%gs:%0" : "+m" (*(u32 *)addr) : "ri" (v));
}
/* Note: these only return true/false, not a signed return value! */
static inline int memcmp(const void *s1, const void *s2, size_t len)
{
u8 diff;
asm("repe; cmpsb; setnz %0"
: "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
return diff;
}
static inline int memcmp_fs(const void *s1, addr_t s2, size_t len)
{
u8 diff;
asm volatile("fs; repe; cmpsb; setnz %0"
: "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
return diff;
}
static inline int memcmp_gs(const void *s1, addr_t s2, size_t len)
{
u8 diff;
asm volatile("gs; repe; cmpsb; setnz %0"
: "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
return diff;
}
/* Heap -- available for dynamic lists. */
extern char _end[];
extern char *HEAP;
extern char *heap_end;
#define RESET_HEAP() ((void *)( HEAP = _end ))
static inline char *__get_heap(size_t s, size_t a, size_t n)
{
char *tmp;
HEAP = (char *)(((size_t)HEAP+(a-1)) & ~(a-1));
tmp = HEAP;
HEAP += s*n;
return tmp;
}
#define GET_HEAP(type, n) \
((type *)__get_heap(sizeof(type),__alignof__(type),(n)))
static inline bool heap_free(size_t n)
{
return (int)(heap_end-HEAP) >= (int)n;
}
/* copy.S */
void copy_to_fs(addr_t dst, void *src, size_t len);
void *copy_from_fs(void *dst, addr_t src, size_t len);
void copy_to_gs(addr_t dst, void *src, size_t len);
void *copy_from_gs(void *dst, addr_t src, size_t len);
void *memcpy(void *dst, void *src, size_t len);
void *memset(void *dst, int c, size_t len);
#define memcpy(d,s,l) __builtin_memcpy(d,s,l)
#define memset(d,c,l) __builtin_memset(d,c,l)
/* a20.c */
int enable_a20(void);
/* apm.c */
int query_apm_bios(void);
/* bioscall.c */
struct biosregs {
union {
struct {
u32 edi;
u32 esi;
u32 ebp;
u32 _esp;
u32 ebx;
u32 edx;
u32 ecx;
u32 eax;
u32 _fsgs;
u32 _dses;
u32 eflags;
};
struct {
u16 di, hdi;
u16 si, hsi;
u16 bp, hbp;
u16 _sp, _hsp;
u16 bx, hbx;
u16 dx, hdx;
u16 cx, hcx;
u16 ax, hax;
u16 gs, fs;
u16 es, ds;
u16 flags, hflags;
};
struct {
u8 dil, dih, edi2, edi3;
u8 sil, sih, esi2, esi3;
u8 bpl, bph, ebp2, ebp3;
u8 _spl, _sph, _esp2, _esp3;
u8 bl, bh, ebx2, ebx3;
u8 dl, dh, edx2, edx3;
u8 cl, ch, ecx2, ecx3;
u8 al, ah, eax2, eax3;
};
};
};
void intcall(u8 int_no, const struct biosregs *ireg, struct biosregs *oreg);
/* cmdline.c */
int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int bufsize);
int __cmdline_find_option_bool(u32 cmdline_ptr, const char *option);
static inline int cmdline_find_option(const char *option, char *buffer, int bufsize)
{
return __cmdline_find_option(boot_params.hdr.cmd_line_ptr, option, buffer, bufsize);
}
static inline int cmdline_find_option_bool(const char *option)
{
return __cmdline_find_option_bool(boot_params.hdr.cmd_line_ptr, option);
}
/* cpu.c, cpucheck.c */
struct cpu_features {
int level; /* Family, or 64 for x86-64 */
int model;
u32 flags[NCAPINTS];
};
extern struct cpu_features cpu;
int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr);
int validate_cpu(void);
/* early_serial_console.c */
extern int early_serial_base;
void console_init(void);
/* edd.c */
void query_edd(void);
/* header.S */
void __attribute__((noreturn)) die(void);
/* mca.c */
int query_mca(void);
/* memory.c */
int detect_memory(void);
/* pm.c */
void __attribute__((noreturn)) go_to_protected_mode(void);
/* pmjump.S */
void __attribute__((noreturn))
protected_mode_jump(u32 entrypoint, u32 bootparams);
/* printf.c */
int sprintf(char *buf, const char *fmt, ...);
int vsprintf(char *buf, const char *fmt, va_list args);
int printf(const char *fmt, ...);
/* regs.c */
void initregs(struct biosregs *regs);
/* string.c */
int strcmp(const char *str1, const char *str2);
int strncmp(const char *cs, const char *ct, size_t count);
size_t strnlen(const char *s, size_t maxlen);
unsigned int atou(const char *s);
unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base);
/* tty.c */
void puts(const char *);
void putchar(int);
int getchar(void);
void kbd_flush(void);
int getchar_timeout(void);
/* video.c */
void set_video(void);
/* video-mode.c */
int set_mode(u16 mode);
int mode_defined(u16 mode);
void probe_cards(int unsafe);
/* video-vesa.c */
void vesa_store_edid(void);
#endif /* __ASSEMBLY__ */
#endif /* BOOT_BOOT_H */
+158
View File
@@ -0,0 +1,158 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Simple command-line parser for early boot.
*/
#include "boot.h"
static inline int myisspace(u8 c)
{
return c <= ' '; /* Close enough approximation */
}
/*
* Find a non-boolean option, that is, "option=argument". In accordance
* with standard Linux practice, if this option is repeated, this returns
* the last instance on the command line.
*
* Returns the length of the argument (regardless of if it was
* truncated to fit in the buffer), or -1 on not found.
*/
int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int bufsize)
{
addr_t cptr;
char c;
int len = -1;
const char *opptr = NULL;
char *bufptr = buffer;
enum {
st_wordstart, /* Start of word/after whitespace */
st_wordcmp, /* Comparing this word */
st_wordskip, /* Miscompare, skip */
st_bufcpy /* Copying this to buffer */
} state = st_wordstart;
if (!cmdline_ptr || cmdline_ptr >= 0x100000)
return -1; /* No command line, or inaccessible */
cptr = cmdline_ptr & 0xf;
set_fs(cmdline_ptr >> 4);
while (cptr < 0x10000 && (c = rdfs8(cptr++))) {
switch (state) {
case st_wordstart:
if (myisspace(c))
break;
/* else */
state = st_wordcmp;
opptr = option;
/* fall through */
case st_wordcmp:
if (c == '=' && !*opptr) {
len = 0;
bufptr = buffer;
state = st_bufcpy;
} else if (myisspace(c)) {
state = st_wordstart;
} else if (c != *opptr++) {
state = st_wordskip;
}
break;
case st_wordskip:
if (myisspace(c))
state = st_wordstart;
break;
case st_bufcpy:
if (myisspace(c)) {
state = st_wordstart;
} else {
if (len < bufsize-1)
*bufptr++ = c;
len++;
}
break;
}
}
if (bufsize)
*bufptr = '\0';
return len;
}
/*
* Find a boolean option (like quiet,noapic,nosmp....)
*
* Returns the position of that option (starts counting with 1)
* or 0 on not found
*/
int __cmdline_find_option_bool(u32 cmdline_ptr, const char *option)
{
addr_t cptr;
char c;
int pos = 0, wstart = 0;
const char *opptr = NULL;
enum {
st_wordstart, /* Start of word/after whitespace */
st_wordcmp, /* Comparing this word */
st_wordskip, /* Miscompare, skip */
} state = st_wordstart;
if (!cmdline_ptr || cmdline_ptr >= 0x100000)
return -1; /* No command line, or inaccessible */
cptr = cmdline_ptr & 0xf;
set_fs(cmdline_ptr >> 4);
while (cptr < 0x10000) {
c = rdfs8(cptr++);
pos++;
switch (state) {
case st_wordstart:
if (!c)
return 0;
else if (myisspace(c))
break;
state = st_wordcmp;
opptr = option;
wstart = pos;
/* fall through */
case st_wordcmp:
if (!*opptr)
if (!c || myisspace(c))
return wstart;
else
state = st_wordskip;
else if (!c)
return 0;
else if (c != *opptr++)
state = st_wordskip;
break;
case st_wordskip:
if (!c)
return 0;
else if (myisspace(c))
state = st_wordstart;
break;
}
}
return 0; /* Buffer overrun */
}
+15
View File
@@ -0,0 +1,15 @@
/*
* code16gcc.h
*
* This file is -include'd when compiling 16-bit C code.
* Note: this asm() needs to be emitted before gcc emits any code.
* Depending on gcc version, this requires -fno-unit-at-a-time or
* -fno-toplevel-reorder.
*
* Hopefully gcc will eventually have a real -m16 option so we can
* drop this hack long term.
*/
#ifndef __ASSEMBLY__
asm(".code16gcc");
#endif
+76
View File
@@ -0,0 +1,76 @@
#
# linux/arch/x86/boot/compressed/Makefile
#
# create a compressed vmlinux image from the original vmlinux
#
targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma vmlinux.bin.xz vmlinux.bin.lzo head_$(BITS).o misc.o string.o cmdline.o early_serial_console.o piggy.o
KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2
KBUILD_CFLAGS += -fno-strict-aliasing -fPIC
KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
cflags-$(CONFIG_X86_32) := -march=i386
cflags-$(CONFIG_X86_64) := -mcmodel=small
KBUILD_CFLAGS += $(cflags-y)
KBUILD_CFLAGS += $(call cc-option,-ffreestanding)
KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector)
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
GCOV_PROFILE := n
LDFLAGS := -m elf_$(UTS_MACHINE)
LDFLAGS_vmlinux := -T
hostprogs-y := mkpiggy
HOST_EXTRACFLAGS += -I$(srctree)/tools/include
VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
$(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \
$(obj)/piggy.o
ifeq ($(CONFIG_EFI_STUB), y)
VMLINUX_OBJS += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o
endif
$(obj)/vmlinux: $(VMLINUX_OBJS) FORCE
$(call if_changed,ld)
@:
OBJCOPYFLAGS_vmlinux.bin := -R .comment -S
$(obj)/vmlinux.bin: vmlinux FORCE
$(call if_changed,objcopy)
targets += vmlinux.bin.all vmlinux.relocs
CMD_RELOCS = arch/x86/tools/relocs
quiet_cmd_relocs = RELOCS $@
cmd_relocs = $(CMD_RELOCS) $< > $@;$(CMD_RELOCS) --abs-relocs $<
$(obj)/vmlinux.relocs: vmlinux FORCE
$(call if_changed,relocs)
vmlinux.bin.all-y := $(obj)/vmlinux.bin
vmlinux.bin.all-$(CONFIG_X86_NEED_RELOCS) += $(obj)/vmlinux.relocs
$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE
$(call if_changed,gzip)
$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE
$(call if_changed,bzip2)
$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE
$(call if_changed,lzma)
$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) FORCE
$(call if_changed,xzkern)
$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE
$(call if_changed,lzo)
suffix-$(CONFIG_KERNEL_GZIP) := gz
suffix-$(CONFIG_KERNEL_BZIP2) := bz2
suffix-$(CONFIG_KERNEL_LZMA) := lzma
suffix-$(CONFIG_KERNEL_XZ) := xz
suffix-$(CONFIG_KERNEL_LZO) := lzo
quiet_cmd_mkpiggy = MKPIGGY $@
cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false )
targets += piggy.S
$(obj)/piggy.S: $(obj)/vmlinux.bin.$(suffix-y) $(obj)/mkpiggy FORCE
$(call if_changed,mkpiggy)
+21
View File
@@ -0,0 +1,21 @@
#include "misc.h"
static unsigned long fs;
static inline void set_fs(unsigned long seg)
{
fs = seg << 4; /* shift it back */
}
typedef unsigned long addr_t;
static inline char rdfs8(addr_t addr)
{
return *((char *)(fs + addr));
}
#include "../cmdline.c"
int cmdline_find_option(const char *option, char *buffer, int bufsize)
{
return __cmdline_find_option(real_mode->hdr.cmd_line_ptr, option, buffer, bufsize);
}
int cmdline_find_option_bool(const char *option)
{
return __cmdline_find_option_bool(real_mode->hdr.cmd_line_ptr, option);
}
@@ -0,0 +1,5 @@
#include "misc.h"
int early_serial_base;
#include "../early_serial_console.c"
File diff suppressed because it is too large Load Diff
+61
View File
@@ -0,0 +1,61 @@
#ifndef BOOT_COMPRESSED_EBOOT_H
#define BOOT_COMPRESSED_EBOOT_H
#define SEG_TYPE_DATA (0 << 3)
#define SEG_TYPE_READ_WRITE (1 << 1)
#define SEG_TYPE_CODE (1 << 3)
#define SEG_TYPE_EXEC_READ (1 << 1)
#define SEG_TYPE_TSS ((1 << 3) | (1 << 0))
#define SEG_OP_SIZE_32BIT (1 << 0)
#define SEG_GRANULARITY_4KB (1 << 0)
#define DESC_TYPE_CODE_DATA (1 << 0)
#define EFI_PAGE_SIZE (1UL << EFI_PAGE_SHIFT)
#define EFI_READ_CHUNK_SIZE (1024 * 1024)
#define PIXEL_RGB_RESERVED_8BIT_PER_COLOR 0
#define PIXEL_BGR_RESERVED_8BIT_PER_COLOR 1
#define PIXEL_BIT_MASK 2
#define PIXEL_BLT_ONLY 3
#define PIXEL_FORMAT_MAX 4
struct efi_pixel_bitmask {
u32 red_mask;
u32 green_mask;
u32 blue_mask;
u32 reserved_mask;
};
struct efi_graphics_output_mode_info {
u32 version;
u32 horizontal_resolution;
u32 vertical_resolution;
int pixel_format;
struct efi_pixel_bitmask pixel_information;
u32 pixels_per_scan_line;
} __packed;
struct efi_graphics_output_protocol_mode {
u32 max_mode;
u32 mode;
unsigned long info;
unsigned long size_of_info;
u64 frame_buffer_base;
unsigned long frame_buffer_size;
} __packed;
struct efi_graphics_output_protocol {
void *query_mode;
unsigned long set_mode;
unsigned long blt;
struct efi_graphics_output_protocol_mode *mode;
};
struct efi_uga_draw_protocol {
void *get_mode;
void *set_mode;
void *blt;
};
#endif /* BOOT_COMPRESSED_EBOOT_H */
@@ -0,0 +1,86 @@
/*
* EFI call stub for IA32.
*
* This stub allows us to make EFI calls in physical mode with interrupts
* turned off. Note that this implementation is different from the one in
* arch/x86/platform/efi/efi_stub_32.S because we're _already_ in physical
* mode at this point.
*/
#include <linux/linkage.h>
#include <asm/page_types.h>
/*
* efi_call_phys(void *, ...) is a function with variable parameters.
* All the callers of this function assure that all the parameters are 4-bytes.
*/
/*
* In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save.
* So we'd better save all of them at the beginning of this function and restore
* at the end no matter how many we use, because we can not assure EFI runtime
* service functions will comply with gcc calling convention, too.
*/
.text
ENTRY(efi_call_phys)
/*
* 0. The function can only be called in Linux kernel. So CS has been
* set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found
* the values of these registers are the same. And, the corresponding
* GDT entries are identical. So I will do nothing about segment reg
* and GDT, but change GDT base register in prelog and epilog.
*/
/*
* 1. Because we haven't been relocated by this point we need to
* use relative addressing.
*/
call 1f
1: popl %edx
subl $1b, %edx
/*
* 2. Now on the top of stack is the return
* address in the caller of efi_call_phys(), then parameter 1,
* parameter 2, ..., param n. To make things easy, we save the return
* address of efi_call_phys in a global variable.
*/
popl %ecx
movl %ecx, saved_return_addr(%edx)
/* get the function pointer into ECX*/
popl %ecx
movl %ecx, efi_rt_function_ptr(%edx)
/*
* 3. Call the physical function.
*/
call *%ecx
/*
* 4. Balance the stack. And because EAX contain the return value,
* we'd better not clobber it. We need to calculate our address
* again because %ecx and %edx are not preserved across EFI function
* calls.
*/
call 1f
1: popl %edx
subl $1b, %edx
movl efi_rt_function_ptr(%edx), %ecx
pushl %ecx
/*
* 10. Push the saved return address onto the stack and return.
*/
movl saved_return_addr(%edx), %ecx
pushl %ecx
ret
ENDPROC(efi_call_phys)
.previous
.data
saved_return_addr:
.long 0
efi_rt_function_ptr:
.long 0
@@ -0,0 +1 @@
#include "../../platform/efi/efi_stub_64.S"
+225
View File
@@ -0,0 +1,225 @@
/*
* linux/boot/head.S
*
* Copyright (C) 1991, 1992, 1993 Linus Torvalds
*/
/*
* head.S contains the 32-bit startup code.
*
* NOTE!!! Startup happens at absolute address 0x00001000, which is also where
* the page directory will exist. The startup code will be overwritten by
* the page directory. [According to comments etc elsewhere on a compressed
* kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC]
*
* Page 0 is deliberately kept safe, since System Management Mode code in
* laptops may need to access the BIOS data stored there. This is also
* useful for future device drivers that either access the BIOS via VM86
* mode.
*/
/*
* High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
*/
.text
#include <linux/init.h>
#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/page_types.h>
#include <asm/boot.h>
#include <asm/asm-offsets.h>
__HEAD
ENTRY(startup_32)
#ifdef CONFIG_EFI_STUB
jmp preferred_addr
.balign 0x10
/*
* We don't need the return address, so set up the stack so
* efi_main() can find its arugments.
*/
add $0x4, %esp
call efi_main
cmpl $0, %eax
movl %eax, %esi
jne 2f
1:
/* EFI init failed, so hang. */
hlt
jmp 1b
2:
call 3f
3:
popl %eax
subl $3b, %eax
subl BP_pref_address(%esi), %eax
add BP_code32_start(%esi), %eax
leal preferred_addr(%eax), %eax
jmp *%eax
preferred_addr:
#endif
cld
/*
* Test KEEP_SEGMENTS flag to see if the bootloader is asking
* us to not reload segments
*/
testb $(1<<6), BP_loadflags(%esi)
jnz 1f
cli
movl $__BOOT_DS, %eax
movl %eax, %ds
movl %eax, %es
movl %eax, %fs
movl %eax, %gs
movl %eax, %ss
1:
/*
* Calculate the delta between where we were compiled to run
* at and where we were actually loaded at. This can only be done
* with a short local call on x86. Nothing else will tell us what
* address we are running at. The reserved chunk of the real-mode
* data at 0x1e4 (defined as a scratch field) are used as the stack
* for this calculation. Only 4 bytes are needed.
*/
leal (BP_scratch+4)(%esi), %esp
call 1f
1: popl %ebp
subl $1b, %ebp
/*
* %ebp contains the address we are loaded at by the boot loader and %ebx
* contains the address where we should move the kernel image temporarily
* for safe in-place decompression.
*/
#ifdef CONFIG_RELOCATABLE
movl %ebp, %ebx
movl BP_kernel_alignment(%esi), %eax
decl %eax
addl %eax, %ebx
notl %eax
andl %eax, %ebx
#else
movl $LOAD_PHYSICAL_ADDR, %ebx
#endif
/* Target address to relocate to for decompression */
addl $z_extract_offset, %ebx
/* Set up the stack */
leal boot_stack_end(%ebx), %esp
/* Zero EFLAGS */
pushl $0
popfl
/*
* Copy the compressed kernel to the end of our buffer
* where decompression in place becomes safe.
*/
pushl %esi
leal (_bss-4)(%ebp), %esi
leal (_bss-4)(%ebx), %edi
movl $(_bss - startup_32), %ecx
shrl $2, %ecx
std
rep movsl
cld
popl %esi
/*
* Jump to the relocated address.
*/
leal relocated(%ebx), %eax
jmp *%eax
ENDPROC(startup_32)
.text
relocated:
/*
* Clear BSS (stack is currently empty)
*/
xorl %eax, %eax
leal _bss(%ebx), %edi
leal _ebss(%ebx), %ecx
subl %edi, %ecx
shrl $2, %ecx
rep stosl
/*
* Adjust our own GOT
*/
leal _got(%ebx), %edx
leal _egot(%ebx), %ecx
1:
cmpl %ecx, %edx
jae 2f
addl %ebx, (%edx)
addl $4, %edx
jmp 1b
2:
/*
* Do the decompression, and jump to the new kernel..
*/
leal z_extract_offset_negative(%ebx), %ebp
/* push arguments for decompress_kernel: */
pushl %ebp /* output address */
pushl $z_input_len /* input_len */
leal input_data(%ebx), %eax
pushl %eax /* input_data */
leal boot_heap(%ebx), %eax
pushl %eax /* heap area */
pushl %esi /* real mode pointer */
call decompress_kernel
addl $20, %esp
#if CONFIG_RELOCATABLE
/*
* Find the address of the relocations.
*/
leal z_output_len(%ebp), %edi
/*
* Calculate the delta between where vmlinux was compiled to run
* and where it was actually loaded.
*/
movl %ebp, %ebx
subl $LOAD_PHYSICAL_ADDR, %ebx
jz 2f /* Nothing to be done if loaded at compiled addr. */
/*
* Process relocations.
*/
1: subl $4, %edi
movl (%edi), %ecx
testl %ecx, %ecx
jz 2f
addl %ebx, -__PAGE_OFFSET(%ebx, %ecx)
jmp 1b
2:
#endif
/*
* Jump to the decompressed kernel.
*/
xorl %ebx, %ebx
jmp *%ebp
/*
* Stack and heap for uncompression
*/
.bss
.balign 4
boot_heap:
.fill BOOT_HEAP_SIZE, 1, 0
boot_stack:
.fill BOOT_STACK_SIZE, 1, 0
boot_stack_end:
+371
View File
@@ -0,0 +1,371 @@
/*
* linux/boot/head.S
*
* Copyright (C) 1991, 1992, 1993 Linus Torvalds
*/
/*
* head.S contains the 32-bit startup code.
*
* NOTE!!! Startup happens at absolute address 0x00001000, which is also where
* the page directory will exist. The startup code will be overwritten by
* the page directory. [According to comments etc elsewhere on a compressed
* kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC]
*
* Page 0 is deliberately kept safe, since System Management Mode code in
* laptops may need to access the BIOS data stored there. This is also
* useful for future device drivers that either access the BIOS via VM86
* mode.
*/
/*
* High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
*/
.code32
.text
#include <linux/init.h>
#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/pgtable_types.h>
#include <asm/page_types.h>
#include <asm/boot.h>
#include <asm/msr.h>
#include <asm/processor-flags.h>
#include <asm/asm-offsets.h>
__HEAD
.code32
ENTRY(startup_32)
cld
/*
* Test KEEP_SEGMENTS flag to see if the bootloader is asking
* us to not reload segments
*/
testb $(1<<6), BP_loadflags(%esi)
jnz 1f
cli
movl $(__KERNEL_DS), %eax
movl %eax, %ds
movl %eax, %es
movl %eax, %ss
1:
/*
* Calculate the delta between where we were compiled to run
* at and where we were actually loaded at. This can only be done
* with a short local call on x86. Nothing else will tell us what
* address we are running at. The reserved chunk of the real-mode
* data at 0x1e4 (defined as a scratch field) are used as the stack
* for this calculation. Only 4 bytes are needed.
*/
leal (BP_scratch+4)(%esi), %esp
call 1f
1: popl %ebp
subl $1b, %ebp
/* setup a stack and make sure cpu supports long mode. */
movl $boot_stack_end, %eax
addl %ebp, %eax
movl %eax, %esp
call verify_cpu
testl %eax, %eax
jnz no_longmode
/*
* Compute the delta between where we were compiled to run at
* and where the code will actually run at.
*
* %ebp contains the address we are loaded at by the boot loader and %ebx
* contains the address where we should move the kernel image temporarily
* for safe in-place decompression.
*/
#ifdef CONFIG_RELOCATABLE
movl %ebp, %ebx
movl BP_kernel_alignment(%esi), %eax
decl %eax
addl %eax, %ebx
notl %eax
andl %eax, %ebx
#else
movl $LOAD_PHYSICAL_ADDR, %ebx
#endif
/* Target address to relocate to for decompression */
addl $z_extract_offset, %ebx
/*
* Prepare for entering 64 bit mode
*/
/* Load new GDT with the 64bit segments using 32bit descriptor */
leal gdt(%ebp), %eax
movl %eax, gdt+2(%ebp)
lgdt gdt(%ebp)
/* Enable PAE mode */
movl $(X86_CR4_PAE), %eax
movl %eax, %cr4
/*
* Build early 4G boot pagetable
*/
/* Initialize Page tables to 0 */
leal pgtable(%ebx), %edi
xorl %eax, %eax
movl $((4096*6)/4), %ecx
rep stosl
/* Build Level 4 */
leal pgtable + 0(%ebx), %edi
leal 0x1007 (%edi), %eax
movl %eax, 0(%edi)
/* Build Level 3 */
leal pgtable + 0x1000(%ebx), %edi
leal 0x1007(%edi), %eax
movl $4, %ecx
1: movl %eax, 0x00(%edi)
addl $0x00001000, %eax
addl $8, %edi
decl %ecx
jnz 1b
/* Build Level 2 */
leal pgtable + 0x2000(%ebx), %edi
movl $0x00000183, %eax
movl $2048, %ecx
1: movl %eax, 0(%edi)
addl $0x00200000, %eax
addl $8, %edi
decl %ecx
jnz 1b
/* Enable the boot page tables */
leal pgtable(%ebx), %eax
movl %eax, %cr3
/* Enable Long mode in EFER (Extended Feature Enable Register) */
movl $MSR_EFER, %ecx
rdmsr
btsl $_EFER_LME, %eax
wrmsr
/*
* Setup for the jump to 64bit mode
*
* When the jump is performend we will be in long mode but
* in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1
* (and in turn EFER.LMA = 1). To jump into 64bit mode we use
* the new gdt/idt that has __KERNEL_CS with CS.L = 1.
* We place all of the values on our mini stack so lret can
* used to perform that far jump.
*/
pushl $__KERNEL_CS
leal startup_64(%ebp), %eax
pushl %eax
/* Enter paged protected Mode, activating Long Mode */
movl $(X86_CR0_PG | X86_CR0_PE), %eax /* Enable Paging and Protected mode */
movl %eax, %cr0
/* Jump from 32bit compatibility mode into 64bit mode. */
lret
ENDPROC(startup_32)
no_longmode:
/* This isn't an x86-64 CPU so hang */
1:
hlt
jmp 1b
#include "../../kernel/verify_cpu.S"
/*
* Be careful here startup_64 needs to be at a predictable
* address so I can export it in an ELF header. Bootloaders
* should look at the ELF header to find this address, as
* it may change in the future.
*/
.code64
.org 0x200
ENTRY(startup_64)
/*
* We come here either from startup_32 or directly from a
* 64bit bootloader. If we come here from a bootloader we depend on
* an identity mapped page table being provied that maps our
* entire text+data+bss and hopefully all of memory.
*/
#ifdef CONFIG_EFI_STUB
/*
* The entry point for the PE/COFF executable is 0x210, so only
* legacy boot loaders will execute this jmp.
*/
jmp preferred_addr
.org 0x210
mov %rcx, %rdi
mov %rdx, %rsi
call efi_main
movq %rax,%rsi
cmpq $0,%rax
jne 2f
1:
/* EFI init failed, so hang. */
hlt
jmp 1b
2:
call 3f
3:
popq %rax
subq $3b, %rax
subq BP_pref_address(%rsi), %rax
add BP_code32_start(%esi), %eax
leaq preferred_addr(%rax), %rax
jmp *%rax
preferred_addr:
#endif
/* Setup data segments. */
xorl %eax, %eax
movl %eax, %ds
movl %eax, %es
movl %eax, %ss
movl %eax, %fs
movl %eax, %gs
lldt %ax
movl $0x20, %eax
ltr %ax
/*
* Compute the decompressed kernel start address. It is where
* we were loaded at aligned to a 2M boundary. %rbp contains the
* decompressed kernel start address.
*
* If it is a relocatable kernel then decompress and run the kernel
* from load address aligned to 2MB addr, otherwise decompress and
* run the kernel from LOAD_PHYSICAL_ADDR
*
* We cannot rely on the calculation done in 32-bit mode, since we
* may have been invoked via the 64-bit entry point.
*/
/* Start with the delta to where the kernel will run at. */
#ifdef CONFIG_RELOCATABLE
leaq startup_32(%rip) /* - $startup_32 */, %rbp
movl BP_kernel_alignment(%rsi), %eax
decl %eax
addq %rax, %rbp
notq %rax
andq %rax, %rbp
#else
movq $LOAD_PHYSICAL_ADDR, %rbp
#endif
/* Target address to relocate to for decompression */
leaq z_extract_offset(%rbp), %rbx
/* Set up the stack */
leaq boot_stack_end(%rbx), %rsp
/* Zero EFLAGS */
pushq $0
popfq
/*
* Copy the compressed kernel to the end of our buffer
* where decompression in place becomes safe.
*/
pushq %rsi
leaq (_bss-8)(%rip), %rsi
leaq (_bss-8)(%rbx), %rdi
movq $_bss /* - $startup_32 */, %rcx
shrq $3, %rcx
std
rep movsq
cld
popq %rsi
/*
* Jump to the relocated address.
*/
leaq relocated(%rbx), %rax
jmp *%rax
.text
relocated:
/*
* Clear BSS (stack is currently empty)
*/
xorl %eax, %eax
leaq _bss(%rip), %rdi
leaq _ebss(%rip), %rcx
subq %rdi, %rcx
shrq $3, %rcx
rep stosq
/*
* Adjust our own GOT
*/
leaq _got(%rip), %rdx
leaq _egot(%rip), %rcx
1:
cmpq %rcx, %rdx
jae 2f
addq %rbx, (%rdx)
addq $8, %rdx
jmp 1b
2:
/*
* Do the decompression, and jump to the new kernel..
*/
pushq %rsi /* Save the real mode argument */
movq %rsi, %rdi /* real mode address */
leaq boot_heap(%rip), %rsi /* malloc area for uncompression */
leaq input_data(%rip), %rdx /* input_data */
movl $z_input_len, %ecx /* input_len */
movq %rbp, %r8 /* output target address */
call decompress_kernel
popq %rsi
/*
* Jump to the decompressed kernel.
*/
jmp *%rbp
.data
gdt:
.word gdt_end - gdt
.long gdt
.word 0
.quad 0x0000000000000000 /* NULL descriptor */
.quad 0x00af9a000000ffff /* __KERNEL_CS */
.quad 0x00cf92000000ffff /* __KERNEL_DS */
.quad 0x0080890000000000 /* TS descriptor */
.quad 0x0000000000000000 /* TS continued */
gdt_end:
/*
* Stack and heap for uncompression
*/
.bss
.balign 4
boot_heap:
.fill BOOT_HEAP_SIZE, 1, 0
boot_stack:
.fill BOOT_STACK_SIZE, 1, 0
boot_stack_end:
/*
* Space for page tables (not in .bss so not zeroed)
*/
.section ".pgtable","a",@nobits
.balign 4096
pgtable:
.fill 6*4096, 1, 0
+379
View File
@@ -0,0 +1,379 @@
/*
* misc.c
*
* This is a collection of several routines from gzip-1.0.3
* adapted for Linux.
*
* malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994
* puts by Nick Holloway 1993, better puts by Martin Mares 1995
* High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
*/
#include "misc.h"
/* WARNING!!
* This code is compiled with -fPIC and it is relocated dynamically
* at run time, but no relocation processing is performed.
* This means that it is not safe to place pointers in static structures.
*/
/*
* Getting to provable safe in place decompression is hard.
* Worst case behaviours need to be analyzed.
* Background information:
*
* The file layout is:
* magic[2]
* method[1]
* flags[1]
* timestamp[4]
* extraflags[1]
* os[1]
* compressed data blocks[N]
* crc[4] orig_len[4]
*
* resulting in 18 bytes of non compressed data overhead.
*
* Files divided into blocks
* 1 bit (last block flag)
* 2 bits (block type)
*
* 1 block occurs every 32K -1 bytes or when there 50% compression
* has been achieved. The smallest block type encoding is always used.
*
* stored:
* 32 bits length in bytes.
*
* fixed:
* magic fixed tree.
* symbols.
*
* dynamic:
* dynamic tree encoding.
* symbols.
*
*
* The buffer for decompression in place is the length of the
* uncompressed data, plus a small amount extra to keep the algorithm safe.
* The compressed data is placed at the end of the buffer. The output
* pointer is placed at the start of the buffer and the input pointer
* is placed where the compressed data starts. Problems will occur
* when the output pointer overruns the input pointer.
*
* The output pointer can only overrun the input pointer if the input
* pointer is moving faster than the output pointer. A condition only
* triggered by data whose compressed form is larger than the uncompressed
* form.
*
* The worst case at the block level is a growth of the compressed data
* of 5 bytes per 32767 bytes.
*
* The worst case internal to a compressed block is very hard to figure.
* The worst case can at least be boundined by having one bit that represents
* 32764 bytes and then all of the rest of the bytes representing the very
* very last byte.
*
* All of which is enough to compute an amount of extra data that is required
* to be safe. To avoid problems at the block level allocating 5 extra bytes
* per 32767 bytes of data is sufficient. To avoind problems internal to a
* block adding an extra 32767 bytes (the worst case uncompressed block size)
* is sufficient, to ensure that in the worst case the decompressed data for
* block will stop the byte before the compressed data for a block begins.
* To avoid problems with the compressed data's meta information an extra 18
* bytes are needed. Leading to the formula:
*
* extra_bytes = (uncompressed_size >> 12) + 32768 + 18 + decompressor_size.
*
* Adding 8 bytes per 32K is a bit excessive but much easier to calculate.
* Adding 32768 instead of 32767 just makes for round numbers.
* Adding the decompressor_size is necessary as it musht live after all
* of the data as well. Last I measured the decompressor is about 14K.
* 10K of actual data and 4K of bss.
*
*/
/*
* gzip declarations
*/
#define STATIC static
#undef memset
#undef memcpy
#define memzero(s, n) memset((s), 0, (n))
static void error(char *m);
/*
* This is set up by the setup-routine at boot-time
*/
struct boot_params *real_mode; /* Pointer to real-mode data */
static int quiet;
static int debug;
void *memset(void *s, int c, size_t n);
void *memcpy(void *dest, const void *src, size_t n);
#ifdef CONFIG_X86_64
#define memptr long
#else
#define memptr unsigned
#endif
static memptr free_mem_ptr;
static memptr free_mem_end_ptr;
static char *vidmem;
static int vidport;
static int lines, cols;
#ifdef CONFIG_KERNEL_GZIP
#include "../../../../lib/decompress_inflate.c"
#endif
#ifdef CONFIG_KERNEL_BZIP2
#include "../../../../lib/decompress_bunzip2.c"
#endif
#ifdef CONFIG_KERNEL_LZMA
#include "../../../../lib/decompress_unlzma.c"
#endif
#ifdef CONFIG_KERNEL_XZ
#include "../../../../lib/decompress_unxz.c"
#endif
#ifdef CONFIG_KERNEL_LZO
#include "../../../../lib/decompress_unlzo.c"
#endif
static void scroll(void)
{
int i;
memcpy(vidmem, vidmem + cols * 2, (lines - 1) * cols * 2);
for (i = (lines - 1) * cols * 2; i < lines * cols * 2; i += 2)
vidmem[i] = ' ';
}
#define XMTRDY 0x20
#define TXR 0 /* Transmit register (WRITE) */
#define LSR 5 /* Line Status */
static void serial_putchar(int ch)
{
unsigned timeout = 0xffff;
while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout)
cpu_relax();
outb(ch, early_serial_base + TXR);
}
void __putstr(int error, const char *s)
{
int x, y, pos;
char c;
#ifndef CONFIG_X86_VERBOSE_BOOTUP
if (!error)
return;
#endif
if (early_serial_base) {
const char *str = s;
while (*str) {
if (*str == '\n')
serial_putchar('\r');
serial_putchar(*str++);
}
}
if (real_mode->screen_info.orig_video_mode == 0 &&
lines == 0 && cols == 0)
return;
x = real_mode->screen_info.orig_x;
y = real_mode->screen_info.orig_y;
while ((c = *s++) != '\0') {
if (c == '\n') {
x = 0;
if (++y >= lines) {
scroll();
y--;
}
} else {
vidmem[(x + cols * y) * 2] = c;
if (++x >= cols) {
x = 0;
if (++y >= lines) {
scroll();
y--;
}
}
}
}
real_mode->screen_info.orig_x = x;
real_mode->screen_info.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
outb(14, vidport);
outb(0xff & (pos >> 9), vidport+1);
outb(15, vidport);
outb(0xff & (pos >> 1), vidport+1);
}
void *memset(void *s, int c, size_t n)
{
int i;
char *ss = s;
for (i = 0; i < n; i++)
ss[i] = c;
return s;
}
#ifdef CONFIG_X86_32
void *memcpy(void *dest, const void *src, size_t n)
{
int d0, d1, d2;
asm volatile(
"rep ; movsl\n\t"
"movl %4,%%ecx\n\t"
"rep ; movsb\n\t"
: "=&c" (d0), "=&D" (d1), "=&S" (d2)
: "0" (n >> 2), "g" (n & 3), "1" (dest), "2" (src)
: "memory");
return dest;
}
#else
void *memcpy(void *dest, const void *src, size_t n)
{
long d0, d1, d2;
asm volatile(
"rep ; movsq\n\t"
"movq %4,%%rcx\n\t"
"rep ; movsb\n\t"
: "=&c" (d0), "=&D" (d1), "=&S" (d2)
: "0" (n >> 3), "g" (n & 7), "1" (dest), "2" (src)
: "memory");
return dest;
}
#endif
static void error(char *x)
{
__putstr(1, "\n\n");
__putstr(1, x);
__putstr(1, "\n\n -- System halted");
while (1)
asm("hlt");
}
static void parse_elf(void *output)
{
#ifdef CONFIG_X86_64
Elf64_Ehdr ehdr;
Elf64_Phdr *phdrs, *phdr;
#else
Elf32_Ehdr ehdr;
Elf32_Phdr *phdrs, *phdr;
#endif
void *dest;
int i;
memcpy(&ehdr, output, sizeof(ehdr));
if (ehdr.e_ident[EI_MAG0] != ELFMAG0 ||
ehdr.e_ident[EI_MAG1] != ELFMAG1 ||
ehdr.e_ident[EI_MAG2] != ELFMAG2 ||
ehdr.e_ident[EI_MAG3] != ELFMAG3) {
error("Kernel is not a valid ELF file");
return;
}
if (!quiet)
putstr("Parsing ELF... ");
phdrs = malloc(sizeof(*phdrs) * ehdr.e_phnum);
if (!phdrs)
error("Failed to allocate space for phdrs");
memcpy(phdrs, output + ehdr.e_phoff, sizeof(*phdrs) * ehdr.e_phnum);
for (i = 0; i < ehdr.e_phnum; i++) {
phdr = &phdrs[i];
switch (phdr->p_type) {
case PT_LOAD:
#ifdef CONFIG_RELOCATABLE
dest = output;
dest += (phdr->p_paddr - LOAD_PHYSICAL_ADDR);
#else
dest = (void *)(phdr->p_paddr);
#endif
memcpy(dest,
output + phdr->p_offset,
phdr->p_filesz);
break;
default: /* Ignore other PT_* */ break;
}
}
free(phdrs);
}
asmlinkage void decompress_kernel(void *rmode, memptr heap,
unsigned char *input_data,
unsigned long input_len,
unsigned char *output)
{
real_mode = rmode;
if (cmdline_find_option_bool("quiet"))
quiet = 1;
if (cmdline_find_option_bool("debug"))
debug = 1;
if (real_mode->screen_info.orig_video_mode == 7) {
vidmem = (char *) 0xb0000;
vidport = 0x3b4;
} else {
vidmem = (char *) 0xb8000;
vidport = 0x3d4;
}
lines = real_mode->screen_info.orig_video_lines;
cols = real_mode->screen_info.orig_video_cols;
console_init();
if (debug)
putstr("early console in decompress_kernel\n");
free_mem_ptr = heap; /* Heap */
free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1))
error("Destination address inappropriately aligned");
#ifdef CONFIG_X86_64
if (heap > 0x3fffffffffffUL)
error("Destination address too large");
#else
if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff))
error("Destination address too large");
#endif
#ifndef CONFIG_RELOCATABLE
if ((unsigned long)output != LOAD_PHYSICAL_ADDR)
error("Wrong destination address");
#endif
if (!quiet)
putstr("\nDecompressing Linux... ");
decompress(input_data, input_len, NULL, NULL, output, NULL, error);
parse_elf(output);
if (!quiet)
putstr("done.\nBooting the kernel.\n");
return;
}
+39
View File
@@ -0,0 +1,39 @@
#ifndef BOOT_COMPRESSED_MISC_H
#define BOOT_COMPRESSED_MISC_H
/*
* we have to be careful, because no indirections are allowed here, and
* paravirt_ops is a kind of one. As it will only run in baremetal anyway,
* we just keep it from happening
*/
#undef CONFIG_PARAVIRT
#ifdef CONFIG_X86_32
#define _ASM_X86_DESC_H 1
#endif
#include <linux/linkage.h>
#include <linux/screen_info.h>
#include <linux/elf.h>
#include <linux/io.h>
#include <asm/page.h>
#include <asm/boot.h>
#include <asm/bootparam.h>
#define BOOT_BOOT_H
#include "../ctype.h"
/* misc.c */
extern struct boot_params *real_mode; /* Pointer to real-mode data */
void __putstr(int error, const char *s);
#define putstr(__x) __putstr(0, __x)
#define puts(__x) __putstr(0, __x)
/* cmdline.c */
int cmdline_find_option(const char *option, char *buffer, int bufsize);
int cmdline_find_option_bool(const char *option);
/* early_serial_console.c */
extern int early_serial_base;
void console_init(void);
#endif
+95
View File
@@ -0,0 +1,95 @@
/* ----------------------------------------------------------------------- *
*
* Copyright (C) 2009 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*
* H. Peter Anvin <hpa@linux.intel.com>
*
* ----------------------------------------------------------------------- */
/*
* Compute the desired load offset from a compressed program; outputs
* a small assembly wrapper with the appropriate symbols defined.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <inttypes.h>
#include <tools/le_byteshift.h>
int main(int argc, char *argv[])
{
uint32_t olen;
long ilen;
unsigned long offs;
FILE *f;
if (argc < 2) {
fprintf(stderr, "Usage: %s compressed_file\n", argv[0]);
return 1;
}
/* Get the information for the compressed kernel image first */
f = fopen(argv[1], "r");
if (!f) {
perror(argv[1]);
return 1;
}
if (fseek(f, -4L, SEEK_END)) {
perror(argv[1]);
}
if (fread(&olen, sizeof(olen), 1, f) != 1) {
perror(argv[1]);
return 1;
}
ilen = ftell(f);
olen = get_unaligned_le32(&olen);
fclose(f);
/*
* Now we have the input (compressed) and output (uncompressed)
* sizes, compute the necessary decompression offset...
*/
offs = (olen > ilen) ? olen - ilen : 0;
offs += olen >> 12; /* Add 8 bytes for each 32K block */
offs += 64*1024 + 128; /* Add 64K + 128 bytes slack */
offs = (offs+4095) & ~4095; /* Round to a 4K boundary */
printf(".section \".rodata..compressed\",\"a\",@progbits\n");
printf(".globl z_input_len\n");
printf("z_input_len = %lu\n", ilen);
printf(".globl z_output_len\n");
printf("z_output_len = %lu\n", (unsigned long)olen);
printf(".globl z_extract_offset\n");
printf("z_extract_offset = 0x%lx\n", offs);
/* z_extract_offset_negative allows simplification of head_32.S */
printf(".globl z_extract_offset_negative\n");
printf("z_extract_offset_negative = -0x%lx\n", offs);
printf(".globl input_data, input_data_end\n");
printf("input_data:\n");
printf(".incbin \"%s\"\n", argv[1]);
printf("input_data_end:\n");
return 0;
}
+11
View File
@@ -0,0 +1,11 @@
#include "misc.h"
int memcmp(const void *s1, const void *s2, size_t len)
{
u8 diff;
asm("repe; cmpsb; setnz %0"
: "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
return diff;
}
#include "../string.c"
@@ -0,0 +1,74 @@
#include <asm-generic/vmlinux.lds.h>
OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT)
#undef i386
#include <asm/cache.h>
#include <asm/page_types.h>
#ifdef CONFIG_X86_64
OUTPUT_ARCH(i386:x86-64)
ENTRY(startup_64)
#else
OUTPUT_ARCH(i386)
ENTRY(startup_32)
#endif
SECTIONS
{
/* Be careful parts of head_64.S assume startup_32 is at
* address 0.
*/
. = 0;
.head.text : {
_head = . ;
HEAD_TEXT
_ehead = . ;
}
.rodata..compressed : {
*(.rodata..compressed)
}
.text : {
_text = .; /* Text */
*(.text)
*(.text.*)
_etext = . ;
}
.rodata : {
_rodata = . ;
*(.rodata) /* read-only data */
*(.rodata.*)
_erodata = . ;
}
.got : {
_got = .;
KEEP(*(.got.plt))
KEEP(*(.got))
_egot = .;
}
.data : {
_data = . ;
*(.data)
*(.data.*)
_edata = . ;
}
. = ALIGN(L1_CACHE_BYTES);
.bss : {
_bss = . ;
*(.bss)
*(.bss.*)
*(COMMON)
. = ALIGN(8); /* For convenience during zeroing */
_ebss = .;
}
#ifdef CONFIG_X86_64
. = ALIGN(PAGE_SIZE);
.pgtable : {
_pgtable = . ;
*(.pgtable)
_epgtable = . ;
}
#endif
_end = .;
}
+87
View File
@@ -0,0 +1,87 @@
/* ----------------------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
#include <linux/linkage.h>
/*
* Memory copy routines
*/
.code16gcc
.text
GLOBAL(memcpy)
pushw %si
pushw %di
movw %ax, %di
movw %dx, %si
pushw %cx
shrw $2, %cx
rep; movsl
popw %cx
andw $3, %cx
rep; movsb
popw %di
popw %si
ret
ENDPROC(memcpy)
GLOBAL(memset)
pushw %di
movw %ax, %di
movzbl %dl, %eax
imull $0x01010101,%eax
pushw %cx
shrw $2, %cx
rep; stosl
popw %cx
andw $3, %cx
rep; stosb
popw %di
ret
ENDPROC(memset)
GLOBAL(copy_from_fs)
pushw %ds
pushw %fs
popw %ds
call memcpy
popw %ds
ret
ENDPROC(copy_from_fs)
GLOBAL(copy_to_fs)
pushw %es
pushw %fs
popw %es
call memcpy
popw %es
ret
ENDPROC(copy_to_fs)
#if 0 /* Not currently used, but can be enabled as needed */
GLOBAL(copy_from_gs)
pushw %ds
pushw %gs
popw %ds
call memcpy
popw %ds
ret
ENDPROC(copy_from_gs)
GLOBAL(copy_to_gs)
pushw %es
pushw %gs
popw %es
call memcpy
popw %es
ret
ENDPROC(copy_to_gs)
#endif
+85
View File
@@ -0,0 +1,85 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007-2008 rPath, Inc. - All Rights Reserved
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* arch/x86/boot/cpu.c
*
* Check for obligatory CPU features and abort if the features are not
* present.
*/
#include "boot.h"
#include "cpustr.h"
static char *cpu_name(int level)
{
static char buf[6];
if (level == 64) {
return "x86-64";
} else {
if (level == 15)
level = 6;
sprintf(buf, "i%d86", level);
return buf;
}
}
int validate_cpu(void)
{
u32 *err_flags;
int cpu_level, req_level;
const unsigned char *msg_strs;
check_cpu(&cpu_level, &req_level, &err_flags);
if (cpu_level < req_level) {
printf("This kernel requires an %s CPU, ",
cpu_name(req_level));
printf("but only detected an %s CPU.\n",
cpu_name(cpu_level));
return -1;
}
if (err_flags) {
int i, j;
puts("This kernel requires the following features "
"not present on the CPU:\n");
msg_strs = (const unsigned char *)x86_cap_strs;
for (i = 0; i < NCAPINTS; i++) {
u32 e = err_flags[i];
for (j = 0; j < 32; j++) {
if (msg_strs[0] < i ||
(msg_strs[0] == i && msg_strs[1] < j)) {
/* Skip to the next string */
msg_strs += 2;
while (*msg_strs++)
;
}
if (e & 1) {
if (msg_strs[0] == i &&
msg_strs[1] == j &&
msg_strs[2])
printf("%s ", msg_strs+2);
else
printf("%d:%d ", i, j);
}
e >>= 1;
}
}
putchar('\n');
return -1;
} else {
return 0;
}
}
+252
View File
@@ -0,0 +1,252 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Check for obligatory CPU features and abort if the features are not
* present. This code should be compilable as 16-, 32- or 64-bit
* code, so be very careful with types and inline assembly.
*
* This code should not contain any messages; that requires an
* additional wrapper.
*
* As written, this code is not safe for inclusion into the kernel
* proper (after FPU initialization, in particular).
*/
#ifdef _SETUP
# include "boot.h"
#endif
#include <linux/types.h>
#include <asm/processor-flags.h>
#include <asm/required-features.h>
#include <asm/msr-index.h>
struct cpu_features cpu;
static u32 cpu_vendor[3];
static u32 err_flags[NCAPINTS];
static const int req_level = CONFIG_X86_MINIMUM_CPU_FAMILY;
static const u32 req_flags[NCAPINTS] =
{
REQUIRED_MASK0,
REQUIRED_MASK1,
0, /* REQUIRED_MASK2 not implemented in this file */
0, /* REQUIRED_MASK3 not implemented in this file */
REQUIRED_MASK4,
0, /* REQUIRED_MASK5 not implemented in this file */
REQUIRED_MASK6,
0, /* REQUIRED_MASK7 not implemented in this file */
};
#define A32(a, b, c, d) (((d) << 24)+((c) << 16)+((b) << 8)+(a))
static int is_amd(void)
{
return cpu_vendor[0] == A32('A', 'u', 't', 'h') &&
cpu_vendor[1] == A32('e', 'n', 't', 'i') &&
cpu_vendor[2] == A32('c', 'A', 'M', 'D');
}
static int is_centaur(void)
{
return cpu_vendor[0] == A32('C', 'e', 'n', 't') &&
cpu_vendor[1] == A32('a', 'u', 'r', 'H') &&
cpu_vendor[2] == A32('a', 'u', 'l', 's');
}
static int is_transmeta(void)
{
return cpu_vendor[0] == A32('G', 'e', 'n', 'u') &&
cpu_vendor[1] == A32('i', 'n', 'e', 'T') &&
cpu_vendor[2] == A32('M', 'x', '8', '6');
}
static int has_fpu(void)
{
u16 fcw = -1, fsw = -1;
u32 cr0;
asm("movl %%cr0,%0" : "=r" (cr0));
if (cr0 & (X86_CR0_EM|X86_CR0_TS)) {
cr0 &= ~(X86_CR0_EM|X86_CR0_TS);
asm volatile("movl %0,%%cr0" : : "r" (cr0));
}
asm volatile("fninit ; fnstsw %0 ; fnstcw %1"
: "+m" (fsw), "+m" (fcw));
return fsw == 0 && (fcw & 0x103f) == 0x003f;
}
static int has_eflag(u32 mask)
{
u32 f0, f1;
asm("pushfl ; "
"pushfl ; "
"popl %0 ; "
"movl %0,%1 ; "
"xorl %2,%1 ; "
"pushl %1 ; "
"popfl ; "
"pushfl ; "
"popl %1 ; "
"popfl"
: "=&r" (f0), "=&r" (f1)
: "ri" (mask));
return !!((f0^f1) & mask);
}
static void get_flags(void)
{
u32 max_intel_level, max_amd_level;
u32 tfms;
if (has_fpu())
set_bit(X86_FEATURE_FPU, cpu.flags);
if (has_eflag(X86_EFLAGS_ID)) {
asm("cpuid"
: "=a" (max_intel_level),
"=b" (cpu_vendor[0]),
"=d" (cpu_vendor[1]),
"=c" (cpu_vendor[2])
: "a" (0));
if (max_intel_level >= 0x00000001 &&
max_intel_level <= 0x0000ffff) {
asm("cpuid"
: "=a" (tfms),
"=c" (cpu.flags[4]),
"=d" (cpu.flags[0])
: "a" (0x00000001)
: "ebx");
cpu.level = (tfms >> 8) & 15;
cpu.model = (tfms >> 4) & 15;
if (cpu.level >= 6)
cpu.model += ((tfms >> 16) & 0xf) << 4;
}
asm("cpuid"
: "=a" (max_amd_level)
: "a" (0x80000000)
: "ebx", "ecx", "edx");
if (max_amd_level >= 0x80000001 &&
max_amd_level <= 0x8000ffff) {
u32 eax = 0x80000001;
asm("cpuid"
: "+a" (eax),
"=c" (cpu.flags[6]),
"=d" (cpu.flags[1])
: : "ebx");
}
}
}
/* Returns a bitmask of which words we have error bits in */
static int check_flags(void)
{
u32 err;
int i;
err = 0;
for (i = 0; i < NCAPINTS; i++) {
err_flags[i] = req_flags[i] & ~cpu.flags[i];
if (err_flags[i])
err |= 1 << i;
}
return err;
}
/*
* Returns -1 on error.
*
* *cpu_level is set to the current CPU level; *req_level to the required
* level. x86-64 is considered level 64 for this purpose.
*
* *err_flags_ptr is set to the flags error array if there are flags missing.
*/
int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr)
{
int err;
memset(&cpu.flags, 0, sizeof cpu.flags);
cpu.level = 3;
if (has_eflag(X86_EFLAGS_AC))
cpu.level = 4;
get_flags();
err = check_flags();
if (test_bit(X86_FEATURE_LM, cpu.flags))
cpu.level = 64;
if (err == 0x01 &&
!(err_flags[0] &
~((1 << X86_FEATURE_XMM)|(1 << X86_FEATURE_XMM2))) &&
is_amd()) {
/* If this is an AMD and we're only missing SSE+SSE2, try to
turn them on */
u32 ecx = MSR_K7_HWCR;
u32 eax, edx;
asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx));
eax &= ~(1 << 15);
asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
get_flags(); /* Make sure it really did something */
err = check_flags();
} else if (err == 0x01 &&
!(err_flags[0] & ~(1 << X86_FEATURE_CX8)) &&
is_centaur() && cpu.model >= 6) {
/* If this is a VIA C3, we might have to enable CX8
explicitly */
u32 ecx = MSR_VIA_FCR;
u32 eax, edx;
asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx));
eax |= (1<<1)|(1<<7);
asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
set_bit(X86_FEATURE_CX8, cpu.flags);
err = check_flags();
} else if (err == 0x01 && is_transmeta()) {
/* Transmeta might have masked feature bits in word 0 */
u32 ecx = 0x80860004;
u32 eax, edx;
u32 level = 1;
asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx));
asm("wrmsr" : : "a" (~0), "d" (edx), "c" (ecx));
asm("cpuid"
: "+a" (level), "=d" (cpu.flags[0])
: : "ecx", "ebx");
asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
err = check_flags();
}
if (err_flags_ptr)
*err_flags_ptr = err ? err_flags : NULL;
if (cpu_level_ptr)
*cpu_level_ptr = cpu.level;
if (req_level_ptr)
*req_level_ptr = req_level;
return (cpu.level < req_level || err) ? -1 : 0;
}
+21
View File
@@ -0,0 +1,21 @@
#ifndef BOOT_ISDIGIT_H
#define BOOT_ISDIGIT_H
static inline int isdigit(int ch)
{
return (ch >= '0') && (ch <= '9');
}
static inline int isxdigit(int ch)
{
if (isdigit(ch))
return true;
if ((ch >= 'a') && (ch <= 'f'))
return true;
return (ch >= 'A') && (ch <= 'F');
}
#endif
+151
View File
@@ -0,0 +1,151 @@
#include "boot.h"
#define DEFAULT_SERIAL_PORT 0x3f8 /* ttyS0 */
#define XMTRDY 0x20
#define DLAB 0x80
#define TXR 0 /* Transmit register (WRITE) */
#define RXR 0 /* Receive register (READ) */
#define IER 1 /* Interrupt Enable */
#define IIR 2 /* Interrupt ID */
#define FCR 2 /* FIFO control */
#define LCR 3 /* Line control */
#define MCR 4 /* Modem control */
#define LSR 5 /* Line Status */
#define MSR 6 /* Modem Status */
#define DLL 0 /* Divisor Latch Low */
#define DLH 1 /* Divisor latch High */
#define DEFAULT_BAUD 9600
static void early_serial_init(int port, int baud)
{
unsigned char c;
unsigned divisor;
outb(0x3, port + LCR); /* 8n1 */
outb(0, port + IER); /* no interrupt */
outb(0, port + FCR); /* no fifo */
outb(0x3, port + MCR); /* DTR + RTS */
divisor = 115200 / baud;
c = inb(port + LCR);
outb(c | DLAB, port + LCR);
outb(divisor & 0xff, port + DLL);
outb((divisor >> 8) & 0xff, port + DLH);
outb(c & ~DLAB, port + LCR);
early_serial_base = port;
}
static void parse_earlyprintk(void)
{
int baud = DEFAULT_BAUD;
char arg[32];
int pos = 0;
int port = 0;
if (cmdline_find_option("earlyprintk", arg, sizeof arg) > 0) {
char *e;
if (!strncmp(arg, "serial", 6)) {
port = DEFAULT_SERIAL_PORT;
pos += 6;
}
if (arg[pos] == ',')
pos++;
/*
* make sure we have
* "serial,0x3f8,115200"
* "serial,ttyS0,115200"
* "ttyS0,115200"
*/
if (pos == 7 && !strncmp(arg + pos, "0x", 2)) {
port = simple_strtoull(arg + pos, &e, 16);
if (port == 0 || arg + pos == e)
port = DEFAULT_SERIAL_PORT;
else
pos = e - arg;
} else if (!strncmp(arg + pos, "ttyS", 4)) {
static const int bases[] = { 0x3f8, 0x2f8 };
int idx = 0;
if (!strncmp(arg + pos, "ttyS", 4))
pos += 4;
if (arg[pos++] == '1')
idx = 1;
port = bases[idx];
}
if (arg[pos] == ',')
pos++;
baud = simple_strtoull(arg + pos, &e, 0);
if (baud == 0 || arg + pos == e)
baud = DEFAULT_BAUD;
}
if (port)
early_serial_init(port, baud);
}
#define BASE_BAUD (1843200/16)
static unsigned int probe_baud(int port)
{
unsigned char lcr, dll, dlh;
unsigned int quot;
lcr = inb(port + LCR);
outb(lcr | DLAB, port + LCR);
dll = inb(port + DLL);
dlh = inb(port + DLH);
outb(lcr, port + LCR);
quot = (dlh << 8) | dll;
return BASE_BAUD / quot;
}
static void parse_console_uart8250(void)
{
char optstr[64], *options;
int baud = DEFAULT_BAUD;
int port = 0;
/*
* console=uart8250,io,0x3f8,115200n8
* need to make sure it is last one console !
*/
if (cmdline_find_option("console", optstr, sizeof optstr) <= 0)
return;
options = optstr;
if (!strncmp(options, "uart8250,io,", 12))
port = simple_strtoull(options + 12, &options, 0);
else if (!strncmp(options, "uart,io,", 8))
port = simple_strtoull(options + 8, &options, 0);
else
return;
if (options && (options[0] == ','))
baud = simple_strtoull(options + 1, &options, 0);
else
baud = probe_baud(port);
if (port)
early_serial_init(port, baud);
}
void console_init(void)
{
parse_earlyprintk();
if (!early_serial_base)
parse_console_uart8250();
}
+181
View File
@@ -0,0 +1,181 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Get EDD BIOS disk information
*/
#include "boot.h"
#include <linux/edd.h>
#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
/*
* Read the MBR (first sector) from a specific device.
*/
static int read_mbr(u8 devno, void *buf)
{
struct biosregs ireg, oreg;
initregs(&ireg);
ireg.ax = 0x0201; /* Legacy Read, one sector */
ireg.cx = 0x0001; /* Sector 0-0-1 */
ireg.dl = devno;
ireg.bx = (size_t)buf;
intcall(0x13, &ireg, &oreg);
return -(oreg.eflags & X86_EFLAGS_CF); /* 0 or -1 */
}
static u32 read_mbr_sig(u8 devno, struct edd_info *ei, u32 *mbrsig)
{
int sector_size;
char *mbrbuf_ptr, *mbrbuf_end;
u32 buf_base, mbr_base;
extern char _end[];
u16 mbr_magic;
sector_size = ei->params.bytes_per_sector;
if (!sector_size)
sector_size = 512; /* Best available guess */
/* Produce a naturally aligned buffer on the heap */
buf_base = (ds() << 4) + (u32)&_end;
mbr_base = (buf_base+sector_size-1) & ~(sector_size-1);
mbrbuf_ptr = _end + (mbr_base-buf_base);
mbrbuf_end = mbrbuf_ptr + sector_size;
/* Make sure we actually have space on the heap... */
if (!(boot_params.hdr.loadflags & CAN_USE_HEAP))
return -1;
if (mbrbuf_end > (char *)(size_t)boot_params.hdr.heap_end_ptr)
return -1;
memset(mbrbuf_ptr, 0, sector_size);
if (read_mbr(devno, mbrbuf_ptr))
return -1;
*mbrsig = *(u32 *)&mbrbuf_ptr[EDD_MBR_SIG_OFFSET];
mbr_magic = *(u16 *)&mbrbuf_ptr[510];
/* check for valid MBR magic */
return mbr_magic == 0xAA55 ? 0 : -1;
}
static int get_edd_info(u8 devno, struct edd_info *ei)
{
struct biosregs ireg, oreg;
memset(ei, 0, sizeof *ei);
/* Check Extensions Present */
initregs(&ireg);
ireg.ah = 0x41;
ireg.bx = EDDMAGIC1;
ireg.dl = devno;
intcall(0x13, &ireg, &oreg);
if (oreg.eflags & X86_EFLAGS_CF)
return -1; /* No extended information */
if (oreg.bx != EDDMAGIC2)
return -1;
ei->device = devno;
ei->version = oreg.ah; /* EDD version number */
ei->interface_support = oreg.cx; /* EDD functionality subsets */
/* Extended Get Device Parameters */
ei->params.length = sizeof(ei->params);
ireg.ah = 0x48;
ireg.si = (size_t)&ei->params;
intcall(0x13, &ireg, &oreg);
/* Get legacy CHS parameters */
/* Ralf Brown recommends setting ES:DI to 0:0 */
ireg.ah = 0x08;
ireg.es = 0;
intcall(0x13, &ireg, &oreg);
if (!(oreg.eflags & X86_EFLAGS_CF)) {
ei->legacy_max_cylinder = oreg.ch + ((oreg.cl & 0xc0) << 2);
ei->legacy_max_head = oreg.dh;
ei->legacy_sectors_per_track = oreg.cl & 0x3f;
}
return 0;
}
void query_edd(void)
{
char eddarg[8];
int do_mbr = 1;
#ifdef CONFIG_EDD_OFF
int do_edd = 0;
#else
int do_edd = 1;
#endif
int be_quiet;
int devno;
struct edd_info ei, *edp;
u32 *mbrptr;
if (cmdline_find_option("edd", eddarg, sizeof eddarg) > 0) {
if (!strcmp(eddarg, "skipmbr") || !strcmp(eddarg, "skip")) {
do_edd = 1;
do_mbr = 0;
}
else if (!strcmp(eddarg, "off"))
do_edd = 0;
else if (!strcmp(eddarg, "on"))
do_edd = 1;
}
be_quiet = cmdline_find_option_bool("quiet");
edp = boot_params.eddbuf;
mbrptr = boot_params.edd_mbr_sig_buffer;
if (!do_edd)
return;
/* Bugs in OnBoard or AddOnCards Bios may hang the EDD probe,
* so give a hint if this happens.
*/
if (!be_quiet)
printf("Probing EDD (edd=off to disable)... ");
for (devno = 0x80; devno < 0x80+EDD_MBR_SIG_MAX; devno++) {
/*
* Scan the BIOS-supported hard disks and query EDD
* information...
*/
if (!get_edd_info(devno, &ei)
&& boot_params.eddbuf_entries < EDDMAXNR) {
memcpy(edp, &ei, sizeof ei);
edp++;
boot_params.eddbuf_entries++;
}
if (do_mbr && !read_mbr_sig(devno, &ei, mbrptr++))
boot_params.edd_mbr_sig_buf_entries = devno-0x80+1;
}
if (!be_quiet)
printf("ok\n");
}
#endif
+478
View File
@@ -0,0 +1,478 @@
/*
* header.S
*
* Copyright (C) 1991, 1992 Linus Torvalds
*
* Based on bootsect.S and setup.S
* modified by more people than can be counted
*
* Rewritten as a common file by H. Peter Anvin (Apr 2007)
*
* BIG FAT NOTE: We're in real mode using 64k segments. Therefore segment
* addresses must be multiplied by 16 to obtain their respective linear
* addresses. To avoid confusion, linear addresses are written using leading
* hex while segment addresses are written as segment:offset.
*
*/
#include <asm/segment.h>
#include <generated/utsrelease.h>
#include <asm/boot.h>
#include <asm/e820.h>
#include <asm/page_types.h>
#include <asm/setup.h>
#include "boot.h"
#include "voffset.h"
#include "zoffset.h"
BOOTSEG = 0x07C0 /* original address of boot-sector */
SYSSEG = 0x1000 /* historical load address >> 4 */
#ifndef SVGA_MODE
#define SVGA_MODE ASK_VGA
#endif
#ifndef RAMDISK
#define RAMDISK 0
#endif
#ifndef ROOT_RDONLY
#define ROOT_RDONLY 1
#endif
.code16
.section ".bstext", "ax"
.global bootsect_start
bootsect_start:
#ifdef CONFIG_EFI_STUB
# "MZ", MS-DOS header
.byte 0x4d
.byte 0x5a
#endif
# Normalize the start address
ljmp $BOOTSEG, $start2
start2:
movw %cs, %ax
movw %ax, %ds
movw %ax, %es
movw %ax, %ss
xorw %sp, %sp
sti
cld
movw $bugger_off_msg, %si
msg_loop:
lodsb
andb %al, %al
jz bs_die
movb $0xe, %ah
movw $7, %bx
int $0x10
jmp msg_loop
bs_die:
# Allow the user to press a key, then reboot
xorw %ax, %ax
int $0x16
int $0x19
# int 0x19 should never return. In case it does anyway,
# invoke the BIOS reset code...
ljmp $0xf000,$0xfff0
#ifdef CONFIG_EFI_STUB
.org 0x3c
#
# Offset to the PE header.
#
.long pe_header
#endif /* CONFIG_EFI_STUB */
.section ".bsdata", "a"
bugger_off_msg:
.ascii "Direct booting from floppy is no longer supported.\r\n"
.ascii "Please use a boot loader program instead.\r\n"
.ascii "\n"
.ascii "Remove disk and press any key to reboot . . .\r\n"
.byte 0
#ifdef CONFIG_EFI_STUB
pe_header:
.ascii "PE"
.word 0
coff_header:
#ifdef CONFIG_X86_32
.word 0x14c # i386
#else
.word 0x8664 # x86-64
#endif
.word 2 # nr_sections
.long 0 # TimeDateStamp
.long 0 # PointerToSymbolTable
.long 1 # NumberOfSymbols
.word section_table - optional_header # SizeOfOptionalHeader
#ifdef CONFIG_X86_32
.word 0x306 # Characteristics.
# IMAGE_FILE_32BIT_MACHINE |
# IMAGE_FILE_DEBUG_STRIPPED |
# IMAGE_FILE_EXECUTABLE_IMAGE |
# IMAGE_FILE_LINE_NUMS_STRIPPED
#else
.word 0x206 # Characteristics
# IMAGE_FILE_DEBUG_STRIPPED |
# IMAGE_FILE_EXECUTABLE_IMAGE |
# IMAGE_FILE_LINE_NUMS_STRIPPED
#endif
optional_header:
#ifdef CONFIG_X86_32
.word 0x10b # PE32 format
#else
.word 0x20b # PE32+ format
#endif
.byte 0x02 # MajorLinkerVersion
.byte 0x14 # MinorLinkerVersion
# Filled in by build.c
.long 0 # SizeOfCode
.long 0 # SizeOfInitializedData
.long 0 # SizeOfUninitializedData
# Filled in by build.c
.long 0x0000 # AddressOfEntryPoint
.long 0x0000 # BaseOfCode
#ifdef CONFIG_X86_32
.long 0 # data
#endif
extra_header_fields:
#ifdef CONFIG_X86_32
.long 0 # ImageBase
#else
.quad 0 # ImageBase
#endif
.long 0x1000 # SectionAlignment
.long 0x200 # FileAlignment
.word 0 # MajorOperatingSystemVersion
.word 0 # MinorOperatingSystemVersion
.word 0 # MajorImageVersion
.word 0 # MinorImageVersion
.word 0 # MajorSubsystemVersion
.word 0 # MinorSubsystemVersion
.long 0 # Win32VersionValue
#
# The size of the bzImage is written in tools/build.c
#
.long 0 # SizeOfImage
.long 0x200 # SizeOfHeaders
.long 0 # CheckSum
.word 0xa # Subsystem (EFI application)
.word 0 # DllCharacteristics
#ifdef CONFIG_X86_32
.long 0 # SizeOfStackReserve
.long 0 # SizeOfStackCommit
.long 0 # SizeOfHeapReserve
.long 0 # SizeOfHeapCommit
#else
.quad 0 # SizeOfStackReserve
.quad 0 # SizeOfStackCommit
.quad 0 # SizeOfHeapReserve
.quad 0 # SizeOfHeapCommit
#endif
.long 0 # LoaderFlags
.long 0x1 # NumberOfRvaAndSizes
.quad 0 # ExportTable
.quad 0 # ImportTable
.quad 0 # ResourceTable
.quad 0 # ExceptionTable
.quad 0 # CertificationTable
.quad 0 # BaseRelocationTable
# Section table
section_table:
.ascii ".text"
.byte 0
.byte 0
.byte 0
.long 0
.long 0x0 # startup_{32,64}
.long 0 # Size of initialized data
# on disk
.long 0x0 # startup_{32,64}
.long 0 # PointerToRelocations
.long 0 # PointerToLineNumbers
.word 0 # NumberOfRelocations
.word 0 # NumberOfLineNumbers
.long 0x60500020 # Characteristics (section flags)
#
# The EFI application loader requires a relocation section
# because EFI applications are relocatable and not having
# this section seems to confuse it. But since we don't need
# the loader to fixup any relocs for us just fill it with a
# single dummy reloc.
#
.ascii ".reloc"
.byte 0
.byte 0
.long reloc_end - reloc_start
.long reloc_start
.long reloc_end - reloc_start # SizeOfRawData
.long reloc_start # PointerToRawData
.long 0 # PointerToRelocations
.long 0 # PointerToLineNumbers
.word 0 # NumberOfRelocations
.word 0 # NumberOfLineNumbers
.long 0x42100040 # Characteristics (section flags)
#endif /* CONFIG_EFI_STUB */
# Kernel attributes; used by setup. This is part 1 of the
# header, from the old boot sector.
.section ".header", "a"
.globl hdr
hdr:
setup_sects: .byte 0 /* Filled in by build.c */
root_flags: .word ROOT_RDONLY
syssize: .long 0 /* Filled in by build.c */
ram_size: .word 0 /* Obsolete */
vid_mode: .word SVGA_MODE
root_dev: .word 0 /* Filled in by build.c */
boot_flag: .word 0xAA55
# offset 512, entry point
.globl _start
_start:
# Explicitly enter this as bytes, or the assembler
# tries to generate a 3-byte jump here, which causes
# everything else to push off to the wrong offset.
.byte 0xeb # short (2-byte) jump
.byte start_of_setup-1f
1:
# Part 2 of the header, from the old setup.S
.ascii "HdrS" # header signature
.word 0x020a # header version number (>= 0x0105)
# or else old loadlin-1.5 will fail)
.globl realmode_swtch
realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
start_sys_seg: .word SYSSEG # obsolete and meaningless, but just
# in case something decided to "use" it
.word kernel_version-512 # pointing to kernel version string
# above section of header is compatible
# with loadlin-1.5 (header v1.5). Don't
# change it.
type_of_loader: .byte 0 # 0 means ancient bootloader, newer
# bootloaders know to change this.
# See Documentation/x86/boot.txt for
# assigned ids
# flags, unused bits must be zero (RFU) bit within loadflags
loadflags:
LOADED_HIGH = 1 # If set, the kernel is loaded high
CAN_USE_HEAP = 0x80 # If set, the loader also has set
# heap_end_ptr to tell how much
# space behind setup.S can be used for
# heap purposes.
# Only the loader knows what is free
.byte LOADED_HIGH
setup_move_size: .word 0x8000 # size to move, when setup is not
# loaded at 0x90000. We will move setup
# to 0x90000 then just before jumping
# into the kernel. However, only the
# loader knows how much data behind
# us also needs to be loaded.
code32_start: # here loaders can put a different
# start address for 32-bit code.
.long 0x100000 # 0x100000 = default for big kernel
ramdisk_image: .long 0 # address of loaded ramdisk image
# Here the loader puts the 32-bit
# address where it loaded the image.
# This only will be read by the kernel.
ramdisk_size: .long 0 # its size in bytes
bootsect_kludge:
.long 0 # obsolete
heap_end_ptr: .word _end+STACK_SIZE-512
# (Header version 0x0201 or later)
# space from here (exclusive) down to
# end of setup code can be used by setup
# for local heap purposes.
ext_loader_ver:
.byte 0 # Extended boot loader version
ext_loader_type:
.byte 0 # Extended boot loader type
cmd_line_ptr: .long 0 # (Header version 0x0202 or later)
# If nonzero, a 32-bit pointer
# to the kernel command line.
# The command line should be
# located between the start of
# setup and the end of low
# memory (0xa0000), or it may
# get overwritten before it
# gets read. If this field is
# used, there is no longer
# anything magical about the
# 0x90000 segment; the setup
# can be located anywhere in
# low memory 0x10000 or higher.
ramdisk_max: .long 0x7fffffff
# (Header version 0x0203 or later)
# The highest safe address for
# the contents of an initrd
# The current kernel allows up to 4 GB,
# but leave it at 2 GB to avoid
# possible bootloader bugs.
kernel_alignment: .long CONFIG_PHYSICAL_ALIGN #physical addr alignment
#required for protected mode
#kernel
#ifdef CONFIG_RELOCATABLE
relocatable_kernel: .byte 1
#else
relocatable_kernel: .byte 0
#endif
min_alignment: .byte MIN_KERNEL_ALIGN_LG2 # minimum alignment
pad3: .word 0
cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line,
#added with boot protocol
#version 2.06
hardware_subarch: .long 0 # subarchitecture, added with 2.07
# default to 0 for normal x86 PC
hardware_subarch_data: .quad 0
payload_offset: .long ZO_input_data
payload_length: .long ZO_z_input_len
setup_data: .quad 0 # 64-bit physical pointer to
# single linked list of
# struct setup_data
pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr
#define ZO_INIT_SIZE (ZO__end - ZO_startup_32 + ZO_z_extract_offset)
#define VO_INIT_SIZE (VO__end - VO__text)
#if ZO_INIT_SIZE > VO_INIT_SIZE
#define INIT_SIZE ZO_INIT_SIZE
#else
#define INIT_SIZE VO_INIT_SIZE
#endif
init_size: .long INIT_SIZE # kernel initialization size
# End of setup header #####################################################
.section ".entrytext", "ax"
start_of_setup:
#ifdef SAFE_RESET_DISK_CONTROLLER
# Reset the disk controller.
movw $0x0000, %ax # Reset disk controller
movb $0x80, %dl # All disks
int $0x13
#endif
# Force %es = %ds
movw %ds, %ax
movw %ax, %es
cld
# Apparently some ancient versions of LILO invoked the kernel with %ss != %ds,
# which happened to work by accident for the old code. Recalculate the stack
# pointer if %ss is invalid. Otherwise leave it alone, LOADLIN sets up the
# stack behind its own code, so we can't blindly put it directly past the heap.
movw %ss, %dx
cmpw %ax, %dx # %ds == %ss?
movw %sp, %dx
je 2f # -> assume %sp is reasonably set
# Invalid %ss, make up a new stack
movw $_end, %dx
testb $CAN_USE_HEAP, loadflags
jz 1f
movw heap_end_ptr, %dx
1: addw $STACK_SIZE, %dx
jnc 2f
xorw %dx, %dx # Prevent wraparound
2: # Now %dx should point to the end of our stack space
andw $~3, %dx # dword align (might as well...)
jnz 3f
movw $0xfffc, %dx # Make sure we're not zero
3: movw %ax, %ss
movzwl %dx, %esp # Clear upper half of %esp
sti # Now we should have a working stack
# We will have entered with %cs = %ds+0x20, normalize %cs so
# it is on par with the other segments.
pushw %ds
pushw $6f
lretw
6:
# Check signature at end of setup
cmpl $0x5a5aaa55, setup_sig
jne setup_bad
# Zero the bss
movw $__bss_start, %di
movw $_end+3, %cx
xorl %eax, %eax
subw %di, %cx
shrw $2, %cx
rep; stosl
# Jump to C code (should not return)
calll main
# Setup corrupt somehow...
setup_bad:
movl $setup_corrupt, %eax
calll puts
# Fall through...
.globl die
.type die, @function
die:
hlt
jmp die
.size die, .-die
.section ".initdata", "a"
setup_corrupt:
.byte 7
.string "No setup signature found...\n"
.data
dummy: .long 0
.section .reloc
reloc_start:
.long dummy - reloc_start
.long 10
.word 0
reloc_end:
+59
View File
@@ -0,0 +1,59 @@
#!/bin/sh
#
# This file is subject to the terms and conditions of the GNU General Public
# License. See the file "COPYING" in the main directory of this archive
# for more details.
#
# Copyright (C) 1995 by Linus Torvalds
#
# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin
#
# "make install" script for i386 architecture
#
# Arguments:
# $1 - kernel version
# $2 - kernel image file
# $3 - kernel map file
# $4 - default install path (blank if root directory)
#
verify () {
if [ ! -f "$1" ]; then
echo "" 1>&2
echo " *** Missing file: $1" 1>&2
echo ' *** You need to run "make" before "make install".' 1>&2
echo "" 1>&2
exit 1
fi
}
# Make sure the files actually exist
verify "$2"
verify "$3"
# User may have a custom install script
if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
# Default install - same as make zlilo
if [ -f $4/vmlinuz ]; then
mv $4/vmlinuz $4/vmlinuz.old
fi
if [ -f $4/System.map ]; then
mv $4/System.map $4/System.old
fi
cat $2 > $4/vmlinuz
cp $3 $4/System.map
if [ -x /sbin/lilo ]; then
/sbin/lilo
elif [ -x /etc/lilo/install ]; then
/etc/lilo/install
else
sync
echo "Cannot find LILO."
fi
+178
View File
@@ -0,0 +1,178 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Main module for the real-mode kernel code
*/
#include "boot.h"
struct boot_params boot_params __attribute__((aligned(16)));
char *HEAP = _end;
char *heap_end = _end; /* Default end of heap = no heap */
/*
* Copy the header into the boot parameter block. Since this
* screws up the old-style command line protocol, adjust by
* filling in the new-style command line pointer instead.
*/
static void copy_boot_params(void)
{
struct old_cmdline {
u16 cl_magic;
u16 cl_offset;
};
const struct old_cmdline * const oldcmd =
(const struct old_cmdline *)OLD_CL_ADDRESS;
BUILD_BUG_ON(sizeof boot_params != 4096);
memcpy(&boot_params.hdr, &hdr, sizeof hdr);
if (!boot_params.hdr.cmd_line_ptr &&
oldcmd->cl_magic == OLD_CL_MAGIC) {
/* Old-style command line protocol. */
u16 cmdline_seg;
/* Figure out if the command line falls in the region
of memory that an old kernel would have copied up
to 0x90000... */
if (oldcmd->cl_offset < boot_params.hdr.setup_move_size)
cmdline_seg = ds();
else
cmdline_seg = 0x9000;
boot_params.hdr.cmd_line_ptr =
(cmdline_seg << 4) + oldcmd->cl_offset;
}
}
/*
* Set the keyboard repeat rate to maximum. Unclear why this
* is done here; this might be possible to kill off as stale code.
*/
static void keyboard_set_repeat(void)
{
struct biosregs ireg;
initregs(&ireg);
ireg.ax = 0x0305;
intcall(0x16, &ireg, NULL);
}
/*
* Get Intel SpeedStep (IST) information.
*/
static void query_ist(void)
{
struct biosregs ireg, oreg;
/* Some older BIOSes apparently crash on this call, so filter
it from machines too old to have SpeedStep at all. */
if (cpu.level < 6)
return;
initregs(&ireg);
ireg.ax = 0xe980; /* IST Support */
ireg.edx = 0x47534943; /* Request value */
intcall(0x15, &ireg, &oreg);
boot_params.ist_info.signature = oreg.eax;
boot_params.ist_info.command = oreg.ebx;
boot_params.ist_info.event = oreg.ecx;
boot_params.ist_info.perf_level = oreg.edx;
}
/*
* Tell the BIOS what CPU mode we intend to run in.
*/
static void set_bios_mode(void)
{
#ifdef CONFIG_X86_64
struct biosregs ireg;
initregs(&ireg);
ireg.ax = 0xec00;
ireg.bx = 2;
intcall(0x15, &ireg, NULL);
#endif
}
static void init_heap(void)
{
char *stack_end;
if (boot_params.hdr.loadflags & CAN_USE_HEAP) {
asm("leal %P1(%%esp),%0"
: "=r" (stack_end) : "i" (-STACK_SIZE));
heap_end = (char *)
((size_t)boot_params.hdr.heap_end_ptr + 0x200);
if (heap_end > stack_end)
heap_end = stack_end;
} else {
/* Boot protocol 2.00 only, no heap available */
puts("WARNING: Ancient bootloader, some functionality "
"may be limited!\n");
}
}
void main(void)
{
/* First, copy the boot header into the "zeropage" */
copy_boot_params();
/* Initialize the early-boot console */
console_init();
if (cmdline_find_option_bool("debug"))
puts("early console in setup code\n");
/* End of heap check */
init_heap();
/* Make sure we have all the proper CPU support */
if (validate_cpu()) {
puts("Unable to boot - please use a kernel appropriate "
"for your CPU.\n");
die();
}
/* Tell the BIOS what CPU mode we intend to run in. */
set_bios_mode();
/* Detect memory layout */
detect_memory();
/* Set keyboard repeat rate (why?) */
keyboard_set_repeat();
/* Query MCA information */
query_mca();
/* Query Intel SpeedStep (IST) information */
query_ist();
/* Query APM information */
#if defined(CONFIG_APM) || defined(CONFIG_APM_MODULE)
query_apm_bios();
#endif
/* Query EDD information */
#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
query_edd();
#endif
/* Set the video mode */
set_video();
/* Do the last things and invoke protected mode */
go_to_protected_mode();
}
+38
View File
@@ -0,0 +1,38 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Get the MCA system description table
*/
#include "boot.h"
int query_mca(void)
{
struct biosregs ireg, oreg;
u16 len;
initregs(&ireg);
ireg.ah = 0xc0;
intcall(0x15, &ireg, &oreg);
if (oreg.eflags & X86_EFLAGS_CF)
return -1; /* No MCA present */
set_fs(oreg.es);
len = rdfs16(oreg.bx);
if (len > sizeof(boot_params.sys_desc_table))
len = sizeof(boot_params.sys_desc_table);
copy_from_fs(&boot_params.sys_desc_table, oreg.bx, len);
return 0;
}
+136
View File
@@ -0,0 +1,136 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Memory detection code
*/
#include "boot.h"
#define SMAP 0x534d4150 /* ASCII "SMAP" */
static int detect_memory_e820(void)
{
int count = 0;
struct biosregs ireg, oreg;
struct e820entry *desc = boot_params.e820_map;
static struct e820entry buf; /* static so it is zeroed */
initregs(&ireg);
ireg.ax = 0xe820;
ireg.cx = sizeof buf;
ireg.edx = SMAP;
ireg.di = (size_t)&buf;
/*
* Note: at least one BIOS is known which assumes that the
* buffer pointed to by one e820 call is the same one as
* the previous call, and only changes modified fields. Therefore,
* we use a temporary buffer and copy the results entry by entry.
*
* This routine deliberately does not try to account for
* ACPI 3+ extended attributes. This is because there are
* BIOSes in the field which report zero for the valid bit for
* all ranges, and we don't currently make any use of the
* other attribute bits. Revisit this if we see the extended
* attribute bits deployed in a meaningful way in the future.
*/
do {
intcall(0x15, &ireg, &oreg);
ireg.ebx = oreg.ebx; /* for next iteration... */
/* BIOSes which terminate the chain with CF = 1 as opposed
to %ebx = 0 don't always report the SMAP signature on
the final, failing, probe. */
if (oreg.eflags & X86_EFLAGS_CF)
break;
/* Some BIOSes stop returning SMAP in the middle of
the search loop. We don't know exactly how the BIOS
screwed up the map at that point, we might have a
partial map, the full map, or complete garbage, so
just return failure. */
if (oreg.eax != SMAP) {
count = 0;
break;
}
*desc++ = buf;
count++;
} while (ireg.ebx && count < ARRAY_SIZE(boot_params.e820_map));
return boot_params.e820_entries = count;
}
static int detect_memory_e801(void)
{
struct biosregs ireg, oreg;
initregs(&ireg);
ireg.ax = 0xe801;
intcall(0x15, &ireg, &oreg);
if (oreg.eflags & X86_EFLAGS_CF)
return -1;
/* Do we really need to do this? */
if (oreg.cx || oreg.dx) {
oreg.ax = oreg.cx;
oreg.bx = oreg.dx;
}
if (oreg.ax > 15*1024) {
return -1; /* Bogus! */
} else if (oreg.ax == 15*1024) {
boot_params.alt_mem_k = (oreg.bx << 6) + oreg.ax;
} else {
/*
* This ignores memory above 16MB if we have a memory
* hole there. If someone actually finds a machine
* with a memory hole at 16MB and no support for
* 0E820h they should probably generate a fake e820
* map.
*/
boot_params.alt_mem_k = oreg.ax;
}
return 0;
}
static int detect_memory_88(void)
{
struct biosregs ireg, oreg;
initregs(&ireg);
ireg.ah = 0x88;
intcall(0x15, &ireg, &oreg);
boot_params.screen_info.ext_mem_k = oreg.ax;
return -(oreg.eflags & X86_EFLAGS_CF); /* 0 or -1 */
}
int detect_memory(void)
{
int err = -1;
if (detect_memory_e820() > 0)
err = 0;
if (!detect_memory_e801())
err = 0;
if (!detect_memory_88())
err = 0;
return err;
}
+49
View File
@@ -0,0 +1,49 @@
/* ----------------------------------------------------------------------- *
*
* Copyright 2008 rPath, Inc. - All Rights Reserved
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2 or (at your
* option) any later version; incorporated herein by reference.
*
* ----------------------------------------------------------------------- */
/*
* This is a host program to preprocess the CPU strings into a
* compact format suitable for the setup code.
*/
#include <stdio.h>
#include "../kernel/cpu/capflags.c"
int main(void)
{
int i, j;
const char *str;
printf("static const char x86_cap_strs[] =\n");
for (i = 0; i < NCAPINTS; i++) {
for (j = 0; j < 32; j++) {
str = x86_cap_flags[i*32+j];
if (i == NCAPINTS-1 && j == 31) {
/* The last entry must be unconditional; this
also consumes the compiler-added null
character */
if (!str)
str = "";
printf("\t\"\\x%02x\\x%02x\"\"%s\"\n",
i, j, str);
} else if (str) {
printf("#if REQUIRED_MASK%d & (1 << %d)\n"
"\t\"\\x%02x\\x%02x\"\"%s\\0\"\n"
"#endif\n",
i, j, i, j, str);
}
}
}
printf("\t;\n");
return 0;
}
+17
View File
@@ -0,0 +1,17 @@
#
# mtools configuration file for "make (b)zdisk"
#
# Actual floppy drive
drive a:
file="/dev/fd0"
# 1.44 MB floppy disk image
drive v:
file="@OBJ@/fdimage" cylinders=80 heads=2 sectors=18 filter
# 2.88 MB floppy disk image (mostly for virtual uses)
drive w:
file="@OBJ@/fdimage" cylinders=80 heads=2 sectors=36 filter
+126
View File
@@ -0,0 +1,126 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Prepare the machine for transition to protected mode.
*/
#include "boot.h"
#include <asm/segment.h>
/*
* Invoke the realmode switch hook if present; otherwise
* disable all interrupts.
*/
static void realmode_switch_hook(void)
{
if (boot_params.hdr.realmode_swtch) {
asm volatile("lcallw *%0"
: : "m" (boot_params.hdr.realmode_swtch)
: "eax", "ebx", "ecx", "edx");
} else {
asm volatile("cli");
outb(0x80, 0x70); /* Disable NMI */
io_delay();
}
}
/*
* Disable all interrupts at the legacy PIC.
*/
static void mask_all_interrupts(void)
{
outb(0xff, 0xa1); /* Mask all interrupts on the secondary PIC */
io_delay();
outb(0xfb, 0x21); /* Mask all but cascade on the primary PIC */
io_delay();
}
/*
* Reset IGNNE# if asserted in the FPU.
*/
static void reset_coprocessor(void)
{
outb(0, 0xf0);
io_delay();
outb(0, 0xf1);
io_delay();
}
/*
* Set up the GDT
*/
struct gdt_ptr {
u16 len;
u32 ptr;
} __attribute__((packed));
static void setup_gdt(void)
{
/* There are machines which are known to not boot with the GDT
being 8-byte unaligned. Intel recommends 16 byte alignment. */
static const u64 boot_gdt[] __attribute__((aligned(16))) = {
/* CS: code, read/execute, 4 GB, base 0 */
[GDT_ENTRY_BOOT_CS] = GDT_ENTRY(0xc09b, 0, 0xfffff),
/* DS: data, read/write, 4 GB, base 0 */
[GDT_ENTRY_BOOT_DS] = GDT_ENTRY(0xc093, 0, 0xfffff),
/* TSS: 32-bit tss, 104 bytes, base 4096 */
/* We only have a TSS here to keep Intel VT happy;
we don't actually use it for anything. */
[GDT_ENTRY_BOOT_TSS] = GDT_ENTRY(0x0089, 4096, 103),
};
/* Xen HVM incorrectly stores a pointer to the gdt_ptr, instead
of the gdt_ptr contents. Thus, make it static so it will
stay in memory, at least long enough that we switch to the
proper kernel GDT. */
static struct gdt_ptr gdt;
gdt.len = sizeof(boot_gdt)-1;
gdt.ptr = (u32)&boot_gdt + (ds() << 4);
asm volatile("lgdtl %0" : : "m" (gdt));
}
/*
* Set up the IDT
*/
static void setup_idt(void)
{
static const struct gdt_ptr null_idt = {0, 0};
asm volatile("lidtl %0" : : "m" (null_idt));
}
/*
* Actual invocation sequence
*/
void go_to_protected_mode(void)
{
/* Hook before leaving real mode, also disables interrupts */
realmode_switch_hook();
/* Enable the A20 gate */
if (enable_a20()) {
puts("A20 gate not responding, unable to boot...\n");
die();
}
/* Reset coprocessor (IGNNE#) */
reset_coprocessor();
/* Mask all interrupts in the PIC */
mask_all_interrupts();
/* Actual transition to protected mode... */
setup_idt();
setup_gdt();
protected_mode_jump(boot_params.hdr.code32_start,
(u32)&boot_params + (ds() << 4));
}
+77
View File
@@ -0,0 +1,77 @@
/* ----------------------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* The actual transition into protected mode
*/
#include <asm/boot.h>
#include <asm/processor-flags.h>
#include <asm/segment.h>
#include <linux/linkage.h>
.text
.code16
/*
* void protected_mode_jump(u32 entrypoint, u32 bootparams);
*/
GLOBAL(protected_mode_jump)
movl %edx, %esi # Pointer to boot_params table
xorl %ebx, %ebx
movw %cs, %bx
shll $4, %ebx
addl %ebx, 2f
jmp 1f # Short jump to serialize on 386/486
1:
movw $__BOOT_DS, %cx
movw $__BOOT_TSS, %di
movl %cr0, %edx
orb $X86_CR0_PE, %dl # Protected mode
movl %edx, %cr0
# Transition to 32-bit mode
.byte 0x66, 0xea # ljmpl opcode
2: .long in_pm32 # offset
.word __BOOT_CS # segment
ENDPROC(protected_mode_jump)
.code32
.section ".text32","ax"
GLOBAL(in_pm32)
# Set up data segments for flat 32-bit mode
movl %ecx, %ds
movl %ecx, %es
movl %ecx, %fs
movl %ecx, %gs
movl %ecx, %ss
# The 32-bit code sets up its own stack, but this way we do have
# a valid stack if some debugging hack wants to use it.
addl %ebx, %esp
# Set up TR to make Intel VT happy
ltr %di
# Clear registers to allow for future extensions to the
# 32-bit boot protocol
xorl %ecx, %ecx
xorl %edx, %edx
xorl %ebx, %ebx
xorl %ebp, %ebp
xorl %edi, %edi
# Set up LDTR to make Intel VT happy
lldt %cx
jmpl *%eax # Jump to the 32-bit entrypoint
ENDPROC(in_pm32)
+309
View File
@@ -0,0 +1,309 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Oh, it's a waste of space, but oh-so-yummy for debugging. This
* version of printf() does not include 64-bit support. "Live with
* it."
*
*/
#include "boot.h"
static int skip_atoi(const char **s)
{
int i = 0;
while (isdigit(**s))
i = i * 10 + *((*s)++) - '0';
return i;
}
#define ZEROPAD 1 /* pad with zero */
#define SIGN 2 /* unsigned/signed long */
#define PLUS 4 /* show plus */
#define SPACE 8 /* space if plus */
#define LEFT 16 /* left justified */
#define SMALL 32 /* Must be 32 == 0x20 */
#define SPECIAL 64 /* 0x */
#define __do_div(n, base) ({ \
int __res; \
__res = ((unsigned long) n) % (unsigned) base; \
n = ((unsigned long) n) / (unsigned) base; \
__res; })
static char *number(char *str, long num, int base, int size, int precision,
int type)
{
/* we are called with base 8, 10 or 16, only, thus don't need "G..." */
static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
char tmp[66];
char c, sign, locase;
int i;
/* locase = 0 or 0x20. ORing digits or letters with 'locase'
* produces same digits or (maybe lowercased) letters */
locase = (type & SMALL);
if (type & LEFT)
type &= ~ZEROPAD;
if (base < 2 || base > 36)
return NULL;
c = (type & ZEROPAD) ? '0' : ' ';
sign = 0;
if (type & SIGN) {
if (num < 0) {
sign = '-';
num = -num;
size--;
} else if (type & PLUS) {
sign = '+';
size--;
} else if (type & SPACE) {
sign = ' ';
size--;
}
}
if (type & SPECIAL) {
if (base == 16)
size -= 2;
else if (base == 8)
size--;
}
i = 0;
if (num == 0)
tmp[i++] = '0';
else
while (num != 0)
tmp[i++] = (digits[__do_div(num, base)] | locase);
if (i > precision)
precision = i;
size -= precision;
if (!(type & (ZEROPAD + LEFT)))
while (size-- > 0)
*str++ = ' ';
if (sign)
*str++ = sign;
if (type & SPECIAL) {
if (base == 8)
*str++ = '0';
else if (base == 16) {
*str++ = '0';
*str++ = ('X' | locase);
}
}
if (!(type & LEFT))
while (size-- > 0)
*str++ = c;
while (i < precision--)
*str++ = '0';
while (i-- > 0)
*str++ = tmp[i];
while (size-- > 0)
*str++ = ' ';
return str;
}
int vsprintf(char *buf, const char *fmt, va_list args)
{
int len;
unsigned long num;
int i, base;
char *str;
const char *s;
int flags; /* flags to number() */
int field_width; /* width of output field */
int precision; /* min. # of digits for integers; max
number of chars for from string */
int qualifier; /* 'h', 'l', or 'L' for integer fields */
for (str = buf; *fmt; ++fmt) {
if (*fmt != '%') {
*str++ = *fmt;
continue;
}
/* process flags */
flags = 0;
repeat:
++fmt; /* this also skips first '%' */
switch (*fmt) {
case '-':
flags |= LEFT;
goto repeat;
case '+':
flags |= PLUS;
goto repeat;
case ' ':
flags |= SPACE;
goto repeat;
case '#':
flags |= SPECIAL;
goto repeat;
case '0':
flags |= ZEROPAD;
goto repeat;
}
/* get field width */
field_width = -1;
if (isdigit(*fmt))
field_width = skip_atoi(&fmt);
else if (*fmt == '*') {
++fmt;
/* it's the next argument */
field_width = va_arg(args, int);
if (field_width < 0) {
field_width = -field_width;
flags |= LEFT;
}
}
/* get the precision */
precision = -1;
if (*fmt == '.') {
++fmt;
if (isdigit(*fmt))
precision = skip_atoi(&fmt);
else if (*fmt == '*') {
++fmt;
/* it's the next argument */
precision = va_arg(args, int);
}
if (precision < 0)
precision = 0;
}
/* get the conversion qualifier */
qualifier = -1;
if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L') {
qualifier = *fmt;
++fmt;
}
/* default base */
base = 10;
switch (*fmt) {
case 'c':
if (!(flags & LEFT))
while (--field_width > 0)
*str++ = ' ';
*str++ = (unsigned char)va_arg(args, int);
while (--field_width > 0)
*str++ = ' ';
continue;
case 's':
s = va_arg(args, char *);
len = strnlen(s, precision);
if (!(flags & LEFT))
while (len < field_width--)
*str++ = ' ';
for (i = 0; i < len; ++i)
*str++ = *s++;
while (len < field_width--)
*str++ = ' ';
continue;
case 'p':
if (field_width == -1) {
field_width = 2 * sizeof(void *);
flags |= ZEROPAD;
}
str = number(str,
(unsigned long)va_arg(args, void *), 16,
field_width, precision, flags);
continue;
case 'n':
if (qualifier == 'l') {
long *ip = va_arg(args, long *);
*ip = (str - buf);
} else {
int *ip = va_arg(args, int *);
*ip = (str - buf);
}
continue;
case '%':
*str++ = '%';
continue;
/* integer number formats - set up the flags and "break" */
case 'o':
base = 8;
break;
case 'x':
flags |= SMALL;
case 'X':
base = 16;
break;
case 'd':
case 'i':
flags |= SIGN;
case 'u':
break;
default:
*str++ = '%';
if (*fmt)
*str++ = *fmt;
else
--fmt;
continue;
}
if (qualifier == 'l')
num = va_arg(args, unsigned long);
else if (qualifier == 'h') {
num = (unsigned short)va_arg(args, int);
if (flags & SIGN)
num = (short)num;
} else if (flags & SIGN)
num = va_arg(args, int);
else
num = va_arg(args, unsigned int);
str = number(str, num, base, field_width, precision, flags);
}
*str = '\0';
return str - buf;
}
int sprintf(char *buf, const char *fmt, ...)
{
va_list args;
int i;
va_start(args, fmt);
i = vsprintf(buf, fmt, args);
va_end(args);
return i;
}
int printf(const char *fmt, ...)
{
char printf_buf[1024];
va_list args;
int printed;
va_start(args, fmt);
printed = vsprintf(printf_buf, fmt, args);
va_end(args);
puts(printf_buf);
return printed;
}
+29
View File
@@ -0,0 +1,29 @@
/* -----------------------------------------------------------------------
*
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2 or (at your
* option) any later version; incorporated herein by reference.
*
* ----------------------------------------------------------------------- */
/*
* Simple helper function for initializing a register set.
*
* Note that this sets EFLAGS_CF in the input register set; this
* makes it easier to catch functions which do nothing but don't
* explicitly set CF.
*/
#include "boot.h"
void initregs(struct biosregs *reg)
{
memset(reg, 0, sizeof *reg);
reg->eflags |= X86_EFLAGS_CF;
reg->ds = ds();
reg->es = ds();
reg->fs = fs();
reg->gs = gs();
}
+64
View File
@@ -0,0 +1,64 @@
/*
* setup.ld
*
* Linker script for the i386 setup code
*/
OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
OUTPUT_ARCH(i386)
ENTRY(_start)
SECTIONS
{
. = 0;
.bstext : { *(.bstext) }
.bsdata : { *(.bsdata) }
. = 497;
.header : { *(.header) }
.entrytext : { *(.entrytext) }
.inittext : { *(.inittext) }
.initdata : { *(.initdata) }
__end_init = .;
.text : { *(.text) }
.text32 : { *(.text32) }
. = ALIGN(16);
.rodata : { *(.rodata*) }
.videocards : {
video_cards = .;
*(.videocards)
video_cards_end = .;
}
. = ALIGN(16);
.data : { *(.data*) }
.signature : {
setup_sig = .;
LONG(0x5a5aaa55)
}
. = ALIGN(16);
.bss :
{
__bss_start = .;
*(.bss)
__bss_end = .;
}
. = ALIGN(16);
_end = .;
/DISCARD/ : { *(.note*) }
/*
* The ASSERT() sink to . is intentional, for binutils 2.14 compatibility:
*/
. = ASSERT(_end <= 0x8000, "Setup too big!");
. = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!");
/* Necessary for the very-old-loader check to work... */
. = ASSERT(__end_init <= 5*512, "init sections too big!");
}
+148
View File
@@ -0,0 +1,148 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Very basic string functions
*/
#include "boot.h"
int strcmp(const char *str1, const char *str2)
{
const unsigned char *s1 = (const unsigned char *)str1;
const unsigned char *s2 = (const unsigned char *)str2;
int delta = 0;
while (*s1 || *s2) {
delta = *s2 - *s1;
if (delta)
return delta;
s1++;
s2++;
}
return 0;
}
int strncmp(const char *cs, const char *ct, size_t count)
{
unsigned char c1, c2;
while (count) {
c1 = *cs++;
c2 = *ct++;
if (c1 != c2)
return c1 < c2 ? -1 : 1;
if (!c1)
break;
count--;
}
return 0;
}
size_t strnlen(const char *s, size_t maxlen)
{
const char *es = s;
while (*es && maxlen) {
es++;
maxlen--;
}
return (es - s);
}
unsigned int atou(const char *s)
{
unsigned int i = 0;
while (isdigit(*s))
i = i * 10 + (*s++ - '0');
return i;
}
/* Works only for digits and letters, but small and fast */
#define TOLOWER(x) ((x) | 0x20)
static unsigned int simple_guess_base(const char *cp)
{
if (cp[0] == '0') {
if (TOLOWER(cp[1]) == 'x' && isxdigit(cp[2]))
return 16;
else
return 8;
} else {
return 10;
}
}
/**
* simple_strtoull - convert a string to an unsigned long long
* @cp: The start of the string
* @endp: A pointer to the end of the parsed string will be placed here
* @base: The number base to use
*/
unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base)
{
unsigned long long result = 0;
if (!base)
base = simple_guess_base(cp);
if (base == 16 && cp[0] == '0' && TOLOWER(cp[1]) == 'x')
cp += 2;
while (isxdigit(*cp)) {
unsigned int value;
value = isdigit(*cp) ? *cp - '0' : TOLOWER(*cp) - 'a' + 10;
if (value >= base)
break;
result = result * base + value;
cp++;
}
if (endp)
*endp = (char *)cp;
return result;
}
/**
* strlen - Find the length of a string
* @s: The string to be sized
*/
size_t strlen(const char *s)
{
const char *sc;
for (sc = s; *sc != '\0'; ++sc)
/* nothing */;
return sc - s;
}
/**
* strstr - Find the first substring in a %NUL terminated string
* @s1: The string to be searched
* @s2: The string to search for
*/
char *strstr(const char *s1, const char *s2)
{
size_t l1, l2;
l2 = strlen(s2);
if (!l2)
return (char *)s1;
l1 = strlen(s1);
while (l1 >= l2) {
l1--;
if (!memcmp(s1, s2, l2))
return (char *)s1;
s1++;
}
return NULL;
}
+266
View File
@@ -0,0 +1,266 @@
/*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright (C) 1997 Martin Mares
* Copyright (C) 2007 H. Peter Anvin
*/
/*
* This file builds a disk-image from two different files:
*
* - setup: 8086 machine code, sets up system parm
* - system: 80386 code for actual system
*
* It does some checking that all files are of the correct type, and
* just writes the result to stdout, removing headers and padding to
* the right amount. It also writes some system data to stderr.
*/
/*
* Changes by tytso to allow root device specification
* High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
* Cross compiling fixes by Gertjan van Wingerde, July 1996
* Rewritten by Martin Mares, April 1997
* Substantially overhauled by H. Peter Anvin, April 2007
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdarg.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <tools/le_byteshift.h>
typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned int u32;
#define DEFAULT_MAJOR_ROOT 0
#define DEFAULT_MINOR_ROOT 0
#define DEFAULT_ROOT_DEV (DEFAULT_MAJOR_ROOT << 8 | DEFAULT_MINOR_ROOT)
/* Minimal number of setup sectors */
#define SETUP_SECT_MIN 5
#define SETUP_SECT_MAX 64
/* This must be large enough to hold the entire setup */
u8 buf[SETUP_SECT_MAX*512];
int is_big_kernel;
/*----------------------------------------------------------------------*/
static const u32 crctab32[] = {
0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419,
0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4,
0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07,
0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856,
0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4,
0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3,
0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a,
0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599,
0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190,
0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f,
0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e,
0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed,
0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3,
0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a,
0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5,
0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010,
0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17,
0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6,
0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615,
0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344,
0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a,
0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1,
0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c,
0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef,
0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe,
0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31,
0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c,
0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b,
0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1,
0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278,
0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7,
0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66,
0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605,
0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8,
0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b,
0x2d02ef8d
};
static u32 partial_crc32_one(u8 c, u32 crc)
{
return crctab32[(crc ^ c) & 0xff] ^ (crc >> 8);
}
static u32 partial_crc32(const u8 *s, int len, u32 crc)
{
while (len--)
crc = partial_crc32_one(*s++, crc);
return crc;
}
static void die(const char * str, ...)
{
va_list args;
va_start(args, str);
vfprintf(stderr, str, args);
fputc('\n', stderr);
exit(1);
}
static void usage(void)
{
die("Usage: build setup system [> image]");
}
int main(int argc, char ** argv)
{
#ifdef CONFIG_EFI_STUB
unsigned int file_sz, pe_header;
#endif
unsigned int i, sz, setup_sectors;
int c;
u32 sys_size;
struct stat sb;
FILE *file;
int fd;
void *kernel;
u32 crc = 0xffffffffUL;
if (argc != 3)
usage();
/* Copy the setup code */
file = fopen(argv[1], "r");
if (!file)
die("Unable to open `%s': %m", argv[1]);
c = fread(buf, 1, sizeof(buf), file);
if (ferror(file))
die("read-error on `setup'");
if (c < 1024)
die("The setup must be at least 1024 bytes");
if (get_unaligned_le16(&buf[510]) != 0xAA55)
die("Boot block hasn't got boot flag (0xAA55)");
fclose(file);
/* Pad unused space with zeros */
setup_sectors = (c + 511) / 512;
if (setup_sectors < SETUP_SECT_MIN)
setup_sectors = SETUP_SECT_MIN;
i = setup_sectors*512;
memset(buf+c, 0, i-c);
/* Set the default root device */
put_unaligned_le16(DEFAULT_ROOT_DEV, &buf[508]);
fprintf(stderr, "Setup is %d bytes (padded to %d bytes).\n", c, i);
/* Open and stat the kernel file */
fd = open(argv[2], O_RDONLY);
if (fd < 0)
die("Unable to open `%s': %m", argv[2]);
if (fstat(fd, &sb))
die("Unable to stat `%s': %m", argv[2]);
sz = sb.st_size;
fprintf (stderr, "System is %d kB\n", (sz+1023)/1024);
kernel = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, 0);
if (kernel == MAP_FAILED)
die("Unable to mmap '%s': %m", argv[2]);
/* Number of 16-byte paragraphs, including space for a 4-byte CRC */
sys_size = (sz + 15 + 4) / 16;
/* Patch the setup code with the appropriate size parameters */
buf[0x1f1] = setup_sectors-1;
put_unaligned_le32(sys_size, &buf[0x1f4]);
#ifdef CONFIG_EFI_STUB
file_sz = sz + i + ((sys_size * 16) - sz);
pe_header = get_unaligned_le32(&buf[0x3c]);
/* Size of code */
put_unaligned_le32(file_sz, &buf[pe_header + 0x1c]);
/* Size of image */
put_unaligned_le32(file_sz, &buf[pe_header + 0x50]);
#ifdef CONFIG_X86_32
/*
* Address of entry point.
*
* The EFI stub entry point is +16 bytes from the start of
* the .text section.
*/
put_unaligned_le32(i + 16, &buf[pe_header + 0x28]);
/* .text size */
put_unaligned_le32(file_sz, &buf[pe_header + 0xb0]);
/* .text size of initialised data */
put_unaligned_le32(file_sz, &buf[pe_header + 0xb8]);
#else
/*
* Address of entry point. startup_32 is at the beginning and
* the 64-bit entry point (startup_64) is always 512 bytes
* after. The EFI stub entry point is 16 bytes after that, as
* the first instruction allows legacy loaders to jump over
* the EFI stub initialisation
*/
put_unaligned_le32(i + 528, &buf[pe_header + 0x28]);
/* .text size */
put_unaligned_le32(file_sz, &buf[pe_header + 0xc0]);
/* .text size of initialised data */
put_unaligned_le32(file_sz, &buf[pe_header + 0xc8]);
#endif /* CONFIG_X86_32 */
#endif /* CONFIG_EFI_STUB */
crc = partial_crc32(buf, i, crc);
if (fwrite(buf, 1, i, stdout) != i)
die("Writing setup failed");
/* Copy the kernel code */
crc = partial_crc32(kernel, sz, crc);
if (fwrite(kernel, 1, sz, stdout) != sz)
die("Writing kernel failed");
/* Add padding leaving 4 bytes for the checksum */
while (sz++ < (sys_size*16) - 4) {
crc = partial_crc32_one('\0', crc);
if (fwrite("\0", 1, 1, stdout) != 1)
die("Writing padding failed");
}
/* Write the CRC */
fprintf(stderr, "CRC %x\n", crc);
put_unaligned_le32(crc, buf);
if (fwrite(buf, 1, 4, stdout) != 4)
die("Writing CRC failed");
close(fd);
/* Everything is OK */
return 0;
}
+139
View File
@@ -0,0 +1,139 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Very simple screen and serial I/O
*/
#include "boot.h"
int early_serial_base;
#define XMTRDY 0x20
#define TXR 0 /* Transmit register (WRITE) */
#define LSR 5 /* Line Status */
/*
* These functions are in .inittext so they can be used to signal
* error during initialization.
*/
static void __attribute__((section(".inittext"))) serial_putchar(int ch)
{
unsigned timeout = 0xffff;
while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout)
cpu_relax();
outb(ch, early_serial_base + TXR);
}
static void __attribute__((section(".inittext"))) bios_putchar(int ch)
{
struct biosregs ireg;
initregs(&ireg);
ireg.bx = 0x0007;
ireg.cx = 0x0001;
ireg.ah = 0x0e;
ireg.al = ch;
intcall(0x10, &ireg, NULL);
}
void __attribute__((section(".inittext"))) putchar(int ch)
{
if (ch == '\n')
putchar('\r'); /* \n -> \r\n */
bios_putchar(ch);
if (early_serial_base != 0)
serial_putchar(ch);
}
void __attribute__((section(".inittext"))) puts(const char *str)
{
while (*str)
putchar(*str++);
}
/*
* Read the CMOS clock through the BIOS, and return the
* seconds in BCD.
*/
static u8 gettime(void)
{
struct biosregs ireg, oreg;
initregs(&ireg);
ireg.ah = 0x02;
intcall(0x1a, &ireg, &oreg);
return oreg.dh;
}
/*
* Read from the keyboard
*/
int getchar(void)
{
struct biosregs ireg, oreg;
initregs(&ireg);
/* ireg.ah = 0x00; */
intcall(0x16, &ireg, &oreg);
return oreg.al;
}
static int kbd_pending(void)
{
struct biosregs ireg, oreg;
initregs(&ireg);
ireg.ah = 0x01;
intcall(0x16, &ireg, &oreg);
return !(oreg.eflags & X86_EFLAGS_ZF);
}
void kbd_flush(void)
{
for (;;) {
if (!kbd_pending())
break;
getchar();
}
}
int getchar_timeout(void)
{
int cnt = 30;
int t0, t1;
t0 = gettime();
while (cnt) {
if (kbd_pending())
return getchar();
t1 = gettime();
if (t0 != t1) {
cnt--;
t0 = t1;
}
}
return 0; /* Timeout! */
}
+21
View File
@@ -0,0 +1,21 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Kernel version string
*/
#include "boot.h"
#include <generated/utsrelease.h>
#include <generated/compile.h>
const char kernel_version[] =
UTS_RELEASE " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") "
UTS_VERSION;
+72
View File
@@ -0,0 +1,72 @@
/* ----------------------------------------------------------------------- *
*
* Copyright 1999-2007 H. Peter Anvin - All Rights Reserved
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, Inc., 53 Temple Place Ste 330,
* Boston MA 02111-1307, USA; either version 2 of the License, or
* (at your option) any later version; incorporated herein by reference.
*
* ----------------------------------------------------------------------- */
#ifndef BOOT_VESA_H
#define BOOT_VESA_H
typedef struct {
u16 off, seg;
} far_ptr;
/* VESA General Information table */
struct vesa_general_info {
u32 signature; /* 0 Magic number = "VESA" */
u16 version; /* 4 */
far_ptr vendor_string; /* 6 */
u32 capabilities; /* 10 */
far_ptr video_mode_ptr; /* 14 */
u16 total_memory; /* 18 */
u8 reserved[236]; /* 20 */
} __attribute__ ((packed));
#define VESA_MAGIC ('V' + ('E' << 8) + ('S' << 16) + ('A' << 24))
struct vesa_mode_info {
u16 mode_attr; /* 0 */
u8 win_attr[2]; /* 2 */
u16 win_grain; /* 4 */
u16 win_size; /* 6 */
u16 win_seg[2]; /* 8 */
far_ptr win_scheme; /* 12 */
u16 logical_scan; /* 16 */
u16 h_res; /* 18 */
u16 v_res; /* 20 */
u8 char_width; /* 22 */
u8 char_height; /* 23 */
u8 memory_planes; /* 24 */
u8 bpp; /* 25 */
u8 banks; /* 26 */
u8 memory_layout; /* 27 */
u8 bank_size; /* 28 */
u8 image_planes; /* 29 */
u8 page_function; /* 30 */
u8 rmask; /* 31 */
u8 rpos; /* 32 */
u8 gmask; /* 33 */
u8 gpos; /* 34 */
u8 bmask; /* 35 */
u8 bpos; /* 36 */
u8 resv_mask; /* 37 */
u8 resv_pos; /* 38 */
u8 dcm_info; /* 39 */
u32 lfb_ptr; /* 40 Linear frame buffer address */
u32 offscreen_ptr; /* 44 Offscreen memory address */
u16 offscreen_size; /* 48 */
u8 reserved[206]; /* 50 */
} __attribute__ ((packed));
#endif /* LIB_SYS_VESA_H */
+128
View File
@@ -0,0 +1,128 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Standard video BIOS modes
*
* We have two options for this; silent and scanned.
*/
#include "boot.h"
#include "video.h"
static __videocard video_bios;
/* Set a conventional BIOS mode */
static int set_bios_mode(u8 mode);
static int bios_set_mode(struct mode_info *mi)
{
return set_bios_mode(mi->mode - VIDEO_FIRST_BIOS);
}
static int set_bios_mode(u8 mode)
{
struct biosregs ireg, oreg;
u8 new_mode;
initregs(&ireg);
ireg.al = mode; /* AH=0x00 Set Video Mode */
intcall(0x10, &ireg, NULL);
ireg.ah = 0x0f; /* Get Current Video Mode */
intcall(0x10, &ireg, &oreg);
do_restore = 1; /* Assume video contents were lost */
/* Not all BIOSes are clean with the top bit */
new_mode = oreg.al & 0x7f;
if (new_mode == mode)
return 0; /* Mode change OK */
#ifndef _WAKEUP
if (new_mode != boot_params.screen_info.orig_video_mode) {
/* Mode setting failed, but we didn't end up where we
started. That's bad. Try to revert to the original
video mode. */
ireg.ax = boot_params.screen_info.orig_video_mode;
intcall(0x10, &ireg, NULL);
}
#endif
return -1;
}
static int bios_probe(void)
{
u8 mode;
#ifdef _WAKEUP
u8 saved_mode = 0x03;
#else
u8 saved_mode = boot_params.screen_info.orig_video_mode;
#endif
u16 crtc;
struct mode_info *mi;
int nmodes = 0;
if (adapter != ADAPTER_EGA && adapter != ADAPTER_VGA)
return 0;
set_fs(0);
crtc = vga_crtc();
video_bios.modes = GET_HEAP(struct mode_info, 0);
for (mode = 0x14; mode <= 0x7f; mode++) {
if (!heap_free(sizeof(struct mode_info)))
break;
if (mode_defined(VIDEO_FIRST_BIOS+mode))
continue;
if (set_bios_mode(mode))
continue;
/* Try to verify that it's a text mode. */
/* Attribute Controller: make graphics controller disabled */
if (in_idx(0x3c0, 0x10) & 0x01)
continue;
/* Graphics Controller: verify Alpha addressing enabled */
if (in_idx(0x3ce, 0x06) & 0x01)
continue;
/* CRTC cursor location low should be zero(?) */
if (in_idx(crtc, 0x0f))
continue;
mi = GET_HEAP(struct mode_info, 1);
mi->mode = VIDEO_FIRST_BIOS+mode;
mi->depth = 0; /* text */
mi->x = rdfs16(0x44a);
mi->y = rdfs8(0x484)+1;
nmodes++;
}
set_bios_mode(saved_mode);
return nmodes;
}
static __videocard video_bios =
{
.card_name = "BIOS",
.probe = bios_probe,
.set_mode = bios_set_mode,
.unsafe = 1,
.xmode_first = VIDEO_FIRST_BIOS,
.xmode_n = 0x80,
};
+173
View File
@@ -0,0 +1,173 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007-2008 rPath, Inc. - All Rights Reserved
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* arch/i386/boot/video-mode.c
*
* Set the video mode. This is separated out into a different
* file in order to be shared with the ACPI wakeup code.
*/
#include "boot.h"
#include "video.h"
#include "vesa.h"
/*
* Common variables
*/
int adapter; /* 0=CGA/MDA/HGC, 1=EGA, 2=VGA+ */
u16 video_segment;
int force_x, force_y; /* Don't query the BIOS for cols/rows */
int do_restore; /* Screen contents changed during mode flip */
int graphic_mode; /* Graphic mode with linear frame buffer */
/* Probe the video drivers and have them generate their mode lists. */
void probe_cards(int unsafe)
{
struct card_info *card;
static u8 probed[2];
if (probed[unsafe])
return;
probed[unsafe] = 1;
for (card = video_cards; card < video_cards_end; card++) {
if (card->unsafe == unsafe) {
if (card->probe)
card->nmodes = card->probe();
else
card->nmodes = 0;
}
}
}
/* Test if a mode is defined */
int mode_defined(u16 mode)
{
struct card_info *card;
struct mode_info *mi;
int i;
for (card = video_cards; card < video_cards_end; card++) {
mi = card->modes;
for (i = 0; i < card->nmodes; i++, mi++) {
if (mi->mode == mode)
return 1;
}
}
return 0;
}
/* Set mode (without recalc) */
static int raw_set_mode(u16 mode, u16 *real_mode)
{
int nmode, i;
struct card_info *card;
struct mode_info *mi;
/* Drop the recalc bit if set */
mode &= ~VIDEO_RECALC;
/* Scan for mode based on fixed ID, position, or resolution */
nmode = 0;
for (card = video_cards; card < video_cards_end; card++) {
mi = card->modes;
for (i = 0; i < card->nmodes; i++, mi++) {
int visible = mi->x || mi->y;
if ((mode == nmode && visible) ||
mode == mi->mode ||
mode == (mi->y << 8)+mi->x) {
*real_mode = mi->mode;
return card->set_mode(mi);
}
if (visible)
nmode++;
}
}
/* Nothing found? Is it an "exceptional" (unprobed) mode? */
for (card = video_cards; card < video_cards_end; card++) {
if (mode >= card->xmode_first &&
mode < card->xmode_first+card->xmode_n) {
struct mode_info mix;
*real_mode = mix.mode = mode;
mix.x = mix.y = 0;
return card->set_mode(&mix);
}
}
/* Otherwise, failure... */
return -1;
}
/*
* Recalculate the vertical video cutoff (hack!)
*/
static void vga_recalc_vertical(void)
{
unsigned int font_size, rows;
u16 crtc;
u8 pt, ov;
set_fs(0);
font_size = rdfs8(0x485); /* BIOS: font size (pixels) */
rows = force_y ? force_y : rdfs8(0x484)+1; /* Text rows */
rows *= font_size; /* Visible scan lines */
rows--; /* ... minus one */
crtc = vga_crtc();
pt = in_idx(crtc, 0x11);
pt &= ~0x80; /* Unlock CR0-7 */
out_idx(pt, crtc, 0x11);
out_idx((u8)rows, crtc, 0x12); /* Lower height register */
ov = in_idx(crtc, 0x07); /* Overflow register */
ov &= 0xbd;
ov |= (rows >> (8-1)) & 0x02;
ov |= (rows >> (9-6)) & 0x40;
out_idx(ov, crtc, 0x07);
}
/* Set mode (with recalc if specified) */
int set_mode(u16 mode)
{
int rv;
u16 real_mode;
/* Very special mode numbers... */
if (mode == VIDEO_CURRENT_MODE)
return 0; /* Nothing to do... */
else if (mode == NORMAL_VGA)
mode = VIDEO_80x25;
else if (mode == EXTENDED_VGA)
mode = VIDEO_8POINT;
rv = raw_set_mode(mode, &real_mode);
if (rv)
return rv;
if (mode & VIDEO_RECALC)
vga_recalc_vertical();
/* Save the canonical mode number for the kernel, not
an alias, size specification or menu position */
#ifndef _WAKEUP
boot_params.hdr.vid_mode = real_mode;
#endif
return 0;
}
+280
View File
@@ -0,0 +1,280 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* VESA text modes
*/
#include "boot.h"
#include "video.h"
#include "vesa.h"
/* VESA information */
static struct vesa_general_info vginfo;
static struct vesa_mode_info vminfo;
static __videocard video_vesa;
#ifndef _WAKEUP
static void vesa_store_mode_params_graphics(void);
#else /* _WAKEUP */
static inline void vesa_store_mode_params_graphics(void) {}
#endif /* _WAKEUP */
static int vesa_probe(void)
{
struct biosregs ireg, oreg;
u16 mode;
addr_t mode_ptr;
struct mode_info *mi;
int nmodes = 0;
video_vesa.modes = GET_HEAP(struct mode_info, 0);
initregs(&ireg);
ireg.ax = 0x4f00;
ireg.di = (size_t)&vginfo;
intcall(0x10, &ireg, &oreg);
if (oreg.ax != 0x004f ||
vginfo.signature != VESA_MAGIC ||
vginfo.version < 0x0102)
return 0; /* Not present */
set_fs(vginfo.video_mode_ptr.seg);
mode_ptr = vginfo.video_mode_ptr.off;
while ((mode = rdfs16(mode_ptr)) != 0xffff) {
mode_ptr += 2;
if (!heap_free(sizeof(struct mode_info)))
break; /* Heap full, can't save mode info */
if (mode & ~0x1ff)
continue;
memset(&vminfo, 0, sizeof vminfo); /* Just in case... */
ireg.ax = 0x4f01;
ireg.cx = mode;
ireg.di = (size_t)&vminfo;
intcall(0x10, &ireg, &oreg);
if (oreg.ax != 0x004f)
continue;
if ((vminfo.mode_attr & 0x15) == 0x05) {
/* Text Mode, TTY BIOS supported,
supported by hardware */
mi = GET_HEAP(struct mode_info, 1);
mi->mode = mode + VIDEO_FIRST_VESA;
mi->depth = 0; /* text */
mi->x = vminfo.h_res;
mi->y = vminfo.v_res;
nmodes++;
} else if ((vminfo.mode_attr & 0x99) == 0x99 &&
(vminfo.memory_layout == 4 ||
vminfo.memory_layout == 6) &&
vminfo.memory_planes == 1) {
#ifdef CONFIG_FB_BOOT_VESA_SUPPORT
/* Graphics mode, color, linear frame buffer
supported. Only register the mode if
if framebuffer is configured, however,
otherwise the user will be left without a screen. */
mi = GET_HEAP(struct mode_info, 1);
mi->mode = mode + VIDEO_FIRST_VESA;
mi->depth = vminfo.bpp;
mi->x = vminfo.h_res;
mi->y = vminfo.v_res;
nmodes++;
#endif
}
}
return nmodes;
}
static int vesa_set_mode(struct mode_info *mode)
{
struct biosregs ireg, oreg;
int is_graphic;
u16 vesa_mode = mode->mode - VIDEO_FIRST_VESA;
memset(&vminfo, 0, sizeof vminfo); /* Just in case... */
initregs(&ireg);
ireg.ax = 0x4f01;
ireg.cx = vesa_mode;
ireg.di = (size_t)&vminfo;
intcall(0x10, &ireg, &oreg);
if (oreg.ax != 0x004f)
return -1;
if ((vminfo.mode_attr & 0x15) == 0x05) {
/* It's a supported text mode */
is_graphic = 0;
#ifdef CONFIG_FB_BOOT_VESA_SUPPORT
} else if ((vminfo.mode_attr & 0x99) == 0x99) {
/* It's a graphics mode with linear frame buffer */
is_graphic = 1;
vesa_mode |= 0x4000; /* Request linear frame buffer */
#endif
} else {
return -1; /* Invalid mode */
}
initregs(&ireg);
ireg.ax = 0x4f02;
ireg.bx = vesa_mode;
intcall(0x10, &ireg, &oreg);
if (oreg.ax != 0x004f)
return -1;
graphic_mode = is_graphic;
if (!is_graphic) {
/* Text mode */
force_x = mode->x;
force_y = mode->y;
do_restore = 1;
} else {
/* Graphics mode */
vesa_store_mode_params_graphics();
}
return 0;
}
#ifndef _WAKEUP
/* Switch DAC to 8-bit mode */
static void vesa_dac_set_8bits(void)
{
struct biosregs ireg, oreg;
u8 dac_size = 6;
/* If possible, switch the DAC to 8-bit mode */
if (vginfo.capabilities & 1) {
initregs(&ireg);
ireg.ax = 0x4f08;
ireg.bh = 0x08;
intcall(0x10, &ireg, &oreg);
if (oreg.ax == 0x004f)
dac_size = oreg.bh;
}
/* Set the color sizes to the DAC size, and offsets to 0 */
boot_params.screen_info.red_size = dac_size;
boot_params.screen_info.green_size = dac_size;
boot_params.screen_info.blue_size = dac_size;
boot_params.screen_info.rsvd_size = dac_size;
boot_params.screen_info.red_pos = 0;
boot_params.screen_info.green_pos = 0;
boot_params.screen_info.blue_pos = 0;
boot_params.screen_info.rsvd_pos = 0;
}
/* Save the VESA protected mode info */
static void vesa_store_pm_info(void)
{
struct biosregs ireg, oreg;
initregs(&ireg);
ireg.ax = 0x4f0a;
intcall(0x10, &ireg, &oreg);
if (oreg.ax != 0x004f)
return;
boot_params.screen_info.vesapm_seg = oreg.es;
boot_params.screen_info.vesapm_off = oreg.di;
}
/*
* Save video mode parameters for graphics mode
*/
static void vesa_store_mode_params_graphics(void)
{
/* Tell the kernel we're in VESA graphics mode */
boot_params.screen_info.orig_video_isVGA = VIDEO_TYPE_VLFB;
/* Mode parameters */
boot_params.screen_info.vesa_attributes = vminfo.mode_attr;
boot_params.screen_info.lfb_linelength = vminfo.logical_scan;
boot_params.screen_info.lfb_width = vminfo.h_res;
boot_params.screen_info.lfb_height = vminfo.v_res;
boot_params.screen_info.lfb_depth = vminfo.bpp;
boot_params.screen_info.pages = vminfo.image_planes;
boot_params.screen_info.lfb_base = vminfo.lfb_ptr;
memcpy(&boot_params.screen_info.red_size,
&vminfo.rmask, 8);
/* General parameters */
boot_params.screen_info.lfb_size = vginfo.total_memory;
if (vminfo.bpp <= 8)
vesa_dac_set_8bits();
vesa_store_pm_info();
}
/*
* Save EDID information for the kernel; this is invoked, separately,
* after mode-setting.
*/
void vesa_store_edid(void)
{
#ifdef CONFIG_FIRMWARE_EDID
struct biosregs ireg, oreg;
/* Apparently used as a nonsense token... */
memset(&boot_params.edid_info, 0x13, sizeof boot_params.edid_info);
if (vginfo.version < 0x0200)
return; /* EDID requires VBE 2.0+ */
initregs(&ireg);
ireg.ax = 0x4f15; /* VBE DDC */
/* ireg.bx = 0x0000; */ /* Report DDC capabilities */
/* ireg.cx = 0; */ /* Controller 0 */
ireg.es = 0; /* ES:DI must be 0 by spec */
intcall(0x10, &ireg, &oreg);
if (oreg.ax != 0x004f)
return; /* No EDID */
/* BH = time in seconds to transfer EDD information */
/* BL = DDC level supported */
ireg.ax = 0x4f15; /* VBE DDC */
ireg.bx = 0x0001; /* Read EDID */
/* ireg.cx = 0; */ /* Controller 0 */
/* ireg.dx = 0; */ /* EDID block number */
ireg.es = ds();
ireg.di =(size_t)&boot_params.edid_info; /* (ES:)Pointer to block */
intcall(0x10, &ireg, &oreg);
#endif /* CONFIG_FIRMWARE_EDID */
}
#endif /* not _WAKEUP */
static __videocard video_vesa =
{
.card_name = "VESA",
.probe = vesa_probe,
.set_mode = vesa_set_mode,
.xmode_first = VIDEO_FIRST_VESA,
.xmode_n = 0x200,
};
+288
View File
@@ -0,0 +1,288 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Common all-VGA modes
*/
#include "boot.h"
#include "video.h"
static struct mode_info vga_modes[] = {
{ VIDEO_80x25, 80, 25, 0 },
{ VIDEO_8POINT, 80, 50, 0 },
{ VIDEO_80x43, 80, 43, 0 },
{ VIDEO_80x28, 80, 28, 0 },
{ VIDEO_80x30, 80, 30, 0 },
{ VIDEO_80x34, 80, 34, 0 },
{ VIDEO_80x60, 80, 60, 0 },
};
static struct mode_info ega_modes[] = {
{ VIDEO_80x25, 80, 25, 0 },
{ VIDEO_8POINT, 80, 43, 0 },
};
static struct mode_info cga_modes[] = {
{ VIDEO_80x25, 80, 25, 0 },
};
static __videocard video_vga;
/* Set basic 80x25 mode */
static u8 vga_set_basic_mode(void)
{
struct biosregs ireg, oreg;
u8 mode;
initregs(&ireg);
/* Query current mode */
ireg.ax = 0x0f00;
intcall(0x10, &ireg, &oreg);
mode = oreg.al;
if (mode != 3 && mode != 7)
mode = 3;
/* Set the mode */
ireg.ax = mode; /* AH=0: set mode */
intcall(0x10, &ireg, NULL);
do_restore = 1;
return mode;
}
static void vga_set_8font(void)
{
/* Set 8x8 font - 80x43 on EGA, 80x50 on VGA */
struct biosregs ireg;
initregs(&ireg);
/* Set 8x8 font */
ireg.ax = 0x1112;
/* ireg.bl = 0; */
intcall(0x10, &ireg, NULL);
/* Use alternate print screen */
ireg.ax = 0x1200;
ireg.bl = 0x20;
intcall(0x10, &ireg, NULL);
/* Turn off cursor emulation */
ireg.ax = 0x1201;
ireg.bl = 0x34;
intcall(0x10, &ireg, NULL);
/* Cursor is scan lines 6-7 */
ireg.ax = 0x0100;
ireg.cx = 0x0607;
intcall(0x10, &ireg, NULL);
}
static void vga_set_14font(void)
{
/* Set 9x14 font - 80x28 on VGA */
struct biosregs ireg;
initregs(&ireg);
/* Set 9x14 font */
ireg.ax = 0x1111;
/* ireg.bl = 0; */
intcall(0x10, &ireg, NULL);
/* Turn off cursor emulation */
ireg.ax = 0x1201;
ireg.bl = 0x34;
intcall(0x10, &ireg, NULL);
/* Cursor is scan lines 11-12 */
ireg.ax = 0x0100;
ireg.cx = 0x0b0c;
intcall(0x10, &ireg, NULL);
}
static void vga_set_80x43(void)
{
/* Set 80x43 mode on VGA (not EGA) */
struct biosregs ireg;
initregs(&ireg);
/* Set 350 scans */
ireg.ax = 0x1201;
ireg.bl = 0x30;
intcall(0x10, &ireg, NULL);
/* Reset video mode */
ireg.ax = 0x0003;
intcall(0x10, &ireg, NULL);
vga_set_8font();
}
/* I/O address of the VGA CRTC */
u16 vga_crtc(void)
{
return (inb(0x3cc) & 1) ? 0x3d4 : 0x3b4;
}
static void vga_set_480_scanlines(void)
{
u16 crtc; /* CRTC base address */
u8 csel; /* CRTC miscellaneous output register */
crtc = vga_crtc();
out_idx(0x0c, crtc, 0x11); /* Vertical sync end, unlock CR0-7 */
out_idx(0x0b, crtc, 0x06); /* Vertical total */
out_idx(0x3e, crtc, 0x07); /* Vertical overflow */
out_idx(0xea, crtc, 0x10); /* Vertical sync start */
out_idx(0xdf, crtc, 0x12); /* Vertical display end */
out_idx(0xe7, crtc, 0x15); /* Vertical blank start */
out_idx(0x04, crtc, 0x16); /* Vertical blank end */
csel = inb(0x3cc);
csel &= 0x0d;
csel |= 0xe2;
outb(csel, 0x3c2);
}
static void vga_set_vertical_end(int lines)
{
u16 crtc; /* CRTC base address */
u8 ovfw; /* CRTC overflow register */
int end = lines-1;
crtc = vga_crtc();
ovfw = 0x3c | ((end >> (8-1)) & 0x02) | ((end >> (9-6)) & 0x40);
out_idx(ovfw, crtc, 0x07); /* Vertical overflow */
out_idx(end, crtc, 0x12); /* Vertical display end */
}
static void vga_set_80x30(void)
{
vga_set_480_scanlines();
vga_set_vertical_end(30*16);
}
static void vga_set_80x34(void)
{
vga_set_480_scanlines();
vga_set_14font();
vga_set_vertical_end(34*14);
}
static void vga_set_80x60(void)
{
vga_set_480_scanlines();
vga_set_8font();
vga_set_vertical_end(60*8);
}
static int vga_set_mode(struct mode_info *mode)
{
/* Set the basic mode */
vga_set_basic_mode();
/* Override a possibly broken BIOS */
force_x = mode->x;
force_y = mode->y;
switch (mode->mode) {
case VIDEO_80x25:
break;
case VIDEO_8POINT:
vga_set_8font();
break;
case VIDEO_80x43:
vga_set_80x43();
break;
case VIDEO_80x28:
vga_set_14font();
break;
case VIDEO_80x30:
vga_set_80x30();
break;
case VIDEO_80x34:
vga_set_80x34();
break;
case VIDEO_80x60:
vga_set_80x60();
break;
}
return 0;
}
/*
* Note: this probe includes basic information required by all
* systems. It should be executed first, by making sure
* video-vga.c is listed first in the Makefile.
*/
static int vga_probe(void)
{
static const char *card_name[] = {
"CGA/MDA/HGC", "EGA", "VGA"
};
static struct mode_info *mode_lists[] = {
cga_modes,
ega_modes,
vga_modes,
};
static int mode_count[] = {
sizeof(cga_modes)/sizeof(struct mode_info),
sizeof(ega_modes)/sizeof(struct mode_info),
sizeof(vga_modes)/sizeof(struct mode_info),
};
struct biosregs ireg, oreg;
initregs(&ireg);
ireg.ax = 0x1200;
ireg.bl = 0x10; /* Check EGA/VGA */
intcall(0x10, &ireg, &oreg);
#ifndef _WAKEUP
boot_params.screen_info.orig_video_ega_bx = oreg.bx;
#endif
/* If we have MDA/CGA/HGC then BL will be unchanged at 0x10 */
if (oreg.bl != 0x10) {
/* EGA/VGA */
ireg.ax = 0x1a00;
intcall(0x10, &ireg, &oreg);
if (oreg.al == 0x1a) {
adapter = ADAPTER_VGA;
#ifndef _WAKEUP
boot_params.screen_info.orig_video_isVGA = 1;
#endif
} else {
adapter = ADAPTER_EGA;
}
} else {
adapter = ADAPTER_CGA;
}
video_vga.modes = mode_lists[adapter];
video_vga.card_name = card_name[adapter];
return mode_count[adapter];
}
static __videocard video_vga = {
.card_name = "VGA",
.probe = vga_probe,
.set_mode = vga_set_mode,
};
+341
View File
@@ -0,0 +1,341 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Select video mode
*/
#include "boot.h"
#include "video.h"
#include "vesa.h"
static void store_cursor_position(void)
{
struct biosregs ireg, oreg;
initregs(&ireg);
ireg.ah = 0x03;
intcall(0x10, &ireg, &oreg);
boot_params.screen_info.orig_x = oreg.dl;
boot_params.screen_info.orig_y = oreg.dh;
if (oreg.ch & 0x20)
boot_params.screen_info.flags |= VIDEO_FLAGS_NOCURSOR;
if ((oreg.ch & 0x1f) > (oreg.cl & 0x1f))
boot_params.screen_info.flags |= VIDEO_FLAGS_NOCURSOR;
}
static void store_video_mode(void)
{
struct biosregs ireg, oreg;
/* N.B.: the saving of the video page here is a bit silly,
since we pretty much assume page 0 everywhere. */
initregs(&ireg);
ireg.ah = 0x0f;
intcall(0x10, &ireg, &oreg);
/* Not all BIOSes are clean with respect to the top bit */
boot_params.screen_info.orig_video_mode = oreg.al & 0x7f;
boot_params.screen_info.orig_video_page = oreg.bh;
}
/*
* Store the video mode parameters for later usage by the kernel.
* This is done by asking the BIOS except for the rows/columns
* parameters in the default 80x25 mode -- these are set directly,
* because some very obscure BIOSes supply insane values.
*/
static void store_mode_params(void)
{
u16 font_size;
int x, y;
/* For graphics mode, it is up to the mode-setting driver
(currently only video-vesa.c) to store the parameters */
if (graphic_mode)
return;
store_cursor_position();
store_video_mode();
if (boot_params.screen_info.orig_video_mode == 0x07) {
/* MDA, HGC, or VGA in monochrome mode */
video_segment = 0xb000;
} else {
/* CGA, EGA, VGA and so forth */
video_segment = 0xb800;
}
set_fs(0);
font_size = rdfs16(0x485); /* Font size, BIOS area */
boot_params.screen_info.orig_video_points = font_size;
x = rdfs16(0x44a);
y = (adapter == ADAPTER_CGA) ? 25 : rdfs8(0x484)+1;
if (force_x)
x = force_x;
if (force_y)
y = force_y;
boot_params.screen_info.orig_video_cols = x;
boot_params.screen_info.orig_video_lines = y;
}
static unsigned int get_entry(void)
{
char entry_buf[4];
int i, len = 0;
int key;
unsigned int v;
do {
key = getchar();
if (key == '\b') {
if (len > 0) {
puts("\b \b");
len--;
}
} else if ((key >= '0' && key <= '9') ||
(key >= 'A' && key <= 'Z') ||
(key >= 'a' && key <= 'z')) {
if (len < sizeof entry_buf) {
entry_buf[len++] = key;
putchar(key);
}
}
} while (key != '\r');
putchar('\n');
if (len == 0)
return VIDEO_CURRENT_MODE; /* Default */
v = 0;
for (i = 0; i < len; i++) {
v <<= 4;
key = entry_buf[i] | 0x20;
v += (key > '9') ? key-'a'+10 : key-'0';
}
return v;
}
static void display_menu(void)
{
struct card_info *card;
struct mode_info *mi;
char ch;
int i;
int nmodes;
int modes_per_line;
int col;
nmodes = 0;
for (card = video_cards; card < video_cards_end; card++)
nmodes += card->nmodes;
modes_per_line = 1;
if (nmodes >= 20)
modes_per_line = 3;
for (col = 0; col < modes_per_line; col++)
puts("Mode: Resolution: Type: ");
putchar('\n');
col = 0;
ch = '0';
for (card = video_cards; card < video_cards_end; card++) {
mi = card->modes;
for (i = 0; i < card->nmodes; i++, mi++) {
char resbuf[32];
int visible = mi->x && mi->y;
u16 mode_id = mi->mode ? mi->mode :
(mi->y << 8)+mi->x;
if (!visible)
continue; /* Hidden mode */
if (mi->depth)
sprintf(resbuf, "%dx%d", mi->y, mi->depth);
else
sprintf(resbuf, "%d", mi->y);
printf("%c %03X %4dx%-7s %-6s",
ch, mode_id, mi->x, resbuf, card->card_name);
col++;
if (col >= modes_per_line) {
putchar('\n');
col = 0;
}
if (ch == '9')
ch = 'a';
else if (ch == 'z' || ch == ' ')
ch = ' '; /* Out of keys... */
else
ch++;
}
}
if (col)
putchar('\n');
}
#define H(x) ((x)-'a'+10)
#define SCAN ((H('s')<<12)+(H('c')<<8)+(H('a')<<4)+H('n'))
static unsigned int mode_menu(void)
{
int key;
unsigned int sel;
puts("Press <ENTER> to see video modes available, "
"<SPACE> to continue, or wait 30 sec\n");
kbd_flush();
while (1) {
key = getchar_timeout();
if (key == ' ' || key == 0)
return VIDEO_CURRENT_MODE; /* Default */
if (key == '\r')
break;
putchar('\a'); /* Beep! */
}
for (;;) {
display_menu();
puts("Enter a video mode or \"scan\" to scan for "
"additional modes: ");
sel = get_entry();
if (sel != SCAN)
return sel;
probe_cards(1);
}
}
/* Save screen content to the heap */
static struct saved_screen {
int x, y;
int curx, cury;
u16 *data;
} saved;
static void save_screen(void)
{
/* Should be called after store_mode_params() */
saved.x = boot_params.screen_info.orig_video_cols;
saved.y = boot_params.screen_info.orig_video_lines;
saved.curx = boot_params.screen_info.orig_x;
saved.cury = boot_params.screen_info.orig_y;
if (!heap_free(saved.x*saved.y*sizeof(u16)+512))
return; /* Not enough heap to save the screen */
saved.data = GET_HEAP(u16, saved.x*saved.y);
set_fs(video_segment);
copy_from_fs(saved.data, 0, saved.x*saved.y*sizeof(u16));
}
static void restore_screen(void)
{
/* Should be called after store_mode_params() */
int xs = boot_params.screen_info.orig_video_cols;
int ys = boot_params.screen_info.orig_video_lines;
int y;
addr_t dst = 0;
u16 *src = saved.data;
struct biosregs ireg;
if (graphic_mode)
return; /* Can't restore onto a graphic mode */
if (!src)
return; /* No saved screen contents */
/* Restore screen contents */
set_fs(video_segment);
for (y = 0; y < ys; y++) {
int npad;
if (y < saved.y) {
int copy = (xs < saved.x) ? xs : saved.x;
copy_to_fs(dst, src, copy*sizeof(u16));
dst += copy*sizeof(u16);
src += saved.x;
npad = (xs < saved.x) ? 0 : xs-saved.x;
} else {
npad = xs;
}
/* Writes "npad" blank characters to
video_segment:dst and advances dst */
asm volatile("pushw %%es ; "
"movw %2,%%es ; "
"shrw %%cx ; "
"jnc 1f ; "
"stosw \n\t"
"1: rep;stosl ; "
"popw %%es"
: "+D" (dst), "+c" (npad)
: "bdS" (video_segment),
"a" (0x07200720));
}
/* Restore cursor position */
if (saved.curx >= xs)
saved.curx = xs-1;
if (saved.cury >= ys)
saved.cury = ys-1;
initregs(&ireg);
ireg.ah = 0x02; /* Set cursor position */
ireg.dh = saved.cury;
ireg.dl = saved.curx;
intcall(0x10, &ireg, NULL);
store_cursor_position();
}
void set_video(void)
{
u16 mode = boot_params.hdr.vid_mode;
RESET_HEAP();
store_mode_params();
save_screen();
probe_cards(0);
for (;;) {
if (mode == ASK_VGA)
mode = mode_menu();
if (!set_mode(mode))
break;
printf("Undefined video mode number: %x\n", mode);
mode = ASK_VGA;
}
boot_params.hdr.vid_mode = mode;
vesa_store_edid();
store_mode_params();
if (do_restore)
restore_screen();
}
+121
View File
@@ -0,0 +1,121 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Header file for the real-mode video probing code
*/
#ifndef BOOT_VIDEO_H
#define BOOT_VIDEO_H
#include <linux/types.h>
/*
* This code uses an extended set of video mode numbers. These include:
* Aliases for standard modes
* NORMAL_VGA (-1)
* EXTENDED_VGA (-2)
* ASK_VGA (-3)
* Video modes numbered by menu position -- NOT RECOMMENDED because of lack
* of compatibility when extending the table. These are between 0x00 and 0xff.
*/
#define VIDEO_FIRST_MENU 0x0000
/* Standard BIOS video modes (BIOS number + 0x0100) */
#define VIDEO_FIRST_BIOS 0x0100
/* VESA BIOS video modes (VESA number + 0x0200) */
#define VIDEO_FIRST_VESA 0x0200
/* Video7 special modes (BIOS number + 0x0900) */
#define VIDEO_FIRST_V7 0x0900
/* Special video modes */
#define VIDEO_FIRST_SPECIAL 0x0f00
#define VIDEO_80x25 0x0f00
#define VIDEO_8POINT 0x0f01
#define VIDEO_80x43 0x0f02
#define VIDEO_80x28 0x0f03
#define VIDEO_CURRENT_MODE 0x0f04
#define VIDEO_80x30 0x0f05
#define VIDEO_80x34 0x0f06
#define VIDEO_80x60 0x0f07
#define VIDEO_GFX_HACK 0x0f08
#define VIDEO_LAST_SPECIAL 0x0f09
/* Video modes given by resolution */
#define VIDEO_FIRST_RESOLUTION 0x1000
/* The "recalculate timings" flag */
#define VIDEO_RECALC 0x8000
void store_screen(void);
#define DO_STORE() store_screen()
/*
* Mode table structures
*/
struct mode_info {
u16 mode; /* Mode number (vga= style) */
u16 x, y; /* Width, height */
u16 depth; /* Bits per pixel, 0 for text mode */
};
struct card_info {
const char *card_name;
int (*set_mode)(struct mode_info *mode);
int (*probe)(void);
struct mode_info *modes;
int nmodes; /* Number of probed modes so far */
int unsafe; /* Probing is unsafe, only do after "scan" */
u16 xmode_first; /* Unprobed modes to try to call anyway */
u16 xmode_n; /* Size of unprobed mode range */
};
#define __videocard struct card_info __attribute__((section(".videocards")))
extern struct card_info video_cards[], video_cards_end[];
int mode_defined(u16 mode); /* video.c */
/* Basic video information */
#define ADAPTER_CGA 0 /* CGA/MDA/HGC */
#define ADAPTER_EGA 1
#define ADAPTER_VGA 2
extern int adapter;
extern u16 video_segment;
extern int force_x, force_y; /* Don't query the BIOS for cols/rows */
extern int do_restore; /* Restore screen contents */
extern int graphic_mode; /* Graphics mode with linear frame buffer */
/* Accessing VGA indexed registers */
static inline u8 in_idx(u16 port, u8 index)
{
outb(index, port);
return inb(port+1);
}
static inline void out_idx(u8 v, u16 port, u8 index)
{
outw(index+(v << 8), port);
}
/* Writes a value to an indexed port and then reads the port again */
static inline u8 tst_idx(u8 v, u16 port, u8 index)
{
out_idx(port, index, v);
return in_idx(port, index);
}
/* Get the I/O port of the VGA CRTC */
u16 vga_crtc(void); /* video-vga.c */
#endif /* BOOT_VIDEO_H */
+319
View File
@@ -0,0 +1,319 @@
CONFIG_EXPERIMENTAL=y
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_TASKSTATS=y
CONFIG_TASK_DELAY_ACCT=y
CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
CONFIG_AUDIT=y
CONFIG_LOG_BUF_SHIFT=18
CONFIG_CGROUPS=y
CONFIG_CGROUP_FREEZER=y
CONFIG_CPUSETS=y
CONFIG_CGROUP_CPUACCT=y
CONFIG_RESOURCE_COUNTERS=y
CONFIG_CGROUP_SCHED=y
CONFIG_BLK_DEV_INITRD=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
CONFIG_KPROBES=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_OSF_PARTITION=y
CONFIG_AMIGA_PARTITION=y
CONFIG_MAC_PARTITION=y
CONFIG_BSD_DISKLABEL=y
CONFIG_MINIX_SUBPARTITION=y
CONFIG_SOLARIS_X86_PARTITION=y
CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_SGI_PARTITION=y
CONFIG_SUN_PARTITION=y
CONFIG_KARMA_PARTITION=y
CONFIG_EFI_PARTITION=y
CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_SMP=y
CONFIG_X86_GENERIC=y
CONFIG_HPET_TIMER=y
CONFIG_SCHED_SMT=y
CONFIG_PREEMPT_VOLUNTARY=y
CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
CONFIG_X86_MCE=y
CONFIG_X86_REBOOTFIXUPS=y
CONFIG_MICROCODE=y
CONFIG_MICROCODE_AMD=y
CONFIG_X86_MSR=y
CONFIG_X86_CPUID=y
CONFIG_HIGHPTE=y
CONFIG_X86_CHECK_BIOS_CORRUPTION=y
# CONFIG_MTRR_SANITIZER is not set
CONFIG_EFI=y
CONFIG_HZ_1000=y
CONFIG_KEXEC=y
CONFIG_CRASH_DUMP=y
# CONFIG_COMPAT_VDSO is not set
CONFIG_HIBERNATION=y
CONFIG_PM_DEBUG=y
CONFIG_PM_TRACE_RTC=y
CONFIG_ACPI_PROCFS=y
CONFIG_ACPI_DOCK=y
CONFIG_CPU_FREQ=y
# CONFIG_CPU_FREQ_STAT is not set
CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y
CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
CONFIG_CPU_FREQ_GOV_ONDEMAND=y
CONFIG_X86_ACPI_CPUFREQ=y
CONFIG_PCIEPORTBUS=y
CONFIG_PCI_MSI=y
CONFIG_PCCARD=y
CONFIG_YENTA=y
CONFIG_HOTPLUG_PCI=y
CONFIG_BINFMT_MISC=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
CONFIG_XFRM_USER=y
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_MULTIPLE_TABLES=y
CONFIG_IP_ROUTE_MULTIPATH=y
CONFIG_IP_ROUTE_VERBOSE=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_IP_PNP_RARP=y
CONFIG_IP_MROUTE=y
CONFIG_IP_PIMSM_V1=y
CONFIG_IP_PIMSM_V2=y
CONFIG_SYN_COOKIES=y
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_INET_DIAG is not set
CONFIG_TCP_CONG_ADVANCED=y
# CONFIG_TCP_CONG_BIC is not set
# CONFIG_TCP_CONG_WESTWOOD is not set
# CONFIG_TCP_CONG_HTCP is not set
CONFIG_TCP_MD5SIG=y
CONFIG_IPV6=y
CONFIG_INET6_AH=y
CONFIG_INET6_ESP=y
CONFIG_NETLABEL=y
CONFIG_NETFILTER=y
# CONFIG_NETFILTER_ADVANCED is not set
CONFIG_NF_CONNTRACK=y
CONFIG_NF_CONNTRACK_FTP=y
CONFIG_NF_CONNTRACK_IRC=y
CONFIG_NF_CONNTRACK_SIP=y
CONFIG_NF_CT_NETLINK=y
CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y
CONFIG_NETFILTER_XT_TARGET_NFLOG=y
CONFIG_NETFILTER_XT_TARGET_SECMARK=y
CONFIG_NETFILTER_XT_TARGET_TCPMSS=y
CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
CONFIG_NETFILTER_XT_MATCH_POLICY=y
CONFIG_NETFILTER_XT_MATCH_STATE=y
CONFIG_NF_CONNTRACK_IPV4=y
CONFIG_IP_NF_IPTABLES=y
CONFIG_IP_NF_FILTER=y
CONFIG_IP_NF_TARGET_REJECT=y
CONFIG_IP_NF_TARGET_ULOG=y
CONFIG_NF_NAT=y
CONFIG_IP_NF_TARGET_MASQUERADE=y
CONFIG_IP_NF_MANGLE=y
CONFIG_NF_CONNTRACK_IPV6=y
CONFIG_IP6_NF_IPTABLES=y
CONFIG_IP6_NF_MATCH_IPV6HEADER=y
CONFIG_IP6_NF_FILTER=y
CONFIG_IP6_NF_TARGET_REJECT=y
CONFIG_IP6_NF_MANGLE=y
CONFIG_NET_SCHED=y
CONFIG_NET_EMATCH=y
CONFIG_NET_CLS_ACT=y
CONFIG_HAMRADIO=y
CONFIG_CFG80211=y
CONFIG_MAC80211=y
CONFIG_MAC80211_LEDS=y
CONFIG_RFKILL=y
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEBUG_DEVRES=y
CONFIG_CONNECTOR=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=16384
CONFIG_BLK_DEV_SD=y
CONFIG_BLK_DEV_SR=y
CONFIG_BLK_DEV_SR_VENDOR=y
CONFIG_CHR_DEV_SG=y
CONFIG_SCSI_CONSTANTS=y
CONFIG_SCSI_SPI_ATTRS=y
# CONFIG_SCSI_LOWLEVEL is not set
CONFIG_ATA=y
CONFIG_SATA_AHCI=y
CONFIG_ATA_PIIX=y
CONFIG_PATA_AMD=y
CONFIG_PATA_OLDPIIX=y
CONFIG_PATA_SCH=y
CONFIG_PATA_MPIIX=y
CONFIG_ATA_GENERIC=y
CONFIG_MD=y
CONFIG_BLK_DEV_MD=y
CONFIG_BLK_DEV_DM=y
CONFIG_DM_MIRROR=y
CONFIG_DM_ZERO=y
CONFIG_MACINTOSH_DRIVERS=y
CONFIG_MAC_EMUMOUSEBTN=y
CONFIG_NETDEVICES=y
CONFIG_NETCONSOLE=y
CONFIG_BNX2=y
CONFIG_TIGON3=y
CONFIG_NET_TULIP=y
CONFIG_E100=y
CONFIG_E1000=y
CONFIG_E1000E=y
CONFIG_SKY2=y
CONFIG_NE2K_PCI=y
CONFIG_FORCEDETH=y
CONFIG_8139TOO=y
# CONFIG_8139TOO_PIO is not set
CONFIG_R8169=y
CONFIG_FDDI=y
CONFIG_INPUT_POLLDEV=y
# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
CONFIG_INPUT_EVDEV=y
CONFIG_INPUT_JOYSTICK=y
CONFIG_INPUT_TABLET=y
CONFIG_INPUT_TOUCHSCREEN=y
CONFIG_INPUT_MISC=y
CONFIG_VT_HW_CONSOLE_BINDING=y
# CONFIG_LEGACY_PTYS is not set
CONFIG_SERIAL_NONSTANDARD=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_8250_NR_UARTS=32
CONFIG_SERIAL_8250_EXTENDED=y
CONFIG_SERIAL_8250_MANY_PORTS=y
CONFIG_SERIAL_8250_SHARE_IRQ=y
CONFIG_SERIAL_8250_DETECT_IRQ=y
CONFIG_SERIAL_8250_RSA=y
CONFIG_HW_RANDOM=y
CONFIG_NVRAM=y
CONFIG_HPET=y
# CONFIG_HPET_MMAP is not set
CONFIG_I2C_I801=y
CONFIG_WATCHDOG=y
CONFIG_AGP=y
CONFIG_AGP_AMD64=y
CONFIG_AGP_INTEL=y
CONFIG_DRM=y
CONFIG_DRM_I915=y
CONFIG_FB_MODE_HELPERS=y
CONFIG_FB_TILEBLITTING=y
CONFIG_FB_EFI=y
# CONFIG_LCD_CLASS_DEVICE is not set
CONFIG_VGACON_SOFT_SCROLLBACK=y
CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_MONO is not set
# CONFIG_LOGO_LINUX_VGA16 is not set
CONFIG_SOUND=y
CONFIG_SND=y
CONFIG_SND_SEQUENCER=y
CONFIG_SND_SEQ_DUMMY=y
CONFIG_SND_MIXER_OSS=y
CONFIG_SND_PCM_OSS=y
CONFIG_SND_SEQUENCER_OSS=y
CONFIG_SND_HRTIMER=y
CONFIG_SND_HDA_INTEL=y
CONFIG_SND_HDA_HWDEP=y
CONFIG_HIDRAW=y
CONFIG_HID_PID=y
CONFIG_USB_HIDDEV=y
CONFIG_HID_GYRATION=y
CONFIG_LOGITECH_FF=y
CONFIG_HID_NTRIG=y
CONFIG_HID_PANTHERLORD=y
CONFIG_PANTHERLORD_FF=y
CONFIG_HID_PETALYNX=y
CONFIG_HID_SAMSUNG=y
CONFIG_HID_SONY=y
CONFIG_HID_SUNPLUS=y
CONFIG_HID_TOPSEED=y
CONFIG_USB=y
CONFIG_USB_DEBUG=y
CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
CONFIG_USB_DEVICEFS=y
# CONFIG_USB_DEVICE_CLASS is not set
CONFIG_USB_MON=y
CONFIG_USB_EHCI_HCD=y
# CONFIG_USB_EHCI_TT_NEWSCHED is not set
CONFIG_USB_OHCI_HCD=y
CONFIG_USB_UHCI_HCD=y
CONFIG_USB_PRINTER=y
CONFIG_USB_STORAGE=y
CONFIG_USB_LIBUSUAL=y
CONFIG_EDAC=y
CONFIG_RTC_CLASS=y
# CONFIG_RTC_HCTOSYS is not set
CONFIG_DMADEVICES=y
CONFIG_EEEPC_LAPTOP=y
CONFIG_EFI_VARS=y
CONFIG_EXT3_FS=y
# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
CONFIG_EXT3_FS_POSIX_ACL=y
CONFIG_EXT3_FS_SECURITY=y
CONFIG_QUOTA=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
CONFIG_QFMT_V2=y
CONFIG_AUTOFS4_FS=y
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
CONFIG_ZISOFS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_HUGETLBFS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V3=y
CONFIG_NFS_V3_ACL=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
CONFIG_NLS_DEFAULT="utf8"
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ASCII=y
CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_UTF8=y
CONFIG_PRINTK_TIME=y
# CONFIG_ENABLE_WARN_DEPRECATED is not set
CONFIG_FRAME_WARN=2048
CONFIG_MAGIC_SYSRQ=y
# CONFIG_UNUSED_SYMBOLS is not set
CONFIG_DEBUG_KERNEL=y
# CONFIG_SCHED_DEBUG is not set
CONFIG_SCHEDSTATS=y
CONFIG_TIMER_STATS=y
CONFIG_DEBUG_STACK_USAGE=y
CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
CONFIG_EARLY_PRINTK_DBGP=y
CONFIG_DEBUG_STACKOVERFLOW=y
# CONFIG_DEBUG_RODATA_TEST is not set
CONFIG_DEBUG_NX_TEST=m
CONFIG_DEBUG_BOOT_PARAMS=y
CONFIG_OPTIMIZE_INLINING=y
CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
CONFIG_SECURITY_SELINUX_DISABLE=y
CONFIG_CRYPTO_AES_586=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
CONFIG_CRC_T10DIF=y
+317
View File
@@ -0,0 +1,317 @@
CONFIG_EXPERIMENTAL=y
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_TASKSTATS=y
CONFIG_TASK_DELAY_ACCT=y
CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
CONFIG_AUDIT=y
CONFIG_LOG_BUF_SHIFT=18
CONFIG_CGROUPS=y
CONFIG_CGROUP_FREEZER=y
CONFIG_CPUSETS=y
CONFIG_CGROUP_CPUACCT=y
CONFIG_RESOURCE_COUNTERS=y
CONFIG_CGROUP_SCHED=y
CONFIG_BLK_DEV_INITRD=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
CONFIG_KPROBES=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_OSF_PARTITION=y
CONFIG_AMIGA_PARTITION=y
CONFIG_MAC_PARTITION=y
CONFIG_BSD_DISKLABEL=y
CONFIG_MINIX_SUBPARTITION=y
CONFIG_SOLARIS_X86_PARTITION=y
CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_SGI_PARTITION=y
CONFIG_SUN_PARTITION=y
CONFIG_KARMA_PARTITION=y
CONFIG_EFI_PARTITION=y
CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_SMP=y
CONFIG_CALGARY_IOMMU=y
CONFIG_NR_CPUS=64
CONFIG_SCHED_SMT=y
CONFIG_PREEMPT_VOLUNTARY=y
CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
CONFIG_X86_MCE=y
CONFIG_MICROCODE=y
CONFIG_MICROCODE_AMD=y
CONFIG_X86_MSR=y
CONFIG_X86_CPUID=y
CONFIG_NUMA=y
CONFIG_X86_CHECK_BIOS_CORRUPTION=y
# CONFIG_MTRR_SANITIZER is not set
CONFIG_EFI=y
CONFIG_HZ_1000=y
CONFIG_KEXEC=y
CONFIG_CRASH_DUMP=y
# CONFIG_COMPAT_VDSO is not set
CONFIG_HIBERNATION=y
CONFIG_PM_DEBUG=y
CONFIG_PM_TRACE_RTC=y
CONFIG_ACPI_PROCFS=y
CONFIG_ACPI_DOCK=y
CONFIG_CPU_FREQ=y
# CONFIG_CPU_FREQ_STAT is not set
CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y
CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
CONFIG_CPU_FREQ_GOV_ONDEMAND=y
CONFIG_X86_ACPI_CPUFREQ=y
CONFIG_PCI_MMCONFIG=y
CONFIG_PCIEPORTBUS=y
CONFIG_PCCARD=y
CONFIG_YENTA=y
CONFIG_HOTPLUG_PCI=y
CONFIG_BINFMT_MISC=y
CONFIG_IA32_EMULATION=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
CONFIG_XFRM_USER=y
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_MULTIPLE_TABLES=y
CONFIG_IP_ROUTE_MULTIPATH=y
CONFIG_IP_ROUTE_VERBOSE=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_IP_PNP_RARP=y
CONFIG_IP_MROUTE=y
CONFIG_IP_PIMSM_V1=y
CONFIG_IP_PIMSM_V2=y
CONFIG_SYN_COOKIES=y
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_INET_DIAG is not set
CONFIG_TCP_CONG_ADVANCED=y
# CONFIG_TCP_CONG_BIC is not set
# CONFIG_TCP_CONG_WESTWOOD is not set
# CONFIG_TCP_CONG_HTCP is not set
CONFIG_TCP_MD5SIG=y
CONFIG_IPV6=y
CONFIG_INET6_AH=y
CONFIG_INET6_ESP=y
CONFIG_NETLABEL=y
CONFIG_NETFILTER=y
# CONFIG_NETFILTER_ADVANCED is not set
CONFIG_NF_CONNTRACK=y
CONFIG_NF_CONNTRACK_FTP=y
CONFIG_NF_CONNTRACK_IRC=y
CONFIG_NF_CONNTRACK_SIP=y
CONFIG_NF_CT_NETLINK=y
CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y
CONFIG_NETFILTER_XT_TARGET_NFLOG=y
CONFIG_NETFILTER_XT_TARGET_SECMARK=y
CONFIG_NETFILTER_XT_TARGET_TCPMSS=y
CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
CONFIG_NETFILTER_XT_MATCH_POLICY=y
CONFIG_NETFILTER_XT_MATCH_STATE=y
CONFIG_NF_CONNTRACK_IPV4=y
CONFIG_IP_NF_IPTABLES=y
CONFIG_IP_NF_FILTER=y
CONFIG_IP_NF_TARGET_REJECT=y
CONFIG_IP_NF_TARGET_ULOG=y
CONFIG_NF_NAT=y
CONFIG_IP_NF_TARGET_MASQUERADE=y
CONFIG_IP_NF_MANGLE=y
CONFIG_NF_CONNTRACK_IPV6=y
CONFIG_IP6_NF_IPTABLES=y
CONFIG_IP6_NF_MATCH_IPV6HEADER=y
CONFIG_IP6_NF_FILTER=y
CONFIG_IP6_NF_TARGET_REJECT=y
CONFIG_IP6_NF_MANGLE=y
CONFIG_NET_SCHED=y
CONFIG_NET_EMATCH=y
CONFIG_NET_CLS_ACT=y
CONFIG_HAMRADIO=y
CONFIG_CFG80211=y
CONFIG_MAC80211=y
CONFIG_MAC80211_LEDS=y
CONFIG_RFKILL=y
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEBUG_DEVRES=y
CONFIG_CONNECTOR=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=16384
CONFIG_BLK_DEV_SD=y
CONFIG_BLK_DEV_SR=y
CONFIG_BLK_DEV_SR_VENDOR=y
CONFIG_CHR_DEV_SG=y
CONFIG_SCSI_CONSTANTS=y
CONFIG_SCSI_SPI_ATTRS=y
# CONFIG_SCSI_LOWLEVEL is not set
CONFIG_ATA=y
CONFIG_SATA_AHCI=y
CONFIG_ATA_PIIX=y
CONFIG_PATA_AMD=y
CONFIG_PATA_OLDPIIX=y
CONFIG_PATA_SCH=y
CONFIG_MD=y
CONFIG_BLK_DEV_MD=y
CONFIG_BLK_DEV_DM=y
CONFIG_DM_MIRROR=y
CONFIG_DM_ZERO=y
CONFIG_MACINTOSH_DRIVERS=y
CONFIG_MAC_EMUMOUSEBTN=y
CONFIG_NETDEVICES=y
CONFIG_NETCONSOLE=y
CONFIG_TIGON3=y
CONFIG_NET_TULIP=y
CONFIG_E100=y
CONFIG_E1000=y
CONFIG_SKY2=y
CONFIG_FORCEDETH=y
CONFIG_8139TOO=y
CONFIG_FDDI=y
CONFIG_INPUT_POLLDEV=y
# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
CONFIG_INPUT_EVDEV=y
CONFIG_INPUT_JOYSTICK=y
CONFIG_INPUT_TABLET=y
CONFIG_INPUT_TOUCHSCREEN=y
CONFIG_INPUT_MISC=y
CONFIG_VT_HW_CONSOLE_BINDING=y
# CONFIG_LEGACY_PTYS is not set
CONFIG_SERIAL_NONSTANDARD=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_8250_NR_UARTS=32
CONFIG_SERIAL_8250_EXTENDED=y
CONFIG_SERIAL_8250_MANY_PORTS=y
CONFIG_SERIAL_8250_SHARE_IRQ=y
CONFIG_SERIAL_8250_DETECT_IRQ=y
CONFIG_SERIAL_8250_RSA=y
CONFIG_HW_RANDOM=y
# CONFIG_HW_RANDOM_INTEL is not set
# CONFIG_HW_RANDOM_AMD is not set
CONFIG_NVRAM=y
CONFIG_HPET=y
# CONFIG_HPET_MMAP is not set
CONFIG_I2C_I801=y
CONFIG_WATCHDOG=y
CONFIG_AGP=y
CONFIG_AGP_AMD64=y
CONFIG_AGP_INTEL=y
CONFIG_DRM=y
CONFIG_DRM_I915=y
CONFIG_DRM_I915_KMS=y
CONFIG_FB_MODE_HELPERS=y
CONFIG_FB_TILEBLITTING=y
CONFIG_FB_EFI=y
# CONFIG_LCD_CLASS_DEVICE is not set
CONFIG_VGACON_SOFT_SCROLLBACK=y
CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_MONO is not set
# CONFIG_LOGO_LINUX_VGA16 is not set
CONFIG_SOUND=y
CONFIG_SND=y
CONFIG_SND_SEQUENCER=y
CONFIG_SND_SEQ_DUMMY=y
CONFIG_SND_MIXER_OSS=y
CONFIG_SND_PCM_OSS=y
CONFIG_SND_SEQUENCER_OSS=y
CONFIG_SND_HRTIMER=y
CONFIG_SND_HDA_INTEL=y
CONFIG_SND_HDA_HWDEP=y
CONFIG_HIDRAW=y
CONFIG_HID_PID=y
CONFIG_USB_HIDDEV=y
CONFIG_HID_GYRATION=y
CONFIG_LOGITECH_FF=y
CONFIG_HID_NTRIG=y
CONFIG_HID_PANTHERLORD=y
CONFIG_PANTHERLORD_FF=y
CONFIG_HID_PETALYNX=y
CONFIG_HID_SAMSUNG=y
CONFIG_HID_SONY=y
CONFIG_HID_SUNPLUS=y
CONFIG_HID_TOPSEED=y
CONFIG_USB=y
CONFIG_USB_DEBUG=y
CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
CONFIG_USB_DEVICEFS=y
# CONFIG_USB_DEVICE_CLASS is not set
CONFIG_USB_MON=y
CONFIG_USB_EHCI_HCD=y
# CONFIG_USB_EHCI_TT_NEWSCHED is not set
CONFIG_USB_OHCI_HCD=y
CONFIG_USB_UHCI_HCD=y
CONFIG_USB_PRINTER=y
CONFIG_USB_STORAGE=y
CONFIG_USB_LIBUSUAL=y
CONFIG_EDAC=y
CONFIG_RTC_CLASS=y
# CONFIG_RTC_HCTOSYS is not set
CONFIG_DMADEVICES=y
CONFIG_EEEPC_LAPTOP=y
CONFIG_AMD_IOMMU=y
CONFIG_AMD_IOMMU_STATS=y
CONFIG_INTEL_IOMMU=y
# CONFIG_INTEL_IOMMU_DEFAULT_ON is not set
CONFIG_EFI_VARS=y
CONFIG_EXT3_FS=y
# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
CONFIG_EXT3_FS_POSIX_ACL=y
CONFIG_EXT3_FS_SECURITY=y
CONFIG_QUOTA=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
CONFIG_QFMT_V2=y
CONFIG_AUTOFS4_FS=y
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
CONFIG_ZISOFS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_HUGETLBFS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V3=y
CONFIG_NFS_V3_ACL=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
CONFIG_NLS_DEFAULT="utf8"
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ASCII=y
CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_UTF8=y
CONFIG_PRINTK_TIME=y
# CONFIG_ENABLE_WARN_DEPRECATED is not set
CONFIG_MAGIC_SYSRQ=y
# CONFIG_UNUSED_SYMBOLS is not set
CONFIG_DEBUG_KERNEL=y
# CONFIG_SCHED_DEBUG is not set
CONFIG_SCHEDSTATS=y
CONFIG_TIMER_STATS=y
CONFIG_DEBUG_STACK_USAGE=y
CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
CONFIG_EARLY_PRINTK_DBGP=y
CONFIG_DEBUG_STACKOVERFLOW=y
# CONFIG_DEBUG_RODATA_TEST is not set
CONFIG_DEBUG_NX_TEST=m
CONFIG_DEBUG_BOOT_PARAMS=y
CONFIG_OPTIMIZE_INLINING=y
CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
CONFIG_SECURITY_SELINUX_DISABLE=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
CONFIG_CRC_T10DIF=y
+45
View File
@@ -0,0 +1,45 @@
#
# Arch-specific CryptoAPI modules.
#
obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o
obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o
obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o
obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o
aes-i586-y := aes-i586-asm_32.o aes_glue.o
twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o
aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
# enable AVX support only when $(AS) can actually assemble the instructions
ifeq ($(call as-instr,vpxor %xmm0$(comma)%xmm1$(comma)%xmm2,yes,no),yes)
AFLAGS_sha1_ssse3_asm.o += -DSHA1_ENABLE_AVX_SUPPORT
CFLAGS_sha1_ssse3_glue.o += -DSHA1_ENABLE_AVX_SUPPORT
endif
sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
+367
View File
@@ -0,0 +1,367 @@
// -------------------------------------------------------------------------
// Copyright (c) 2001, Dr Brian Gladman < >, Worcester, UK.
// All rights reserved.
//
// LICENSE TERMS
//
// The free distribution and use of this software in both source and binary
// form is allowed (with or without changes) provided that:
//
// 1. distributions of this source code include the above copyright
// notice, this list of conditions and the following disclaimer//
//
// 2. distributions in binary form include the above copyright
// notice, this list of conditions and the following disclaimer
// in the documentation and/or other associated materials//
//
// 3. the copyright holder's name is not used to endorse products
// built using this software without specific written permission.
//
//
// ALTERNATIVELY, provided that this notice is retained in full, this product
// may be distributed under the terms of the GNU General Public License (GPL),
// in which case the provisions of the GPL apply INSTEAD OF those given above.
//
// Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org>
// Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
// DISCLAIMER
//
// This software is provided 'as is' with no explicit or implied warranties
// in respect of its properties including, but not limited to, correctness
// and fitness for purpose.
// -------------------------------------------------------------------------
// Issue Date: 29/07/2002
.file "aes-i586-asm.S"
.text
#include <asm/asm-offsets.h>
#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
/* offsets to parameters with one register pushed onto stack */
#define ctx 8
#define out_blk 12
#define in_blk 16
/* offsets in crypto_aes_ctx structure */
#define klen (480)
#define ekey (0)
#define dkey (240)
// register mapping for encrypt and decrypt subroutines
#define r0 eax
#define r1 ebx
#define r2 ecx
#define r3 edx
#define r4 esi
#define r5 edi
#define eaxl al
#define eaxh ah
#define ebxl bl
#define ebxh bh
#define ecxl cl
#define ecxh ch
#define edxl dl
#define edxh dh
#define _h(reg) reg##h
#define h(reg) _h(reg)
#define _l(reg) reg##l
#define l(reg) _l(reg)
// This macro takes a 32-bit word representing a column and uses
// each of its four bytes to index into four tables of 256 32-bit
// words to obtain values that are then xored into the appropriate
// output registers r0, r1, r4 or r5.
// Parameters:
// table table base address
// %1 out_state[0]
// %2 out_state[1]
// %3 out_state[2]
// %4 out_state[3]
// idx input register for the round (destroyed)
// tmp scratch register for the round
// sched key schedule
#define do_col(table, a1,a2,a3,a4, idx, tmp) \
movzx %l(idx),%tmp; \
xor table(,%tmp,4),%a1; \
movzx %h(idx),%tmp; \
shr $16,%idx; \
xor table+tlen(,%tmp,4),%a2; \
movzx %l(idx),%tmp; \
movzx %h(idx),%idx; \
xor table+2*tlen(,%tmp,4),%a3; \
xor table+3*tlen(,%idx,4),%a4;
// initialise output registers from the key schedule
// NB1: original value of a3 is in idx on exit
// NB2: original values of a1,a2,a4 aren't used
#define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \
mov 0 sched,%a1; \
movzx %l(idx),%tmp; \
mov 12 sched,%a2; \
xor table(,%tmp,4),%a1; \
mov 4 sched,%a4; \
movzx %h(idx),%tmp; \
shr $16,%idx; \
xor table+tlen(,%tmp,4),%a2; \
movzx %l(idx),%tmp; \
movzx %h(idx),%idx; \
xor table+3*tlen(,%idx,4),%a4; \
mov %a3,%idx; \
mov 8 sched,%a3; \
xor table+2*tlen(,%tmp,4),%a3;
// initialise output registers from the key schedule
// NB1: original value of a3 is in idx on exit
// NB2: original values of a1,a2,a4 aren't used
#define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \
mov 0 sched,%a1; \
movzx %l(idx),%tmp; \
mov 4 sched,%a2; \
xor table(,%tmp,4),%a1; \
mov 12 sched,%a4; \
movzx %h(idx),%tmp; \
shr $16,%idx; \
xor table+tlen(,%tmp,4),%a2; \
movzx %l(idx),%tmp; \
movzx %h(idx),%idx; \
xor table+3*tlen(,%idx,4),%a4; \
mov %a3,%idx; \
mov 8 sched,%a3; \
xor table+2*tlen(,%tmp,4),%a3;
// original Gladman had conditional saves to MMX regs.
#define save(a1, a2) \
mov %a2,4*a1(%esp)
#define restore(a1, a2) \
mov 4*a2(%esp),%a1
// These macros perform a forward encryption cycle. They are entered with
// the first previous round column values in r0,r1,r4,r5 and
// exit with the final values in the same registers, using stack
// for temporary storage.
// round column values
// on entry: r0,r1,r4,r5
// on exit: r2,r1,r4,r5
#define fwd_rnd1(arg, table) \
save (0,r1); \
save (1,r5); \
\
/* compute new column values */ \
do_fcol(table, r2,r5,r4,r1, r0,r3, arg); /* idx=r0 */ \
do_col (table, r4,r1,r2,r5, r0,r3); /* idx=r4 */ \
restore(r0,0); \
do_col (table, r1,r2,r5,r4, r0,r3); /* idx=r1 */ \
restore(r0,1); \
do_col (table, r5,r4,r1,r2, r0,r3); /* idx=r5 */
// round column values
// on entry: r2,r1,r4,r5
// on exit: r0,r1,r4,r5
#define fwd_rnd2(arg, table) \
save (0,r1); \
save (1,r5); \
\
/* compute new column values */ \
do_fcol(table, r0,r5,r4,r1, r2,r3, arg); /* idx=r2 */ \
do_col (table, r4,r1,r0,r5, r2,r3); /* idx=r4 */ \
restore(r2,0); \
do_col (table, r1,r0,r5,r4, r2,r3); /* idx=r1 */ \
restore(r2,1); \
do_col (table, r5,r4,r1,r0, r2,r3); /* idx=r5 */
// These macros performs an inverse encryption cycle. They are entered with
// the first previous round column values in r0,r1,r4,r5 and
// exit with the final values in the same registers, using stack
// for temporary storage
// round column values
// on entry: r0,r1,r4,r5
// on exit: r2,r1,r4,r5
#define inv_rnd1(arg, table) \
save (0,r1); \
save (1,r5); \
\
/* compute new column values */ \
do_icol(table, r2,r1,r4,r5, r0,r3, arg); /* idx=r0 */ \
do_col (table, r4,r5,r2,r1, r0,r3); /* idx=r4 */ \
restore(r0,0); \
do_col (table, r1,r4,r5,r2, r0,r3); /* idx=r1 */ \
restore(r0,1); \
do_col (table, r5,r2,r1,r4, r0,r3); /* idx=r5 */
// round column values
// on entry: r2,r1,r4,r5
// on exit: r0,r1,r4,r5
#define inv_rnd2(arg, table) \
save (0,r1); \
save (1,r5); \
\
/* compute new column values */ \
do_icol(table, r0,r1,r4,r5, r2,r3, arg); /* idx=r2 */ \
do_col (table, r4,r5,r0,r1, r2,r3); /* idx=r4 */ \
restore(r2,0); \
do_col (table, r1,r4,r5,r0, r2,r3); /* idx=r1 */ \
restore(r2,1); \
do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */
// AES (Rijndael) Encryption Subroutine
/* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
.global aes_enc_blk
.extern crypto_ft_tab
.extern crypto_fl_tab
.align 4
aes_enc_blk:
push %ebp
mov ctx(%esp),%ebp
// CAUTION: the order and the values used in these assigns
// rely on the register mappings
1: push %ebx
mov in_blk+4(%esp),%r2
push %esi
mov klen(%ebp),%r3 // key size
push %edi
#if ekey != 0
lea ekey(%ebp),%ebp // key pointer
#endif
// input four columns and xor in first round key
mov (%r2),%r0
mov 4(%r2),%r1
mov 8(%r2),%r4
mov 12(%r2),%r5
xor (%ebp),%r0
xor 4(%ebp),%r1
xor 8(%ebp),%r4
xor 12(%ebp),%r5
sub $8,%esp // space for register saves on stack
add $16,%ebp // increment to next round key
cmp $24,%r3
jb 4f // 10 rounds for 128-bit key
lea 32(%ebp),%ebp
je 3f // 12 rounds for 192-bit key
lea 32(%ebp),%ebp
2: fwd_rnd1( -64(%ebp), crypto_ft_tab) // 14 rounds for 256-bit key
fwd_rnd2( -48(%ebp), crypto_ft_tab)
3: fwd_rnd1( -32(%ebp), crypto_ft_tab) // 12 rounds for 192-bit key
fwd_rnd2( -16(%ebp), crypto_ft_tab)
4: fwd_rnd1( (%ebp), crypto_ft_tab) // 10 rounds for 128-bit key
fwd_rnd2( +16(%ebp), crypto_ft_tab)
fwd_rnd1( +32(%ebp), crypto_ft_tab)
fwd_rnd2( +48(%ebp), crypto_ft_tab)
fwd_rnd1( +64(%ebp), crypto_ft_tab)
fwd_rnd2( +80(%ebp), crypto_ft_tab)
fwd_rnd1( +96(%ebp), crypto_ft_tab)
fwd_rnd2(+112(%ebp), crypto_ft_tab)
fwd_rnd1(+128(%ebp), crypto_ft_tab)
fwd_rnd2(+144(%ebp), crypto_fl_tab) // last round uses a different table
// move final values to the output array. CAUTION: the
// order of these assigns rely on the register mappings
add $8,%esp
mov out_blk+12(%esp),%ebp
mov %r5,12(%ebp)
pop %edi
mov %r4,8(%ebp)
pop %esi
mov %r1,4(%ebp)
pop %ebx
mov %r0,(%ebp)
pop %ebp
ret
// AES (Rijndael) Decryption Subroutine
/* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
.global aes_dec_blk
.extern crypto_it_tab
.extern crypto_il_tab
.align 4
aes_dec_blk:
push %ebp
mov ctx(%esp),%ebp
// CAUTION: the order and the values used in these assigns
// rely on the register mappings
1: push %ebx
mov in_blk+4(%esp),%r2
push %esi
mov klen(%ebp),%r3 // key size
push %edi
#if dkey != 0
lea dkey(%ebp),%ebp // key pointer
#endif
// input four columns and xor in first round key
mov (%r2),%r0
mov 4(%r2),%r1
mov 8(%r2),%r4
mov 12(%r2),%r5
xor (%ebp),%r0
xor 4(%ebp),%r1
xor 8(%ebp),%r4
xor 12(%ebp),%r5
sub $8,%esp // space for register saves on stack
add $16,%ebp // increment to next round key
cmp $24,%r3
jb 4f // 10 rounds for 128-bit key
lea 32(%ebp),%ebp
je 3f // 12 rounds for 192-bit key
lea 32(%ebp),%ebp
2: inv_rnd1( -64(%ebp), crypto_it_tab) // 14 rounds for 256-bit key
inv_rnd2( -48(%ebp), crypto_it_tab)
3: inv_rnd1( -32(%ebp), crypto_it_tab) // 12 rounds for 192-bit key
inv_rnd2( -16(%ebp), crypto_it_tab)
4: inv_rnd1( (%ebp), crypto_it_tab) // 10 rounds for 128-bit key
inv_rnd2( +16(%ebp), crypto_it_tab)
inv_rnd1( +32(%ebp), crypto_it_tab)
inv_rnd2( +48(%ebp), crypto_it_tab)
inv_rnd1( +64(%ebp), crypto_it_tab)
inv_rnd2( +80(%ebp), crypto_it_tab)
inv_rnd1( +96(%ebp), crypto_it_tab)
inv_rnd2(+112(%ebp), crypto_it_tab)
inv_rnd1(+128(%ebp), crypto_it_tab)
inv_rnd2(+144(%ebp), crypto_il_tab) // last round uses a different table
// move final values to the output array. CAUTION: the
// order of these assigns rely on the register mappings
add $8,%esp
mov out_blk+12(%esp),%ebp
mov %r5,12(%ebp)
pop %edi
mov %r4,8(%ebp)
pop %esi
mov %r1,4(%ebp)
pop %ebx
mov %r0,(%ebp)
pop %ebp
ret
+188
View File
@@ -0,0 +1,188 @@
/* AES (Rijndael) implementation (FIPS PUB 197) for x86_64
*
* Copyright (C) 2005 Andreas Steinmetz, <ast@domdv.de>
*
* License:
* This code can be distributed under the terms of the GNU General Public
* License (GPL) Version 2 provided that the above header down to and
* including this sentence is retained in full.
*/
.extern crypto_ft_tab
.extern crypto_it_tab
.extern crypto_fl_tab
.extern crypto_il_tab
.text
#include <asm/asm-offsets.h>
#define R1 %rax
#define R1E %eax
#define R1X %ax
#define R1H %ah
#define R1L %al
#define R2 %rbx
#define R2E %ebx
#define R2X %bx
#define R2H %bh
#define R2L %bl
#define R3 %rcx
#define R3E %ecx
#define R3X %cx
#define R3H %ch
#define R3L %cl
#define R4 %rdx
#define R4E %edx
#define R4X %dx
#define R4H %dh
#define R4L %dl
#define R5 %rsi
#define R5E %esi
#define R6 %rdi
#define R6E %edi
#define R7 %rbp
#define R7E %ebp
#define R8 %r8
#define R9 %r9
#define R10 %r10
#define R11 %r11
#define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \
.global FUNC; \
.type FUNC,@function; \
.align 8; \
FUNC: movq r1,r2; \
movq r3,r4; \
leaq KEY+48(r8),r9; \
movq r10,r11; \
movl (r7),r5 ## E; \
movl 4(r7),r1 ## E; \
movl 8(r7),r6 ## E; \
movl 12(r7),r7 ## E; \
movl 480(r8),r10 ## E; \
xorl -48(r9),r5 ## E; \
xorl -44(r9),r1 ## E; \
xorl -40(r9),r6 ## E; \
xorl -36(r9),r7 ## E; \
cmpl $24,r10 ## E; \
jb B128; \
leaq 32(r9),r9; \
je B192; \
leaq 32(r9),r9;
#define epilogue(r1,r2,r3,r4,r5,r6,r7,r8,r9) \
movq r1,r2; \
movq r3,r4; \
movl r5 ## E,(r9); \
movl r6 ## E,4(r9); \
movl r7 ## E,8(r9); \
movl r8 ## E,12(r9); \
ret;
#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
movzbl r2 ## H,r5 ## E; \
movzbl r2 ## L,r6 ## E; \
movl TAB+1024(,r5,4),r5 ## E;\
movw r4 ## X,r2 ## X; \
movl TAB(,r6,4),r6 ## E; \
roll $16,r2 ## E; \
shrl $16,r4 ## E; \
movzbl r4 ## H,r7 ## E; \
movzbl r4 ## L,r4 ## E; \
xorl OFFSET(r8),ra ## E; \
xorl OFFSET+4(r8),rb ## E; \
xorl TAB+3072(,r7,4),r5 ## E;\
xorl TAB+2048(,r4,4),r6 ## E;\
movzbl r1 ## L,r7 ## E; \
movzbl r1 ## H,r4 ## E; \
movl TAB+1024(,r4,4),r4 ## E;\
movw r3 ## X,r1 ## X; \
roll $16,r1 ## E; \
shrl $16,r3 ## E; \
xorl TAB(,r7,4),r5 ## E; \
movzbl r3 ## H,r7 ## E; \
movzbl r3 ## L,r3 ## E; \
xorl TAB+3072(,r7,4),r4 ## E;\
xorl TAB+2048(,r3,4),r5 ## E;\
movzbl r1 ## H,r7 ## E; \
movzbl r1 ## L,r3 ## E; \
shrl $16,r1 ## E; \
xorl TAB+3072(,r7,4),r6 ## E;\
movl TAB+2048(,r3,4),r3 ## E;\
movzbl r1 ## H,r7 ## E; \
movzbl r1 ## L,r1 ## E; \
xorl TAB+1024(,r7,4),r6 ## E;\
xorl TAB(,r1,4),r3 ## E; \
movzbl r2 ## H,r1 ## E; \
movzbl r2 ## L,r7 ## E; \
shrl $16,r2 ## E; \
xorl TAB+3072(,r1,4),r3 ## E;\
xorl TAB+2048(,r7,4),r4 ## E;\
movzbl r2 ## H,r1 ## E; \
movzbl r2 ## L,r2 ## E; \
xorl OFFSET+8(r8),rc ## E; \
xorl OFFSET+12(r8),rd ## E; \
xorl TAB+1024(,r1,4),r3 ## E;\
xorl TAB(,r2,4),r4 ## E;
#define move_regs(r1,r2,r3,r4) \
movl r3 ## E,r1 ## E; \
movl r4 ## E,r2 ## E;
#define entry(FUNC,KEY,B128,B192) \
prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11)
#define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11)
#define encrypt_round(TAB,OFFSET) \
round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
move_regs(R1,R2,R5,R6)
#define encrypt_final(TAB,OFFSET) \
round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4)
#define decrypt_round(TAB,OFFSET) \
round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) \
move_regs(R1,R2,R5,R6)
#define decrypt_final(TAB,OFFSET) \
round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4)
/* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */
entry(aes_enc_blk,0,enc128,enc192)
encrypt_round(crypto_ft_tab,-96)
encrypt_round(crypto_ft_tab,-80)
enc192: encrypt_round(crypto_ft_tab,-64)
encrypt_round(crypto_ft_tab,-48)
enc128: encrypt_round(crypto_ft_tab,-32)
encrypt_round(crypto_ft_tab,-16)
encrypt_round(crypto_ft_tab, 0)
encrypt_round(crypto_ft_tab, 16)
encrypt_round(crypto_ft_tab, 32)
encrypt_round(crypto_ft_tab, 48)
encrypt_round(crypto_ft_tab, 64)
encrypt_round(crypto_ft_tab, 80)
encrypt_round(crypto_ft_tab, 96)
encrypt_final(crypto_fl_tab,112)
return
/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */
entry(aes_dec_blk,240,dec128,dec192)
decrypt_round(crypto_it_tab,-96)
decrypt_round(crypto_it_tab,-80)
dec192: decrypt_round(crypto_it_tab,-64)
decrypt_round(crypto_it_tab,-48)
dec128: decrypt_round(crypto_it_tab,-32)
decrypt_round(crypto_it_tab,-16)
decrypt_round(crypto_it_tab, 0)
decrypt_round(crypto_it_tab, 16)
decrypt_round(crypto_it_tab, 32)
decrypt_round(crypto_it_tab, 48)
decrypt_round(crypto_it_tab, 64)
decrypt_round(crypto_it_tab, 80)
decrypt_round(crypto_it_tab, 96)
decrypt_final(crypto_il_tab,112)
return
+71
View File
@@ -0,0 +1,71 @@
/*
* Glue Code for the asm optimized version of the AES Cipher Algorithm
*
*/
#include <linux/module.h>
#include <crypto/aes.h>
#include <asm/aes.h>
asmlinkage void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
asmlinkage void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
void crypto_aes_encrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
{
aes_enc_blk(ctx, dst, src);
}
EXPORT_SYMBOL_GPL(crypto_aes_encrypt_x86);
void crypto_aes_decrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
{
aes_dec_blk(ctx, dst, src);
}
EXPORT_SYMBOL_GPL(crypto_aes_decrypt_x86);
static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
aes_enc_blk(crypto_tfm_ctx(tfm), dst, src);
}
static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
aes_dec_blk(crypto_tfm_ctx(tfm), dst, src);
}
static struct crypto_alg aes_alg = {
.cra_name = "aes",
.cra_driver_name = "aes-asm",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(aes_alg.cra_list),
.cra_u = {
.cipher = {
.cia_min_keysize = AES_MIN_KEY_SIZE,
.cia_max_keysize = AES_MAX_KEY_SIZE,
.cia_setkey = crypto_aes_set_key,
.cia_encrypt = aes_encrypt,
.cia_decrypt = aes_decrypt
}
}
};
static int __init aes_init(void)
{
return crypto_register_alg(&aes_alg);
}
static void __exit aes_fini(void)
{
crypto_unregister_alg(&aes_alg);
}
module_init(aes_init);
module_exit(aes_fini);
MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, asm optimized");
MODULE_LICENSE("GPL");
MODULE_ALIAS("aes");
MODULE_ALIAS("aes-asm");
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,390 @@
/*
* Blowfish Cipher Algorithm (x86_64)
*
* Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA
*
*/
.file "blowfish-x86_64-asm.S"
.text
/* structure of crypto context */
#define p 0
#define s0 ((16 + 2) * 4)
#define s1 ((16 + 2 + (1 * 256)) * 4)
#define s2 ((16 + 2 + (2 * 256)) * 4)
#define s3 ((16 + 2 + (3 * 256)) * 4)
/* register macros */
#define CTX %rdi
#define RIO %rsi
#define RX0 %rax
#define RX1 %rbx
#define RX2 %rcx
#define RX3 %rdx
#define RX0d %eax
#define RX1d %ebx
#define RX2d %ecx
#define RX3d %edx
#define RX0bl %al
#define RX1bl %bl
#define RX2bl %cl
#define RX3bl %dl
#define RX0bh %ah
#define RX1bh %bh
#define RX2bh %ch
#define RX3bh %dh
#define RT0 %rbp
#define RT1 %rsi
#define RT2 %r8
#define RT3 %r9
#define RT0d %ebp
#define RT1d %esi
#define RT2d %r8d
#define RT3d %r9d
#define RKEY %r10
/***********************************************************************
* 1-way blowfish
***********************************************************************/
#define F() \
rorq $16, RX0; \
movzbl RX0bh, RT0d; \
movzbl RX0bl, RT1d; \
rolq $16, RX0; \
movl s0(CTX,RT0,4), RT0d; \
addl s1(CTX,RT1,4), RT0d; \
movzbl RX0bh, RT1d; \
movzbl RX0bl, RT2d; \
rolq $32, RX0; \
xorl s2(CTX,RT1,4), RT0d; \
addl s3(CTX,RT2,4), RT0d; \
xorq RT0, RX0;
#define add_roundkey_enc(n) \
xorq p+4*(n)(CTX), RX0;
#define round_enc(n) \
add_roundkey_enc(n); \
\
F(); \
F();
#define add_roundkey_dec(n) \
movq p+4*(n-1)(CTX), RT0; \
rorq $32, RT0; \
xorq RT0, RX0;
#define round_dec(n) \
add_roundkey_dec(n); \
\
F(); \
F(); \
#define read_block() \
movq (RIO), RX0; \
rorq $32, RX0; \
bswapq RX0;
#define write_block() \
bswapq RX0; \
movq RX0, (RIO);
#define xor_block() \
bswapq RX0; \
xorq RX0, (RIO);
.align 8
.global __blowfish_enc_blk
.type __blowfish_enc_blk,@function;
__blowfish_enc_blk:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: bool, if true: xor output
*/
movq %rbp, %r11;
movq %rsi, %r10;
movq %rdx, RIO;
read_block();
round_enc(0);
round_enc(2);
round_enc(4);
round_enc(6);
round_enc(8);
round_enc(10);
round_enc(12);
round_enc(14);
add_roundkey_enc(16);
movq %r11, %rbp;
movq %r10, RIO;
test %cl, %cl;
jnz __enc_xor;
write_block();
ret;
__enc_xor:
xor_block();
ret;
.align 8
.global blowfish_dec_blk
.type blowfish_dec_blk,@function;
blowfish_dec_blk:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
movq %rbp, %r11;
movq %rsi, %r10;
movq %rdx, RIO;
read_block();
round_dec(17);
round_dec(15);
round_dec(13);
round_dec(11);
round_dec(9);
round_dec(7);
round_dec(5);
round_dec(3);
add_roundkey_dec(1);
movq %r10, RIO;
write_block();
movq %r11, %rbp;
ret;
/**********************************************************************
4-way blowfish, four blocks parallel
**********************************************************************/
/* F() for 4-way. Slower when used alone/1-way, but faster when used
* parallel/4-way (tested on AMD Phenom II & Intel Xeon E7330).
*/
#define F4(x) \
movzbl x ## bh, RT1d; \
movzbl x ## bl, RT3d; \
rorq $16, x; \
movzbl x ## bh, RT0d; \
movzbl x ## bl, RT2d; \
rorq $16, x; \
movl s0(CTX,RT0,4), RT0d; \
addl s1(CTX,RT2,4), RT0d; \
xorl s2(CTX,RT1,4), RT0d; \
addl s3(CTX,RT3,4), RT0d; \
xorq RT0, x;
#define add_preloaded_roundkey4() \
xorq RKEY, RX0; \
xorq RKEY, RX1; \
xorq RKEY, RX2; \
xorq RKEY, RX3;
#define preload_roundkey_enc(n) \
movq p+4*(n)(CTX), RKEY;
#define add_roundkey_enc4(n) \
add_preloaded_roundkey4(); \
preload_roundkey_enc(n + 2);
#define round_enc4(n) \
add_roundkey_enc4(n); \
\
F4(RX0); \
F4(RX1); \
F4(RX2); \
F4(RX3); \
\
F4(RX0); \
F4(RX1); \
F4(RX2); \
F4(RX3);
#define preload_roundkey_dec(n) \
movq p+4*((n)-1)(CTX), RKEY; \
rorq $32, RKEY;
#define add_roundkey_dec4(n) \
add_preloaded_roundkey4(); \
preload_roundkey_dec(n - 2);
#define round_dec4(n) \
add_roundkey_dec4(n); \
\
F4(RX0); \
F4(RX1); \
F4(RX2); \
F4(RX3); \
\
F4(RX0); \
F4(RX1); \
F4(RX2); \
F4(RX3);
#define read_block4() \
movq (RIO), RX0; \
rorq $32, RX0; \
bswapq RX0; \
\
movq 8(RIO), RX1; \
rorq $32, RX1; \
bswapq RX1; \
\
movq 16(RIO), RX2; \
rorq $32, RX2; \
bswapq RX2; \
\
movq 24(RIO), RX3; \
rorq $32, RX3; \
bswapq RX3;
#define write_block4() \
bswapq RX0; \
movq RX0, (RIO); \
\
bswapq RX1; \
movq RX1, 8(RIO); \
\
bswapq RX2; \
movq RX2, 16(RIO); \
\
bswapq RX3; \
movq RX3, 24(RIO);
#define xor_block4() \
bswapq RX0; \
xorq RX0, (RIO); \
\
bswapq RX1; \
xorq RX1, 8(RIO); \
\
bswapq RX2; \
xorq RX2, 16(RIO); \
\
bswapq RX3; \
xorq RX3, 24(RIO);
.align 8
.global __blowfish_enc_blk_4way
.type __blowfish_enc_blk_4way,@function;
__blowfish_enc_blk_4way:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: bool, if true: xor output
*/
pushq %rbp;
pushq %rbx;
pushq %rcx;
preload_roundkey_enc(0);
movq %rsi, %r11;
movq %rdx, RIO;
read_block4();
round_enc4(0);
round_enc4(2);
round_enc4(4);
round_enc4(6);
round_enc4(8);
round_enc4(10);
round_enc4(12);
round_enc4(14);
add_preloaded_roundkey4();
popq %rbp;
movq %r11, RIO;
test %bpl, %bpl;
jnz __enc_xor4;
write_block4();
popq %rbx;
popq %rbp;
ret;
__enc_xor4:
xor_block4();
popq %rbx;
popq %rbp;
ret;
.align 8
.global blowfish_dec_blk_4way
.type blowfish_dec_blk_4way,@function;
blowfish_dec_blk_4way:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
pushq %rbp;
pushq %rbx;
preload_roundkey_dec(17);
movq %rsi, %r11;
movq %rdx, RIO;
read_block4();
round_dec4(17);
round_dec4(15);
round_dec4(13);
round_dec4(11);
round_dec4(9);
round_dec4(7);
round_dec4(5);
round_dec4(3);
add_preloaded_roundkey4();
movq %r11, RIO;
write_block4();
popq %rbx;
popq %rbp;
ret;
+489
View File
@@ -0,0 +1,489 @@
/*
* Glue Code for assembler optimized version of Blowfish
*
* Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
* CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
* Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
* CTR part based on code (crypto/ctr.c) by:
* (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA
*
*/
#include <asm/processor.h>
#include <crypto/blowfish.h>
#include <linux/crypto.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/types.h>
#include <crypto/algapi.h>
/* regular block cipher functions */
asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src,
bool xor);
asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src);
/* 4-way parallel cipher functions */
asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src, bool xor);
asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src);
static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src)
{
__blowfish_enc_blk(ctx, dst, src, false);
}
static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst,
const u8 *src)
{
__blowfish_enc_blk(ctx, dst, src, true);
}
static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src)
{
__blowfish_enc_blk_4way(ctx, dst, src, false);
}
static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src)
{
__blowfish_enc_blk_4way(ctx, dst, src, true);
}
static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
blowfish_enc_blk(crypto_tfm_ctx(tfm), dst, src);
}
static void blowfish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
blowfish_dec_blk(crypto_tfm_ctx(tfm), dst, src);
}
static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
void (*fn)(struct bf_ctx *, u8 *, const u8 *),
void (*fn_4way)(struct bf_ctx *, u8 *, const u8 *))
{
struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
unsigned int bsize = BF_BLOCK_SIZE;
unsigned int nbytes;
int err;
err = blkcipher_walk_virt(desc, walk);
while ((nbytes = walk->nbytes)) {
u8 *wsrc = walk->src.virt.addr;
u8 *wdst = walk->dst.virt.addr;
/* Process four block batch */
if (nbytes >= bsize * 4) {
do {
fn_4way(ctx, wdst, wsrc);
wsrc += bsize * 4;
wdst += bsize * 4;
nbytes -= bsize * 4;
} while (nbytes >= bsize * 4);
if (nbytes < bsize)
goto done;
}
/* Handle leftovers */
do {
fn(ctx, wdst, wsrc);
wsrc += bsize;
wdst += bsize;
nbytes -= bsize;
} while (nbytes >= bsize);
done:
err = blkcipher_walk_done(desc, walk, nbytes);
}
return err;
}
static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
return ecb_crypt(desc, &walk, blowfish_enc_blk, blowfish_enc_blk_4way);
}
static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
return ecb_crypt(desc, &walk, blowfish_dec_blk, blowfish_dec_blk_4way);
}
static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
unsigned int bsize = BF_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u64 *src = (u64 *)walk->src.virt.addr;
u64 *dst = (u64 *)walk->dst.virt.addr;
u64 *iv = (u64 *)walk->iv;
do {
*dst = *src ^ *iv;
blowfish_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
iv = dst;
src += 1;
dst += 1;
nbytes -= bsize;
} while (nbytes >= bsize);
*(u64 *)walk->iv = *iv;
return nbytes;
}
static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
while ((nbytes = walk.nbytes)) {
nbytes = __cbc_encrypt(desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
return err;
}
static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
unsigned int bsize = BF_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u64 *src = (u64 *)walk->src.virt.addr;
u64 *dst = (u64 *)walk->dst.virt.addr;
u64 ivs[4 - 1];
u64 last_iv;
/* Start of the last block. */
src += nbytes / bsize - 1;
dst += nbytes / bsize - 1;
last_iv = *src;
/* Process four block batch */
if (nbytes >= bsize * 4) {
do {
nbytes -= bsize * 4 - bsize;
src -= 4 - 1;
dst -= 4 - 1;
ivs[0] = src[0];
ivs[1] = src[1];
ivs[2] = src[2];
blowfish_dec_blk_4way(ctx, (u8 *)dst, (u8 *)src);
dst[1] ^= ivs[0];
dst[2] ^= ivs[1];
dst[3] ^= ivs[2];
nbytes -= bsize;
if (nbytes < bsize)
goto done;
*dst ^= *(src - 1);
src -= 1;
dst -= 1;
} while (nbytes >= bsize * 4);
if (nbytes < bsize)
goto done;
}
/* Handle leftovers */
for (;;) {
blowfish_dec_blk(ctx, (u8 *)dst, (u8 *)src);
nbytes -= bsize;
if (nbytes < bsize)
break;
*dst ^= *(src - 1);
src -= 1;
dst -= 1;
}
done:
*dst ^= *(u64 *)walk->iv;
*(u64 *)walk->iv = last_iv;
return nbytes;
}
static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
while ((nbytes = walk.nbytes)) {
nbytes = __cbc_decrypt(desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
return err;
}
static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk)
{
u8 *ctrblk = walk->iv;
u8 keystream[BF_BLOCK_SIZE];
u8 *src = walk->src.virt.addr;
u8 *dst = walk->dst.virt.addr;
unsigned int nbytes = walk->nbytes;
blowfish_enc_blk(ctx, keystream, ctrblk);
crypto_xor(keystream, src, nbytes);
memcpy(dst, keystream, nbytes);
crypto_inc(ctrblk, BF_BLOCK_SIZE);
}
static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
unsigned int bsize = BF_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u64 *src = (u64 *)walk->src.virt.addr;
u64 *dst = (u64 *)walk->dst.virt.addr;
u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
__be64 ctrblocks[4];
/* Process four block batch */
if (nbytes >= bsize * 4) {
do {
if (dst != src) {
dst[0] = src[0];
dst[1] = src[1];
dst[2] = src[2];
dst[3] = src[3];
}
/* create ctrblks for parallel encrypt */
ctrblocks[0] = cpu_to_be64(ctrblk++);
ctrblocks[1] = cpu_to_be64(ctrblk++);
ctrblocks[2] = cpu_to_be64(ctrblk++);
ctrblocks[3] = cpu_to_be64(ctrblk++);
blowfish_enc_blk_xor_4way(ctx, (u8 *)dst,
(u8 *)ctrblocks);
src += 4;
dst += 4;
} while ((nbytes -= bsize * 4) >= bsize * 4);
if (nbytes < bsize)
goto done;
}
/* Handle leftovers */
do {
if (dst != src)
*dst = *src;
ctrblocks[0] = cpu_to_be64(ctrblk++);
blowfish_enc_blk_xor(ctx, (u8 *)dst, (u8 *)ctrblocks);
src += 1;
dst += 1;
} while ((nbytes -= bsize) >= bsize);
done:
*(__be64 *)walk->iv = cpu_to_be64(ctrblk);
return nbytes;
}
static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt_block(desc, &walk, BF_BLOCK_SIZE);
while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) {
nbytes = __ctr_crypt(desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
if (walk.nbytes) {
ctr_crypt_final(crypto_blkcipher_ctx(desc->tfm), &walk);
err = blkcipher_walk_done(desc, &walk, 0);
}
return err;
}
static struct crypto_alg bf_algs[4] = { {
.cra_name = "blowfish",
.cra_driver_name = "blowfish-asm",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = BF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct bf_ctx),
.cra_alignmask = 0,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(bf_algs[0].cra_list),
.cra_u = {
.cipher = {
.cia_min_keysize = BF_MIN_KEY_SIZE,
.cia_max_keysize = BF_MAX_KEY_SIZE,
.cia_setkey = blowfish_setkey,
.cia_encrypt = blowfish_encrypt,
.cia_decrypt = blowfish_decrypt,
}
}
}, {
.cra_name = "ecb(blowfish)",
.cra_driver_name = "ecb-blowfish-asm",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = BF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct bf_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(bf_algs[1].cra_list),
.cra_u = {
.blkcipher = {
.min_keysize = BF_MIN_KEY_SIZE,
.max_keysize = BF_MAX_KEY_SIZE,
.setkey = blowfish_setkey,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
},
},
}, {
.cra_name = "cbc(blowfish)",
.cra_driver_name = "cbc-blowfish-asm",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = BF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct bf_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(bf_algs[2].cra_list),
.cra_u = {
.blkcipher = {
.min_keysize = BF_MIN_KEY_SIZE,
.max_keysize = BF_MAX_KEY_SIZE,
.ivsize = BF_BLOCK_SIZE,
.setkey = blowfish_setkey,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
},
},
}, {
.cra_name = "ctr(blowfish)",
.cra_driver_name = "ctr-blowfish-asm",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct bf_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(bf_algs[3].cra_list),
.cra_u = {
.blkcipher = {
.min_keysize = BF_MIN_KEY_SIZE,
.max_keysize = BF_MAX_KEY_SIZE,
.ivsize = BF_BLOCK_SIZE,
.setkey = blowfish_setkey,
.encrypt = ctr_crypt,
.decrypt = ctr_crypt,
},
},
} };
static bool is_blacklisted_cpu(void)
{
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return false;
if (boot_cpu_data.x86 == 0x0f) {
/*
* On Pentium 4, blowfish-x86_64 is slower than generic C
* implementation because use of 64bit rotates (which are really
* slow on P4). Therefore blacklist P4s.
*/
return true;
}
return false;
}
static int force;
module_param(force, int, 0);
MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
static int __init init(void)
{
if (!force && is_blacklisted_cpu()) {
printk(KERN_INFO
"blowfish-x86_64: performance on this CPU "
"would be suboptimal: disabling "
"blowfish-x86_64.\n");
return -ENODEV;
}
return crypto_register_algs(bf_algs, ARRAY_SIZE(bf_algs));
}
static void __exit fini(void)
{
crypto_unregister_algs(bf_algs, ARRAY_SIZE(bf_algs));
}
module_init(init);
module_exit(fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Blowfish Cipher Algorithm, asm optimized");
MODULE_ALIAS("blowfish");
MODULE_ALIAS("blowfish-asm");
@@ -0,0 +1,520 @@
/*
* Camellia Cipher Algorithm (x86_64)
*
* Copyright (C) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA
*
*/
.file "camellia-x86_64-asm_64.S"
.text
.extern camellia_sp10011110;
.extern camellia_sp22000222;
.extern camellia_sp03303033;
.extern camellia_sp00444404;
.extern camellia_sp02220222;
.extern camellia_sp30333033;
.extern camellia_sp44044404;
.extern camellia_sp11101110;
#define sp10011110 camellia_sp10011110
#define sp22000222 camellia_sp22000222
#define sp03303033 camellia_sp03303033
#define sp00444404 camellia_sp00444404
#define sp02220222 camellia_sp02220222
#define sp30333033 camellia_sp30333033
#define sp44044404 camellia_sp44044404
#define sp11101110 camellia_sp11101110
#define CAMELLIA_TABLE_BYTE_LEN 272
/* struct camellia_ctx: */
#define key_table 0
#define key_length CAMELLIA_TABLE_BYTE_LEN
/* register macros */
#define CTX %rdi
#define RIO %rsi
#define RIOd %esi
#define RAB0 %rax
#define RCD0 %rcx
#define RAB1 %rbx
#define RCD1 %rdx
#define RAB0d %eax
#define RCD0d %ecx
#define RAB1d %ebx
#define RCD1d %edx
#define RAB0bl %al
#define RCD0bl %cl
#define RAB1bl %bl
#define RCD1bl %dl
#define RAB0bh %ah
#define RCD0bh %ch
#define RAB1bh %bh
#define RCD1bh %dh
#define RT0 %rsi
#define RT1 %rbp
#define RT2 %r8
#define RT0d %esi
#define RT1d %ebp
#define RT2d %r8d
#define RT2bl %r8b
#define RXOR %r9
#define RRBP %r10
#define RDST %r11
#define RXORd %r9d
#define RXORbl %r9b
#define xor2ror16(T0, T1, tmp1, tmp2, ab, dst) \
movzbl ab ## bl, tmp2 ## d; \
movzbl ab ## bh, tmp1 ## d; \
rorq $16, ab; \
xorq T0(, tmp2, 8), dst; \
xorq T1(, tmp1, 8), dst;
/**********************************************************************
1-way camellia
**********************************************************************/
#define roundsm(ab, subkey, cd) \
movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \
\
xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \
xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \
xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \
xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \
\
xorq RT2, cd ## 0;
#define fls(l, r, kl, kr) \
movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \
andl l ## 0d, RT0d; \
roll $1, RT0d; \
shlq $32, RT0; \
xorq RT0, l ## 0; \
movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \
orq r ## 0, RT1; \
shrq $32, RT1; \
xorq RT1, r ## 0; \
\
movq (key_table + ((kl) * 2) * 4)(CTX), RT2; \
orq l ## 0, RT2; \
shrq $32, RT2; \
xorq RT2, l ## 0; \
movl (key_table + ((kr) * 2) * 4)(CTX), RT0d; \
andl r ## 0d, RT0d; \
roll $1, RT0d; \
shlq $32, RT0; \
xorq RT0, r ## 0;
#define enc_rounds(i) \
roundsm(RAB, i + 2, RCD); \
roundsm(RCD, i + 3, RAB); \
roundsm(RAB, i + 4, RCD); \
roundsm(RCD, i + 5, RAB); \
roundsm(RAB, i + 6, RCD); \
roundsm(RCD, i + 7, RAB);
#define enc_fls(i) \
fls(RAB, RCD, i + 0, i + 1);
#define enc_inpack() \
movq (RIO), RAB0; \
bswapq RAB0; \
rolq $32, RAB0; \
movq 4*2(RIO), RCD0; \
bswapq RCD0; \
rorq $32, RCD0; \
xorq key_table(CTX), RAB0;
#define enc_outunpack(op, max) \
xorq key_table(CTX, max, 8), RCD0; \
rorq $32, RCD0; \
bswapq RCD0; \
op ## q RCD0, (RIO); \
rolq $32, RAB0; \
bswapq RAB0; \
op ## q RAB0, 4*2(RIO);
#define dec_rounds(i) \
roundsm(RAB, i + 7, RCD); \
roundsm(RCD, i + 6, RAB); \
roundsm(RAB, i + 5, RCD); \
roundsm(RCD, i + 4, RAB); \
roundsm(RAB, i + 3, RCD); \
roundsm(RCD, i + 2, RAB);
#define dec_fls(i) \
fls(RAB, RCD, i + 1, i + 0);
#define dec_inpack(max) \
movq (RIO), RAB0; \
bswapq RAB0; \
rolq $32, RAB0; \
movq 4*2(RIO), RCD0; \
bswapq RCD0; \
rorq $32, RCD0; \
xorq key_table(CTX, max, 8), RAB0;
#define dec_outunpack() \
xorq key_table(CTX), RCD0; \
rorq $32, RCD0; \
bswapq RCD0; \
movq RCD0, (RIO); \
rolq $32, RAB0; \
bswapq RAB0; \
movq RAB0, 4*2(RIO);
.global __camellia_enc_blk;
.type __camellia_enc_blk,@function;
__camellia_enc_blk:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: bool xor
*/
movq %rbp, RRBP;
movq %rcx, RXOR;
movq %rsi, RDST;
movq %rdx, RIO;
enc_inpack();
enc_rounds(0);
enc_fls(8);
enc_rounds(8);
enc_fls(16);
enc_rounds(16);
movl $24, RT1d; /* max */
cmpb $16, key_length(CTX);
je __enc_done;
enc_fls(24);
enc_rounds(24);
movl $32, RT1d; /* max */
__enc_done:
testb RXORbl, RXORbl;
movq RDST, RIO;
jnz __enc_xor;
enc_outunpack(mov, RT1);
movq RRBP, %rbp;
ret;
__enc_xor:
enc_outunpack(xor, RT1);
movq RRBP, %rbp;
ret;
.global camellia_dec_blk;
.type camellia_dec_blk,@function;
camellia_dec_blk:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
cmpl $16, key_length(CTX);
movl $32, RT2d;
movl $24, RXORd;
cmovel RXORd, RT2d; /* max */
movq %rbp, RRBP;
movq %rsi, RDST;
movq %rdx, RIO;
dec_inpack(RT2);
cmpb $24, RT2bl;
je __dec_rounds16;
dec_rounds(24);
dec_fls(24);
__dec_rounds16:
dec_rounds(16);
dec_fls(16);
dec_rounds(8);
dec_fls(8);
dec_rounds(0);
movq RDST, RIO;
dec_outunpack();
movq RRBP, %rbp;
ret;
/**********************************************************************
2-way camellia
**********************************************************************/
#define roundsm2(ab, subkey, cd) \
movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \
xorq RT2, cd ## 1; \
\
xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \
xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \
xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \
xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \
\
xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 1, cd ## 1); \
xorq RT2, cd ## 0; \
xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 1, cd ## 1); \
xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 1, cd ## 1); \
xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 1, cd ## 1);
#define fls2(l, r, kl, kr) \
movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \
andl l ## 0d, RT0d; \
roll $1, RT0d; \
shlq $32, RT0; \
xorq RT0, l ## 0; \
movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \
orq r ## 0, RT1; \
shrq $32, RT1; \
xorq RT1, r ## 0; \
\
movl (key_table + ((kl) * 2) * 4)(CTX), RT2d; \
andl l ## 1d, RT2d; \
roll $1, RT2d; \
shlq $32, RT2; \
xorq RT2, l ## 1; \
movq (key_table + ((kr) * 2) * 4)(CTX), RT0; \
orq r ## 1, RT0; \
shrq $32, RT0; \
xorq RT0, r ## 1; \
\
movq (key_table + ((kl) * 2) * 4)(CTX), RT1; \
orq l ## 0, RT1; \
shrq $32, RT1; \
xorq RT1, l ## 0; \
movl (key_table + ((kr) * 2) * 4)(CTX), RT2d; \
andl r ## 0d, RT2d; \
roll $1, RT2d; \
shlq $32, RT2; \
xorq RT2, r ## 0; \
\
movq (key_table + ((kl) * 2) * 4)(CTX), RT0; \
orq l ## 1, RT0; \
shrq $32, RT0; \
xorq RT0, l ## 1; \
movl (key_table + ((kr) * 2) * 4)(CTX), RT1d; \
andl r ## 1d, RT1d; \
roll $1, RT1d; \
shlq $32, RT1; \
xorq RT1, r ## 1;
#define enc_rounds2(i) \
roundsm2(RAB, i + 2, RCD); \
roundsm2(RCD, i + 3, RAB); \
roundsm2(RAB, i + 4, RCD); \
roundsm2(RCD, i + 5, RAB); \
roundsm2(RAB, i + 6, RCD); \
roundsm2(RCD, i + 7, RAB);
#define enc_fls2(i) \
fls2(RAB, RCD, i + 0, i + 1);
#define enc_inpack2() \
movq (RIO), RAB0; \
bswapq RAB0; \
rorq $32, RAB0; \
movq 4*2(RIO), RCD0; \
bswapq RCD0; \
rolq $32, RCD0; \
xorq key_table(CTX), RAB0; \
\
movq 8*2(RIO), RAB1; \
bswapq RAB1; \
rorq $32, RAB1; \
movq 12*2(RIO), RCD1; \
bswapq RCD1; \
rolq $32, RCD1; \
xorq key_table(CTX), RAB1;
#define enc_outunpack2(op, max) \
xorq key_table(CTX, max, 8), RCD0; \
rolq $32, RCD0; \
bswapq RCD0; \
op ## q RCD0, (RIO); \
rorq $32, RAB0; \
bswapq RAB0; \
op ## q RAB0, 4*2(RIO); \
\
xorq key_table(CTX, max, 8), RCD1; \
rolq $32, RCD1; \
bswapq RCD1; \
op ## q RCD1, 8*2(RIO); \
rorq $32, RAB1; \
bswapq RAB1; \
op ## q RAB1, 12*2(RIO);
#define dec_rounds2(i) \
roundsm2(RAB, i + 7, RCD); \
roundsm2(RCD, i + 6, RAB); \
roundsm2(RAB, i + 5, RCD); \
roundsm2(RCD, i + 4, RAB); \
roundsm2(RAB, i + 3, RCD); \
roundsm2(RCD, i + 2, RAB);
#define dec_fls2(i) \
fls2(RAB, RCD, i + 1, i + 0);
#define dec_inpack2(max) \
movq (RIO), RAB0; \
bswapq RAB0; \
rorq $32, RAB0; \
movq 4*2(RIO), RCD0; \
bswapq RCD0; \
rolq $32, RCD0; \
xorq key_table(CTX, max, 8), RAB0; \
\
movq 8*2(RIO), RAB1; \
bswapq RAB1; \
rorq $32, RAB1; \
movq 12*2(RIO), RCD1; \
bswapq RCD1; \
rolq $32, RCD1; \
xorq key_table(CTX, max, 8), RAB1;
#define dec_outunpack2() \
xorq key_table(CTX), RCD0; \
rolq $32, RCD0; \
bswapq RCD0; \
movq RCD0, (RIO); \
rorq $32, RAB0; \
bswapq RAB0; \
movq RAB0, 4*2(RIO); \
\
xorq key_table(CTX), RCD1; \
rolq $32, RCD1; \
bswapq RCD1; \
movq RCD1, 8*2(RIO); \
rorq $32, RAB1; \
bswapq RAB1; \
movq RAB1, 12*2(RIO);
.global __camellia_enc_blk_2way;
.type __camellia_enc_blk_2way,@function;
__camellia_enc_blk_2way:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: bool xor
*/
pushq %rbx;
movq %rbp, RRBP;
movq %rcx, RXOR;
movq %rsi, RDST;
movq %rdx, RIO;
enc_inpack2();
enc_rounds2(0);
enc_fls2(8);
enc_rounds2(8);
enc_fls2(16);
enc_rounds2(16);
movl $24, RT2d; /* max */
cmpb $16, key_length(CTX);
je __enc2_done;
enc_fls2(24);
enc_rounds2(24);
movl $32, RT2d; /* max */
__enc2_done:
test RXORbl, RXORbl;
movq RDST, RIO;
jnz __enc2_xor;
enc_outunpack2(mov, RT2);
movq RRBP, %rbp;
popq %rbx;
ret;
__enc2_xor:
enc_outunpack2(xor, RT2);
movq RRBP, %rbp;
popq %rbx;
ret;
.global camellia_dec_blk_2way;
.type camellia_dec_blk_2way,@function;
camellia_dec_blk_2way:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
cmpl $16, key_length(CTX);
movl $32, RT2d;
movl $24, RXORd;
cmovel RXORd, RT2d; /* max */
movq %rbx, RXOR;
movq %rbp, RRBP;
movq %rsi, RDST;
movq %rdx, RIO;
dec_inpack2(RT2);
cmpb $24, RT2bl;
je __dec2_rounds16;
dec_rounds2(24);
dec_fls2(24);
__dec2_rounds16:
dec_rounds2(16);
dec_fls2(16);
dec_rounds2(8);
dec_fls2(8);
dec_rounds2(0);
movq RDST, RIO;
dec_outunpack2();
movq RRBP, %rbp;
movq RXOR, %rbx;
ret;
File diff suppressed because it is too large Load Diff
+203
View File
@@ -0,0 +1,203 @@
/*
* Using hardware provided CRC32 instruction to accelerate the CRC32 disposal.
* CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE)
* CRC32 is a new instruction in Intel SSE4.2, the reference can be found at:
* http://www.intel.com/products/processor/manuals/
* Intel(R) 64 and IA-32 Architectures Software Developer's Manual
* Volume 2A: Instruction Set Reference, A-M
*
* Copyright (C) 2008 Intel Corporation
* Authors: Austin Zhang <austin_zhang@linux.intel.com>
* Kent Liu <kent.liu@intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*
*/
#include <linux/init.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <crypto/internal/hash.h>
#include <asm/cpufeature.h>
#include <asm/cpu_device_id.h>
#define CHKSUM_BLOCK_SIZE 1
#define CHKSUM_DIGEST_SIZE 4
#define SCALE_F sizeof(unsigned long)
#ifdef CONFIG_X86_64
#define REX_PRE "0x48, "
#else
#define REX_PRE
#endif
static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length)
{
while (length--) {
__asm__ __volatile__(
".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1"
:"=S"(crc)
:"0"(crc), "c"(*data)
);
data++;
}
return crc;
}
static u32 __pure crc32c_intel_le_hw(u32 crc, unsigned char const *p, size_t len)
{
unsigned int iquotient = len / SCALE_F;
unsigned int iremainder = len % SCALE_F;
unsigned long *ptmp = (unsigned long *)p;
while (iquotient--) {
__asm__ __volatile__(
".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;"
:"=S"(crc)
:"0"(crc), "c"(*ptmp)
);
ptmp++;
}
if (iremainder)
crc = crc32c_intel_le_hw_byte(crc, (unsigned char *)ptmp,
iremainder);
return crc;
}
/*
* Setting the seed allows arbitrary accumulators and flexible XOR policy
* If your algorithm starts with ~0, then XOR with ~0 before you set
* the seed.
*/
static int crc32c_intel_setkey(struct crypto_shash *hash, const u8 *key,
unsigned int keylen)
{
u32 *mctx = crypto_shash_ctx(hash);
if (keylen != sizeof(u32)) {
crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
*mctx = le32_to_cpup((__le32 *)key);
return 0;
}
static int crc32c_intel_init(struct shash_desc *desc)
{
u32 *mctx = crypto_shash_ctx(desc->tfm);
u32 *crcp = shash_desc_ctx(desc);
*crcp = *mctx;
return 0;
}
static int crc32c_intel_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
u32 *crcp = shash_desc_ctx(desc);
*crcp = crc32c_intel_le_hw(*crcp, data, len);
return 0;
}
static int __crc32c_intel_finup(u32 *crcp, const u8 *data, unsigned int len,
u8 *out)
{
*(__le32 *)out = ~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len));
return 0;
}
static int crc32c_intel_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return __crc32c_intel_finup(shash_desc_ctx(desc), data, len, out);
}
static int crc32c_intel_final(struct shash_desc *desc, u8 *out)
{
u32 *crcp = shash_desc_ctx(desc);
*(__le32 *)out = ~cpu_to_le32p(crcp);
return 0;
}
static int crc32c_intel_digest(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return __crc32c_intel_finup(crypto_shash_ctx(desc->tfm), data, len,
out);
}
static int crc32c_intel_cra_init(struct crypto_tfm *tfm)
{
u32 *key = crypto_tfm_ctx(tfm);
*key = ~0;
return 0;
}
static struct shash_alg alg = {
.setkey = crc32c_intel_setkey,
.init = crc32c_intel_init,
.update = crc32c_intel_update,
.final = crc32c_intel_final,
.finup = crc32c_intel_finup,
.digest = crc32c_intel_digest,
.descsize = sizeof(u32),
.digestsize = CHKSUM_DIGEST_SIZE,
.base = {
.cra_name = "crc32c",
.cra_driver_name = "crc32c-intel",
.cra_priority = 200,
.cra_blocksize = CHKSUM_BLOCK_SIZE,
.cra_ctxsize = sizeof(u32),
.cra_module = THIS_MODULE,
.cra_init = crc32c_intel_cra_init,
}
};
static const struct x86_cpu_id crc32c_cpu_id[] = {
X86_FEATURE_MATCH(X86_FEATURE_XMM4_2),
{}
};
MODULE_DEVICE_TABLE(x86cpu, crc32c_cpu_id);
static int __init crc32c_intel_mod_init(void)
{
if (!x86_match_cpu(crc32c_cpu_id))
return -ENODEV;
return crypto_register_shash(&alg);
}
static void __exit crc32c_intel_mod_fini(void)
{
crypto_unregister_shash(&alg);
}
module_init(crc32c_intel_mod_init);
module_exit(crc32c_intel_mod_fini);
MODULE_AUTHOR("Austin Zhang <austin.zhang@intel.com>, Kent Liu <kent.liu@intel.com>");
MODULE_DESCRIPTION("CRC32c (Castagnoli) optimization using Intel Hardware.");
MODULE_LICENSE("GPL");
MODULE_ALIAS("crc32c");
MODULE_ALIAS("crc32c-intel");
+161
View File
@@ -0,0 +1,161 @@
/*
* FPU: Wrapper for blkcipher touching fpu
*
* Copyright (c) Intel Corp.
* Author: Huang Ying <ying.huang@intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
#include <crypto/algapi.h>
#include <linux/err.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <asm/i387.h>
struct crypto_fpu_ctx {
struct crypto_blkcipher *child;
};
static int crypto_fpu_setkey(struct crypto_tfm *parent, const u8 *key,
unsigned int keylen)
{
struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(parent);
struct crypto_blkcipher *child = ctx->child;
int err;
crypto_blkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
crypto_blkcipher_set_flags(child, crypto_tfm_get_flags(parent) &
CRYPTO_TFM_REQ_MASK);
err = crypto_blkcipher_setkey(child, key, keylen);
crypto_tfm_set_flags(parent, crypto_blkcipher_get_flags(child) &
CRYPTO_TFM_RES_MASK);
return err;
}
static int crypto_fpu_encrypt(struct blkcipher_desc *desc_in,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
int err;
struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in->tfm);
struct crypto_blkcipher *child = ctx->child;
struct blkcipher_desc desc = {
.tfm = child,
.info = desc_in->info,
.flags = desc_in->flags & ~CRYPTO_TFM_REQ_MAY_SLEEP,
};
kernel_fpu_begin();
err = crypto_blkcipher_crt(desc.tfm)->encrypt(&desc, dst, src, nbytes);
kernel_fpu_end();
return err;
}
static int crypto_fpu_decrypt(struct blkcipher_desc *desc_in,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
int err;
struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in->tfm);
struct crypto_blkcipher *child = ctx->child;
struct blkcipher_desc desc = {
.tfm = child,
.info = desc_in->info,
.flags = desc_in->flags & ~CRYPTO_TFM_REQ_MAY_SLEEP,
};
kernel_fpu_begin();
err = crypto_blkcipher_crt(desc.tfm)->decrypt(&desc, dst, src, nbytes);
kernel_fpu_end();
return err;
}
static int crypto_fpu_init_tfm(struct crypto_tfm *tfm)
{
struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
struct crypto_spawn *spawn = crypto_instance_ctx(inst);
struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm);
struct crypto_blkcipher *cipher;
cipher = crypto_spawn_blkcipher(spawn);
if (IS_ERR(cipher))
return PTR_ERR(cipher);
ctx->child = cipher;
return 0;
}
static void crypto_fpu_exit_tfm(struct crypto_tfm *tfm)
{
struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm);
crypto_free_blkcipher(ctx->child);
}
static struct crypto_instance *crypto_fpu_alloc(struct rtattr **tb)
{
struct crypto_instance *inst;
struct crypto_alg *alg;
int err;
err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER);
if (err)
return ERR_PTR(err);
alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_BLKCIPHER,
CRYPTO_ALG_TYPE_MASK);
if (IS_ERR(alg))
return ERR_CAST(alg);
inst = crypto_alloc_instance("fpu", alg);
if (IS_ERR(inst))
goto out_put_alg;
inst->alg.cra_flags = alg->cra_flags;
inst->alg.cra_priority = alg->cra_priority;
inst->alg.cra_blocksize = alg->cra_blocksize;
inst->alg.cra_alignmask = alg->cra_alignmask;
inst->alg.cra_type = alg->cra_type;
inst->alg.cra_blkcipher.ivsize = alg->cra_blkcipher.ivsize;
inst->alg.cra_blkcipher.min_keysize = alg->cra_blkcipher.min_keysize;
inst->alg.cra_blkcipher.max_keysize = alg->cra_blkcipher.max_keysize;
inst->alg.cra_ctxsize = sizeof(struct crypto_fpu_ctx);
inst->alg.cra_init = crypto_fpu_init_tfm;
inst->alg.cra_exit = crypto_fpu_exit_tfm;
inst->alg.cra_blkcipher.setkey = crypto_fpu_setkey;
inst->alg.cra_blkcipher.encrypt = crypto_fpu_encrypt;
inst->alg.cra_blkcipher.decrypt = crypto_fpu_decrypt;
out_put_alg:
crypto_mod_put(alg);
return inst;
}
static void crypto_fpu_free(struct crypto_instance *inst)
{
crypto_drop_spawn(crypto_instance_ctx(inst));
kfree(inst);
}
static struct crypto_template crypto_fpu_tmpl = {
.name = "fpu",
.alloc = crypto_fpu_alloc,
.free = crypto_fpu_free,
.module = THIS_MODULE,
};
int __init crypto_fpu_init(void)
{
return crypto_register_template(&crypto_fpu_tmpl);
}
void __exit crypto_fpu_exit(void)
{
crypto_unregister_template(&crypto_fpu_tmpl);
}
@@ -0,0 +1,157 @@
/*
* Accelerated GHASH implementation with Intel PCLMULQDQ-NI
* instructions. This file contains accelerated part of ghash
* implementation. More information about PCLMULQDQ can be found at:
*
* http://software.intel.com/en-us/articles/carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/
*
* Copyright (c) 2009 Intel Corp.
* Author: Huang Ying <ying.huang@intel.com>
* Vinodh Gopal
* Erdinc Ozturk
* Deniz Karakoyunlu
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation.
*/
#include <linux/linkage.h>
#include <asm/inst.h>
.data
.align 16
.Lbswap_mask:
.octa 0x000102030405060708090a0b0c0d0e0f
.Lpoly:
.octa 0xc2000000000000000000000000000001
.Ltwo_one:
.octa 0x00000001000000000000000000000001
#define DATA %xmm0
#define SHASH %xmm1
#define T1 %xmm2
#define T2 %xmm3
#define T3 %xmm4
#define BSWAP %xmm5
#define IN1 %xmm6
.text
/*
* __clmul_gf128mul_ble: internal ABI
* input:
* DATA: operand1
* SHASH: operand2, hash_key << 1 mod poly
* output:
* DATA: operand1 * operand2 mod poly
* changed:
* T1
* T2
* T3
*/
__clmul_gf128mul_ble:
movaps DATA, T1
pshufd $0b01001110, DATA, T2
pshufd $0b01001110, SHASH, T3
pxor DATA, T2
pxor SHASH, T3
PCLMULQDQ 0x00 SHASH DATA # DATA = a0 * b0
PCLMULQDQ 0x11 SHASH T1 # T1 = a1 * b1
PCLMULQDQ 0x00 T3 T2 # T2 = (a1 + a0) * (b1 + b0)
pxor DATA, T2
pxor T1, T2 # T2 = a0 * b1 + a1 * b0
movaps T2, T3
pslldq $8, T3
psrldq $8, T2
pxor T3, DATA
pxor T2, T1 # <T1:DATA> is result of
# carry-less multiplication
# first phase of the reduction
movaps DATA, T3
psllq $1, T3
pxor DATA, T3
psllq $5, T3
pxor DATA, T3
psllq $57, T3
movaps T3, T2
pslldq $8, T2
psrldq $8, T3
pxor T2, DATA
pxor T3, T1
# second phase of the reduction
movaps DATA, T2
psrlq $5, T2
pxor DATA, T2
psrlq $1, T2
pxor DATA, T2
psrlq $1, T2
pxor T2, T1
pxor T1, DATA
ret
/* void clmul_ghash_mul(char *dst, const be128 *shash) */
ENTRY(clmul_ghash_mul)
movups (%rdi), DATA
movups (%rsi), SHASH
movaps .Lbswap_mask, BSWAP
PSHUFB_XMM BSWAP DATA
call __clmul_gf128mul_ble
PSHUFB_XMM BSWAP DATA
movups DATA, (%rdi)
ret
/*
* void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
* const be128 *shash);
*/
ENTRY(clmul_ghash_update)
cmp $16, %rdx
jb .Lupdate_just_ret # check length
movaps .Lbswap_mask, BSWAP
movups (%rdi), DATA
movups (%rcx), SHASH
PSHUFB_XMM BSWAP DATA
.align 4
.Lupdate_loop:
movups (%rsi), IN1
PSHUFB_XMM BSWAP IN1
pxor IN1, DATA
call __clmul_gf128mul_ble
sub $16, %rdx
add $16, %rsi
cmp $16, %rdx
jge .Lupdate_loop
PSHUFB_XMM BSWAP DATA
movups DATA, (%rdi)
.Lupdate_just_ret:
ret
/*
* void clmul_ghash_setkey(be128 *shash, const u8 *key);
*
* Calculate hash_key << 1 mod poly
*/
ENTRY(clmul_ghash_setkey)
movaps .Lbswap_mask, BSWAP
movups (%rsi), %xmm0
PSHUFB_XMM BSWAP %xmm0
movaps %xmm0, %xmm1
psllq $1, %xmm0
psrlq $63, %xmm1
movaps %xmm1, %xmm2
pslldq $8, %xmm1
psrldq $8, %xmm2
por %xmm1, %xmm0
# reduction
pshufd $0b00100100, %xmm2, %xmm1
pcmpeqd .Ltwo_one, %xmm1
pand .Lpoly, %xmm1
pxor %xmm1, %xmm0
movups %xmm0, (%rdi)
ret
@@ -0,0 +1,338 @@
/*
* Accelerated GHASH implementation with Intel PCLMULQDQ-NI
* instructions. This file contains glue code.
*
* Copyright (c) 2009 Intel Corp.
* Author: Huang Ying <ying.huang@intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation.
*/
#include <linux/err.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/crypto.h>
#include <crypto/algapi.h>
#include <crypto/cryptd.h>
#include <crypto/gf128mul.h>
#include <crypto/internal/hash.h>
#include <asm/i387.h>
#include <asm/cpu_device_id.h>
#define GHASH_BLOCK_SIZE 16
#define GHASH_DIGEST_SIZE 16
void clmul_ghash_mul(char *dst, const be128 *shash);
void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
const be128 *shash);
void clmul_ghash_setkey(be128 *shash, const u8 *key);
struct ghash_async_ctx {
struct cryptd_ahash *cryptd_tfm;
};
struct ghash_ctx {
be128 shash;
};
struct ghash_desc_ctx {
u8 buffer[GHASH_BLOCK_SIZE];
u32 bytes;
};
static int ghash_init(struct shash_desc *desc)
{
struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
memset(dctx, 0, sizeof(*dctx));
return 0;
}
static int ghash_setkey(struct crypto_shash *tfm,
const u8 *key, unsigned int keylen)
{
struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
if (keylen != GHASH_BLOCK_SIZE) {
crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
clmul_ghash_setkey(&ctx->shash, key);
return 0;
}
static int ghash_update(struct shash_desc *desc,
const u8 *src, unsigned int srclen)
{
struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
u8 *dst = dctx->buffer;
kernel_fpu_begin();
if (dctx->bytes) {
int n = min(srclen, dctx->bytes);
u8 *pos = dst + (GHASH_BLOCK_SIZE - dctx->bytes);
dctx->bytes -= n;
srclen -= n;
while (n--)
*pos++ ^= *src++;
if (!dctx->bytes)
clmul_ghash_mul(dst, &ctx->shash);
}
clmul_ghash_update(dst, src, srclen, &ctx->shash);
kernel_fpu_end();
if (srclen & 0xf) {
src += srclen - (srclen & 0xf);
srclen &= 0xf;
dctx->bytes = GHASH_BLOCK_SIZE - srclen;
while (srclen--)
*dst++ ^= *src++;
}
return 0;
}
static void ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx)
{
u8 *dst = dctx->buffer;
if (dctx->bytes) {
u8 *tmp = dst + (GHASH_BLOCK_SIZE - dctx->bytes);
while (dctx->bytes--)
*tmp++ ^= 0;
kernel_fpu_begin();
clmul_ghash_mul(dst, &ctx->shash);
kernel_fpu_end();
}
dctx->bytes = 0;
}
static int ghash_final(struct shash_desc *desc, u8 *dst)
{
struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
u8 *buf = dctx->buffer;
ghash_flush(ctx, dctx);
memcpy(dst, buf, GHASH_BLOCK_SIZE);
return 0;
}
static struct shash_alg ghash_alg = {
.digestsize = GHASH_DIGEST_SIZE,
.init = ghash_init,
.update = ghash_update,
.final = ghash_final,
.setkey = ghash_setkey,
.descsize = sizeof(struct ghash_desc_ctx),
.base = {
.cra_name = "__ghash",
.cra_driver_name = "__ghash-pclmulqdqni",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = GHASH_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct ghash_ctx),
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(ghash_alg.base.cra_list),
},
};
static int ghash_async_init(struct ahash_request *req)
{
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
struct ahash_request *cryptd_req = ahash_request_ctx(req);
struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
if (!irq_fpu_usable()) {
memcpy(cryptd_req, req, sizeof(*req));
ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
return crypto_ahash_init(cryptd_req);
} else {
struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
desc->tfm = child;
desc->flags = req->base.flags;
return crypto_shash_init(desc);
}
}
static int ghash_async_update(struct ahash_request *req)
{
struct ahash_request *cryptd_req = ahash_request_ctx(req);
if (!irq_fpu_usable()) {
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
memcpy(cryptd_req, req, sizeof(*req));
ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
return crypto_ahash_update(cryptd_req);
} else {
struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
return shash_ahash_update(req, desc);
}
}
static int ghash_async_final(struct ahash_request *req)
{
struct ahash_request *cryptd_req = ahash_request_ctx(req);
if (!irq_fpu_usable()) {
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
memcpy(cryptd_req, req, sizeof(*req));
ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
return crypto_ahash_final(cryptd_req);
} else {
struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
return crypto_shash_final(desc, req->result);
}
}
static int ghash_async_digest(struct ahash_request *req)
{
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
struct ahash_request *cryptd_req = ahash_request_ctx(req);
struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
if (!irq_fpu_usable()) {
memcpy(cryptd_req, req, sizeof(*req));
ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
return crypto_ahash_digest(cryptd_req);
} else {
struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
desc->tfm = child;
desc->flags = req->base.flags;
return shash_ahash_digest(req, desc);
}
}
static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key,
unsigned int keylen)
{
struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
struct crypto_ahash *child = &ctx->cryptd_tfm->base;
int err;
crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK);
crypto_ahash_set_flags(child, crypto_ahash_get_flags(tfm)
& CRYPTO_TFM_REQ_MASK);
err = crypto_ahash_setkey(child, key, keylen);
crypto_ahash_set_flags(tfm, crypto_ahash_get_flags(child)
& CRYPTO_TFM_RES_MASK);
return err;
}
static int ghash_async_init_tfm(struct crypto_tfm *tfm)
{
struct cryptd_ahash *cryptd_tfm;
struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni", 0, 0);
if (IS_ERR(cryptd_tfm))
return PTR_ERR(cryptd_tfm);
ctx->cryptd_tfm = cryptd_tfm;
crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
sizeof(struct ahash_request) +
crypto_ahash_reqsize(&cryptd_tfm->base));
return 0;
}
static void ghash_async_exit_tfm(struct crypto_tfm *tfm)
{
struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
cryptd_free_ahash(ctx->cryptd_tfm);
}
static struct ahash_alg ghash_async_alg = {
.init = ghash_async_init,
.update = ghash_async_update,
.final = ghash_async_final,
.setkey = ghash_async_setkey,
.digest = ghash_async_digest,
.halg = {
.digestsize = GHASH_DIGEST_SIZE,
.base = {
.cra_name = "ghash",
.cra_driver_name = "ghash-clmulni",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
.cra_blocksize = GHASH_BLOCK_SIZE,
.cra_type = &crypto_ahash_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(ghash_async_alg.halg.base.cra_list),
.cra_init = ghash_async_init_tfm,
.cra_exit = ghash_async_exit_tfm,
},
},
};
static const struct x86_cpu_id pcmul_cpu_id[] = {
X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ), /* Pickle-Mickle-Duck */
{}
};
MODULE_DEVICE_TABLE(x86cpu, pcmul_cpu_id);
static int __init ghash_pclmulqdqni_mod_init(void)
{
int err;
if (!x86_match_cpu(pcmul_cpu_id))
return -ENODEV;
err = crypto_register_shash(&ghash_alg);
if (err)
goto err_out;
err = crypto_register_ahash(&ghash_async_alg);
if (err)
goto err_shash;
return 0;
err_shash:
crypto_unregister_shash(&ghash_alg);
err_out:
return err;
}
static void __exit ghash_pclmulqdqni_mod_exit(void)
{
crypto_unregister_ahash(&ghash_async_alg);
crypto_unregister_shash(&ghash_alg);
}
module_init(ghash_pclmulqdqni_mod_init);
module_exit(ghash_pclmulqdqni_mod_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("GHASH Message Digest Algorithm, "
"acclerated by PCLMULQDQ-NI");
MODULE_ALIAS("ghash");
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,920 @@
# enter ECRYPT_encrypt_bytes
.text
.p2align 5
.globl ECRYPT_encrypt_bytes
ECRYPT_encrypt_bytes:
mov %rsp,%r11
and $31,%r11
add $256,%r11
sub %r11,%rsp
# x = arg1
mov %rdi,%r8
# m = arg2
mov %rsi,%rsi
# out = arg3
mov %rdx,%rdi
# bytes = arg4
mov %rcx,%rdx
# unsigned>? bytes - 0
cmp $0,%rdx
# comment:fp stack unchanged by jump
# goto done if !unsigned>
jbe ._done
# comment:fp stack unchanged by fallthrough
# start:
._start:
# r11_stack = r11
movq %r11,0(%rsp)
# r12_stack = r12
movq %r12,8(%rsp)
# r13_stack = r13
movq %r13,16(%rsp)
# r14_stack = r14
movq %r14,24(%rsp)
# r15_stack = r15
movq %r15,32(%rsp)
# rbx_stack = rbx
movq %rbx,40(%rsp)
# rbp_stack = rbp
movq %rbp,48(%rsp)
# in0 = *(uint64 *) (x + 0)
movq 0(%r8),%rcx
# in2 = *(uint64 *) (x + 8)
movq 8(%r8),%r9
# in4 = *(uint64 *) (x + 16)
movq 16(%r8),%rax
# in6 = *(uint64 *) (x + 24)
movq 24(%r8),%r10
# in8 = *(uint64 *) (x + 32)
movq 32(%r8),%r11
# in10 = *(uint64 *) (x + 40)
movq 40(%r8),%r12
# in12 = *(uint64 *) (x + 48)
movq 48(%r8),%r13
# in14 = *(uint64 *) (x + 56)
movq 56(%r8),%r14
# j0 = in0
movq %rcx,56(%rsp)
# j2 = in2
movq %r9,64(%rsp)
# j4 = in4
movq %rax,72(%rsp)
# j6 = in6
movq %r10,80(%rsp)
# j8 = in8
movq %r11,88(%rsp)
# j10 = in10
movq %r12,96(%rsp)
# j12 = in12
movq %r13,104(%rsp)
# j14 = in14
movq %r14,112(%rsp)
# x_backup = x
movq %r8,120(%rsp)
# bytesatleast1:
._bytesatleast1:
# unsigned<? bytes - 64
cmp $64,%rdx
# comment:fp stack unchanged by jump
# goto nocopy if !unsigned<
jae ._nocopy
# ctarget = out
movq %rdi,128(%rsp)
# out = &tmp
leaq 192(%rsp),%rdi
# i = bytes
mov %rdx,%rcx
# while (i) { *out++ = *m++; --i }
rep movsb
# out = &tmp
leaq 192(%rsp),%rdi
# m = &tmp
leaq 192(%rsp),%rsi
# comment:fp stack unchanged by fallthrough
# nocopy:
._nocopy:
# out_backup = out
movq %rdi,136(%rsp)
# m_backup = m
movq %rsi,144(%rsp)
# bytes_backup = bytes
movq %rdx,152(%rsp)
# x1 = j0
movq 56(%rsp),%rdi
# x0 = x1
mov %rdi,%rdx
# (uint64) x1 >>= 32
shr $32,%rdi
# x3 = j2
movq 64(%rsp),%rsi
# x2 = x3
mov %rsi,%rcx
# (uint64) x3 >>= 32
shr $32,%rsi
# x5 = j4
movq 72(%rsp),%r8
# x4 = x5
mov %r8,%r9
# (uint64) x5 >>= 32
shr $32,%r8
# x5_stack = x5
movq %r8,160(%rsp)
# x7 = j6
movq 80(%rsp),%r8
# x6 = x7
mov %r8,%rax
# (uint64) x7 >>= 32
shr $32,%r8
# x9 = j8
movq 88(%rsp),%r10
# x8 = x9
mov %r10,%r11
# (uint64) x9 >>= 32
shr $32,%r10
# x11 = j10
movq 96(%rsp),%r12
# x10 = x11
mov %r12,%r13
# x10_stack = x10
movq %r13,168(%rsp)
# (uint64) x11 >>= 32
shr $32,%r12
# x13 = j12
movq 104(%rsp),%r13
# x12 = x13
mov %r13,%r14
# (uint64) x13 >>= 32
shr $32,%r13
# x15 = j14
movq 112(%rsp),%r15
# x14 = x15
mov %r15,%rbx
# (uint64) x15 >>= 32
shr $32,%r15
# x15_stack = x15
movq %r15,176(%rsp)
# i = 20
mov $20,%r15
# mainloop:
._mainloop:
# i_backup = i
movq %r15,184(%rsp)
# x5 = x5_stack
movq 160(%rsp),%r15
# a = x12 + x0
lea (%r14,%rdx),%rbp
# (uint32) a <<<= 7
rol $7,%ebp
# x4 ^= a
xor %rbp,%r9
# b = x1 + x5
lea (%rdi,%r15),%rbp
# (uint32) b <<<= 7
rol $7,%ebp
# x9 ^= b
xor %rbp,%r10
# a = x0 + x4
lea (%rdx,%r9),%rbp
# (uint32) a <<<= 9
rol $9,%ebp
# x8 ^= a
xor %rbp,%r11
# b = x5 + x9
lea (%r15,%r10),%rbp
# (uint32) b <<<= 9
rol $9,%ebp
# x13 ^= b
xor %rbp,%r13
# a = x4 + x8
lea (%r9,%r11),%rbp
# (uint32) a <<<= 13
rol $13,%ebp
# x12 ^= a
xor %rbp,%r14
# b = x9 + x13
lea (%r10,%r13),%rbp
# (uint32) b <<<= 13
rol $13,%ebp
# x1 ^= b
xor %rbp,%rdi
# a = x8 + x12
lea (%r11,%r14),%rbp
# (uint32) a <<<= 18
rol $18,%ebp
# x0 ^= a
xor %rbp,%rdx
# b = x13 + x1
lea (%r13,%rdi),%rbp
# (uint32) b <<<= 18
rol $18,%ebp
# x5 ^= b
xor %rbp,%r15
# x10 = x10_stack
movq 168(%rsp),%rbp
# x5_stack = x5
movq %r15,160(%rsp)
# c = x6 + x10
lea (%rax,%rbp),%r15
# (uint32) c <<<= 7
rol $7,%r15d
# x14 ^= c
xor %r15,%rbx
# c = x10 + x14
lea (%rbp,%rbx),%r15
# (uint32) c <<<= 9
rol $9,%r15d
# x2 ^= c
xor %r15,%rcx
# c = x14 + x2
lea (%rbx,%rcx),%r15
# (uint32) c <<<= 13
rol $13,%r15d
# x6 ^= c
xor %r15,%rax
# c = x2 + x6
lea (%rcx,%rax),%r15
# (uint32) c <<<= 18
rol $18,%r15d
# x10 ^= c
xor %r15,%rbp
# x15 = x15_stack
movq 176(%rsp),%r15
# x10_stack = x10
movq %rbp,168(%rsp)
# d = x11 + x15
lea (%r12,%r15),%rbp
# (uint32) d <<<= 7
rol $7,%ebp
# x3 ^= d
xor %rbp,%rsi
# d = x15 + x3
lea (%r15,%rsi),%rbp
# (uint32) d <<<= 9
rol $9,%ebp
# x7 ^= d
xor %rbp,%r8
# d = x3 + x7
lea (%rsi,%r8),%rbp
# (uint32) d <<<= 13
rol $13,%ebp
# x11 ^= d
xor %rbp,%r12
# d = x7 + x11
lea (%r8,%r12),%rbp
# (uint32) d <<<= 18
rol $18,%ebp
# x15 ^= d
xor %rbp,%r15
# x15_stack = x15
movq %r15,176(%rsp)
# x5 = x5_stack
movq 160(%rsp),%r15
# a = x3 + x0
lea (%rsi,%rdx),%rbp
# (uint32) a <<<= 7
rol $7,%ebp
# x1 ^= a
xor %rbp,%rdi
# b = x4 + x5
lea (%r9,%r15),%rbp
# (uint32) b <<<= 7
rol $7,%ebp
# x6 ^= b
xor %rbp,%rax
# a = x0 + x1
lea (%rdx,%rdi),%rbp
# (uint32) a <<<= 9
rol $9,%ebp
# x2 ^= a
xor %rbp,%rcx
# b = x5 + x6
lea (%r15,%rax),%rbp
# (uint32) b <<<= 9
rol $9,%ebp
# x7 ^= b
xor %rbp,%r8
# a = x1 + x2
lea (%rdi,%rcx),%rbp
# (uint32) a <<<= 13
rol $13,%ebp
# x3 ^= a
xor %rbp,%rsi
# b = x6 + x7
lea (%rax,%r8),%rbp
# (uint32) b <<<= 13
rol $13,%ebp
# x4 ^= b
xor %rbp,%r9
# a = x2 + x3
lea (%rcx,%rsi),%rbp
# (uint32) a <<<= 18
rol $18,%ebp
# x0 ^= a
xor %rbp,%rdx
# b = x7 + x4
lea (%r8,%r9),%rbp
# (uint32) b <<<= 18
rol $18,%ebp
# x5 ^= b
xor %rbp,%r15
# x10 = x10_stack
movq 168(%rsp),%rbp
# x5_stack = x5
movq %r15,160(%rsp)
# c = x9 + x10
lea (%r10,%rbp),%r15
# (uint32) c <<<= 7
rol $7,%r15d
# x11 ^= c
xor %r15,%r12
# c = x10 + x11
lea (%rbp,%r12),%r15
# (uint32) c <<<= 9
rol $9,%r15d
# x8 ^= c
xor %r15,%r11
# c = x11 + x8
lea (%r12,%r11),%r15
# (uint32) c <<<= 13
rol $13,%r15d
# x9 ^= c
xor %r15,%r10
# c = x8 + x9
lea (%r11,%r10),%r15
# (uint32) c <<<= 18
rol $18,%r15d
# x10 ^= c
xor %r15,%rbp
# x15 = x15_stack
movq 176(%rsp),%r15
# x10_stack = x10
movq %rbp,168(%rsp)
# d = x14 + x15
lea (%rbx,%r15),%rbp
# (uint32) d <<<= 7
rol $7,%ebp
# x12 ^= d
xor %rbp,%r14
# d = x15 + x12
lea (%r15,%r14),%rbp
# (uint32) d <<<= 9
rol $9,%ebp
# x13 ^= d
xor %rbp,%r13
# d = x12 + x13
lea (%r14,%r13),%rbp
# (uint32) d <<<= 13
rol $13,%ebp
# x14 ^= d
xor %rbp,%rbx
# d = x13 + x14
lea (%r13,%rbx),%rbp
# (uint32) d <<<= 18
rol $18,%ebp
# x15 ^= d
xor %rbp,%r15
# x15_stack = x15
movq %r15,176(%rsp)
# x5 = x5_stack
movq 160(%rsp),%r15
# a = x12 + x0
lea (%r14,%rdx),%rbp
# (uint32) a <<<= 7
rol $7,%ebp
# x4 ^= a
xor %rbp,%r9
# b = x1 + x5
lea (%rdi,%r15),%rbp
# (uint32) b <<<= 7
rol $7,%ebp
# x9 ^= b
xor %rbp,%r10
# a = x0 + x4
lea (%rdx,%r9),%rbp
# (uint32) a <<<= 9
rol $9,%ebp
# x8 ^= a
xor %rbp,%r11
# b = x5 + x9
lea (%r15,%r10),%rbp
# (uint32) b <<<= 9
rol $9,%ebp
# x13 ^= b
xor %rbp,%r13
# a = x4 + x8
lea (%r9,%r11),%rbp
# (uint32) a <<<= 13
rol $13,%ebp
# x12 ^= a
xor %rbp,%r14
# b = x9 + x13
lea (%r10,%r13),%rbp
# (uint32) b <<<= 13
rol $13,%ebp
# x1 ^= b
xor %rbp,%rdi
# a = x8 + x12
lea (%r11,%r14),%rbp
# (uint32) a <<<= 18
rol $18,%ebp
# x0 ^= a
xor %rbp,%rdx
# b = x13 + x1
lea (%r13,%rdi),%rbp
# (uint32) b <<<= 18
rol $18,%ebp
# x5 ^= b
xor %rbp,%r15
# x10 = x10_stack
movq 168(%rsp),%rbp
# x5_stack = x5
movq %r15,160(%rsp)
# c = x6 + x10
lea (%rax,%rbp),%r15
# (uint32) c <<<= 7
rol $7,%r15d
# x14 ^= c
xor %r15,%rbx
# c = x10 + x14
lea (%rbp,%rbx),%r15
# (uint32) c <<<= 9
rol $9,%r15d
# x2 ^= c
xor %r15,%rcx
# c = x14 + x2
lea (%rbx,%rcx),%r15
# (uint32) c <<<= 13
rol $13,%r15d
# x6 ^= c
xor %r15,%rax
# c = x2 + x6
lea (%rcx,%rax),%r15
# (uint32) c <<<= 18
rol $18,%r15d
# x10 ^= c
xor %r15,%rbp
# x15 = x15_stack
movq 176(%rsp),%r15
# x10_stack = x10
movq %rbp,168(%rsp)
# d = x11 + x15
lea (%r12,%r15),%rbp
# (uint32) d <<<= 7
rol $7,%ebp
# x3 ^= d
xor %rbp,%rsi
# d = x15 + x3
lea (%r15,%rsi),%rbp
# (uint32) d <<<= 9
rol $9,%ebp
# x7 ^= d
xor %rbp,%r8
# d = x3 + x7
lea (%rsi,%r8),%rbp
# (uint32) d <<<= 13
rol $13,%ebp
# x11 ^= d
xor %rbp,%r12
# d = x7 + x11
lea (%r8,%r12),%rbp
# (uint32) d <<<= 18
rol $18,%ebp
# x15 ^= d
xor %rbp,%r15
# x15_stack = x15
movq %r15,176(%rsp)
# x5 = x5_stack
movq 160(%rsp),%r15
# a = x3 + x0
lea (%rsi,%rdx),%rbp
# (uint32) a <<<= 7
rol $7,%ebp
# x1 ^= a
xor %rbp,%rdi
# b = x4 + x5
lea (%r9,%r15),%rbp
# (uint32) b <<<= 7
rol $7,%ebp
# x6 ^= b
xor %rbp,%rax
# a = x0 + x1
lea (%rdx,%rdi),%rbp
# (uint32) a <<<= 9
rol $9,%ebp
# x2 ^= a
xor %rbp,%rcx
# b = x5 + x6
lea (%r15,%rax),%rbp
# (uint32) b <<<= 9
rol $9,%ebp
# x7 ^= b
xor %rbp,%r8
# a = x1 + x2
lea (%rdi,%rcx),%rbp
# (uint32) a <<<= 13
rol $13,%ebp
# x3 ^= a
xor %rbp,%rsi
# b = x6 + x7
lea (%rax,%r8),%rbp
# (uint32) b <<<= 13
rol $13,%ebp
# x4 ^= b
xor %rbp,%r9
# a = x2 + x3
lea (%rcx,%rsi),%rbp
# (uint32) a <<<= 18
rol $18,%ebp
# x0 ^= a
xor %rbp,%rdx
# b = x7 + x4
lea (%r8,%r9),%rbp
# (uint32) b <<<= 18
rol $18,%ebp
# x5 ^= b
xor %rbp,%r15
# x10 = x10_stack
movq 168(%rsp),%rbp
# x5_stack = x5
movq %r15,160(%rsp)
# c = x9 + x10
lea (%r10,%rbp),%r15
# (uint32) c <<<= 7
rol $7,%r15d
# x11 ^= c
xor %r15,%r12
# c = x10 + x11
lea (%rbp,%r12),%r15
# (uint32) c <<<= 9
rol $9,%r15d
# x8 ^= c
xor %r15,%r11
# c = x11 + x8
lea (%r12,%r11),%r15
# (uint32) c <<<= 13
rol $13,%r15d
# x9 ^= c
xor %r15,%r10
# c = x8 + x9
lea (%r11,%r10),%r15
# (uint32) c <<<= 18
rol $18,%r15d
# x10 ^= c
xor %r15,%rbp
# x15 = x15_stack
movq 176(%rsp),%r15
# x10_stack = x10
movq %rbp,168(%rsp)
# d = x14 + x15
lea (%rbx,%r15),%rbp
# (uint32) d <<<= 7
rol $7,%ebp
# x12 ^= d
xor %rbp,%r14
# d = x15 + x12
lea (%r15,%r14),%rbp
# (uint32) d <<<= 9
rol $9,%ebp
# x13 ^= d
xor %rbp,%r13
# d = x12 + x13
lea (%r14,%r13),%rbp
# (uint32) d <<<= 13
rol $13,%ebp
# x14 ^= d
xor %rbp,%rbx
# d = x13 + x14
lea (%r13,%rbx),%rbp
# (uint32) d <<<= 18
rol $18,%ebp
# x15 ^= d
xor %rbp,%r15
# x15_stack = x15
movq %r15,176(%rsp)
# i = i_backup
movq 184(%rsp),%r15
# unsigned>? i -= 4
sub $4,%r15
# comment:fp stack unchanged by jump
# goto mainloop if unsigned>
ja ._mainloop
# (uint32) x2 += j2
addl 64(%rsp),%ecx
# x3 <<= 32
shl $32,%rsi
# x3 += j2
addq 64(%rsp),%rsi
# (uint64) x3 >>= 32
shr $32,%rsi
# x3 <<= 32
shl $32,%rsi
# x2 += x3
add %rsi,%rcx
# (uint32) x6 += j6
addl 80(%rsp),%eax
# x7 <<= 32
shl $32,%r8
# x7 += j6
addq 80(%rsp),%r8
# (uint64) x7 >>= 32
shr $32,%r8
# x7 <<= 32
shl $32,%r8
# x6 += x7
add %r8,%rax
# (uint32) x8 += j8
addl 88(%rsp),%r11d
# x9 <<= 32
shl $32,%r10
# x9 += j8
addq 88(%rsp),%r10
# (uint64) x9 >>= 32
shr $32,%r10
# x9 <<= 32
shl $32,%r10
# x8 += x9
add %r10,%r11
# (uint32) x12 += j12
addl 104(%rsp),%r14d
# x13 <<= 32
shl $32,%r13
# x13 += j12
addq 104(%rsp),%r13
# (uint64) x13 >>= 32
shr $32,%r13
# x13 <<= 32
shl $32,%r13
# x12 += x13
add %r13,%r14
# (uint32) x0 += j0
addl 56(%rsp),%edx
# x1 <<= 32
shl $32,%rdi
# x1 += j0
addq 56(%rsp),%rdi
# (uint64) x1 >>= 32
shr $32,%rdi
# x1 <<= 32
shl $32,%rdi
# x0 += x1
add %rdi,%rdx
# x5 = x5_stack
movq 160(%rsp),%rdi
# (uint32) x4 += j4
addl 72(%rsp),%r9d
# x5 <<= 32
shl $32,%rdi
# x5 += j4
addq 72(%rsp),%rdi
# (uint64) x5 >>= 32
shr $32,%rdi
# x5 <<= 32
shl $32,%rdi
# x4 += x5
add %rdi,%r9
# x10 = x10_stack
movq 168(%rsp),%r8
# (uint32) x10 += j10
addl 96(%rsp),%r8d
# x11 <<= 32
shl $32,%r12
# x11 += j10
addq 96(%rsp),%r12
# (uint64) x11 >>= 32
shr $32,%r12
# x11 <<= 32
shl $32,%r12
# x10 += x11
add %r12,%r8
# x15 = x15_stack
movq 176(%rsp),%rdi
# (uint32) x14 += j14
addl 112(%rsp),%ebx
# x15 <<= 32
shl $32,%rdi
# x15 += j14
addq 112(%rsp),%rdi
# (uint64) x15 >>= 32
shr $32,%rdi
# x15 <<= 32
shl $32,%rdi
# x14 += x15
add %rdi,%rbx
# out = out_backup
movq 136(%rsp),%rdi
# m = m_backup
movq 144(%rsp),%rsi
# x0 ^= *(uint64 *) (m + 0)
xorq 0(%rsi),%rdx
# *(uint64 *) (out + 0) = x0
movq %rdx,0(%rdi)
# x2 ^= *(uint64 *) (m + 8)
xorq 8(%rsi),%rcx
# *(uint64 *) (out + 8) = x2
movq %rcx,8(%rdi)
# x4 ^= *(uint64 *) (m + 16)
xorq 16(%rsi),%r9
# *(uint64 *) (out + 16) = x4
movq %r9,16(%rdi)
# x6 ^= *(uint64 *) (m + 24)
xorq 24(%rsi),%rax
# *(uint64 *) (out + 24) = x6
movq %rax,24(%rdi)
# x8 ^= *(uint64 *) (m + 32)
xorq 32(%rsi),%r11
# *(uint64 *) (out + 32) = x8
movq %r11,32(%rdi)
# x10 ^= *(uint64 *) (m + 40)
xorq 40(%rsi),%r8
# *(uint64 *) (out + 40) = x10
movq %r8,40(%rdi)
# x12 ^= *(uint64 *) (m + 48)
xorq 48(%rsi),%r14
# *(uint64 *) (out + 48) = x12
movq %r14,48(%rdi)
# x14 ^= *(uint64 *) (m + 56)
xorq 56(%rsi),%rbx
# *(uint64 *) (out + 56) = x14
movq %rbx,56(%rdi)
# bytes = bytes_backup
movq 152(%rsp),%rdx
# in8 = j8
movq 88(%rsp),%rcx
# in8 += 1
add $1,%rcx
# j8 = in8
movq %rcx,88(%rsp)
# unsigned>? unsigned<? bytes - 64
cmp $64,%rdx
# comment:fp stack unchanged by jump
# goto bytesatleast65 if unsigned>
ja ._bytesatleast65
# comment:fp stack unchanged by jump
# goto bytesatleast64 if !unsigned<
jae ._bytesatleast64
# m = out
mov %rdi,%rsi
# out = ctarget
movq 128(%rsp),%rdi
# i = bytes
mov %rdx,%rcx
# while (i) { *out++ = *m++; --i }
rep movsb
# comment:fp stack unchanged by fallthrough
# bytesatleast64:
._bytesatleast64:
# x = x_backup
movq 120(%rsp),%rdi
# in8 = j8
movq 88(%rsp),%rsi
# *(uint64 *) (x + 32) = in8
movq %rsi,32(%rdi)
# r11 = r11_stack
movq 0(%rsp),%r11
# r12 = r12_stack
movq 8(%rsp),%r12
# r13 = r13_stack
movq 16(%rsp),%r13
# r14 = r14_stack
movq 24(%rsp),%r14
# r15 = r15_stack
movq 32(%rsp),%r15
# rbx = rbx_stack
movq 40(%rsp),%rbx
# rbp = rbp_stack
movq 48(%rsp),%rbp
# comment:fp stack unchanged by fallthrough
# done:
._done:
# leave
add %r11,%rsp
mov %rdi,%rax
mov %rsi,%rdx
ret
# bytesatleast65:
._bytesatleast65:
# bytes -= 64
sub $64,%rdx
# out += 64
add $64,%rdi
# m += 64
add $64,%rsi
# comment:fp stack unchanged by jump
# goto bytesatleast1
jmp ._bytesatleast1
# enter ECRYPT_keysetup
.text
.p2align 5
.globl ECRYPT_keysetup
ECRYPT_keysetup:
mov %rsp,%r11
and $31,%r11
add $256,%r11
sub %r11,%rsp
# k = arg2
mov %rsi,%rsi
# kbits = arg3
mov %rdx,%rdx
# x = arg1
mov %rdi,%rdi
# in0 = *(uint64 *) (k + 0)
movq 0(%rsi),%r8
# in2 = *(uint64 *) (k + 8)
movq 8(%rsi),%r9
# *(uint64 *) (x + 4) = in0
movq %r8,4(%rdi)
# *(uint64 *) (x + 12) = in2
movq %r9,12(%rdi)
# unsigned<? kbits - 256
cmp $256,%rdx
# comment:fp stack unchanged by jump
# goto kbits128 if unsigned<
jb ._kbits128
# kbits256:
._kbits256:
# in10 = *(uint64 *) (k + 16)
movq 16(%rsi),%rdx
# in12 = *(uint64 *) (k + 24)
movq 24(%rsi),%rsi
# *(uint64 *) (x + 44) = in10
movq %rdx,44(%rdi)
# *(uint64 *) (x + 52) = in12
movq %rsi,52(%rdi)
# in0 = 1634760805
mov $1634760805,%rsi
# in4 = 857760878
mov $857760878,%rdx
# in10 = 2036477234
mov $2036477234,%rcx
# in14 = 1797285236
mov $1797285236,%r8
# *(uint32 *) (x + 0) = in0
movl %esi,0(%rdi)
# *(uint32 *) (x + 20) = in4
movl %edx,20(%rdi)
# *(uint32 *) (x + 40) = in10
movl %ecx,40(%rdi)
# *(uint32 *) (x + 60) = in14
movl %r8d,60(%rdi)
# comment:fp stack unchanged by jump
# goto keysetupdone
jmp ._keysetupdone
# kbits128:
._kbits128:
# in10 = *(uint64 *) (k + 0)
movq 0(%rsi),%rdx
# in12 = *(uint64 *) (k + 8)
movq 8(%rsi),%rsi
# *(uint64 *) (x + 44) = in10
movq %rdx,44(%rdi)
# *(uint64 *) (x + 52) = in12
movq %rsi,52(%rdi)
# in0 = 1634760805
mov $1634760805,%rsi
# in4 = 824206446
mov $824206446,%rdx
# in10 = 2036477238
mov $2036477238,%rcx
# in14 = 1797285236
mov $1797285236,%r8
# *(uint32 *) (x + 0) = in0
movl %esi,0(%rdi)
# *(uint32 *) (x + 20) = in4
movl %edx,20(%rdi)
# *(uint32 *) (x + 40) = in10
movl %ecx,40(%rdi)
# *(uint32 *) (x + 60) = in14
movl %r8d,60(%rdi)
# keysetupdone:
._keysetupdone:
# leave
add %r11,%rsp
mov %rdi,%rax
mov %rsi,%rdx
ret
# enter ECRYPT_ivsetup
.text
.p2align 5
.globl ECRYPT_ivsetup
ECRYPT_ivsetup:
mov %rsp,%r11
and $31,%r11
add $256,%r11
sub %r11,%rsp
# iv = arg2
mov %rsi,%rsi
# x = arg1
mov %rdi,%rdi
# in6 = *(uint64 *) (iv + 0)
movq 0(%rsi),%rsi
# in8 = 0
mov $0,%r8
# *(uint64 *) (x + 24) = in6
movq %rsi,24(%rdi)
# *(uint64 *) (x + 32) = in8
movq %r8,32(%rdi)
# leave
add %r11,%rsp
mov %rdi,%rax
mov %rsi,%rdx
ret
+129
View File
@@ -0,0 +1,129 @@
/*
* Glue code for optimized assembly version of Salsa20.
*
* Copyright (c) 2007 Tan Swee Heng <thesweeheng@gmail.com>
*
* The assembly codes are public domain assembly codes written by Daniel. J.
* Bernstein <djb@cr.yp.to>. The codes are modified to include indentation
* and to remove extraneous comments and functions that are not needed.
* - i586 version, renamed as salsa20-i586-asm_32.S
* available from <http://cr.yp.to/snuffle/salsa20/x86-pm/salsa20.s>
* - x86-64 version, renamed as salsa20-x86_64-asm_64.S
* available from <http://cr.yp.to/snuffle/salsa20/amd64-3/salsa20.s>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
#include <crypto/algapi.h>
#include <linux/module.h>
#include <linux/crypto.h>
#define SALSA20_IV_SIZE 8U
#define SALSA20_MIN_KEY_SIZE 16U
#define SALSA20_MAX_KEY_SIZE 32U
// use the ECRYPT_* function names
#define salsa20_keysetup ECRYPT_keysetup
#define salsa20_ivsetup ECRYPT_ivsetup
#define salsa20_encrypt_bytes ECRYPT_encrypt_bytes
struct salsa20_ctx
{
u32 input[16];
};
asmlinkage void salsa20_keysetup(struct salsa20_ctx *ctx, const u8 *k,
u32 keysize, u32 ivsize);
asmlinkage void salsa20_ivsetup(struct salsa20_ctx *ctx, const u8 *iv);
asmlinkage void salsa20_encrypt_bytes(struct salsa20_ctx *ctx,
const u8 *src, u8 *dst, u32 bytes);
static int setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int keysize)
{
struct salsa20_ctx *ctx = crypto_tfm_ctx(tfm);
salsa20_keysetup(ctx, key, keysize*8, SALSA20_IV_SIZE*8);
return 0;
}
static int encrypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
struct blkcipher_walk walk;
struct crypto_blkcipher *tfm = desc->tfm;
struct salsa20_ctx *ctx = crypto_blkcipher_ctx(tfm);
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt_block(desc, &walk, 64);
salsa20_ivsetup(ctx, walk.iv);
if (likely(walk.nbytes == nbytes))
{
salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
walk.dst.virt.addr, nbytes);
return blkcipher_walk_done(desc, &walk, 0);
}
while (walk.nbytes >= 64) {
salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
walk.dst.virt.addr,
walk.nbytes - (walk.nbytes % 64));
err = blkcipher_walk_done(desc, &walk, walk.nbytes % 64);
}
if (walk.nbytes) {
salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
walk.dst.virt.addr, walk.nbytes);
err = blkcipher_walk_done(desc, &walk, 0);
}
return err;
}
static struct crypto_alg alg = {
.cra_name = "salsa20",
.cra_driver_name = "salsa20-asm",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_type = &crypto_blkcipher_type,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct salsa20_ctx),
.cra_alignmask = 3,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(alg.cra_list),
.cra_u = {
.blkcipher = {
.setkey = setkey,
.encrypt = encrypt,
.decrypt = encrypt,
.min_keysize = SALSA20_MIN_KEY_SIZE,
.max_keysize = SALSA20_MAX_KEY_SIZE,
.ivsize = SALSA20_IV_SIZE,
}
}
};
static int __init init(void)
{
return crypto_register_alg(&alg);
}
static void __exit fini(void)
{
crypto_unregister_alg(&alg);
}
module_init(init);
module_exit(fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION ("Salsa20 stream cipher algorithm (optimized assembly version)");
MODULE_ALIAS("salsa20");
MODULE_ALIAS("salsa20-asm");
@@ -0,0 +1,635 @@
/*
* Serpent Cipher 4-way parallel algorithm (i586/SSE2)
*
* Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
* Based on crypto/serpent.c by
* Copyright (C) 2002 Dag Arne Osvik <osvik@ii.uib.no>
* 2003 Herbert Valerio Riedel <hvr@gnu.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA
*
*/
.file "serpent-sse2-i586-asm_32.S"
.text
#define arg_ctx 4
#define arg_dst 8
#define arg_src 12
#define arg_xor 16
/**********************************************************************
4-way SSE2 serpent
**********************************************************************/
#define CTX %edx
#define RA %xmm0
#define RB %xmm1
#define RC %xmm2
#define RD %xmm3
#define RE %xmm4
#define RT0 %xmm5
#define RT1 %xmm6
#define RNOT %xmm7
#define get_key(i, j, t) \
movd (4*(i)+(j))*4(CTX), t; \
pshufd $0, t, t;
#define K(x0, x1, x2, x3, x4, i) \
get_key(i, 0, x4); \
get_key(i, 1, RT0); \
get_key(i, 2, RT1); \
pxor x4, x0; \
pxor RT0, x1; \
pxor RT1, x2; \
get_key(i, 3, x4); \
pxor x4, x3;
#define LK(x0, x1, x2, x3, x4, i) \
movdqa x0, x4; \
pslld $13, x0; \
psrld $(32 - 13), x4; \
por x4, x0; \
pxor x0, x1; \
movdqa x2, x4; \
pslld $3, x2; \
psrld $(32 - 3), x4; \
por x4, x2; \
pxor x2, x1; \
movdqa x1, x4; \
pslld $1, x1; \
psrld $(32 - 1), x4; \
por x4, x1; \
movdqa x0, x4; \
pslld $3, x4; \
pxor x2, x3; \
pxor x4, x3; \
movdqa x3, x4; \
pslld $7, x3; \
psrld $(32 - 7), x4; \
por x4, x3; \
movdqa x1, x4; \
pslld $7, x4; \
pxor x1, x0; \
pxor x3, x0; \
pxor x3, x2; \
pxor x4, x2; \
movdqa x0, x4; \
get_key(i, 1, RT0); \
pxor RT0, x1; \
get_key(i, 3, RT0); \
pxor RT0, x3; \
pslld $5, x0; \
psrld $(32 - 5), x4; \
por x4, x0; \
movdqa x2, x4; \
pslld $22, x2; \
psrld $(32 - 22), x4; \
por x4, x2; \
get_key(i, 0, RT0); \
pxor RT0, x0; \
get_key(i, 2, RT0); \
pxor RT0, x2;
#define KL(x0, x1, x2, x3, x4, i) \
K(x0, x1, x2, x3, x4, i); \
movdqa x0, x4; \
psrld $5, x0; \
pslld $(32 - 5), x4; \
por x4, x0; \
movdqa x2, x4; \
psrld $22, x2; \
pslld $(32 - 22), x4; \
por x4, x2; \
pxor x3, x2; \
pxor x3, x0; \
movdqa x1, x4; \
pslld $7, x4; \
pxor x1, x0; \
pxor x4, x2; \
movdqa x1, x4; \
psrld $1, x1; \
pslld $(32 - 1), x4; \
por x4, x1; \
movdqa x3, x4; \
psrld $7, x3; \
pslld $(32 - 7), x4; \
por x4, x3; \
pxor x0, x1; \
movdqa x0, x4; \
pslld $3, x4; \
pxor x4, x3; \
movdqa x0, x4; \
psrld $13, x0; \
pslld $(32 - 13), x4; \
por x4, x0; \
pxor x2, x1; \
pxor x2, x3; \
movdqa x2, x4; \
psrld $3, x2; \
pslld $(32 - 3), x4; \
por x4, x2;
#define S0(x0, x1, x2, x3, x4) \
movdqa x3, x4; \
por x0, x3; \
pxor x4, x0; \
pxor x2, x4; \
pxor RNOT, x4; \
pxor x1, x3; \
pand x0, x1; \
pxor x4, x1; \
pxor x0, x2; \
pxor x3, x0; \
por x0, x4; \
pxor x2, x0; \
pand x1, x2; \
pxor x2, x3; \
pxor RNOT, x1; \
pxor x4, x2; \
pxor x2, x1;
#define S1(x0, x1, x2, x3, x4) \
movdqa x1, x4; \
pxor x0, x1; \
pxor x3, x0; \
pxor RNOT, x3; \
pand x1, x4; \
por x1, x0; \
pxor x2, x3; \
pxor x3, x0; \
pxor x3, x1; \
pxor x4, x3; \
por x4, x1; \
pxor x2, x4; \
pand x0, x2; \
pxor x1, x2; \
por x0, x1; \
pxor RNOT, x0; \
pxor x2, x0; \
pxor x1, x4;
#define S2(x0, x1, x2, x3, x4) \
pxor RNOT, x3; \
pxor x0, x1; \
movdqa x0, x4; \
pand x2, x0; \
pxor x3, x0; \
por x4, x3; \
pxor x1, x2; \
pxor x1, x3; \
pand x0, x1; \
pxor x2, x0; \
pand x3, x2; \
por x1, x3; \
pxor RNOT, x0; \
pxor x0, x3; \
pxor x0, x4; \
pxor x2, x0; \
por x2, x1;
#define S3(x0, x1, x2, x3, x4) \
movdqa x1, x4; \
pxor x3, x1; \
por x0, x3; \
pand x0, x4; \
pxor x2, x0; \
pxor x1, x2; \
pand x3, x1; \
pxor x3, x2; \
por x4, x0; \
pxor x3, x4; \
pxor x0, x1; \
pand x3, x0; \
pand x4, x3; \
pxor x2, x3; \
por x1, x4; \
pand x1, x2; \
pxor x3, x4; \
pxor x3, x0; \
pxor x2, x3;
#define S4(x0, x1, x2, x3, x4) \
movdqa x3, x4; \
pand x0, x3; \
pxor x4, x0; \
pxor x2, x3; \
por x4, x2; \
pxor x1, x0; \
pxor x3, x4; \
por x0, x2; \
pxor x1, x2; \
pand x0, x1; \
pxor x4, x1; \
pand x2, x4; \
pxor x3, x2; \
pxor x0, x4; \
por x1, x3; \
pxor RNOT, x1; \
pxor x0, x3;
#define S5(x0, x1, x2, x3, x4) \
movdqa x1, x4; \
por x0, x1; \
pxor x1, x2; \
pxor RNOT, x3; \
pxor x0, x4; \
pxor x2, x0; \
pand x4, x1; \
por x3, x4; \
pxor x0, x4; \
pand x3, x0; \
pxor x3, x1; \
pxor x2, x3; \
pxor x1, x0; \
pand x4, x2; \
pxor x2, x1; \
pand x0, x2; \
pxor x2, x3;
#define S6(x0, x1, x2, x3, x4) \
movdqa x1, x4; \
pxor x0, x3; \
pxor x2, x1; \
pxor x0, x2; \
pand x3, x0; \
por x3, x1; \
pxor RNOT, x4; \
pxor x1, x0; \
pxor x2, x1; \
pxor x4, x3; \
pxor x0, x4; \
pand x0, x2; \
pxor x1, x4; \
pxor x3, x2; \
pand x1, x3; \
pxor x0, x3; \
pxor x2, x1;
#define S7(x0, x1, x2, x3, x4) \
pxor RNOT, x1; \
movdqa x1, x4; \
pxor RNOT, x0; \
pand x2, x1; \
pxor x3, x1; \
por x4, x3; \
pxor x2, x4; \
pxor x3, x2; \
pxor x0, x3; \
por x1, x0; \
pand x0, x2; \
pxor x4, x0; \
pxor x3, x4; \
pand x0, x3; \
pxor x1, x4; \
pxor x4, x2; \
pxor x1, x3; \
por x0, x4; \
pxor x1, x4;
#define SI0(x0, x1, x2, x3, x4) \
movdqa x3, x4; \
pxor x0, x1; \
por x1, x3; \
pxor x1, x4; \
pxor RNOT, x0; \
pxor x3, x2; \
pxor x0, x3; \
pand x1, x0; \
pxor x2, x0; \
pand x3, x2; \
pxor x4, x3; \
pxor x3, x2; \
pxor x3, x1; \
pand x0, x3; \
pxor x0, x1; \
pxor x2, x0; \
pxor x3, x4;
#define SI1(x0, x1, x2, x3, x4) \
pxor x3, x1; \
movdqa x0, x4; \
pxor x2, x0; \
pxor RNOT, x2; \
por x1, x4; \
pxor x3, x4; \
pand x1, x3; \
pxor x2, x1; \
pand x4, x2; \
pxor x1, x4; \
por x3, x1; \
pxor x0, x3; \
pxor x0, x2; \
por x4, x0; \
pxor x4, x2; \
pxor x0, x1; \
pxor x1, x4;
#define SI2(x0, x1, x2, x3, x4) \
pxor x1, x2; \
movdqa x3, x4; \
pxor RNOT, x3; \
por x2, x3; \
pxor x4, x2; \
pxor x0, x4; \
pxor x1, x3; \
por x2, x1; \
pxor x0, x2; \
pxor x4, x1; \
por x3, x4; \
pxor x3, x2; \
pxor x2, x4; \
pand x1, x2; \
pxor x3, x2; \
pxor x4, x3; \
pxor x0, x4;
#define SI3(x0, x1, x2, x3, x4) \
pxor x1, x2; \
movdqa x1, x4; \
pand x2, x1; \
pxor x0, x1; \
por x4, x0; \
pxor x3, x4; \
pxor x3, x0; \
por x1, x3; \
pxor x2, x1; \
pxor x3, x1; \
pxor x2, x0; \
pxor x3, x2; \
pand x1, x3; \
pxor x0, x1; \
pand x2, x0; \
pxor x3, x4; \
pxor x0, x3; \
pxor x1, x0;
#define SI4(x0, x1, x2, x3, x4) \
pxor x3, x2; \
movdqa x0, x4; \
pand x1, x0; \
pxor x2, x0; \
por x3, x2; \
pxor RNOT, x4; \
pxor x0, x1; \
pxor x2, x0; \
pand x4, x2; \
pxor x0, x2; \
por x4, x0; \
pxor x3, x0; \
pand x2, x3; \
pxor x3, x4; \
pxor x1, x3; \
pand x0, x1; \
pxor x1, x4; \
pxor x3, x0;
#define SI5(x0, x1, x2, x3, x4) \
movdqa x1, x4; \
por x2, x1; \
pxor x4, x2; \
pxor x3, x1; \
pand x4, x3; \
pxor x3, x2; \
por x0, x3; \
pxor RNOT, x0; \
pxor x2, x3; \
por x0, x2; \
pxor x1, x4; \
pxor x4, x2; \
pand x0, x4; \
pxor x1, x0; \
pxor x3, x1; \
pand x2, x0; \
pxor x3, x2; \
pxor x2, x0; \
pxor x4, x2; \
pxor x3, x4;
#define SI6(x0, x1, x2, x3, x4) \
pxor x2, x0; \
movdqa x0, x4; \
pand x3, x0; \
pxor x3, x2; \
pxor x2, x0; \
pxor x1, x3; \
por x4, x2; \
pxor x3, x2; \
pand x0, x3; \
pxor RNOT, x0; \
pxor x1, x3; \
pand x2, x1; \
pxor x0, x4; \
pxor x4, x3; \
pxor x2, x4; \
pxor x1, x0; \
pxor x0, x2;
#define SI7(x0, x1, x2, x3, x4) \
movdqa x3, x4; \
pand x0, x3; \
pxor x2, x0; \
por x4, x2; \
pxor x1, x4; \
pxor RNOT, x0; \
por x3, x1; \
pxor x0, x4; \
pand x2, x0; \
pxor x1, x0; \
pand x2, x1; \
pxor x2, x3; \
pxor x3, x4; \
pand x3, x2; \
por x0, x3; \
pxor x4, x1; \
pxor x4, x3; \
pand x0, x4; \
pxor x2, x4;
#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
movdqa x0, t2; \
punpckldq x1, x0; \
punpckhdq x1, t2; \
movdqa x2, t1; \
punpckhdq x3, x2; \
punpckldq x3, t1; \
movdqa x0, x1; \
punpcklqdq t1, x0; \
punpckhqdq t1, x1; \
movdqa t2, x3; \
punpcklqdq x2, t2; \
punpckhqdq x2, x3; \
movdqa t2, x2;
#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \
movdqu (0*4*4)(in), x0; \
movdqu (1*4*4)(in), x1; \
movdqu (2*4*4)(in), x2; \
movdqu (3*4*4)(in), x3; \
\
transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
#define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
\
movdqu x0, (0*4*4)(out); \
movdqu x1, (1*4*4)(out); \
movdqu x2, (2*4*4)(out); \
movdqu x3, (3*4*4)(out);
#define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
\
movdqu (0*4*4)(out), t0; \
pxor t0, x0; \
movdqu x0, (0*4*4)(out); \
movdqu (1*4*4)(out), t0; \
pxor t0, x1; \
movdqu x1, (1*4*4)(out); \
movdqu (2*4*4)(out), t0; \
pxor t0, x2; \
movdqu x2, (2*4*4)(out); \
movdqu (3*4*4)(out), t0; \
pxor t0, x3; \
movdqu x3, (3*4*4)(out);
.align 8
.global __serpent_enc_blk_4way
.type __serpent_enc_blk_4way,@function;
__serpent_enc_blk_4way:
/* input:
* arg_ctx(%esp): ctx, CTX
* arg_dst(%esp): dst
* arg_src(%esp): src
* arg_xor(%esp): bool, if true: xor output
*/
pcmpeqd RNOT, RNOT;
movl arg_ctx(%esp), CTX;
movl arg_src(%esp), %eax;
read_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
K(RA, RB, RC, RD, RE, 0);
S0(RA, RB, RC, RD, RE); LK(RC, RB, RD, RA, RE, 1);
S1(RC, RB, RD, RA, RE); LK(RE, RD, RA, RC, RB, 2);
S2(RE, RD, RA, RC, RB); LK(RB, RD, RE, RC, RA, 3);
S3(RB, RD, RE, RC, RA); LK(RC, RA, RD, RB, RE, 4);
S4(RC, RA, RD, RB, RE); LK(RA, RD, RB, RE, RC, 5);
S5(RA, RD, RB, RE, RC); LK(RC, RA, RD, RE, RB, 6);
S6(RC, RA, RD, RE, RB); LK(RD, RB, RA, RE, RC, 7);
S7(RD, RB, RA, RE, RC); LK(RC, RA, RE, RD, RB, 8);
S0(RC, RA, RE, RD, RB); LK(RE, RA, RD, RC, RB, 9);
S1(RE, RA, RD, RC, RB); LK(RB, RD, RC, RE, RA, 10);
S2(RB, RD, RC, RE, RA); LK(RA, RD, RB, RE, RC, 11);
S3(RA, RD, RB, RE, RC); LK(RE, RC, RD, RA, RB, 12);
S4(RE, RC, RD, RA, RB); LK(RC, RD, RA, RB, RE, 13);
S5(RC, RD, RA, RB, RE); LK(RE, RC, RD, RB, RA, 14);
S6(RE, RC, RD, RB, RA); LK(RD, RA, RC, RB, RE, 15);
S7(RD, RA, RC, RB, RE); LK(RE, RC, RB, RD, RA, 16);
S0(RE, RC, RB, RD, RA); LK(RB, RC, RD, RE, RA, 17);
S1(RB, RC, RD, RE, RA); LK(RA, RD, RE, RB, RC, 18);
S2(RA, RD, RE, RB, RC); LK(RC, RD, RA, RB, RE, 19);
S3(RC, RD, RA, RB, RE); LK(RB, RE, RD, RC, RA, 20);
S4(RB, RE, RD, RC, RA); LK(RE, RD, RC, RA, RB, 21);
S5(RE, RD, RC, RA, RB); LK(RB, RE, RD, RA, RC, 22);
S6(RB, RE, RD, RA, RC); LK(RD, RC, RE, RA, RB, 23);
S7(RD, RC, RE, RA, RB); LK(RB, RE, RA, RD, RC, 24);
S0(RB, RE, RA, RD, RC); LK(RA, RE, RD, RB, RC, 25);
S1(RA, RE, RD, RB, RC); LK(RC, RD, RB, RA, RE, 26);
S2(RC, RD, RB, RA, RE); LK(RE, RD, RC, RA, RB, 27);
S3(RE, RD, RC, RA, RB); LK(RA, RB, RD, RE, RC, 28);
S4(RA, RB, RD, RE, RC); LK(RB, RD, RE, RC, RA, 29);
S5(RB, RD, RE, RC, RA); LK(RA, RB, RD, RC, RE, 30);
S6(RA, RB, RD, RC, RE); LK(RD, RE, RB, RC, RA, 31);
S7(RD, RE, RB, RC, RA); K(RA, RB, RC, RD, RE, 32);
movl arg_dst(%esp), %eax;
cmpb $0, arg_xor(%esp);
jnz __enc_xor4;
write_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
ret;
__enc_xor4:
xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
ret;
.align 8
.global serpent_dec_blk_4way
.type serpent_dec_blk_4way,@function;
serpent_dec_blk_4way:
/* input:
* arg_ctx(%esp): ctx, CTX
* arg_dst(%esp): dst
* arg_src(%esp): src
*/
pcmpeqd RNOT, RNOT;
movl arg_ctx(%esp), CTX;
movl arg_src(%esp), %eax;
read_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
K(RA, RB, RC, RD, RE, 32);
SI7(RA, RB, RC, RD, RE); KL(RB, RD, RA, RE, RC, 31);
SI6(RB, RD, RA, RE, RC); KL(RA, RC, RE, RB, RD, 30);
SI5(RA, RC, RE, RB, RD); KL(RC, RD, RA, RE, RB, 29);
SI4(RC, RD, RA, RE, RB); KL(RC, RA, RB, RE, RD, 28);
SI3(RC, RA, RB, RE, RD); KL(RB, RC, RD, RE, RA, 27);
SI2(RB, RC, RD, RE, RA); KL(RC, RA, RE, RD, RB, 26);
SI1(RC, RA, RE, RD, RB); KL(RB, RA, RE, RD, RC, 25);
SI0(RB, RA, RE, RD, RC); KL(RE, RC, RA, RB, RD, 24);
SI7(RE, RC, RA, RB, RD); KL(RC, RB, RE, RD, RA, 23);
SI6(RC, RB, RE, RD, RA); KL(RE, RA, RD, RC, RB, 22);
SI5(RE, RA, RD, RC, RB); KL(RA, RB, RE, RD, RC, 21);
SI4(RA, RB, RE, RD, RC); KL(RA, RE, RC, RD, RB, 20);
SI3(RA, RE, RC, RD, RB); KL(RC, RA, RB, RD, RE, 19);
SI2(RC, RA, RB, RD, RE); KL(RA, RE, RD, RB, RC, 18);
SI1(RA, RE, RD, RB, RC); KL(RC, RE, RD, RB, RA, 17);
SI0(RC, RE, RD, RB, RA); KL(RD, RA, RE, RC, RB, 16);
SI7(RD, RA, RE, RC, RB); KL(RA, RC, RD, RB, RE, 15);
SI6(RA, RC, RD, RB, RE); KL(RD, RE, RB, RA, RC, 14);
SI5(RD, RE, RB, RA, RC); KL(RE, RC, RD, RB, RA, 13);
SI4(RE, RC, RD, RB, RA); KL(RE, RD, RA, RB, RC, 12);
SI3(RE, RD, RA, RB, RC); KL(RA, RE, RC, RB, RD, 11);
SI2(RA, RE, RC, RB, RD); KL(RE, RD, RB, RC, RA, 10);
SI1(RE, RD, RB, RC, RA); KL(RA, RD, RB, RC, RE, 9);
SI0(RA, RD, RB, RC, RE); KL(RB, RE, RD, RA, RC, 8);
SI7(RB, RE, RD, RA, RC); KL(RE, RA, RB, RC, RD, 7);
SI6(RE, RA, RB, RC, RD); KL(RB, RD, RC, RE, RA, 6);
SI5(RB, RD, RC, RE, RA); KL(RD, RA, RB, RC, RE, 5);
SI4(RD, RA, RB, RC, RE); KL(RD, RB, RE, RC, RA, 4);
SI3(RD, RB, RE, RC, RA); KL(RE, RD, RA, RC, RB, 3);
SI2(RE, RD, RA, RC, RB); KL(RD, RB, RC, RA, RE, 2);
SI1(RD, RB, RC, RA, RE); KL(RE, RB, RC, RA, RD, 1);
SI0(RE, RB, RC, RA, RD); K(RC, RD, RB, RE, RA, 0);
movl arg_dst(%esp), %eax;
write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA);
ret;
@@ -0,0 +1,758 @@
/*
* Serpent Cipher 8-way parallel algorithm (x86_64/SSE2)
*
* Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
* Based on crypto/serpent.c by
* Copyright (C) 2002 Dag Arne Osvik <osvik@ii.uib.no>
* 2003 Herbert Valerio Riedel <hvr@gnu.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA
*
*/
.file "serpent-sse2-x86_64-asm_64.S"
.text
#define CTX %rdi
/**********************************************************************
8-way SSE2 serpent
**********************************************************************/
#define RA1 %xmm0
#define RB1 %xmm1
#define RC1 %xmm2
#define RD1 %xmm3
#define RE1 %xmm4
#define RA2 %xmm5
#define RB2 %xmm6
#define RC2 %xmm7
#define RD2 %xmm8
#define RE2 %xmm9
#define RNOT %xmm10
#define RK0 %xmm11
#define RK1 %xmm12
#define RK2 %xmm13
#define RK3 %xmm14
#define S0_1(x0, x1, x2, x3, x4) \
movdqa x3, x4; \
por x0, x3; \
pxor x4, x0; \
pxor x2, x4; \
pxor RNOT, x4; \
pxor x1, x3; \
pand x0, x1; \
pxor x4, x1; \
pxor x0, x2;
#define S0_2(x0, x1, x2, x3, x4) \
pxor x3, x0; \
por x0, x4; \
pxor x2, x0; \
pand x1, x2; \
pxor x2, x3; \
pxor RNOT, x1; \
pxor x4, x2; \
pxor x2, x1;
#define S1_1(x0, x1, x2, x3, x4) \
movdqa x1, x4; \
pxor x0, x1; \
pxor x3, x0; \
pxor RNOT, x3; \
pand x1, x4; \
por x1, x0; \
pxor x2, x3; \
pxor x3, x0; \
pxor x3, x1;
#define S1_2(x0, x1, x2, x3, x4) \
pxor x4, x3; \
por x4, x1; \
pxor x2, x4; \
pand x0, x2; \
pxor x1, x2; \
por x0, x1; \
pxor RNOT, x0; \
pxor x2, x0; \
pxor x1, x4;
#define S2_1(x0, x1, x2, x3, x4) \
pxor RNOT, x3; \
pxor x0, x1; \
movdqa x0, x4; \
pand x2, x0; \
pxor x3, x0; \
por x4, x3; \
pxor x1, x2; \
pxor x1, x3; \
pand x0, x1;
#define S2_2(x0, x1, x2, x3, x4) \
pxor x2, x0; \
pand x3, x2; \
por x1, x3; \
pxor RNOT, x0; \
pxor x0, x3; \
pxor x0, x4; \
pxor x2, x0; \
por x2, x1;
#define S3_1(x0, x1, x2, x3, x4) \
movdqa x1, x4; \
pxor x3, x1; \
por x0, x3; \
pand x0, x4; \
pxor x2, x0; \
pxor x1, x2; \
pand x3, x1; \
pxor x3, x2; \
por x4, x0; \
pxor x3, x4;
#define S3_2(x0, x1, x2, x3, x4) \
pxor x0, x1; \
pand x3, x0; \
pand x4, x3; \
pxor x2, x3; \
por x1, x4; \
pand x1, x2; \
pxor x3, x4; \
pxor x3, x0; \
pxor x2, x3;
#define S4_1(x0, x1, x2, x3, x4) \
movdqa x3, x4; \
pand x0, x3; \
pxor x4, x0; \
pxor x2, x3; \
por x4, x2; \
pxor x1, x0; \
pxor x3, x4; \
por x0, x2; \
pxor x1, x2;
#define S4_2(x0, x1, x2, x3, x4) \
pand x0, x1; \
pxor x4, x1; \
pand x2, x4; \
pxor x3, x2; \
pxor x0, x4; \
por x1, x3; \
pxor RNOT, x1; \
pxor x0, x3;
#define S5_1(x0, x1, x2, x3, x4) \
movdqa x1, x4; \
por x0, x1; \
pxor x1, x2; \
pxor RNOT, x3; \
pxor x0, x4; \
pxor x2, x0; \
pand x4, x1; \
por x3, x4; \
pxor x0, x4;
#define S5_2(x0, x1, x2, x3, x4) \
pand x3, x0; \
pxor x3, x1; \
pxor x2, x3; \
pxor x1, x0; \
pand x4, x2; \
pxor x2, x1; \
pand x0, x2; \
pxor x2, x3;
#define S6_1(x0, x1, x2, x3, x4) \
movdqa x1, x4; \
pxor x0, x3; \
pxor x2, x1; \
pxor x0, x2; \
pand x3, x0; \
por x3, x1; \
pxor RNOT, x4; \
pxor x1, x0; \
pxor x2, x1;
#define S6_2(x0, x1, x2, x3, x4) \
pxor x4, x3; \
pxor x0, x4; \
pand x0, x2; \
pxor x1, x4; \
pxor x3, x2; \
pand x1, x3; \
pxor x0, x3; \
pxor x2, x1;
#define S7_1(x0, x1, x2, x3, x4) \
pxor RNOT, x1; \
movdqa x1, x4; \
pxor RNOT, x0; \
pand x2, x1; \
pxor x3, x1; \
por x4, x3; \
pxor x2, x4; \
pxor x3, x2; \
pxor x0, x3; \
por x1, x0;
#define S7_2(x0, x1, x2, x3, x4) \
pand x0, x2; \
pxor x4, x0; \
pxor x3, x4; \
pand x0, x3; \
pxor x1, x4; \
pxor x4, x2; \
pxor x1, x3; \
por x0, x4; \
pxor x1, x4;
#define SI0_1(x0, x1, x2, x3, x4) \
movdqa x3, x4; \
pxor x0, x1; \
por x1, x3; \
pxor x1, x4; \
pxor RNOT, x0; \
pxor x3, x2; \
pxor x0, x3; \
pand x1, x0; \
pxor x2, x0;
#define SI0_2(x0, x1, x2, x3, x4) \
pand x3, x2; \
pxor x4, x3; \
pxor x3, x2; \
pxor x3, x1; \
pand x0, x3; \
pxor x0, x1; \
pxor x2, x0; \
pxor x3, x4;
#define SI1_1(x0, x1, x2, x3, x4) \
pxor x3, x1; \
movdqa x0, x4; \
pxor x2, x0; \
pxor RNOT, x2; \
por x1, x4; \
pxor x3, x4; \
pand x1, x3; \
pxor x2, x1; \
pand x4, x2;
#define SI1_2(x0, x1, x2, x3, x4) \
pxor x1, x4; \
por x3, x1; \
pxor x0, x3; \
pxor x0, x2; \
por x4, x0; \
pxor x4, x2; \
pxor x0, x1; \
pxor x1, x4;
#define SI2_1(x0, x1, x2, x3, x4) \
pxor x1, x2; \
movdqa x3, x4; \
pxor RNOT, x3; \
por x2, x3; \
pxor x4, x2; \
pxor x0, x4; \
pxor x1, x3; \
por x2, x1; \
pxor x0, x2;
#define SI2_2(x0, x1, x2, x3, x4) \
pxor x4, x1; \
por x3, x4; \
pxor x3, x2; \
pxor x2, x4; \
pand x1, x2; \
pxor x3, x2; \
pxor x4, x3; \
pxor x0, x4;
#define SI3_1(x0, x1, x2, x3, x4) \
pxor x1, x2; \
movdqa x1, x4; \
pand x2, x1; \
pxor x0, x1; \
por x4, x0; \
pxor x3, x4; \
pxor x3, x0; \
por x1, x3; \
pxor x2, x1;
#define SI3_2(x0, x1, x2, x3, x4) \
pxor x3, x1; \
pxor x2, x0; \
pxor x3, x2; \
pand x1, x3; \
pxor x0, x1; \
pand x2, x0; \
pxor x3, x4; \
pxor x0, x3; \
pxor x1, x0;
#define SI4_1(x0, x1, x2, x3, x4) \
pxor x3, x2; \
movdqa x0, x4; \
pand x1, x0; \
pxor x2, x0; \
por x3, x2; \
pxor RNOT, x4; \
pxor x0, x1; \
pxor x2, x0; \
pand x4, x2;
#define SI4_2(x0, x1, x2, x3, x4) \
pxor x0, x2; \
por x4, x0; \
pxor x3, x0; \
pand x2, x3; \
pxor x3, x4; \
pxor x1, x3; \
pand x0, x1; \
pxor x1, x4; \
pxor x3, x0;
#define SI5_1(x0, x1, x2, x3, x4) \
movdqa x1, x4; \
por x2, x1; \
pxor x4, x2; \
pxor x3, x1; \
pand x4, x3; \
pxor x3, x2; \
por x0, x3; \
pxor RNOT, x0; \
pxor x2, x3; \
por x0, x2;
#define SI5_2(x0, x1, x2, x3, x4) \
pxor x1, x4; \
pxor x4, x2; \
pand x0, x4; \
pxor x1, x0; \
pxor x3, x1; \
pand x2, x0; \
pxor x3, x2; \
pxor x2, x0; \
pxor x4, x2; \
pxor x3, x4;
#define SI6_1(x0, x1, x2, x3, x4) \
pxor x2, x0; \
movdqa x0, x4; \
pand x3, x0; \
pxor x3, x2; \
pxor x2, x0; \
pxor x1, x3; \
por x4, x2; \
pxor x3, x2; \
pand x0, x3;
#define SI6_2(x0, x1, x2, x3, x4) \
pxor RNOT, x0; \
pxor x1, x3; \
pand x2, x1; \
pxor x0, x4; \
pxor x4, x3; \
pxor x2, x4; \
pxor x1, x0; \
pxor x0, x2;
#define SI7_1(x0, x1, x2, x3, x4) \
movdqa x3, x4; \
pand x0, x3; \
pxor x2, x0; \
por x4, x2; \
pxor x1, x4; \
pxor RNOT, x0; \
por x3, x1; \
pxor x0, x4; \
pand x2, x0; \
pxor x1, x0;
#define SI7_2(x0, x1, x2, x3, x4) \
pand x2, x1; \
pxor x2, x3; \
pxor x3, x4; \
pand x3, x2; \
por x0, x3; \
pxor x4, x1; \
pxor x4, x3; \
pand x0, x4; \
pxor x2, x4;
#define get_key(i, j, t) \
movd (4*(i)+(j))*4(CTX), t; \
pshufd $0, t, t;
#define K2(x0, x1, x2, x3, x4, i) \
get_key(i, 0, RK0); \
get_key(i, 1, RK1); \
get_key(i, 2, RK2); \
get_key(i, 3, RK3); \
pxor RK0, x0 ## 1; \
pxor RK1, x1 ## 1; \
pxor RK2, x2 ## 1; \
pxor RK3, x3 ## 1; \
pxor RK0, x0 ## 2; \
pxor RK1, x1 ## 2; \
pxor RK2, x2 ## 2; \
pxor RK3, x3 ## 2;
#define LK2(x0, x1, x2, x3, x4, i) \
movdqa x0 ## 1, x4 ## 1; \
pslld $13, x0 ## 1; \
psrld $(32 - 13), x4 ## 1; \
por x4 ## 1, x0 ## 1; \
pxor x0 ## 1, x1 ## 1; \
movdqa x2 ## 1, x4 ## 1; \
pslld $3, x2 ## 1; \
psrld $(32 - 3), x4 ## 1; \
por x4 ## 1, x2 ## 1; \
pxor x2 ## 1, x1 ## 1; \
movdqa x0 ## 2, x4 ## 2; \
pslld $13, x0 ## 2; \
psrld $(32 - 13), x4 ## 2; \
por x4 ## 2, x0 ## 2; \
pxor x0 ## 2, x1 ## 2; \
movdqa x2 ## 2, x4 ## 2; \
pslld $3, x2 ## 2; \
psrld $(32 - 3), x4 ## 2; \
por x4 ## 2, x2 ## 2; \
pxor x2 ## 2, x1 ## 2; \
movdqa x1 ## 1, x4 ## 1; \
pslld $1, x1 ## 1; \
psrld $(32 - 1), x4 ## 1; \
por x4 ## 1, x1 ## 1; \
movdqa x0 ## 1, x4 ## 1; \
pslld $3, x4 ## 1; \
pxor x2 ## 1, x3 ## 1; \
pxor x4 ## 1, x3 ## 1; \
movdqa x3 ## 1, x4 ## 1; \
get_key(i, 1, RK1); \
movdqa x1 ## 2, x4 ## 2; \
pslld $1, x1 ## 2; \
psrld $(32 - 1), x4 ## 2; \
por x4 ## 2, x1 ## 2; \
movdqa x0 ## 2, x4 ## 2; \
pslld $3, x4 ## 2; \
pxor x2 ## 2, x3 ## 2; \
pxor x4 ## 2, x3 ## 2; \
movdqa x3 ## 2, x4 ## 2; \
get_key(i, 3, RK3); \
pslld $7, x3 ## 1; \
psrld $(32 - 7), x4 ## 1; \
por x4 ## 1, x3 ## 1; \
movdqa x1 ## 1, x4 ## 1; \
pslld $7, x4 ## 1; \
pxor x1 ## 1, x0 ## 1; \
pxor x3 ## 1, x0 ## 1; \
pxor x3 ## 1, x2 ## 1; \
pxor x4 ## 1, x2 ## 1; \
get_key(i, 0, RK0); \
pslld $7, x3 ## 2; \
psrld $(32 - 7), x4 ## 2; \
por x4 ## 2, x3 ## 2; \
movdqa x1 ## 2, x4 ## 2; \
pslld $7, x4 ## 2; \
pxor x1 ## 2, x0 ## 2; \
pxor x3 ## 2, x0 ## 2; \
pxor x3 ## 2, x2 ## 2; \
pxor x4 ## 2, x2 ## 2; \
get_key(i, 2, RK2); \
pxor RK1, x1 ## 1; \
pxor RK3, x3 ## 1; \
movdqa x0 ## 1, x4 ## 1; \
pslld $5, x0 ## 1; \
psrld $(32 - 5), x4 ## 1; \
por x4 ## 1, x0 ## 1; \
movdqa x2 ## 1, x4 ## 1; \
pslld $22, x2 ## 1; \
psrld $(32 - 22), x4 ## 1; \
por x4 ## 1, x2 ## 1; \
pxor RK0, x0 ## 1; \
pxor RK2, x2 ## 1; \
pxor RK1, x1 ## 2; \
pxor RK3, x3 ## 2; \
movdqa x0 ## 2, x4 ## 2; \
pslld $5, x0 ## 2; \
psrld $(32 - 5), x4 ## 2; \
por x4 ## 2, x0 ## 2; \
movdqa x2 ## 2, x4 ## 2; \
pslld $22, x2 ## 2; \
psrld $(32 - 22), x4 ## 2; \
por x4 ## 2, x2 ## 2; \
pxor RK0, x0 ## 2; \
pxor RK2, x2 ## 2;
#define KL2(x0, x1, x2, x3, x4, i) \
pxor RK0, x0 ## 1; \
pxor RK2, x2 ## 1; \
movdqa x0 ## 1, x4 ## 1; \
psrld $5, x0 ## 1; \
pslld $(32 - 5), x4 ## 1; \
por x4 ## 1, x0 ## 1; \
pxor RK3, x3 ## 1; \
pxor RK1, x1 ## 1; \
movdqa x2 ## 1, x4 ## 1; \
psrld $22, x2 ## 1; \
pslld $(32 - 22), x4 ## 1; \
por x4 ## 1, x2 ## 1; \
pxor x3 ## 1, x2 ## 1; \
pxor RK0, x0 ## 2; \
pxor RK2, x2 ## 2; \
movdqa x0 ## 2, x4 ## 2; \
psrld $5, x0 ## 2; \
pslld $(32 - 5), x4 ## 2; \
por x4 ## 2, x0 ## 2; \
pxor RK3, x3 ## 2; \
pxor RK1, x1 ## 2; \
movdqa x2 ## 2, x4 ## 2; \
psrld $22, x2 ## 2; \
pslld $(32 - 22), x4 ## 2; \
por x4 ## 2, x2 ## 2; \
pxor x3 ## 2, x2 ## 2; \
pxor x3 ## 1, x0 ## 1; \
movdqa x1 ## 1, x4 ## 1; \
pslld $7, x4 ## 1; \
pxor x1 ## 1, x0 ## 1; \
pxor x4 ## 1, x2 ## 1; \
movdqa x1 ## 1, x4 ## 1; \
psrld $1, x1 ## 1; \
pslld $(32 - 1), x4 ## 1; \
por x4 ## 1, x1 ## 1; \
pxor x3 ## 2, x0 ## 2; \
movdqa x1 ## 2, x4 ## 2; \
pslld $7, x4 ## 2; \
pxor x1 ## 2, x0 ## 2; \
pxor x4 ## 2, x2 ## 2; \
movdqa x1 ## 2, x4 ## 2; \
psrld $1, x1 ## 2; \
pslld $(32 - 1), x4 ## 2; \
por x4 ## 2, x1 ## 2; \
movdqa x3 ## 1, x4 ## 1; \
psrld $7, x3 ## 1; \
pslld $(32 - 7), x4 ## 1; \
por x4 ## 1, x3 ## 1; \
pxor x0 ## 1, x1 ## 1; \
movdqa x0 ## 1, x4 ## 1; \
pslld $3, x4 ## 1; \
pxor x4 ## 1, x3 ## 1; \
movdqa x0 ## 1, x4 ## 1; \
movdqa x3 ## 2, x4 ## 2; \
psrld $7, x3 ## 2; \
pslld $(32 - 7), x4 ## 2; \
por x4 ## 2, x3 ## 2; \
pxor x0 ## 2, x1 ## 2; \
movdqa x0 ## 2, x4 ## 2; \
pslld $3, x4 ## 2; \
pxor x4 ## 2, x3 ## 2; \
movdqa x0 ## 2, x4 ## 2; \
psrld $13, x0 ## 1; \
pslld $(32 - 13), x4 ## 1; \
por x4 ## 1, x0 ## 1; \
pxor x2 ## 1, x1 ## 1; \
pxor x2 ## 1, x3 ## 1; \
movdqa x2 ## 1, x4 ## 1; \
psrld $3, x2 ## 1; \
pslld $(32 - 3), x4 ## 1; \
por x4 ## 1, x2 ## 1; \
psrld $13, x0 ## 2; \
pslld $(32 - 13), x4 ## 2; \
por x4 ## 2, x0 ## 2; \
pxor x2 ## 2, x1 ## 2; \
pxor x2 ## 2, x3 ## 2; \
movdqa x2 ## 2, x4 ## 2; \
psrld $3, x2 ## 2; \
pslld $(32 - 3), x4 ## 2; \
por x4 ## 2, x2 ## 2;
#define S(SBOX, x0, x1, x2, x3, x4) \
SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2);
#define SP(SBOX, x0, x1, x2, x3, x4, i) \
get_key(i, 0, RK0); \
SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
get_key(i, 2, RK2); \
SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
get_key(i, 3, RK3); \
SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
get_key(i, 1, RK1); \
SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
movdqa x0, t2; \
punpckldq x1, x0; \
punpckhdq x1, t2; \
movdqa x2, t1; \
punpckhdq x3, x2; \
punpckldq x3, t1; \
movdqa x0, x1; \
punpcklqdq t1, x0; \
punpckhqdq t1, x1; \
movdqa t2, x3; \
punpcklqdq x2, t2; \
punpckhqdq x2, x3; \
movdqa t2, x2;
#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \
movdqu (0*4*4)(in), x0; \
movdqu (1*4*4)(in), x1; \
movdqu (2*4*4)(in), x2; \
movdqu (3*4*4)(in), x3; \
\
transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
#define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
\
movdqu x0, (0*4*4)(out); \
movdqu x1, (1*4*4)(out); \
movdqu x2, (2*4*4)(out); \
movdqu x3, (3*4*4)(out);
#define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
\
movdqu (0*4*4)(out), t0; \
pxor t0, x0; \
movdqu x0, (0*4*4)(out); \
movdqu (1*4*4)(out), t0; \
pxor t0, x1; \
movdqu x1, (1*4*4)(out); \
movdqu (2*4*4)(out), t0; \
pxor t0, x2; \
movdqu x2, (2*4*4)(out); \
movdqu (3*4*4)(out), t0; \
pxor t0, x3; \
movdqu x3, (3*4*4)(out);
.align 8
.global __serpent_enc_blk_8way
.type __serpent_enc_blk_8way,@function;
__serpent_enc_blk_8way:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: bool, if true: xor output
*/
pcmpeqd RNOT, RNOT;
leaq (4*4*4)(%rdx), %rax;
read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
K2(RA, RB, RC, RD, RE, 0);
S(S0, RA, RB, RC, RD, RE); LK2(RC, RB, RD, RA, RE, 1);
S(S1, RC, RB, RD, RA, RE); LK2(RE, RD, RA, RC, RB, 2);
S(S2, RE, RD, RA, RC, RB); LK2(RB, RD, RE, RC, RA, 3);
S(S3, RB, RD, RE, RC, RA); LK2(RC, RA, RD, RB, RE, 4);
S(S4, RC, RA, RD, RB, RE); LK2(RA, RD, RB, RE, RC, 5);
S(S5, RA, RD, RB, RE, RC); LK2(RC, RA, RD, RE, RB, 6);
S(S6, RC, RA, RD, RE, RB); LK2(RD, RB, RA, RE, RC, 7);
S(S7, RD, RB, RA, RE, RC); LK2(RC, RA, RE, RD, RB, 8);
S(S0, RC, RA, RE, RD, RB); LK2(RE, RA, RD, RC, RB, 9);
S(S1, RE, RA, RD, RC, RB); LK2(RB, RD, RC, RE, RA, 10);
S(S2, RB, RD, RC, RE, RA); LK2(RA, RD, RB, RE, RC, 11);
S(S3, RA, RD, RB, RE, RC); LK2(RE, RC, RD, RA, RB, 12);
S(S4, RE, RC, RD, RA, RB); LK2(RC, RD, RA, RB, RE, 13);
S(S5, RC, RD, RA, RB, RE); LK2(RE, RC, RD, RB, RA, 14);
S(S6, RE, RC, RD, RB, RA); LK2(RD, RA, RC, RB, RE, 15);
S(S7, RD, RA, RC, RB, RE); LK2(RE, RC, RB, RD, RA, 16);
S(S0, RE, RC, RB, RD, RA); LK2(RB, RC, RD, RE, RA, 17);
S(S1, RB, RC, RD, RE, RA); LK2(RA, RD, RE, RB, RC, 18);
S(S2, RA, RD, RE, RB, RC); LK2(RC, RD, RA, RB, RE, 19);
S(S3, RC, RD, RA, RB, RE); LK2(RB, RE, RD, RC, RA, 20);
S(S4, RB, RE, RD, RC, RA); LK2(RE, RD, RC, RA, RB, 21);
S(S5, RE, RD, RC, RA, RB); LK2(RB, RE, RD, RA, RC, 22);
S(S6, RB, RE, RD, RA, RC); LK2(RD, RC, RE, RA, RB, 23);
S(S7, RD, RC, RE, RA, RB); LK2(RB, RE, RA, RD, RC, 24);
S(S0, RB, RE, RA, RD, RC); LK2(RA, RE, RD, RB, RC, 25);
S(S1, RA, RE, RD, RB, RC); LK2(RC, RD, RB, RA, RE, 26);
S(S2, RC, RD, RB, RA, RE); LK2(RE, RD, RC, RA, RB, 27);
S(S3, RE, RD, RC, RA, RB); LK2(RA, RB, RD, RE, RC, 28);
S(S4, RA, RB, RD, RE, RC); LK2(RB, RD, RE, RC, RA, 29);
S(S5, RB, RD, RE, RC, RA); LK2(RA, RB, RD, RC, RE, 30);
S(S6, RA, RB, RD, RC, RE); LK2(RD, RE, RB, RC, RA, 31);
S(S7, RD, RE, RB, RC, RA); K2(RA, RB, RC, RD, RE, 32);
leaq (4*4*4)(%rsi), %rax;
testb %cl, %cl;
jnz __enc_xor8;
write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
ret;
__enc_xor8:
xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
ret;
.align 8
.global serpent_dec_blk_8way
.type serpent_dec_blk_8way,@function;
serpent_dec_blk_8way:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
pcmpeqd RNOT, RNOT;
leaq (4*4*4)(%rdx), %rax;
read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
K2(RA, RB, RC, RD, RE, 32);
SP(SI7, RA, RB, RC, RD, RE, 31); KL2(RB, RD, RA, RE, RC, 31);
SP(SI6, RB, RD, RA, RE, RC, 30); KL2(RA, RC, RE, RB, RD, 30);
SP(SI5, RA, RC, RE, RB, RD, 29); KL2(RC, RD, RA, RE, RB, 29);
SP(SI4, RC, RD, RA, RE, RB, 28); KL2(RC, RA, RB, RE, RD, 28);
SP(SI3, RC, RA, RB, RE, RD, 27); KL2(RB, RC, RD, RE, RA, 27);
SP(SI2, RB, RC, RD, RE, RA, 26); KL2(RC, RA, RE, RD, RB, 26);
SP(SI1, RC, RA, RE, RD, RB, 25); KL2(RB, RA, RE, RD, RC, 25);
SP(SI0, RB, RA, RE, RD, RC, 24); KL2(RE, RC, RA, RB, RD, 24);
SP(SI7, RE, RC, RA, RB, RD, 23); KL2(RC, RB, RE, RD, RA, 23);
SP(SI6, RC, RB, RE, RD, RA, 22); KL2(RE, RA, RD, RC, RB, 22);
SP(SI5, RE, RA, RD, RC, RB, 21); KL2(RA, RB, RE, RD, RC, 21);
SP(SI4, RA, RB, RE, RD, RC, 20); KL2(RA, RE, RC, RD, RB, 20);
SP(SI3, RA, RE, RC, RD, RB, 19); KL2(RC, RA, RB, RD, RE, 19);
SP(SI2, RC, RA, RB, RD, RE, 18); KL2(RA, RE, RD, RB, RC, 18);
SP(SI1, RA, RE, RD, RB, RC, 17); KL2(RC, RE, RD, RB, RA, 17);
SP(SI0, RC, RE, RD, RB, RA, 16); KL2(RD, RA, RE, RC, RB, 16);
SP(SI7, RD, RA, RE, RC, RB, 15); KL2(RA, RC, RD, RB, RE, 15);
SP(SI6, RA, RC, RD, RB, RE, 14); KL2(RD, RE, RB, RA, RC, 14);
SP(SI5, RD, RE, RB, RA, RC, 13); KL2(RE, RC, RD, RB, RA, 13);
SP(SI4, RE, RC, RD, RB, RA, 12); KL2(RE, RD, RA, RB, RC, 12);
SP(SI3, RE, RD, RA, RB, RC, 11); KL2(RA, RE, RC, RB, RD, 11);
SP(SI2, RA, RE, RC, RB, RD, 10); KL2(RE, RD, RB, RC, RA, 10);
SP(SI1, RE, RD, RB, RC, RA, 9); KL2(RA, RD, RB, RC, RE, 9);
SP(SI0, RA, RD, RB, RC, RE, 8); KL2(RB, RE, RD, RA, RC, 8);
SP(SI7, RB, RE, RD, RA, RC, 7); KL2(RE, RA, RB, RC, RD, 7);
SP(SI6, RE, RA, RB, RC, RD, 6); KL2(RB, RD, RC, RE, RA, 6);
SP(SI5, RB, RD, RC, RE, RA, 5); KL2(RD, RA, RB, RC, RE, 5);
SP(SI4, RD, RA, RB, RC, RE, 4); KL2(RD, RB, RE, RC, RA, 4);
SP(SI3, RD, RB, RE, RC, RA, 3); KL2(RE, RD, RA, RC, RB, 3);
SP(SI2, RE, RD, RA, RC, RB, 2); KL2(RD, RB, RC, RA, RE, 2);
SP(SI1, RD, RB, RC, RA, RE, 1); KL2(RE, RB, RC, RA, RD, 1);
S(SI0, RE, RB, RC, RA, RD); K2(RC, RD, RB, RE, RA, 0);
leaq (4*4*4)(%rsi), %rax;
write_blocks(%rsi, RC1, RD1, RB1, RE1, RK0, RK1, RK2);
write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2);
ret;
+944
View File
@@ -0,0 +1,944 @@
/*
* Glue Code for SSE2 assembler versions of Serpent Cipher
*
* Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
* Glue code based on aesni-intel_glue.c by:
* Copyright (C) 2008, Intel Corp.
* Author: Huang Ying <ying.huang@intel.com>
*
* CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
* Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
* CTR part based on code (crypto/ctr.c) by:
* (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA
*
*/
#include <linux/module.h>
#include <linux/hardirq.h>
#include <linux/types.h>
#include <linux/crypto.h>
#include <linux/err.h>
#include <crypto/algapi.h>
#include <crypto/serpent.h>
#include <crypto/cryptd.h>
#include <crypto/b128ops.h>
#include <crypto/ctr.h>
#include <crypto/lrw.h>
#include <crypto/xts.h>
#include <asm/i387.h>
#include <asm/serpent.h>
#include <crypto/scatterwalk.h>
#include <linux/workqueue.h>
#include <linux/spinlock.h>
struct async_serpent_ctx {
struct cryptd_ablkcipher *cryptd_tfm;
};
static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes)
{
if (fpu_enabled)
return true;
/* SSE2 is only used when chunk to be processed is large enough, so
* do not enable FPU until it is necessary.
*/
if (nbytes < SERPENT_BLOCK_SIZE * SERPENT_PARALLEL_BLOCKS)
return false;
kernel_fpu_begin();
return true;
}
static inline void serpent_fpu_end(bool fpu_enabled)
{
if (fpu_enabled)
kernel_fpu_end();
}
static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
bool enc)
{
bool fpu_enabled = false;
struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
const unsigned int bsize = SERPENT_BLOCK_SIZE;
unsigned int nbytes;
int err;
err = blkcipher_walk_virt(desc, walk);
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
while ((nbytes = walk->nbytes)) {
u8 *wsrc = walk->src.virt.addr;
u8 *wdst = walk->dst.virt.addr;
fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes);
/* Process multi-block batch */
if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) {
do {
if (enc)
serpent_enc_blk_xway(ctx, wdst, wsrc);
else
serpent_dec_blk_xway(ctx, wdst, wsrc);
wsrc += bsize * SERPENT_PARALLEL_BLOCKS;
wdst += bsize * SERPENT_PARALLEL_BLOCKS;
nbytes -= bsize * SERPENT_PARALLEL_BLOCKS;
} while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS);
if (nbytes < bsize)
goto done;
}
/* Handle leftovers */
do {
if (enc)
__serpent_encrypt(ctx, wdst, wsrc);
else
__serpent_decrypt(ctx, wdst, wsrc);
wsrc += bsize;
wdst += bsize;
nbytes -= bsize;
} while (nbytes >= bsize);
done:
err = blkcipher_walk_done(desc, walk, nbytes);
}
serpent_fpu_end(fpu_enabled);
return err;
}
static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
return ecb_crypt(desc, &walk, true);
}
static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
return ecb_crypt(desc, &walk, false);
}
static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
const unsigned int bsize = SERPENT_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u128 *src = (u128 *)walk->src.virt.addr;
u128 *dst = (u128 *)walk->dst.virt.addr;
u128 *iv = (u128 *)walk->iv;
do {
u128_xor(dst, src, iv);
__serpent_encrypt(ctx, (u8 *)dst, (u8 *)dst);
iv = dst;
src += 1;
dst += 1;
nbytes -= bsize;
} while (nbytes >= bsize);
u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv);
return nbytes;
}
static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
while ((nbytes = walk.nbytes)) {
nbytes = __cbc_encrypt(desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
return err;
}
static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
const unsigned int bsize = SERPENT_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u128 *src = (u128 *)walk->src.virt.addr;
u128 *dst = (u128 *)walk->dst.virt.addr;
u128 ivs[SERPENT_PARALLEL_BLOCKS - 1];
u128 last_iv;
int i;
/* Start of the last block. */
src += nbytes / bsize - 1;
dst += nbytes / bsize - 1;
last_iv = *src;
/* Process multi-block batch */
if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) {
do {
nbytes -= bsize * (SERPENT_PARALLEL_BLOCKS - 1);
src -= SERPENT_PARALLEL_BLOCKS - 1;
dst -= SERPENT_PARALLEL_BLOCKS - 1;
for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++)
ivs[i] = src[i];
serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++)
u128_xor(dst + (i + 1), dst + (i + 1), ivs + i);
nbytes -= bsize;
if (nbytes < bsize)
goto done;
u128_xor(dst, dst, src - 1);
src -= 1;
dst -= 1;
} while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS);
if (nbytes < bsize)
goto done;
}
/* Handle leftovers */
for (;;) {
__serpent_decrypt(ctx, (u8 *)dst, (u8 *)src);
nbytes -= bsize;
if (nbytes < bsize)
break;
u128_xor(dst, dst, src - 1);
src -= 1;
dst -= 1;
}
done:
u128_xor(dst, dst, (u128 *)walk->iv);
*(u128 *)walk->iv = last_iv;
return nbytes;
}
static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
bool fpu_enabled = false;
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
while ((nbytes = walk.nbytes)) {
fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes);
nbytes = __cbc_decrypt(desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
serpent_fpu_end(fpu_enabled);
return err;
}
static inline void u128_to_be128(be128 *dst, const u128 *src)
{
dst->a = cpu_to_be64(src->a);
dst->b = cpu_to_be64(src->b);
}
static inline void be128_to_u128(u128 *dst, const be128 *src)
{
dst->a = be64_to_cpu(src->a);
dst->b = be64_to_cpu(src->b);
}
static inline void u128_inc(u128 *i)
{
i->b++;
if (!i->b)
i->a++;
}
static void ctr_crypt_final(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
u8 *ctrblk = walk->iv;
u8 keystream[SERPENT_BLOCK_SIZE];
u8 *src = walk->src.virt.addr;
u8 *dst = walk->dst.virt.addr;
unsigned int nbytes = walk->nbytes;
__serpent_encrypt(ctx, keystream, ctrblk);
crypto_xor(keystream, src, nbytes);
memcpy(dst, keystream, nbytes);
crypto_inc(ctrblk, SERPENT_BLOCK_SIZE);
}
static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
const unsigned int bsize = SERPENT_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u128 *src = (u128 *)walk->src.virt.addr;
u128 *dst = (u128 *)walk->dst.virt.addr;
u128 ctrblk;
be128 ctrblocks[SERPENT_PARALLEL_BLOCKS];
int i;
be128_to_u128(&ctrblk, (be128 *)walk->iv);
/* Process multi-block batch */
if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) {
do {
/* create ctrblks for parallel encrypt */
for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) {
if (dst != src)
dst[i] = src[i];
u128_to_be128(&ctrblocks[i], &ctrblk);
u128_inc(&ctrblk);
}
serpent_enc_blk_xway_xor(ctx, (u8 *)dst,
(u8 *)ctrblocks);
src += SERPENT_PARALLEL_BLOCKS;
dst += SERPENT_PARALLEL_BLOCKS;
nbytes -= bsize * SERPENT_PARALLEL_BLOCKS;
} while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS);
if (nbytes < bsize)
goto done;
}
/* Handle leftovers */
do {
if (dst != src)
*dst = *src;
u128_to_be128(&ctrblocks[0], &ctrblk);
u128_inc(&ctrblk);
__serpent_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks);
u128_xor(dst, dst, (u128 *)ctrblocks);
src += 1;
dst += 1;
nbytes -= bsize;
} while (nbytes >= bsize);
done:
u128_to_be128((be128 *)walk->iv, &ctrblk);
return nbytes;
}
static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
bool fpu_enabled = false;
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt_block(desc, &walk, SERPENT_BLOCK_SIZE);
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
while ((nbytes = walk.nbytes) >= SERPENT_BLOCK_SIZE) {
fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes);
nbytes = __ctr_crypt(desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
serpent_fpu_end(fpu_enabled);
if (walk.nbytes) {
ctr_crypt_final(desc, &walk);
err = blkcipher_walk_done(desc, &walk, 0);
}
return err;
}
struct crypt_priv {
struct serpent_ctx *ctx;
bool fpu_enabled;
};
static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
{
const unsigned int bsize = SERPENT_BLOCK_SIZE;
struct crypt_priv *ctx = priv;
int i;
ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
serpent_enc_blk_xway(ctx->ctx, srcdst, srcdst);
return;
}
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
__serpent_encrypt(ctx->ctx, srcdst, srcdst);
}
static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
{
const unsigned int bsize = SERPENT_BLOCK_SIZE;
struct crypt_priv *ctx = priv;
int i;
ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
serpent_dec_blk_xway(ctx->ctx, srcdst, srcdst);
return;
}
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
__serpent_decrypt(ctx->ctx, srcdst, srcdst);
}
struct serpent_lrw_ctx {
struct lrw_table_ctx lrw_table;
struct serpent_ctx serpent_ctx;
};
static int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen)
{
struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
int err;
err = __serpent_setkey(&ctx->serpent_ctx, key, keylen -
SERPENT_BLOCK_SIZE);
if (err)
return err;
return lrw_init_table(&ctx->lrw_table, key + keylen -
SERPENT_BLOCK_SIZE);
}
static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[SERPENT_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->serpent_ctx,
.fpu_enabled = false,
};
struct lrw_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.table_ctx = &ctx->lrw_table,
.crypt_ctx = &crypt_ctx,
.crypt_fn = encrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
ret = lrw_crypt(desc, dst, src, nbytes, &req);
serpent_fpu_end(crypt_ctx.fpu_enabled);
return ret;
}
static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[SERPENT_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->serpent_ctx,
.fpu_enabled = false,
};
struct lrw_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.table_ctx = &ctx->lrw_table,
.crypt_ctx = &crypt_ctx,
.crypt_fn = decrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
ret = lrw_crypt(desc, dst, src, nbytes, &req);
serpent_fpu_end(crypt_ctx.fpu_enabled);
return ret;
}
static void lrw_exit_tfm(struct crypto_tfm *tfm)
{
struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
lrw_free_table(&ctx->lrw_table);
}
struct serpent_xts_ctx {
struct serpent_ctx tweak_ctx;
struct serpent_ctx crypt_ctx;
};
static int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen)
{
struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm);
u32 *flags = &tfm->crt_flags;
int err;
/* key consists of keys of equal size concatenated, therefore
* the length must be even
*/
if (keylen % 2) {
*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
return -EINVAL;
}
/* first half of xts-key is for crypt */
err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2);
if (err)
return err;
/* second half of xts-key is for tweak */
return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2);
}
static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[SERPENT_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->crypt_ctx,
.fpu_enabled = false,
};
struct xts_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.tweak_ctx = &ctx->tweak_ctx,
.tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt),
.crypt_ctx = &crypt_ctx,
.crypt_fn = encrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
ret = xts_crypt(desc, dst, src, nbytes, &req);
serpent_fpu_end(crypt_ctx.fpu_enabled);
return ret;
}
static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[SERPENT_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->crypt_ctx,
.fpu_enabled = false,
};
struct xts_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.tweak_ctx = &ctx->tweak_ctx,
.tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt),
.crypt_ctx = &crypt_ctx,
.crypt_fn = decrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
ret = xts_crypt(desc, dst, src, nbytes, &req);
serpent_fpu_end(crypt_ctx.fpu_enabled);
return ret;
}
static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
unsigned int key_len)
{
struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm);
struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base;
int err;
crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm)
& CRYPTO_TFM_REQ_MASK);
err = crypto_ablkcipher_setkey(child, key, key_len);
crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child)
& CRYPTO_TFM_RES_MASK);
return err;
}
static int __ablk_encrypt(struct ablkcipher_request *req)
{
struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm);
struct blkcipher_desc desc;
desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
desc.info = req->info;
desc.flags = 0;
return crypto_blkcipher_crt(desc.tfm)->encrypt(
&desc, req->dst, req->src, req->nbytes);
}
static int ablk_encrypt(struct ablkcipher_request *req)
{
struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm);
if (!irq_fpu_usable()) {
struct ablkcipher_request *cryptd_req =
ablkcipher_request_ctx(req);
memcpy(cryptd_req, req, sizeof(*req));
ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
return crypto_ablkcipher_encrypt(cryptd_req);
} else {
return __ablk_encrypt(req);
}
}
static int ablk_decrypt(struct ablkcipher_request *req)
{
struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm);
if (!irq_fpu_usable()) {
struct ablkcipher_request *cryptd_req =
ablkcipher_request_ctx(req);
memcpy(cryptd_req, req, sizeof(*req));
ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
return crypto_ablkcipher_decrypt(cryptd_req);
} else {
struct blkcipher_desc desc;
desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
desc.info = req->info;
desc.flags = 0;
return crypto_blkcipher_crt(desc.tfm)->decrypt(
&desc, req->dst, req->src, req->nbytes);
}
}
static void ablk_exit(struct crypto_tfm *tfm)
{
struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm);
cryptd_free_ablkcipher(ctx->cryptd_tfm);
}
static int ablk_init(struct crypto_tfm *tfm)
{
struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm);
struct cryptd_ablkcipher *cryptd_tfm;
char drv_name[CRYPTO_MAX_ALG_NAME];
snprintf(drv_name, sizeof(drv_name), "__driver-%s",
crypto_tfm_alg_driver_name(tfm));
cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0);
if (IS_ERR(cryptd_tfm))
return PTR_ERR(cryptd_tfm);
ctx->cryptd_tfm = cryptd_tfm;
tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) +
crypto_ablkcipher_reqsize(&cryptd_tfm->base);
return 0;
}
static struct crypto_alg serpent_algs[10] = { {
.cra_name = "__ecb-serpent-sse2",
.cra_driver_name = "__driver-ecb-serpent-sse2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct serpent_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(serpent_algs[0].cra_list),
.cra_u = {
.blkcipher = {
.min_keysize = SERPENT_MIN_KEY_SIZE,
.max_keysize = SERPENT_MAX_KEY_SIZE,
.setkey = serpent_setkey,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
},
},
}, {
.cra_name = "__cbc-serpent-sse2",
.cra_driver_name = "__driver-cbc-serpent-sse2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct serpent_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(serpent_algs[1].cra_list),
.cra_u = {
.blkcipher = {
.min_keysize = SERPENT_MIN_KEY_SIZE,
.max_keysize = SERPENT_MAX_KEY_SIZE,
.setkey = serpent_setkey,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
},
},
}, {
.cra_name = "__ctr-serpent-sse2",
.cra_driver_name = "__driver-ctr-serpent-sse2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct serpent_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(serpent_algs[2].cra_list),
.cra_u = {
.blkcipher = {
.min_keysize = SERPENT_MIN_KEY_SIZE,
.max_keysize = SERPENT_MAX_KEY_SIZE,
.ivsize = SERPENT_BLOCK_SIZE,
.setkey = serpent_setkey,
.encrypt = ctr_crypt,
.decrypt = ctr_crypt,
},
},
}, {
.cra_name = "__lrw-serpent-sse2",
.cra_driver_name = "__driver-lrw-serpent-sse2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct serpent_lrw_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(serpent_algs[3].cra_list),
.cra_exit = lrw_exit_tfm,
.cra_u = {
.blkcipher = {
.min_keysize = SERPENT_MIN_KEY_SIZE +
SERPENT_BLOCK_SIZE,
.max_keysize = SERPENT_MAX_KEY_SIZE +
SERPENT_BLOCK_SIZE,
.ivsize = SERPENT_BLOCK_SIZE,
.setkey = lrw_serpent_setkey,
.encrypt = lrw_encrypt,
.decrypt = lrw_decrypt,
},
},
}, {
.cra_name = "__xts-serpent-sse2",
.cra_driver_name = "__driver-xts-serpent-sse2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct serpent_xts_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(serpent_algs[4].cra_list),
.cra_u = {
.blkcipher = {
.min_keysize = SERPENT_MIN_KEY_SIZE * 2,
.max_keysize = SERPENT_MAX_KEY_SIZE * 2,
.ivsize = SERPENT_BLOCK_SIZE,
.setkey = xts_serpent_setkey,
.encrypt = xts_encrypt,
.decrypt = xts_decrypt,
},
},
}, {
.cra_name = "ecb(serpent)",
.cra_driver_name = "ecb-serpent-sse2",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_serpent_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(serpent_algs[5].cra_list),
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = SERPENT_MIN_KEY_SIZE,
.max_keysize = SERPENT_MAX_KEY_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
}, {
.cra_name = "cbc(serpent)",
.cra_driver_name = "cbc-serpent-sse2",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_serpent_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(serpent_algs[6].cra_list),
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = SERPENT_MIN_KEY_SIZE,
.max_keysize = SERPENT_MAX_KEY_SIZE,
.ivsize = SERPENT_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = __ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
}, {
.cra_name = "ctr(serpent)",
.cra_driver_name = "ctr-serpent-sse2",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct async_serpent_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(serpent_algs[7].cra_list),
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = SERPENT_MIN_KEY_SIZE,
.max_keysize = SERPENT_MAX_KEY_SIZE,
.ivsize = SERPENT_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_encrypt,
.geniv = "chainiv",
},
},
}, {
.cra_name = "lrw(serpent)",
.cra_driver_name = "lrw-serpent-sse2",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_serpent_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(serpent_algs[8].cra_list),
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = SERPENT_MIN_KEY_SIZE +
SERPENT_BLOCK_SIZE,
.max_keysize = SERPENT_MAX_KEY_SIZE +
SERPENT_BLOCK_SIZE,
.ivsize = SERPENT_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
}, {
.cra_name = "xts(serpent)",
.cra_driver_name = "xts-serpent-sse2",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_serpent_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(serpent_algs[9].cra_list),
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = SERPENT_MIN_KEY_SIZE * 2,
.max_keysize = SERPENT_MAX_KEY_SIZE * 2,
.ivsize = SERPENT_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
} };
static int __init serpent_sse2_init(void)
{
if (!cpu_has_xmm2) {
printk(KERN_INFO "SSE2 instructions are not detected.\n");
return -ENODEV;
}
return crypto_register_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
}
static void __exit serpent_sse2_exit(void)
{
crypto_unregister_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
}
module_init(serpent_sse2_init);
module_exit(serpent_sse2_exit);
MODULE_DESCRIPTION("Serpent Cipher Algorithm, SSE2 optimized");
MODULE_LICENSE("GPL");
MODULE_ALIAS("serpent");
+558
View File
@@ -0,0 +1,558 @@
/*
* This is a SIMD SHA-1 implementation. It requires the Intel(R) Supplemental
* SSE3 instruction set extensions introduced in Intel Core Microarchitecture
* processors. CPUs supporting Intel(R) AVX extensions will get an additional
* boost.
*
* This work was inspired by the vectorized implementation of Dean Gaudet.
* Additional information on it can be found at:
* http://www.arctic.org/~dean/crypto/sha1.html
*
* It was improved upon with more efficient vectorization of the message
* scheduling. This implementation has also been optimized for all current and
* several future generations of Intel CPUs.
*
* See this article for more information about the implementation details:
* http://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1/
*
* Copyright (C) 2010, Intel Corp.
* Authors: Maxim Locktyukhin <maxim.locktyukhin@intel.com>
* Ronen Zohar <ronen.zohar@intel.com>
*
* Converted to AT&T syntax and adapted for inclusion in the Linux kernel:
* Author: Mathias Krause <minipli@googlemail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*/
#define CTX %rdi // arg1
#define BUF %rsi // arg2
#define CNT %rdx // arg3
#define REG_A %ecx
#define REG_B %esi
#define REG_C %edi
#define REG_D %ebp
#define REG_E %edx
#define REG_T1 %eax
#define REG_T2 %ebx
#define K_BASE %r8
#define HASH_PTR %r9
#define BUFFER_PTR %r10
#define BUFFER_END %r11
#define W_TMP1 %xmm0
#define W_TMP2 %xmm9
#define W0 %xmm1
#define W4 %xmm2
#define W8 %xmm3
#define W12 %xmm4
#define W16 %xmm5
#define W20 %xmm6
#define W24 %xmm7
#define W28 %xmm8
#define XMM_SHUFB_BSWAP %xmm10
/* we keep window of 64 w[i]+K pre-calculated values in a circular buffer */
#define WK(t) (((t) & 15) * 4)(%rsp)
#define W_PRECALC_AHEAD 16
/*
* This macro implements the SHA-1 function's body for single 64-byte block
* param: function's name
*/
.macro SHA1_VECTOR_ASM name
.global \name
.type \name, @function
.align 32
\name:
push %rbx
push %rbp
push %r12
mov %rsp, %r12
sub $64, %rsp # allocate workspace
and $~15, %rsp # align stack
mov CTX, HASH_PTR
mov BUF, BUFFER_PTR
shl $6, CNT # multiply by 64
add BUF, CNT
mov CNT, BUFFER_END
lea K_XMM_AR(%rip), K_BASE
xmm_mov BSWAP_SHUFB_CTL(%rip), XMM_SHUFB_BSWAP
SHA1_PIPELINED_MAIN_BODY
# cleanup workspace
mov $8, %ecx
mov %rsp, %rdi
xor %rax, %rax
rep stosq
mov %r12, %rsp # deallocate workspace
pop %r12
pop %rbp
pop %rbx
ret
.size \name, .-\name
.endm
/*
* This macro implements 80 rounds of SHA-1 for one 64-byte block
*/
.macro SHA1_PIPELINED_MAIN_BODY
INIT_REGALLOC
mov (HASH_PTR), A
mov 4(HASH_PTR), B
mov 8(HASH_PTR), C
mov 12(HASH_PTR), D
mov 16(HASH_PTR), E
.set i, 0
.rept W_PRECALC_AHEAD
W_PRECALC i
.set i, (i+1)
.endr
.align 4
1:
RR F1,A,B,C,D,E,0
RR F1,D,E,A,B,C,2
RR F1,B,C,D,E,A,4
RR F1,E,A,B,C,D,6
RR F1,C,D,E,A,B,8
RR F1,A,B,C,D,E,10
RR F1,D,E,A,B,C,12
RR F1,B,C,D,E,A,14
RR F1,E,A,B,C,D,16
RR F1,C,D,E,A,B,18
RR F2,A,B,C,D,E,20
RR F2,D,E,A,B,C,22
RR F2,B,C,D,E,A,24
RR F2,E,A,B,C,D,26
RR F2,C,D,E,A,B,28
RR F2,A,B,C,D,E,30
RR F2,D,E,A,B,C,32
RR F2,B,C,D,E,A,34
RR F2,E,A,B,C,D,36
RR F2,C,D,E,A,B,38
RR F3,A,B,C,D,E,40
RR F3,D,E,A,B,C,42
RR F3,B,C,D,E,A,44
RR F3,E,A,B,C,D,46
RR F3,C,D,E,A,B,48
RR F3,A,B,C,D,E,50
RR F3,D,E,A,B,C,52
RR F3,B,C,D,E,A,54
RR F3,E,A,B,C,D,56
RR F3,C,D,E,A,B,58
add $64, BUFFER_PTR # move to the next 64-byte block
cmp BUFFER_END, BUFFER_PTR # if the current is the last one use
cmovae K_BASE, BUFFER_PTR # dummy source to avoid buffer overrun
RR F4,A,B,C,D,E,60
RR F4,D,E,A,B,C,62
RR F4,B,C,D,E,A,64
RR F4,E,A,B,C,D,66
RR F4,C,D,E,A,B,68
RR F4,A,B,C,D,E,70
RR F4,D,E,A,B,C,72
RR F4,B,C,D,E,A,74
RR F4,E,A,B,C,D,76
RR F4,C,D,E,A,B,78
UPDATE_HASH (HASH_PTR), A
UPDATE_HASH 4(HASH_PTR), B
UPDATE_HASH 8(HASH_PTR), C
UPDATE_HASH 12(HASH_PTR), D
UPDATE_HASH 16(HASH_PTR), E
RESTORE_RENAMED_REGS
cmp K_BASE, BUFFER_PTR # K_BASE means, we reached the end
jne 1b
.endm
.macro INIT_REGALLOC
.set A, REG_A
.set B, REG_B
.set C, REG_C
.set D, REG_D
.set E, REG_E
.set T1, REG_T1
.set T2, REG_T2
.endm
.macro RESTORE_RENAMED_REGS
# order is important (REG_C is where it should be)
mov B, REG_B
mov D, REG_D
mov A, REG_A
mov E, REG_E
.endm
.macro SWAP_REG_NAMES a, b
.set _T, \a
.set \a, \b
.set \b, _T
.endm
.macro F1 b, c, d
mov \c, T1
SWAP_REG_NAMES \c, T1
xor \d, T1
and \b, T1
xor \d, T1
.endm
.macro F2 b, c, d
mov \d, T1
SWAP_REG_NAMES \d, T1
xor \c, T1
xor \b, T1
.endm
.macro F3 b, c ,d
mov \c, T1
SWAP_REG_NAMES \c, T1
mov \b, T2
or \b, T1
and \c, T2
and \d, T1
or T2, T1
.endm
.macro F4 b, c, d
F2 \b, \c, \d
.endm
.macro UPDATE_HASH hash, val
add \hash, \val
mov \val, \hash
.endm
/*
* RR does two rounds of SHA-1 back to back with W[] pre-calc
* t1 = F(b, c, d); e += w(i)
* e += t1; b <<= 30; d += w(i+1);
* t1 = F(a, b, c);
* d += t1; a <<= 5;
* e += a;
* t1 = e; a >>= 7;
* t1 <<= 5;
* d += t1;
*/
.macro RR F, a, b, c, d, e, round
add WK(\round), \e
\F \b, \c, \d # t1 = F(b, c, d);
W_PRECALC (\round + W_PRECALC_AHEAD)
rol $30, \b
add T1, \e
add WK(\round + 1), \d
\F \a, \b, \c
W_PRECALC (\round + W_PRECALC_AHEAD + 1)
rol $5, \a
add \a, \e
add T1, \d
ror $7, \a # (a <<r 5) >>r 7) => a <<r 30)
mov \e, T1
SWAP_REG_NAMES \e, T1
rol $5, T1
add T1, \d
# write: \a, \b
# rotate: \a<=\d, \b<=\e, \c<=\a, \d<=\b, \e<=\c
.endm
.macro W_PRECALC r
.set i, \r
.if (i < 20)
.set K_XMM, 0
.elseif (i < 40)
.set K_XMM, 16
.elseif (i < 60)
.set K_XMM, 32
.elseif (i < 80)
.set K_XMM, 48
.endif
.if ((i < 16) || ((i >= 80) && (i < (80 + W_PRECALC_AHEAD))))
.set i, ((\r) % 80) # pre-compute for the next iteration
.if (i == 0)
W_PRECALC_RESET
.endif
W_PRECALC_00_15
.elseif (i<32)
W_PRECALC_16_31
.elseif (i < 80) // rounds 32-79
W_PRECALC_32_79
.endif
.endm
.macro W_PRECALC_RESET
.set W, W0
.set W_minus_04, W4
.set W_minus_08, W8
.set W_minus_12, W12
.set W_minus_16, W16
.set W_minus_20, W20
.set W_minus_24, W24
.set W_minus_28, W28
.set W_minus_32, W
.endm
.macro W_PRECALC_ROTATE
.set W_minus_32, W_minus_28
.set W_minus_28, W_minus_24
.set W_minus_24, W_minus_20
.set W_minus_20, W_minus_16
.set W_minus_16, W_minus_12
.set W_minus_12, W_minus_08
.set W_minus_08, W_minus_04
.set W_minus_04, W
.set W, W_minus_32
.endm
.macro W_PRECALC_SSSE3
.macro W_PRECALC_00_15
W_PRECALC_00_15_SSSE3
.endm
.macro W_PRECALC_16_31
W_PRECALC_16_31_SSSE3
.endm
.macro W_PRECALC_32_79
W_PRECALC_32_79_SSSE3
.endm
/* message scheduling pre-compute for rounds 0-15 */
.macro W_PRECALC_00_15_SSSE3
.if ((i & 3) == 0)
movdqu (i*4)(BUFFER_PTR), W_TMP1
.elseif ((i & 3) == 1)
pshufb XMM_SHUFB_BSWAP, W_TMP1
movdqa W_TMP1, W
.elseif ((i & 3) == 2)
paddd (K_BASE), W_TMP1
.elseif ((i & 3) == 3)
movdqa W_TMP1, WK(i&~3)
W_PRECALC_ROTATE
.endif
.endm
/* message scheduling pre-compute for rounds 16-31
*
* - calculating last 32 w[i] values in 8 XMM registers
* - pre-calculate K+w[i] values and store to mem, for later load by ALU add
* instruction
*
* some "heavy-lifting" vectorization for rounds 16-31 due to w[i]->w[i-3]
* dependency, but improves for 32-79
*/
.macro W_PRECALC_16_31_SSSE3
# blended scheduling of vector and scalar instruction streams, one 4-wide
# vector iteration / 4 scalar rounds
.if ((i & 3) == 0)
movdqa W_minus_12, W
palignr $8, W_minus_16, W # w[i-14]
movdqa W_minus_04, W_TMP1
psrldq $4, W_TMP1 # w[i-3]
pxor W_minus_08, W
.elseif ((i & 3) == 1)
pxor W_minus_16, W_TMP1
pxor W_TMP1, W
movdqa W, W_TMP2
movdqa W, W_TMP1
pslldq $12, W_TMP2
.elseif ((i & 3) == 2)
psrld $31, W
pslld $1, W_TMP1
por W, W_TMP1
movdqa W_TMP2, W
psrld $30, W_TMP2
pslld $2, W
.elseif ((i & 3) == 3)
pxor W, W_TMP1
pxor W_TMP2, W_TMP1
movdqa W_TMP1, W
paddd K_XMM(K_BASE), W_TMP1
movdqa W_TMP1, WK(i&~3)
W_PRECALC_ROTATE
.endif
.endm
/* message scheduling pre-compute for rounds 32-79
*
* in SHA-1 specification: w[i] = (w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16]) rol 1
* instead we do equal: w[i] = (w[i-6] ^ w[i-16] ^ w[i-28] ^ w[i-32]) rol 2
* allows more efficient vectorization since w[i]=>w[i-3] dependency is broken
*/
.macro W_PRECALC_32_79_SSSE3
.if ((i & 3) == 0)
movdqa W_minus_04, W_TMP1
pxor W_minus_28, W # W is W_minus_32 before xor
palignr $8, W_minus_08, W_TMP1
.elseif ((i & 3) == 1)
pxor W_minus_16, W
pxor W_TMP1, W
movdqa W, W_TMP1
.elseif ((i & 3) == 2)
psrld $30, W
pslld $2, W_TMP1
por W, W_TMP1
.elseif ((i & 3) == 3)
movdqa W_TMP1, W
paddd K_XMM(K_BASE), W_TMP1
movdqa W_TMP1, WK(i&~3)
W_PRECALC_ROTATE
.endif
.endm
.endm // W_PRECALC_SSSE3
#define K1 0x5a827999
#define K2 0x6ed9eba1
#define K3 0x8f1bbcdc
#define K4 0xca62c1d6
.section .rodata
.align 16
K_XMM_AR:
.long K1, K1, K1, K1
.long K2, K2, K2, K2
.long K3, K3, K3, K3
.long K4, K4, K4, K4
BSWAP_SHUFB_CTL:
.long 0x00010203
.long 0x04050607
.long 0x08090a0b
.long 0x0c0d0e0f
.section .text
W_PRECALC_SSSE3
.macro xmm_mov a, b
movdqu \a,\b
.endm
/* SSSE3 optimized implementation:
* extern "C" void sha1_transform_ssse3(u32 *digest, const char *data, u32 *ws,
* unsigned int rounds);
*/
SHA1_VECTOR_ASM sha1_transform_ssse3
#ifdef SHA1_ENABLE_AVX_SUPPORT
.macro W_PRECALC_AVX
.purgem W_PRECALC_00_15
.macro W_PRECALC_00_15
W_PRECALC_00_15_AVX
.endm
.purgem W_PRECALC_16_31
.macro W_PRECALC_16_31
W_PRECALC_16_31_AVX
.endm
.purgem W_PRECALC_32_79
.macro W_PRECALC_32_79
W_PRECALC_32_79_AVX
.endm
.macro W_PRECALC_00_15_AVX
.if ((i & 3) == 0)
vmovdqu (i*4)(BUFFER_PTR), W_TMP1
.elseif ((i & 3) == 1)
vpshufb XMM_SHUFB_BSWAP, W_TMP1, W
.elseif ((i & 3) == 2)
vpaddd (K_BASE), W, W_TMP1
.elseif ((i & 3) == 3)
vmovdqa W_TMP1, WK(i&~3)
W_PRECALC_ROTATE
.endif
.endm
.macro W_PRECALC_16_31_AVX
.if ((i & 3) == 0)
vpalignr $8, W_minus_16, W_minus_12, W # w[i-14]
vpsrldq $4, W_minus_04, W_TMP1 # w[i-3]
vpxor W_minus_08, W, W
vpxor W_minus_16, W_TMP1, W_TMP1
.elseif ((i & 3) == 1)
vpxor W_TMP1, W, W
vpslldq $12, W, W_TMP2
vpslld $1, W, W_TMP1
.elseif ((i & 3) == 2)
vpsrld $31, W, W
vpor W, W_TMP1, W_TMP1
vpslld $2, W_TMP2, W
vpsrld $30, W_TMP2, W_TMP2
.elseif ((i & 3) == 3)
vpxor W, W_TMP1, W_TMP1
vpxor W_TMP2, W_TMP1, W
vpaddd K_XMM(K_BASE), W, W_TMP1
vmovdqu W_TMP1, WK(i&~3)
W_PRECALC_ROTATE
.endif
.endm
.macro W_PRECALC_32_79_AVX
.if ((i & 3) == 0)
vpalignr $8, W_minus_08, W_minus_04, W_TMP1
vpxor W_minus_28, W, W # W is W_minus_32 before xor
.elseif ((i & 3) == 1)
vpxor W_minus_16, W_TMP1, W_TMP1
vpxor W_TMP1, W, W
.elseif ((i & 3) == 2)
vpslld $2, W, W_TMP1
vpsrld $30, W, W
vpor W, W_TMP1, W
.elseif ((i & 3) == 3)
vpaddd K_XMM(K_BASE), W, W_TMP1
vmovdqu W_TMP1, WK(i&~3)
W_PRECALC_ROTATE
.endif
.endm
.endm // W_PRECALC_AVX
W_PRECALC_AVX
.purgem xmm_mov
.macro xmm_mov a, b
vmovdqu \a,\b
.endm
/* AVX optimized implementation:
* extern "C" void sha1_transform_avx(u32 *digest, const char *data, u32 *ws,
* unsigned int rounds);
*/
SHA1_VECTOR_ASM sha1_transform_avx
#endif
+240
View File
@@ -0,0 +1,240 @@
/*
* Cryptographic API.
*
* Glue code for the SHA1 Secure Hash Algorithm assembler implementation using
* Supplemental SSE3 instructions.
*
* This file is based on sha1_generic.c
*
* Copyright (c) Alan Smithee.
* Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
* Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
* Copyright (c) Mathias Krause <minipli@googlemail.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <crypto/internal/hash.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <crypto/sha.h>
#include <asm/byteorder.h>
#include <asm/i387.h>
#include <asm/xcr.h>
#include <asm/xsave.h>
asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data,
unsigned int rounds);
#ifdef SHA1_ENABLE_AVX_SUPPORT
asmlinkage void sha1_transform_avx(u32 *digest, const char *data,
unsigned int rounds);
#endif
static asmlinkage void (*sha1_transform_asm)(u32 *, const char *, unsigned int);
static int sha1_ssse3_init(struct shash_desc *desc)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
*sctx = (struct sha1_state){
.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
};
return 0;
}
static int __sha1_ssse3_update(struct shash_desc *desc, const u8 *data,
unsigned int len, unsigned int partial)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
unsigned int done = 0;
sctx->count += len;
if (partial) {
done = SHA1_BLOCK_SIZE - partial;
memcpy(sctx->buffer + partial, data, done);
sha1_transform_asm(sctx->state, sctx->buffer, 1);
}
if (len - done >= SHA1_BLOCK_SIZE) {
const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
sha1_transform_asm(sctx->state, data + done, rounds);
done += rounds * SHA1_BLOCK_SIZE;
}
memcpy(sctx->buffer, data + done, len - done);
return 0;
}
static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
int res;
/* Handle the fast case right here */
if (partial + len < SHA1_BLOCK_SIZE) {
sctx->count += len;
memcpy(sctx->buffer + partial, data, len);
return 0;
}
if (!irq_fpu_usable()) {
res = crypto_sha1_update(desc, data, len);
} else {
kernel_fpu_begin();
res = __sha1_ssse3_update(desc, data, len, partial);
kernel_fpu_end();
}
return res;
}
/* Add padding and return the message digest. */
static int sha1_ssse3_final(struct shash_desc *desc, u8 *out)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
unsigned int i, index, padlen;
__be32 *dst = (__be32 *)out;
__be64 bits;
static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
bits = cpu_to_be64(sctx->count << 3);
/* Pad out to 56 mod 64 and append length */
index = sctx->count % SHA1_BLOCK_SIZE;
padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
if (!irq_fpu_usable()) {
crypto_sha1_update(desc, padding, padlen);
crypto_sha1_update(desc, (const u8 *)&bits, sizeof(bits));
} else {
kernel_fpu_begin();
/* We need to fill a whole block for __sha1_ssse3_update() */
if (padlen <= 56) {
sctx->count += padlen;
memcpy(sctx->buffer + index, padding, padlen);
} else {
__sha1_ssse3_update(desc, padding, padlen, index);
}
__sha1_ssse3_update(desc, (const u8 *)&bits, sizeof(bits), 56);
kernel_fpu_end();
}
/* Store state in digest */
for (i = 0; i < 5; i++)
dst[i] = cpu_to_be32(sctx->state[i]);
/* Wipe context */
memset(sctx, 0, sizeof(*sctx));
return 0;
}
static int sha1_ssse3_export(struct shash_desc *desc, void *out)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
memcpy(out, sctx, sizeof(*sctx));
return 0;
}
static int sha1_ssse3_import(struct shash_desc *desc, const void *in)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
memcpy(sctx, in, sizeof(*sctx));
return 0;
}
static struct shash_alg alg = {
.digestsize = SHA1_DIGEST_SIZE,
.init = sha1_ssse3_init,
.update = sha1_ssse3_update,
.final = sha1_ssse3_final,
.export = sha1_ssse3_export,
.import = sha1_ssse3_import,
.descsize = sizeof(struct sha1_state),
.statesize = sizeof(struct sha1_state),
.base = {
.cra_name = "sha1",
.cra_driver_name= "sha1-ssse3",
.cra_priority = 150,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
};
#ifdef SHA1_ENABLE_AVX_SUPPORT
static bool __init avx_usable(void)
{
u64 xcr0;
if (!cpu_has_avx || !cpu_has_osxsave)
return false;
xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
pr_info("AVX detected but unusable.\n");
return false;
}
return true;
}
#endif
static int __init sha1_ssse3_mod_init(void)
{
/* test for SSSE3 first */
if (cpu_has_ssse3)
sha1_transform_asm = sha1_transform_ssse3;
#ifdef SHA1_ENABLE_AVX_SUPPORT
/* allow AVX to override SSSE3, it's a little faster */
if (avx_usable())
sha1_transform_asm = sha1_transform_avx;
#endif
if (sha1_transform_asm) {
pr_info("Using %s optimized SHA-1 implementation\n",
sha1_transform_asm == sha1_transform_ssse3 ? "SSSE3"
: "AVX");
return crypto_register_shash(&alg);
}
pr_info("Neither AVX nor SSSE3 is available/usable.\n");
return -ENODEV;
}
static void __exit sha1_ssse3_mod_fini(void)
{
crypto_unregister_shash(&alg);
}
module_init(sha1_ssse3_mod_init);
module_exit(sha1_ssse3_mod_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, Supplemental SSE3 accelerated");
MODULE_ALIAS("sha1");
@@ -0,0 +1,335 @@
/***************************************************************************
* Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de> *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the *
* Free Software Foundation, Inc., *
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
***************************************************************************/
.file "twofish-i586-asm.S"
.text
#include <asm/asm-offsets.h>
/* return address at 0 */
#define in_blk 12 /* input byte array address parameter*/
#define out_blk 8 /* output byte array address parameter*/
#define ctx 4 /* Twofish context structure */
#define a_offset 0
#define b_offset 4
#define c_offset 8
#define d_offset 12
/* Structure of the crypto context struct*/
#define s0 0 /* S0 Array 256 Words each */
#define s1 1024 /* S1 Array */
#define s2 2048 /* S2 Array */
#define s3 3072 /* S3 Array */
#define w 4096 /* 8 whitening keys (word) */
#define k 4128 /* key 1-32 ( word ) */
/* define a few register aliases to allow macro substitution */
#define R0D %eax
#define R0B %al
#define R0H %ah
#define R1D %ebx
#define R1B %bl
#define R1H %bh
#define R2D %ecx
#define R2B %cl
#define R2H %ch
#define R3D %edx
#define R3B %dl
#define R3H %dh
/* performs input whitening */
#define input_whitening(src,context,offset)\
xor w+offset(context), src;
/* performs input whitening */
#define output_whitening(src,context,offset)\
xor w+16+offset(context), src;
/*
* a input register containing a (rotated 16)
* b input register containing b
* c input register containing c
* d input register containing d (already rol $1)
* operations on a and b are interleaved to increase performance
*/
#define encrypt_round(a,b,c,d,round)\
push d ## D;\
movzx b ## B, %edi;\
mov s1(%ebp,%edi,4),d ## D;\
movzx a ## B, %edi;\
mov s2(%ebp,%edi,4),%esi;\
movzx b ## H, %edi;\
ror $16, b ## D;\
xor s2(%ebp,%edi,4),d ## D;\
movzx a ## H, %edi;\
ror $16, a ## D;\
xor s3(%ebp,%edi,4),%esi;\
movzx b ## B, %edi;\
xor s3(%ebp,%edi,4),d ## D;\
movzx a ## B, %edi;\
xor (%ebp,%edi,4), %esi;\
movzx b ## H, %edi;\
ror $15, b ## D;\
xor (%ebp,%edi,4), d ## D;\
movzx a ## H, %edi;\
xor s1(%ebp,%edi,4),%esi;\
pop %edi;\
add d ## D, %esi;\
add %esi, d ## D;\
add k+round(%ebp), %esi;\
xor %esi, c ## D;\
rol $15, c ## D;\
add k+4+round(%ebp),d ## D;\
xor %edi, d ## D;
/*
* a input register containing a (rotated 16)
* b input register containing b
* c input register containing c
* d input register containing d (already rol $1)
* operations on a and b are interleaved to increase performance
* last round has different rotations for the output preparation
*/
#define encrypt_last_round(a,b,c,d,round)\
push d ## D;\
movzx b ## B, %edi;\
mov s1(%ebp,%edi,4),d ## D;\
movzx a ## B, %edi;\
mov s2(%ebp,%edi,4),%esi;\
movzx b ## H, %edi;\
ror $16, b ## D;\
xor s2(%ebp,%edi,4),d ## D;\
movzx a ## H, %edi;\
ror $16, a ## D;\
xor s3(%ebp,%edi,4),%esi;\
movzx b ## B, %edi;\
xor s3(%ebp,%edi,4),d ## D;\
movzx a ## B, %edi;\
xor (%ebp,%edi,4), %esi;\
movzx b ## H, %edi;\
ror $16, b ## D;\
xor (%ebp,%edi,4), d ## D;\
movzx a ## H, %edi;\
xor s1(%ebp,%edi,4),%esi;\
pop %edi;\
add d ## D, %esi;\
add %esi, d ## D;\
add k+round(%ebp), %esi;\
xor %esi, c ## D;\
ror $1, c ## D;\
add k+4+round(%ebp),d ## D;\
xor %edi, d ## D;
/*
* a input register containing a
* b input register containing b (rotated 16)
* c input register containing c
* d input register containing d (already rol $1)
* operations on a and b are interleaved to increase performance
*/
#define decrypt_round(a,b,c,d,round)\
push c ## D;\
movzx a ## B, %edi;\
mov (%ebp,%edi,4), c ## D;\
movzx b ## B, %edi;\
mov s3(%ebp,%edi,4),%esi;\
movzx a ## H, %edi;\
ror $16, a ## D;\
xor s1(%ebp,%edi,4),c ## D;\
movzx b ## H, %edi;\
ror $16, b ## D;\
xor (%ebp,%edi,4), %esi;\
movzx a ## B, %edi;\
xor s2(%ebp,%edi,4),c ## D;\
movzx b ## B, %edi;\
xor s1(%ebp,%edi,4),%esi;\
movzx a ## H, %edi;\
ror $15, a ## D;\
xor s3(%ebp,%edi,4),c ## D;\
movzx b ## H, %edi;\
xor s2(%ebp,%edi,4),%esi;\
pop %edi;\
add %esi, c ## D;\
add c ## D, %esi;\
add k+round(%ebp), c ## D;\
xor %edi, c ## D;\
add k+4+round(%ebp),%esi;\
xor %esi, d ## D;\
rol $15, d ## D;
/*
* a input register containing a
* b input register containing b (rotated 16)
* c input register containing c
* d input register containing d (already rol $1)
* operations on a and b are interleaved to increase performance
* last round has different rotations for the output preparation
*/
#define decrypt_last_round(a,b,c,d,round)\
push c ## D;\
movzx a ## B, %edi;\
mov (%ebp,%edi,4), c ## D;\
movzx b ## B, %edi;\
mov s3(%ebp,%edi,4),%esi;\
movzx a ## H, %edi;\
ror $16, a ## D;\
xor s1(%ebp,%edi,4),c ## D;\
movzx b ## H, %edi;\
ror $16, b ## D;\
xor (%ebp,%edi,4), %esi;\
movzx a ## B, %edi;\
xor s2(%ebp,%edi,4),c ## D;\
movzx b ## B, %edi;\
xor s1(%ebp,%edi,4),%esi;\
movzx a ## H, %edi;\
ror $16, a ## D;\
xor s3(%ebp,%edi,4),c ## D;\
movzx b ## H, %edi;\
xor s2(%ebp,%edi,4),%esi;\
pop %edi;\
add %esi, c ## D;\
add c ## D, %esi;\
add k+round(%ebp), c ## D;\
xor %edi, c ## D;\
add k+4+round(%ebp),%esi;\
xor %esi, d ## D;\
ror $1, d ## D;
.align 4
.global twofish_enc_blk
.global twofish_dec_blk
twofish_enc_blk:
push %ebp /* save registers according to calling convention*/
push %ebx
push %esi
push %edi
mov ctx + 16(%esp), %ebp /* abuse the base pointer: set new base
* pointer to the ctx address */
mov in_blk+16(%esp),%edi /* input address in edi */
mov (%edi), %eax
mov b_offset(%edi), %ebx
mov c_offset(%edi), %ecx
mov d_offset(%edi), %edx
input_whitening(%eax,%ebp,a_offset)
ror $16, %eax
input_whitening(%ebx,%ebp,b_offset)
input_whitening(%ecx,%ebp,c_offset)
input_whitening(%edx,%ebp,d_offset)
rol $1, %edx
encrypt_round(R0,R1,R2,R3,0);
encrypt_round(R2,R3,R0,R1,8);
encrypt_round(R0,R1,R2,R3,2*8);
encrypt_round(R2,R3,R0,R1,3*8);
encrypt_round(R0,R1,R2,R3,4*8);
encrypt_round(R2,R3,R0,R1,5*8);
encrypt_round(R0,R1,R2,R3,6*8);
encrypt_round(R2,R3,R0,R1,7*8);
encrypt_round(R0,R1,R2,R3,8*8);
encrypt_round(R2,R3,R0,R1,9*8);
encrypt_round(R0,R1,R2,R3,10*8);
encrypt_round(R2,R3,R0,R1,11*8);
encrypt_round(R0,R1,R2,R3,12*8);
encrypt_round(R2,R3,R0,R1,13*8);
encrypt_round(R0,R1,R2,R3,14*8);
encrypt_last_round(R2,R3,R0,R1,15*8);
output_whitening(%eax,%ebp,c_offset)
output_whitening(%ebx,%ebp,d_offset)
output_whitening(%ecx,%ebp,a_offset)
output_whitening(%edx,%ebp,b_offset)
mov out_blk+16(%esp),%edi;
mov %eax, c_offset(%edi)
mov %ebx, d_offset(%edi)
mov %ecx, (%edi)
mov %edx, b_offset(%edi)
pop %edi
pop %esi
pop %ebx
pop %ebp
mov $1, %eax
ret
twofish_dec_blk:
push %ebp /* save registers according to calling convention*/
push %ebx
push %esi
push %edi
mov ctx + 16(%esp), %ebp /* abuse the base pointer: set new base
* pointer to the ctx address */
mov in_blk+16(%esp),%edi /* input address in edi */
mov (%edi), %eax
mov b_offset(%edi), %ebx
mov c_offset(%edi), %ecx
mov d_offset(%edi), %edx
output_whitening(%eax,%ebp,a_offset)
output_whitening(%ebx,%ebp,b_offset)
ror $16, %ebx
output_whitening(%ecx,%ebp,c_offset)
output_whitening(%edx,%ebp,d_offset)
rol $1, %ecx
decrypt_round(R0,R1,R2,R3,15*8);
decrypt_round(R2,R3,R0,R1,14*8);
decrypt_round(R0,R1,R2,R3,13*8);
decrypt_round(R2,R3,R0,R1,12*8);
decrypt_round(R0,R1,R2,R3,11*8);
decrypt_round(R2,R3,R0,R1,10*8);
decrypt_round(R0,R1,R2,R3,9*8);
decrypt_round(R2,R3,R0,R1,8*8);
decrypt_round(R0,R1,R2,R3,7*8);
decrypt_round(R2,R3,R0,R1,6*8);
decrypt_round(R0,R1,R2,R3,5*8);
decrypt_round(R2,R3,R0,R1,4*8);
decrypt_round(R0,R1,R2,R3,3*8);
decrypt_round(R2,R3,R0,R1,2*8);
decrypt_round(R0,R1,R2,R3,1*8);
decrypt_last_round(R2,R3,R0,R1,0);
input_whitening(%eax,%ebp,c_offset)
input_whitening(%ebx,%ebp,d_offset)
input_whitening(%ecx,%ebp,a_offset)
input_whitening(%edx,%ebp,b_offset)
mov out_blk+16(%esp),%edi;
mov %eax, c_offset(%edi)
mov %ebx, d_offset(%edi)
mov %ecx, (%edi)
mov %edx, b_offset(%edi)
pop %edi
pop %esi
pop %ebx
pop %ebp
mov $1, %eax
ret
@@ -0,0 +1,316 @@
/*
* Twofish Cipher 3-way parallel algorithm (x86_64)
*
* Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA
*
*/
.file "twofish-x86_64-asm-3way.S"
.text
/* structure of crypto context */
#define s0 0
#define s1 1024
#define s2 2048
#define s3 3072
#define w 4096
#define k 4128
/**********************************************************************
3-way twofish
**********************************************************************/
#define CTX %rdi
#define RIO %rdx
#define RAB0 %rax
#define RAB1 %rbx
#define RAB2 %rcx
#define RAB0d %eax
#define RAB1d %ebx
#define RAB2d %ecx
#define RAB0bh %ah
#define RAB1bh %bh
#define RAB2bh %ch
#define RAB0bl %al
#define RAB1bl %bl
#define RAB2bl %cl
#define RCD0 %r8
#define RCD1 %r9
#define RCD2 %r10
#define RCD0d %r8d
#define RCD1d %r9d
#define RCD2d %r10d
#define RX0 %rbp
#define RX1 %r11
#define RX2 %r12
#define RX0d %ebp
#define RX1d %r11d
#define RX2d %r12d
#define RY0 %r13
#define RY1 %r14
#define RY2 %r15
#define RY0d %r13d
#define RY1d %r14d
#define RY2d %r15d
#define RT0 %rdx
#define RT1 %rsi
#define RT0d %edx
#define RT1d %esi
#define do16bit_ror(rot, op1, op2, T0, T1, tmp1, tmp2, ab, dst) \
movzbl ab ## bl, tmp2 ## d; \
movzbl ab ## bh, tmp1 ## d; \
rorq $(rot), ab; \
op1##l T0(CTX, tmp2, 4), dst ## d; \
op2##l T1(CTX, tmp1, 4), dst ## d;
/*
* Combined G1 & G2 function. Reordered with help of rotates to have moves
* at begining.
*/
#define g1g2_3(ab, cd, Tx0, Tx1, Tx2, Tx3, Ty0, Ty1, Ty2, Ty3, x, y) \
/* G1,1 && G2,1 */ \
do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 0, ab ## 0, x ## 0); \
do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 0, ab ## 0, y ## 0); \
\
do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 1, ab ## 1, x ## 1); \
do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 1, ab ## 1, y ## 1); \
\
do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 2, ab ## 2, x ## 2); \
do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 2, ab ## 2, y ## 2); \
\
/* G1,2 && G2,2 */ \
do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 0, x ## 0); \
do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 0, y ## 0); \
xchgq cd ## 0, ab ## 0; \
\
do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 1, x ## 1); \
do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 1, y ## 1); \
xchgq cd ## 1, ab ## 1; \
\
do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 2, x ## 2); \
do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 2, y ## 2); \
xchgq cd ## 2, ab ## 2;
#define enc_round_end(ab, x, y, n) \
addl y ## d, x ## d; \
addl x ## d, y ## d; \
addl k+4*(2*(n))(CTX), x ## d; \
xorl ab ## d, x ## d; \
addl k+4*(2*(n)+1)(CTX), y ## d; \
shrq $32, ab; \
roll $1, ab ## d; \
xorl y ## d, ab ## d; \
shlq $32, ab; \
rorl $1, x ## d; \
orq x, ab;
#define dec_round_end(ba, x, y, n) \
addl y ## d, x ## d; \
addl x ## d, y ## d; \
addl k+4*(2*(n))(CTX), x ## d; \
addl k+4*(2*(n)+1)(CTX), y ## d; \
xorl ba ## d, y ## d; \
shrq $32, ba; \
roll $1, ba ## d; \
xorl x ## d, ba ## d; \
shlq $32, ba; \
rorl $1, y ## d; \
orq y, ba;
#define encrypt_round3(ab, cd, n) \
g1g2_3(ab, cd, s0, s1, s2, s3, s0, s1, s2, s3, RX, RY); \
\
enc_round_end(ab ## 0, RX0, RY0, n); \
enc_round_end(ab ## 1, RX1, RY1, n); \
enc_round_end(ab ## 2, RX2, RY2, n);
#define decrypt_round3(ba, dc, n) \
g1g2_3(ba, dc, s1, s2, s3, s0, s3, s0, s1, s2, RY, RX); \
\
dec_round_end(ba ## 0, RX0, RY0, n); \
dec_round_end(ba ## 1, RX1, RY1, n); \
dec_round_end(ba ## 2, RX2, RY2, n);
#define encrypt_cycle3(ab, cd, n) \
encrypt_round3(ab, cd, n*2); \
encrypt_round3(ab, cd, (n*2)+1);
#define decrypt_cycle3(ba, dc, n) \
decrypt_round3(ba, dc, (n*2)+1); \
decrypt_round3(ba, dc, (n*2));
#define inpack3(in, n, xy, m) \
movq 4*(n)(in), xy ## 0; \
xorq w+4*m(CTX), xy ## 0; \
\
movq 4*(4+(n))(in), xy ## 1; \
xorq w+4*m(CTX), xy ## 1; \
\
movq 4*(8+(n))(in), xy ## 2; \
xorq w+4*m(CTX), xy ## 2;
#define outunpack3(op, out, n, xy, m) \
xorq w+4*m(CTX), xy ## 0; \
op ## q xy ## 0, 4*(n)(out); \
\
xorq w+4*m(CTX), xy ## 1; \
op ## q xy ## 1, 4*(4+(n))(out); \
\
xorq w+4*m(CTX), xy ## 2; \
op ## q xy ## 2, 4*(8+(n))(out);
#define inpack_enc3() \
inpack3(RIO, 0, RAB, 0); \
inpack3(RIO, 2, RCD, 2);
#define outunpack_enc3(op) \
outunpack3(op, RIO, 2, RAB, 6); \
outunpack3(op, RIO, 0, RCD, 4);
#define inpack_dec3() \
inpack3(RIO, 0, RAB, 4); \
rorq $32, RAB0; \
rorq $32, RAB1; \
rorq $32, RAB2; \
inpack3(RIO, 2, RCD, 6); \
rorq $32, RCD0; \
rorq $32, RCD1; \
rorq $32, RCD2;
#define outunpack_dec3() \
rorq $32, RCD0; \
rorq $32, RCD1; \
rorq $32, RCD2; \
outunpack3(mov, RIO, 0, RCD, 0); \
rorq $32, RAB0; \
rorq $32, RAB1; \
rorq $32, RAB2; \
outunpack3(mov, RIO, 2, RAB, 2);
.align 8
.global __twofish_enc_blk_3way
.type __twofish_enc_blk_3way,@function;
__twofish_enc_blk_3way:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src, RIO
* %rcx: bool, if true: xor output
*/
pushq %r15;
pushq %r14;
pushq %r13;
pushq %r12;
pushq %rbp;
pushq %rbx;
pushq %rcx; /* bool xor */
pushq %rsi; /* dst */
inpack_enc3();
encrypt_cycle3(RAB, RCD, 0);
encrypt_cycle3(RAB, RCD, 1);
encrypt_cycle3(RAB, RCD, 2);
encrypt_cycle3(RAB, RCD, 3);
encrypt_cycle3(RAB, RCD, 4);
encrypt_cycle3(RAB, RCD, 5);
encrypt_cycle3(RAB, RCD, 6);
encrypt_cycle3(RAB, RCD, 7);
popq RIO; /* dst */
popq %rbp; /* bool xor */
testb %bpl, %bpl;
jnz __enc_xor3;
outunpack_enc3(mov);
popq %rbx;
popq %rbp;
popq %r12;
popq %r13;
popq %r14;
popq %r15;
ret;
__enc_xor3:
outunpack_enc3(xor);
popq %rbx;
popq %rbp;
popq %r12;
popq %r13;
popq %r14;
popq %r15;
ret;
.global twofish_dec_blk_3way
.type twofish_dec_blk_3way,@function;
twofish_dec_blk_3way:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src, RIO
*/
pushq %r15;
pushq %r14;
pushq %r13;
pushq %r12;
pushq %rbp;
pushq %rbx;
pushq %rsi; /* dst */
inpack_dec3();
decrypt_cycle3(RAB, RCD, 7);
decrypt_cycle3(RAB, RCD, 6);
decrypt_cycle3(RAB, RCD, 5);
decrypt_cycle3(RAB, RCD, 4);
decrypt_cycle3(RAB, RCD, 3);
decrypt_cycle3(RAB, RCD, 2);
decrypt_cycle3(RAB, RCD, 1);
decrypt_cycle3(RAB, RCD, 0);
popq RIO; /* dst */
outunpack_dec3();
popq %rbx;
popq %rbp;
popq %r12;
popq %r13;
popq %r14;
popq %r15;
ret;
@@ -0,0 +1,322 @@
/***************************************************************************
* Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de> *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the *
* Free Software Foundation, Inc., *
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
***************************************************************************/
.file "twofish-x86_64-asm.S"
.text
#include <asm/asm-offsets.h>
#define a_offset 0
#define b_offset 4
#define c_offset 8
#define d_offset 12
/* Structure of the crypto context struct*/
#define s0 0 /* S0 Array 256 Words each */
#define s1 1024 /* S1 Array */
#define s2 2048 /* S2 Array */
#define s3 3072 /* S3 Array */
#define w 4096 /* 8 whitening keys (word) */
#define k 4128 /* key 1-32 ( word ) */
/* define a few register aliases to allow macro substitution */
#define R0 %rax
#define R0D %eax
#define R0B %al
#define R0H %ah
#define R1 %rbx
#define R1D %ebx
#define R1B %bl
#define R1H %bh
#define R2 %rcx
#define R2D %ecx
#define R2B %cl
#define R2H %ch
#define R3 %rdx
#define R3D %edx
#define R3B %dl
#define R3H %dh
/* performs input whitening */
#define input_whitening(src,context,offset)\
xor w+offset(context), src;
/* performs input whitening */
#define output_whitening(src,context,offset)\
xor w+16+offset(context), src;
/*
* a input register containing a (rotated 16)
* b input register containing b
* c input register containing c
* d input register containing d (already rol $1)
* operations on a and b are interleaved to increase performance
*/
#define encrypt_round(a,b,c,d,round)\
movzx b ## B, %edi;\
mov s1(%r11,%rdi,4),%r8d;\
movzx a ## B, %edi;\
mov s2(%r11,%rdi,4),%r9d;\
movzx b ## H, %edi;\
ror $16, b ## D;\
xor s2(%r11,%rdi,4),%r8d;\
movzx a ## H, %edi;\
ror $16, a ## D;\
xor s3(%r11,%rdi,4),%r9d;\
movzx b ## B, %edi;\
xor s3(%r11,%rdi,4),%r8d;\
movzx a ## B, %edi;\
xor (%r11,%rdi,4), %r9d;\
movzx b ## H, %edi;\
ror $15, b ## D;\
xor (%r11,%rdi,4), %r8d;\
movzx a ## H, %edi;\
xor s1(%r11,%rdi,4),%r9d;\
add %r8d, %r9d;\
add %r9d, %r8d;\
add k+round(%r11), %r9d;\
xor %r9d, c ## D;\
rol $15, c ## D;\
add k+4+round(%r11),%r8d;\
xor %r8d, d ## D;
/*
* a input register containing a(rotated 16)
* b input register containing b
* c input register containing c
* d input register containing d (already rol $1)
* operations on a and b are interleaved to increase performance
* during the round a and b are prepared for the output whitening
*/
#define encrypt_last_round(a,b,c,d,round)\
mov b ## D, %r10d;\
shl $32, %r10;\
movzx b ## B, %edi;\
mov s1(%r11,%rdi,4),%r8d;\
movzx a ## B, %edi;\
mov s2(%r11,%rdi,4),%r9d;\
movzx b ## H, %edi;\
ror $16, b ## D;\
xor s2(%r11,%rdi,4),%r8d;\
movzx a ## H, %edi;\
ror $16, a ## D;\
xor s3(%r11,%rdi,4),%r9d;\
movzx b ## B, %edi;\
xor s3(%r11,%rdi,4),%r8d;\
movzx a ## B, %edi;\
xor (%r11,%rdi,4), %r9d;\
xor a, %r10;\
movzx b ## H, %edi;\
xor (%r11,%rdi,4), %r8d;\
movzx a ## H, %edi;\
xor s1(%r11,%rdi,4),%r9d;\
add %r8d, %r9d;\
add %r9d, %r8d;\
add k+round(%r11), %r9d;\
xor %r9d, c ## D;\
ror $1, c ## D;\
add k+4+round(%r11),%r8d;\
xor %r8d, d ## D
/*
* a input register containing a
* b input register containing b (rotated 16)
* c input register containing c (already rol $1)
* d input register containing d
* operations on a and b are interleaved to increase performance
*/
#define decrypt_round(a,b,c,d,round)\
movzx a ## B, %edi;\
mov (%r11,%rdi,4), %r9d;\
movzx b ## B, %edi;\
mov s3(%r11,%rdi,4),%r8d;\
movzx a ## H, %edi;\
ror $16, a ## D;\
xor s1(%r11,%rdi,4),%r9d;\
movzx b ## H, %edi;\
ror $16, b ## D;\
xor (%r11,%rdi,4), %r8d;\
movzx a ## B, %edi;\
xor s2(%r11,%rdi,4),%r9d;\
movzx b ## B, %edi;\
xor s1(%r11,%rdi,4),%r8d;\
movzx a ## H, %edi;\
ror $15, a ## D;\
xor s3(%r11,%rdi,4),%r9d;\
movzx b ## H, %edi;\
xor s2(%r11,%rdi,4),%r8d;\
add %r8d, %r9d;\
add %r9d, %r8d;\
add k+round(%r11), %r9d;\
xor %r9d, c ## D;\
add k+4+round(%r11),%r8d;\
xor %r8d, d ## D;\
rol $15, d ## D;
/*
* a input register containing a
* b input register containing b
* c input register containing c (already rol $1)
* d input register containing d
* operations on a and b are interleaved to increase performance
* during the round a and b are prepared for the output whitening
*/
#define decrypt_last_round(a,b,c,d,round)\
movzx a ## B, %edi;\
mov (%r11,%rdi,4), %r9d;\
movzx b ## B, %edi;\
mov s3(%r11,%rdi,4),%r8d;\
movzx b ## H, %edi;\
ror $16, b ## D;\
xor (%r11,%rdi,4), %r8d;\
movzx a ## H, %edi;\
mov b ## D, %r10d;\
shl $32, %r10;\
xor a, %r10;\
ror $16, a ## D;\
xor s1(%r11,%rdi,4),%r9d;\
movzx b ## B, %edi;\
xor s1(%r11,%rdi,4),%r8d;\
movzx a ## B, %edi;\
xor s2(%r11,%rdi,4),%r9d;\
movzx b ## H, %edi;\
xor s2(%r11,%rdi,4),%r8d;\
movzx a ## H, %edi;\
xor s3(%r11,%rdi,4),%r9d;\
add %r8d, %r9d;\
add %r9d, %r8d;\
add k+round(%r11), %r9d;\
xor %r9d, c ## D;\
add k+4+round(%r11),%r8d;\
xor %r8d, d ## D;\
ror $1, d ## D;
.align 8
.global twofish_enc_blk
.global twofish_dec_blk
twofish_enc_blk:
pushq R1
/* %rdi contains the ctx address */
/* %rsi contains the output address */
/* %rdx contains the input address */
/* ctx address is moved to free one non-rex register
as target for the 8bit high operations */
mov %rdi, %r11
movq (R3), R1
movq 8(R3), R3
input_whitening(R1,%r11,a_offset)
input_whitening(R3,%r11,c_offset)
mov R1D, R0D
rol $16, R0D
shr $32, R1
mov R3D, R2D
shr $32, R3
rol $1, R3D
encrypt_round(R0,R1,R2,R3,0);
encrypt_round(R2,R3,R0,R1,8);
encrypt_round(R0,R1,R2,R3,2*8);
encrypt_round(R2,R3,R0,R1,3*8);
encrypt_round(R0,R1,R2,R3,4*8);
encrypt_round(R2,R3,R0,R1,5*8);
encrypt_round(R0,R1,R2,R3,6*8);
encrypt_round(R2,R3,R0,R1,7*8);
encrypt_round(R0,R1,R2,R3,8*8);
encrypt_round(R2,R3,R0,R1,9*8);
encrypt_round(R0,R1,R2,R3,10*8);
encrypt_round(R2,R3,R0,R1,11*8);
encrypt_round(R0,R1,R2,R3,12*8);
encrypt_round(R2,R3,R0,R1,13*8);
encrypt_round(R0,R1,R2,R3,14*8);
encrypt_last_round(R2,R3,R0,R1,15*8);
output_whitening(%r10,%r11,a_offset)
movq %r10, (%rsi)
shl $32, R1
xor R0, R1
output_whitening(R1,%r11,c_offset)
movq R1, 8(%rsi)
popq R1
movq $1,%rax
ret
twofish_dec_blk:
pushq R1
/* %rdi contains the ctx address */
/* %rsi contains the output address */
/* %rdx contains the input address */
/* ctx address is moved to free one non-rex register
as target for the 8bit high operations */
mov %rdi, %r11
movq (R3), R1
movq 8(R3), R3
output_whitening(R1,%r11,a_offset)
output_whitening(R3,%r11,c_offset)
mov R1D, R0D
shr $32, R1
rol $16, R1D
mov R3D, R2D
shr $32, R3
rol $1, R2D
decrypt_round(R0,R1,R2,R3,15*8);
decrypt_round(R2,R3,R0,R1,14*8);
decrypt_round(R0,R1,R2,R3,13*8);
decrypt_round(R2,R3,R0,R1,12*8);
decrypt_round(R0,R1,R2,R3,11*8);
decrypt_round(R2,R3,R0,R1,10*8);
decrypt_round(R0,R1,R2,R3,9*8);
decrypt_round(R2,R3,R0,R1,8*8);
decrypt_round(R0,R1,R2,R3,7*8);
decrypt_round(R2,R3,R0,R1,6*8);
decrypt_round(R0,R1,R2,R3,5*8);
decrypt_round(R2,R3,R0,R1,4*8);
decrypt_round(R0,R1,R2,R3,3*8);
decrypt_round(R2,R3,R0,R1,2*8);
decrypt_round(R0,R1,R2,R3,1*8);
decrypt_last_round(R2,R3,R0,R1,0);
input_whitening(%r10,%r11,a_offset)
movq %r10, (%rsi)
shl $32, R1
xor R0, R1
input_whitening(R1,%r11,c_offset)
movq R1, 8(%rsi)
popq R1
movq $1,%rax
ret
+101
View File
@@ -0,0 +1,101 @@
/*
* Glue Code for assembler optimized version of TWOFISH
*
* Originally Twofish for GPG
* By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998
* 256-bit key length added March 20, 1999
* Some modifications to reduce the text size by Werner Koch, April, 1998
* Ported to the kerneli patch by Marc Mutz <Marc@Mutz.com>
* Ported to CryptoAPI by Colin Slater <hoho@tacomeat.net>
*
* The original author has disclaimed all copyright interest in this
* code and thus put it in the public domain. The subsequent authors
* have put this under the GNU General Public License.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA
*
* This code is a "clean room" implementation, written from the paper
* _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,
* Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available
* through http://www.counterpane.com/twofish.html
*
* For background information on multiplication in finite fields, used for
* the matrix operations in the key schedule, see the book _Contemporary
* Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the
* Third Edition.
*/
#include <crypto/twofish.h>
#include <linux/crypto.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/types.h>
asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
EXPORT_SYMBOL_GPL(twofish_enc_blk);
asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
EXPORT_SYMBOL_GPL(twofish_dec_blk);
static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
twofish_enc_blk(crypto_tfm_ctx(tfm), dst, src);
}
static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
twofish_dec_blk(crypto_tfm_ctx(tfm), dst, src);
}
static struct crypto_alg alg = {
.cra_name = "twofish",
.cra_driver_name = "twofish-asm",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = TF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct twofish_ctx),
.cra_alignmask = 0,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(alg.cra_list),
.cra_u = {
.cipher = {
.cia_min_keysize = TF_MIN_KEY_SIZE,
.cia_max_keysize = TF_MAX_KEY_SIZE,
.cia_setkey = twofish_setkey,
.cia_encrypt = twofish_encrypt,
.cia_decrypt = twofish_decrypt
}
}
};
static int __init init(void)
{
return crypto_register_alg(&alg);
}
static void __exit fini(void)
{
crypto_unregister_alg(&alg);
}
module_init(init);
module_exit(fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION ("Twofish Cipher Algorithm, asm optimized");
MODULE_ALIAS("twofish");
MODULE_ALIAS("twofish-asm");
+695
View File
@@ -0,0 +1,695 @@
/*
* Glue Code for 3-way parallel assembler optimized version of Twofish
*
* Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
* CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
* Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
* CTR part based on code (crypto/ctr.c) by:
* (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA
*
*/
#include <asm/processor.h>
#include <linux/crypto.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/types.h>
#include <crypto/algapi.h>
#include <crypto/twofish.h>
#include <crypto/b128ops.h>
#include <crypto/lrw.h>
#include <crypto/xts.h>
/* regular block cipher functions from twofish_x86_64 module */
asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
/* 3-way parallel cipher functions */
asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src, bool xor);
asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src)
{
__twofish_enc_blk_3way(ctx, dst, src, false);
}
static inline void twofish_enc_blk_xor_3way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src)
{
__twofish_enc_blk_3way(ctx, dst, src, true);
}
static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
void (*fn)(struct twofish_ctx *, u8 *, const u8 *),
void (*fn_3way)(struct twofish_ctx *, u8 *, const u8 *))
{
struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
unsigned int bsize = TF_BLOCK_SIZE;
unsigned int nbytes;
int err;
err = blkcipher_walk_virt(desc, walk);
while ((nbytes = walk->nbytes)) {
u8 *wsrc = walk->src.virt.addr;
u8 *wdst = walk->dst.virt.addr;
/* Process three block batch */
if (nbytes >= bsize * 3) {
do {
fn_3way(ctx, wdst, wsrc);
wsrc += bsize * 3;
wdst += bsize * 3;
nbytes -= bsize * 3;
} while (nbytes >= bsize * 3);
if (nbytes < bsize)
goto done;
}
/* Handle leftovers */
do {
fn(ctx, wdst, wsrc);
wsrc += bsize;
wdst += bsize;
nbytes -= bsize;
} while (nbytes >= bsize);
done:
err = blkcipher_walk_done(desc, walk, nbytes);
}
return err;
}
static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
return ecb_crypt(desc, &walk, twofish_enc_blk, twofish_enc_blk_3way);
}
static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
return ecb_crypt(desc, &walk, twofish_dec_blk, twofish_dec_blk_3way);
}
static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
unsigned int bsize = TF_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u128 *src = (u128 *)walk->src.virt.addr;
u128 *dst = (u128 *)walk->dst.virt.addr;
u128 *iv = (u128 *)walk->iv;
do {
u128_xor(dst, src, iv);
twofish_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
iv = dst;
src += 1;
dst += 1;
nbytes -= bsize;
} while (nbytes >= bsize);
u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv);
return nbytes;
}
static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
while ((nbytes = walk.nbytes)) {
nbytes = __cbc_encrypt(desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
return err;
}
static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
unsigned int bsize = TF_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u128 *src = (u128 *)walk->src.virt.addr;
u128 *dst = (u128 *)walk->dst.virt.addr;
u128 ivs[3 - 1];
u128 last_iv;
/* Start of the last block. */
src += nbytes / bsize - 1;
dst += nbytes / bsize - 1;
last_iv = *src;
/* Process three block batch */
if (nbytes >= bsize * 3) {
do {
nbytes -= bsize * (3 - 1);
src -= 3 - 1;
dst -= 3 - 1;
ivs[0] = src[0];
ivs[1] = src[1];
twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src);
u128_xor(dst + 1, dst + 1, ivs + 0);
u128_xor(dst + 2, dst + 2, ivs + 1);
nbytes -= bsize;
if (nbytes < bsize)
goto done;
u128_xor(dst, dst, src - 1);
src -= 1;
dst -= 1;
} while (nbytes >= bsize * 3);
if (nbytes < bsize)
goto done;
}
/* Handle leftovers */
for (;;) {
twofish_dec_blk(ctx, (u8 *)dst, (u8 *)src);
nbytes -= bsize;
if (nbytes < bsize)
break;
u128_xor(dst, dst, src - 1);
src -= 1;
dst -= 1;
}
done:
u128_xor(dst, dst, (u128 *)walk->iv);
*(u128 *)walk->iv = last_iv;
return nbytes;
}
static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
while ((nbytes = walk.nbytes)) {
nbytes = __cbc_decrypt(desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
return err;
}
static inline void u128_to_be128(be128 *dst, const u128 *src)
{
dst->a = cpu_to_be64(src->a);
dst->b = cpu_to_be64(src->b);
}
static inline void be128_to_u128(u128 *dst, const be128 *src)
{
dst->a = be64_to_cpu(src->a);
dst->b = be64_to_cpu(src->b);
}
static inline void u128_inc(u128 *i)
{
i->b++;
if (!i->b)
i->a++;
}
static void ctr_crypt_final(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
u8 *ctrblk = walk->iv;
u8 keystream[TF_BLOCK_SIZE];
u8 *src = walk->src.virt.addr;
u8 *dst = walk->dst.virt.addr;
unsigned int nbytes = walk->nbytes;
twofish_enc_blk(ctx, keystream, ctrblk);
crypto_xor(keystream, src, nbytes);
memcpy(dst, keystream, nbytes);
crypto_inc(ctrblk, TF_BLOCK_SIZE);
}
static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
unsigned int bsize = TF_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u128 *src = (u128 *)walk->src.virt.addr;
u128 *dst = (u128 *)walk->dst.virt.addr;
u128 ctrblk;
be128 ctrblocks[3];
be128_to_u128(&ctrblk, (be128 *)walk->iv);
/* Process three block batch */
if (nbytes >= bsize * 3) {
do {
if (dst != src) {
dst[0] = src[0];
dst[1] = src[1];
dst[2] = src[2];
}
/* create ctrblks for parallel encrypt */
u128_to_be128(&ctrblocks[0], &ctrblk);
u128_inc(&ctrblk);
u128_to_be128(&ctrblocks[1], &ctrblk);
u128_inc(&ctrblk);
u128_to_be128(&ctrblocks[2], &ctrblk);
u128_inc(&ctrblk);
twofish_enc_blk_xor_3way(ctx, (u8 *)dst,
(u8 *)ctrblocks);
src += 3;
dst += 3;
nbytes -= bsize * 3;
} while (nbytes >= bsize * 3);
if (nbytes < bsize)
goto done;
}
/* Handle leftovers */
do {
if (dst != src)
*dst = *src;
u128_to_be128(&ctrblocks[0], &ctrblk);
u128_inc(&ctrblk);
twofish_enc_blk(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks);
u128_xor(dst, dst, (u128 *)ctrblocks);
src += 1;
dst += 1;
nbytes -= bsize;
} while (nbytes >= bsize);
done:
u128_to_be128((be128 *)walk->iv, &ctrblk);
return nbytes;
}
static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt_block(desc, &walk, TF_BLOCK_SIZE);
while ((nbytes = walk.nbytes) >= TF_BLOCK_SIZE) {
nbytes = __ctr_crypt(desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
if (walk.nbytes) {
ctr_crypt_final(desc, &walk);
err = blkcipher_walk_done(desc, &walk, 0);
}
return err;
}
static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
{
const unsigned int bsize = TF_BLOCK_SIZE;
struct twofish_ctx *ctx = priv;
int i;
if (nbytes == 3 * bsize) {
twofish_enc_blk_3way(ctx, srcdst, srcdst);
return;
}
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
twofish_enc_blk(ctx, srcdst, srcdst);
}
static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
{
const unsigned int bsize = TF_BLOCK_SIZE;
struct twofish_ctx *ctx = priv;
int i;
if (nbytes == 3 * bsize) {
twofish_dec_blk_3way(ctx, srcdst, srcdst);
return;
}
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
twofish_dec_blk(ctx, srcdst, srcdst);
}
struct twofish_lrw_ctx {
struct lrw_table_ctx lrw_table;
struct twofish_ctx twofish_ctx;
};
static int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen)
{
struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
int err;
err = __twofish_setkey(&ctx->twofish_ctx, key, keylen - TF_BLOCK_SIZE,
&tfm->crt_flags);
if (err)
return err;
return lrw_init_table(&ctx->lrw_table, key + keylen - TF_BLOCK_SIZE);
}
static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[3];
struct lrw_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.table_ctx = &ctx->lrw_table,
.crypt_ctx = &ctx->twofish_ctx,
.crypt_fn = encrypt_callback,
};
return lrw_crypt(desc, dst, src, nbytes, &req);
}
static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[3];
struct lrw_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.table_ctx = &ctx->lrw_table,
.crypt_ctx = &ctx->twofish_ctx,
.crypt_fn = decrypt_callback,
};
return lrw_crypt(desc, dst, src, nbytes, &req);
}
static void lrw_exit_tfm(struct crypto_tfm *tfm)
{
struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
lrw_free_table(&ctx->lrw_table);
}
struct twofish_xts_ctx {
struct twofish_ctx tweak_ctx;
struct twofish_ctx crypt_ctx;
};
static int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen)
{
struct twofish_xts_ctx *ctx = crypto_tfm_ctx(tfm);
u32 *flags = &tfm->crt_flags;
int err;
/* key consists of keys of equal size concatenated, therefore
* the length must be even
*/
if (keylen % 2) {
*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
return -EINVAL;
}
/* first half of xts-key is for crypt */
err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
if (err)
return err;
/* second half of xts-key is for tweak */
return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
flags);
}
static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[3];
struct xts_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.tweak_ctx = &ctx->tweak_ctx,
.tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk),
.crypt_ctx = &ctx->crypt_ctx,
.crypt_fn = encrypt_callback,
};
return xts_crypt(desc, dst, src, nbytes, &req);
}
static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[3];
struct xts_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.tweak_ctx = &ctx->tweak_ctx,
.tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk),
.crypt_ctx = &ctx->crypt_ctx,
.crypt_fn = decrypt_callback,
};
return xts_crypt(desc, dst, src, nbytes, &req);
}
static struct crypto_alg tf_algs[5] = { {
.cra_name = "ecb(twofish)",
.cra_driver_name = "ecb-twofish-3way",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = TF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct twofish_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(tf_algs[0].cra_list),
.cra_u = {
.blkcipher = {
.min_keysize = TF_MIN_KEY_SIZE,
.max_keysize = TF_MAX_KEY_SIZE,
.setkey = twofish_setkey,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
},
},
}, {
.cra_name = "cbc(twofish)",
.cra_driver_name = "cbc-twofish-3way",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = TF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct twofish_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(tf_algs[1].cra_list),
.cra_u = {
.blkcipher = {
.min_keysize = TF_MIN_KEY_SIZE,
.max_keysize = TF_MAX_KEY_SIZE,
.ivsize = TF_BLOCK_SIZE,
.setkey = twofish_setkey,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
},
},
}, {
.cra_name = "ctr(twofish)",
.cra_driver_name = "ctr-twofish-3way",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct twofish_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(tf_algs[2].cra_list),
.cra_u = {
.blkcipher = {
.min_keysize = TF_MIN_KEY_SIZE,
.max_keysize = TF_MAX_KEY_SIZE,
.ivsize = TF_BLOCK_SIZE,
.setkey = twofish_setkey,
.encrypt = ctr_crypt,
.decrypt = ctr_crypt,
},
},
}, {
.cra_name = "lrw(twofish)",
.cra_driver_name = "lrw-twofish-3way",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = TF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct twofish_lrw_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(tf_algs[3].cra_list),
.cra_exit = lrw_exit_tfm,
.cra_u = {
.blkcipher = {
.min_keysize = TF_MIN_KEY_SIZE + TF_BLOCK_SIZE,
.max_keysize = TF_MAX_KEY_SIZE + TF_BLOCK_SIZE,
.ivsize = TF_BLOCK_SIZE,
.setkey = lrw_twofish_setkey,
.encrypt = lrw_encrypt,
.decrypt = lrw_decrypt,
},
},
}, {
.cra_name = "xts(twofish)",
.cra_driver_name = "xts-twofish-3way",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = TF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct twofish_xts_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(tf_algs[4].cra_list),
.cra_u = {
.blkcipher = {
.min_keysize = TF_MIN_KEY_SIZE * 2,
.max_keysize = TF_MAX_KEY_SIZE * 2,
.ivsize = TF_BLOCK_SIZE,
.setkey = xts_twofish_setkey,
.encrypt = xts_encrypt,
.decrypt = xts_decrypt,
},
},
} };
static bool is_blacklisted_cpu(void)
{
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return false;
if (boot_cpu_data.x86 == 0x06 &&
(boot_cpu_data.x86_model == 0x1c ||
boot_cpu_data.x86_model == 0x26 ||
boot_cpu_data.x86_model == 0x36)) {
/*
* On Atom, twofish-3way is slower than original assembler
* implementation. Twofish-3way trades off some performance in
* storing blocks in 64bit registers to allow three blocks to
* be processed parallel. Parallel operation then allows gaining
* more performance than was trade off, on out-of-order CPUs.
* However Atom does not benefit from this parallellism and
* should be blacklisted.
*/
return true;
}
if (boot_cpu_data.x86 == 0x0f) {
/*
* On Pentium 4, twofish-3way is slower than original assembler
* implementation because excessive uses of 64bit rotate and
* left-shifts (which are really slow on P4) needed to store and
* handle 128bit block in two 64bit registers.
*/
return true;
}
return false;
}
static int force;
module_param(force, int, 0);
MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
static int __init init(void)
{
if (!force && is_blacklisted_cpu()) {
printk(KERN_INFO
"twofish-x86_64-3way: performance on this CPU "
"would be suboptimal: disabling "
"twofish-x86_64-3way.\n");
return -ENODEV;
}
return crypto_register_algs(tf_algs, ARRAY_SIZE(tf_algs));
}
static void __exit fini(void)
{
crypto_unregister_algs(tf_algs, ARRAY_SIZE(tf_algs));
}
module_init(init);
module_exit(fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Twofish Cipher Algorithm, 3-way parallel asm optimized");
MODULE_ALIAS("twofish");
MODULE_ALIAS("twofish-asm");
+14
View File
@@ -0,0 +1,14 @@
#
# Makefile for the ia32 kernel emulation subsystem.
#
obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o
obj-$(CONFIG_IA32_EMULATION) += nosyscall.o syscall_ia32.o
sysv-$(CONFIG_SYSVIPC) := ipc32.o
obj-$(CONFIG_IA32_EMULATION) += $(sysv-y)
obj-$(CONFIG_IA32_AOUT) += ia32_aout.o
audit-class-$(CONFIG_AUDIT) := audit.o
obj-$(CONFIG_IA32_EMULATION) += $(audit-class-y)
+42
View File
@@ -0,0 +1,42 @@
#include <asm/unistd_32.h>
unsigned ia32_dir_class[] = {
#include <asm-generic/audit_dir_write.h>
~0U
};
unsigned ia32_chattr_class[] = {
#include <asm-generic/audit_change_attr.h>
~0U
};
unsigned ia32_write_class[] = {
#include <asm-generic/audit_write.h>
~0U
};
unsigned ia32_read_class[] = {
#include <asm-generic/audit_read.h>
~0U
};
unsigned ia32_signal_class[] = {
#include <asm-generic/audit_signal.h>
~0U
};
int ia32_classify_syscall(unsigned syscall)
{
switch (syscall) {
case __NR_open:
return 2;
case __NR_openat:
return 3;
case __NR_socketcall:
return 4;
case __NR_execve:
return 5;
default:
return 1;
}
}
+514
View File
@@ -0,0 +1,514 @@
/*
* a.out loader for x86-64
*
* Copyright (C) 1991, 1992, 1996 Linus Torvalds
* Hacked together by Andi Kleen
*/
#include <linux/module.h>
#include <linux/time.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/a.out.h>
#include <linux/errno.h>
#include <linux/signal.h>
#include <linux/string.h>
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/stat.h>
#include <linux/fcntl.h>
#include <linux/ptrace.h>
#include <linux/user.h>
#include <linux/binfmts.h>
#include <linux/personality.h>
#include <linux/init.h>
#include <linux/jiffies.h>
#include <asm/uaccess.h>
#include <asm/pgalloc.h>
#include <asm/cacheflush.h>
#include <asm/user32.h>
#include <asm/ia32.h>
#undef WARN_OLD
#undef CORE_DUMP /* definitely broken */
static int load_aout_binary(struct linux_binprm *, struct pt_regs *regs);
static int load_aout_library(struct file *);
#ifdef CORE_DUMP
static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file,
unsigned long limit);
/*
* fill in the user structure for a core dump..
*/
static void dump_thread32(struct pt_regs *regs, struct user32 *dump)
{
u32 fs, gs;
/* changed the size calculations - should hopefully work better. lbt */
dump->magic = CMAGIC;
dump->start_code = 0;
dump->start_stack = regs->sp & ~(PAGE_SIZE - 1);
dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT;
dump->u_dsize = ((unsigned long)
(current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT;
dump->u_dsize -= dump->u_tsize;
dump->u_ssize = 0;
dump->u_debugreg[0] = current->thread.debugreg0;
dump->u_debugreg[1] = current->thread.debugreg1;
dump->u_debugreg[2] = current->thread.debugreg2;
dump->u_debugreg[3] = current->thread.debugreg3;
dump->u_debugreg[4] = 0;
dump->u_debugreg[5] = 0;
dump->u_debugreg[6] = current->thread.debugreg6;
dump->u_debugreg[7] = current->thread.debugreg7;
if (dump->start_stack < 0xc0000000) {
unsigned long tmp;
tmp = (unsigned long) (0xc0000000 - dump->start_stack);
dump->u_ssize = tmp >> PAGE_SHIFT;
}
dump->regs.bx = regs->bx;
dump->regs.cx = regs->cx;
dump->regs.dx = regs->dx;
dump->regs.si = regs->si;
dump->regs.di = regs->di;
dump->regs.bp = regs->bp;
dump->regs.ax = regs->ax;
dump->regs.ds = current->thread.ds;
dump->regs.es = current->thread.es;
savesegment(fs, fs);
dump->regs.fs = fs;
savesegment(gs, gs);
dump->regs.gs = gs;
dump->regs.orig_ax = regs->orig_ax;
dump->regs.ip = regs->ip;
dump->regs.cs = regs->cs;
dump->regs.flags = regs->flags;
dump->regs.sp = regs->sp;
dump->regs.ss = regs->ss;
#if 1 /* FIXME */
dump->u_fpvalid = 0;
#else
dump->u_fpvalid = dump_fpu(regs, &dump->i387);
#endif
}
#endif
static struct linux_binfmt aout_format = {
.module = THIS_MODULE,
.load_binary = load_aout_binary,
.load_shlib = load_aout_library,
#ifdef CORE_DUMP
.core_dump = aout_core_dump,
#endif
.min_coredump = PAGE_SIZE
};
static void set_brk(unsigned long start, unsigned long end)
{
start = PAGE_ALIGN(start);
end = PAGE_ALIGN(end);
if (end <= start)
return;
vm_brk(start, end - start);
}
#ifdef CORE_DUMP
/*
* These are the only things you should do on a core-file: use only these
* macros to write out all the necessary info.
*/
#include <linux/coredump.h>
#define DUMP_WRITE(addr, nr) \
if (!dump_write(file, (void *)(addr), (nr))) \
goto end_coredump;
#define DUMP_SEEK(offset) \
if (!dump_seek(file, offset)) \
goto end_coredump;
#define START_DATA() (u.u_tsize << PAGE_SHIFT)
#define START_STACK(u) (u.start_stack)
/*
* Routine writes a core dump image in the current directory.
* Currently only a stub-function.
*
* Note that setuid/setgid files won't make a core-dump if the uid/gid
* changed due to the set[u|g]id. It's enforced by the "current->mm->dumpable"
* field, which also makes sure the core-dumps won't be recursive if the
* dumping of the process results in another error..
*/
static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file,
unsigned long limit)
{
mm_segment_t fs;
int has_dumped = 0;
unsigned long dump_start, dump_size;
struct user32 dump;
fs = get_fs();
set_fs(KERNEL_DS);
has_dumped = 1;
current->flags |= PF_DUMPCORE;
strncpy(dump.u_comm, current->comm, sizeof(current->comm));
dump.u_ar0 = offsetof(struct user32, regs);
dump.signal = signr;
dump_thread32(regs, &dump);
/*
* If the size of the dump file exceeds the rlimit, then see
* what would happen if we wrote the stack, but not the data
* area.
*/
if ((dump.u_dsize + dump.u_ssize + 1) * PAGE_SIZE > limit)
dump.u_dsize = 0;
/* Make sure we have enough room to write the stack and data areas. */
if ((dump.u_ssize + 1) * PAGE_SIZE > limit)
dump.u_ssize = 0;
/* make sure we actually have a data and stack area to dump */
set_fs(USER_DS);
if (!access_ok(VERIFY_READ, (void *) (unsigned long)START_DATA(dump),
dump.u_dsize << PAGE_SHIFT))
dump.u_dsize = 0;
if (!access_ok(VERIFY_READ, (void *) (unsigned long)START_STACK(dump),
dump.u_ssize << PAGE_SHIFT))
dump.u_ssize = 0;
set_fs(KERNEL_DS);
/* struct user */
DUMP_WRITE(&dump, sizeof(dump));
/* Now dump all of the user data. Include malloced stuff as well */
DUMP_SEEK(PAGE_SIZE);
/* now we start writing out the user space info */
set_fs(USER_DS);
/* Dump the data area */
if (dump.u_dsize != 0) {
dump_start = START_DATA(dump);
dump_size = dump.u_dsize << PAGE_SHIFT;
DUMP_WRITE(dump_start, dump_size);
}
/* Now prepare to dump the stack area */
if (dump.u_ssize != 0) {
dump_start = START_STACK(dump);
dump_size = dump.u_ssize << PAGE_SHIFT;
DUMP_WRITE(dump_start, dump_size);
}
end_coredump:
set_fs(fs);
return has_dumped;
}
#endif
/*
* create_aout_tables() parses the env- and arg-strings in new user
* memory and creates the pointer tables from them, and puts their
* addresses on the "stack", returning the new stack pointer value.
*/
static u32 __user *create_aout_tables(char __user *p, struct linux_binprm *bprm)
{
u32 __user *argv, *envp, *sp;
int argc = bprm->argc, envc = bprm->envc;
sp = (u32 __user *) ((-(unsigned long)sizeof(u32)) & (unsigned long) p);
sp -= envc+1;
envp = sp;
sp -= argc+1;
argv = sp;
put_user((unsigned long) envp, --sp);
put_user((unsigned long) argv, --sp);
put_user(argc, --sp);
current->mm->arg_start = (unsigned long) p;
while (argc-- > 0) {
char c;
put_user((u32)(unsigned long)p, argv++);
do {
get_user(c, p++);
} while (c);
}
put_user(0, argv);
current->mm->arg_end = current->mm->env_start = (unsigned long) p;
while (envc-- > 0) {
char c;
put_user((u32)(unsigned long)p, envp++);
do {
get_user(c, p++);
} while (c);
}
put_user(0, envp);
current->mm->env_end = (unsigned long) p;
return sp;
}
/*
* These are the functions used to load a.out style executables and shared
* libraries. There is no binary dependent code anywhere else.
*/
static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
{
unsigned long error, fd_offset, rlim;
struct exec ex;
int retval;
ex = *((struct exec *) bprm->buf); /* exec-header */
if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC &&
N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) ||
N_TRSIZE(ex) || N_DRSIZE(ex) ||
i_size_read(bprm->file->f_path.dentry->d_inode) <
ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
return -ENOEXEC;
}
fd_offset = N_TXTOFF(ex);
/* Check initial limits. This avoids letting people circumvent
* size limits imposed on them by creating programs with large
* arrays in the data or bss.
*/
rlim = rlimit(RLIMIT_DATA);
if (rlim >= RLIM_INFINITY)
rlim = ~0;
if (ex.a_data + ex.a_bss > rlim)
return -ENOMEM;
/* Flush all traces of the currently running executable */
retval = flush_old_exec(bprm);
if (retval)
return retval;
/* OK, This is the point of no return */
set_personality(PER_LINUX);
set_personality_ia32(false);
setup_new_exec(bprm);
regs->cs = __USER32_CS;
regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 =
regs->r13 = regs->r14 = regs->r15 = 0;
current->mm->end_code = ex.a_text +
(current->mm->start_code = N_TXTADDR(ex));
current->mm->end_data = ex.a_data +
(current->mm->start_data = N_DATADDR(ex));
current->mm->brk = ex.a_bss +
(current->mm->start_brk = N_BSSADDR(ex));
current->mm->free_area_cache = TASK_UNMAPPED_BASE;
current->mm->cached_hole_size = 0;
retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT);
if (retval < 0) {
/* Someone check-me: is this error path enough? */
send_sig(SIGKILL, current, 0);
return retval;
}
install_exec_creds(bprm);
if (N_MAGIC(ex) == OMAGIC) {
unsigned long text_addr, map_size;
loff_t pos;
text_addr = N_TXTADDR(ex);
pos = 32;
map_size = ex.a_text+ex.a_data;
error = vm_brk(text_addr & PAGE_MASK, map_size);
if (error != (text_addr & PAGE_MASK)) {
send_sig(SIGKILL, current, 0);
return error;
}
error = bprm->file->f_op->read(bprm->file,
(char __user *)text_addr,
ex.a_text+ex.a_data, &pos);
if ((signed long)error < 0) {
send_sig(SIGKILL, current, 0);
return error;
}
flush_icache_range(text_addr, text_addr+ex.a_text+ex.a_data);
} else {
#ifdef WARN_OLD
static unsigned long error_time, error_time2;
if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
(N_MAGIC(ex) != NMAGIC) &&
time_after(jiffies, error_time2 + 5*HZ)) {
printk(KERN_NOTICE "executable not page aligned\n");
error_time2 = jiffies;
}
if ((fd_offset & ~PAGE_MASK) != 0 &&
time_after(jiffies, error_time + 5*HZ)) {
printk(KERN_WARNING
"fd_offset is not page aligned. Please convert "
"program: %s\n",
bprm->file->f_path.dentry->d_name.name);
error_time = jiffies;
}
#endif
if (!bprm->file->f_op->mmap || (fd_offset & ~PAGE_MASK) != 0) {
loff_t pos = fd_offset;
vm_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
bprm->file->f_op->read(bprm->file,
(char __user *)N_TXTADDR(ex),
ex.a_text+ex.a_data, &pos);
flush_icache_range((unsigned long) N_TXTADDR(ex),
(unsigned long) N_TXTADDR(ex) +
ex.a_text+ex.a_data);
goto beyond_if;
}
error = vm_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
PROT_READ | PROT_EXEC,
MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE |
MAP_EXECUTABLE | MAP_32BIT,
fd_offset);
if (error != N_TXTADDR(ex)) {
send_sig(SIGKILL, current, 0);
return error;
}
error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE |
MAP_EXECUTABLE | MAP_32BIT,
fd_offset + ex.a_text);
if (error != N_DATADDR(ex)) {
send_sig(SIGKILL, current, 0);
return error;
}
}
beyond_if:
set_binfmt(&aout_format);
set_brk(current->mm->start_brk, current->mm->brk);
current->mm->start_stack =
(unsigned long)create_aout_tables((char __user *)bprm->p, bprm);
/* start thread */
loadsegment(fs, 0);
loadsegment(ds, __USER32_DS);
loadsegment(es, __USER32_DS);
load_gs_index(0);
(regs)->ip = ex.a_entry;
(regs)->sp = current->mm->start_stack;
(regs)->flags = 0x200;
(regs)->cs = __USER32_CS;
(regs)->ss = __USER32_DS;
regs->r8 = regs->r9 = regs->r10 = regs->r11 =
regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;
set_fs(USER_DS);
return 0;
}
static int load_aout_library(struct file *file)
{
struct inode *inode;
unsigned long bss, start_addr, len, error;
int retval;
struct exec ex;
inode = file->f_path.dentry->d_inode;
retval = -ENOEXEC;
error = kernel_read(file, 0, (char *) &ex, sizeof(ex));
if (error != sizeof(ex))
goto out;
/* We come in here for the regular a.out style of shared libraries */
if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || N_TRSIZE(ex) ||
N_DRSIZE(ex) || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) ||
i_size_read(inode) <
ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
goto out;
}
if (N_FLAGS(ex))
goto out;
/* For QMAGIC, the starting address is 0x20 into the page. We mask
this off to get the starting address for the page */
start_addr = ex.a_entry & 0xfffff000;
if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) {
loff_t pos = N_TXTOFF(ex);
#ifdef WARN_OLD
static unsigned long error_time;
if (time_after(jiffies, error_time + 5*HZ)) {
printk(KERN_WARNING
"N_TXTOFF is not page aligned. Please convert "
"library: %s\n",
file->f_path.dentry->d_name.name);
error_time = jiffies;
}
#endif
vm_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
file->f_op->read(file, (char __user *)start_addr,
ex.a_text + ex.a_data, &pos);
flush_icache_range((unsigned long) start_addr,
(unsigned long) start_addr + ex.a_text +
ex.a_data);
retval = 0;
goto out;
}
/* Now use mmap to map the library into memory. */
error = vm_mmap(file, start_addr, ex.a_text + ex.a_data,
PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_32BIT,
N_TXTOFF(ex));
retval = error;
if (error != start_addr)
goto out;
len = PAGE_ALIGN(ex.a_text + ex.a_data);
bss = ex.a_text + ex.a_data + ex.a_bss;
if (bss > len) {
error = vm_brk(start_addr + len, bss - len);
retval = error;
if (error != start_addr + len)
goto out;
}
retval = 0;
out:
return retval;
}
static int __init init_aout_binfmt(void)
{
register_binfmt(&aout_format);
return 0;
}
static void __exit exit_aout_binfmt(void)
{
unregister_binfmt(&aout_format);
}
module_init(init_aout_binfmt);
module_exit(exit_aout_binfmt);
MODULE_LICENSE("GPL");
+562
View File
@@ -0,0 +1,562 @@
/*
* linux/arch/x86_64/ia32/ia32_signal.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
*
* 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
* 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes
* 2000-12-* x86-64 compatibility mode signal handling by Andi Kleen
*/
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/wait.h>
#include <linux/unistd.h>
#include <linux/stddef.h>
#include <linux/personality.h>
#include <linux/compat.h>
#include <linux/binfmts.h>
#include <asm/ucontext.h>
#include <asm/uaccess.h>
#include <asm/i387.h>
#include <asm/fpu-internal.h>
#include <asm/ptrace.h>
#include <asm/ia32_unistd.h>
#include <asm/user32.h>
#include <asm/sigcontext32.h>
#include <asm/proto.h>
#include <asm/vdso.h>
#include <asm/sigframe.h>
#include <asm/sighandling.h>
#include <asm/sys_ia32.h>
#define FIX_EFLAGS __FIX_EFLAGS
int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
{
int err = 0;
bool ia32 = is_ia32_task();
if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t)))
return -EFAULT;
put_user_try {
/* If you change siginfo_t structure, please make sure that
this code is fixed accordingly.
It should never copy any pad contained in the structure
to avoid security leaks, but must copy the generic
3 ints plus the relevant union member. */
put_user_ex(from->si_signo, &to->si_signo);
put_user_ex(from->si_errno, &to->si_errno);
put_user_ex((short)from->si_code, &to->si_code);
if (from->si_code < 0) {
put_user_ex(from->si_pid, &to->si_pid);
put_user_ex(from->si_uid, &to->si_uid);
put_user_ex(ptr_to_compat(from->si_ptr), &to->si_ptr);
} else {
/*
* First 32bits of unions are always present:
* si_pid === si_band === si_tid === si_addr(LS half)
*/
put_user_ex(from->_sifields._pad[0],
&to->_sifields._pad[0]);
switch (from->si_code >> 16) {
case __SI_FAULT >> 16:
break;
case __SI_CHLD >> 16:
if (ia32) {
put_user_ex(from->si_utime, &to->si_utime);
put_user_ex(from->si_stime, &to->si_stime);
} else {
put_user_ex(from->si_utime, &to->_sifields._sigchld_x32._utime);
put_user_ex(from->si_stime, &to->_sifields._sigchld_x32._stime);
}
put_user_ex(from->si_status, &to->si_status);
/* FALL THROUGH */
default:
case __SI_KILL >> 16:
put_user_ex(from->si_uid, &to->si_uid);
break;
case __SI_POLL >> 16:
put_user_ex(from->si_fd, &to->si_fd);
break;
case __SI_TIMER >> 16:
put_user_ex(from->si_overrun, &to->si_overrun);
put_user_ex(ptr_to_compat(from->si_ptr),
&to->si_ptr);
break;
/* This is not generated by the kernel as of now. */
case __SI_RT >> 16:
case __SI_MESGQ >> 16:
put_user_ex(from->si_uid, &to->si_uid);
put_user_ex(from->si_int, &to->si_int);
break;
}
}
} put_user_catch(err);
return err;
}
int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
{
int err = 0;
u32 ptr32;
if (!access_ok(VERIFY_READ, from, sizeof(compat_siginfo_t)))
return -EFAULT;
get_user_try {
get_user_ex(to->si_signo, &from->si_signo);
get_user_ex(to->si_errno, &from->si_errno);
get_user_ex(to->si_code, &from->si_code);
get_user_ex(to->si_pid, &from->si_pid);
get_user_ex(to->si_uid, &from->si_uid);
get_user_ex(ptr32, &from->si_ptr);
to->si_ptr = compat_ptr(ptr32);
} get_user_catch(err);
return err;
}
asmlinkage long sys32_sigsuspend(int history0, int history1, old_sigset_t mask)
{
sigset_t blocked;
current->saved_sigmask = current->blocked;
mask &= _BLOCKABLE;
siginitset(&blocked, mask);
set_current_blocked(&blocked);
current->state = TASK_INTERRUPTIBLE;
schedule();
set_restore_sigmask();
return -ERESTARTNOHAND;
}
asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr,
stack_ia32_t __user *uoss_ptr,
struct pt_regs *regs)
{
stack_t uss, uoss;
int ret, err = 0;
mm_segment_t seg;
if (uss_ptr) {
u32 ptr;
memset(&uss, 0, sizeof(stack_t));
if (!access_ok(VERIFY_READ, uss_ptr, sizeof(stack_ia32_t)))
return -EFAULT;
get_user_try {
get_user_ex(ptr, &uss_ptr->ss_sp);
get_user_ex(uss.ss_flags, &uss_ptr->ss_flags);
get_user_ex(uss.ss_size, &uss_ptr->ss_size);
} get_user_catch(err);
if (err)
return -EFAULT;
uss.ss_sp = compat_ptr(ptr);
}
seg = get_fs();
set_fs(KERNEL_DS);
ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss, regs->sp);
set_fs(seg);
if (ret >= 0 && uoss_ptr) {
if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(stack_ia32_t)))
return -EFAULT;
put_user_try {
put_user_ex(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp);
put_user_ex(uoss.ss_flags, &uoss_ptr->ss_flags);
put_user_ex(uoss.ss_size, &uoss_ptr->ss_size);
} put_user_catch(err);
if (err)
ret = -EFAULT;
}
return ret;
}
/*
* Do a signal return; undo the signal stack.
*/
#define loadsegment_gs(v) load_gs_index(v)
#define loadsegment_fs(v) loadsegment(fs, v)
#define loadsegment_ds(v) loadsegment(ds, v)
#define loadsegment_es(v) loadsegment(es, v)
#define get_user_seg(seg) ({ unsigned int v; savesegment(seg, v); v; })
#define set_user_seg(seg, v) loadsegment_##seg(v)
#define COPY(x) { \
get_user_ex(regs->x, &sc->x); \
}
#define GET_SEG(seg) ({ \
unsigned short tmp; \
get_user_ex(tmp, &sc->seg); \
tmp; \
})
#define COPY_SEG_CPL3(seg) do { \
regs->seg = GET_SEG(seg) | 3; \
} while (0)
#define RELOAD_SEG(seg) { \
unsigned int pre = GET_SEG(seg); \
unsigned int cur = get_user_seg(seg); \
pre |= 3; \
if (pre != cur) \
set_user_seg(seg, pre); \
}
static int ia32_restore_sigcontext(struct pt_regs *regs,
struct sigcontext_ia32 __user *sc,
unsigned int *pax)
{
unsigned int tmpflags, err = 0;
void __user *buf;
u32 tmp;
/* Always make any pending restarted system calls return -EINTR */
current_thread_info()->restart_block.fn = do_no_restart_syscall;
get_user_try {
/*
* Reload fs and gs if they have changed in the signal
* handler. This does not handle long fs/gs base changes in
* the handler, but does not clobber them at least in the
* normal case.
*/
RELOAD_SEG(gs);
RELOAD_SEG(fs);
RELOAD_SEG(ds);
RELOAD_SEG(es);
COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
COPY(dx); COPY(cx); COPY(ip);
/* Don't touch extended registers */
COPY_SEG_CPL3(cs);
COPY_SEG_CPL3(ss);
get_user_ex(tmpflags, &sc->flags);
regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
/* disable syscall checks */
regs->orig_ax = -1;
get_user_ex(tmp, &sc->fpstate);
buf = compat_ptr(tmp);
err |= restore_i387_xstate_ia32(buf);
get_user_ex(*pax, &sc->ax);
} get_user_catch(err);
return err;
}
asmlinkage long sys32_sigreturn(struct pt_regs *regs)
{
struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8);
sigset_t set;
unsigned int ax;
if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
goto badframe;
if (__get_user(set.sig[0], &frame->sc.oldmask)
|| (_COMPAT_NSIG_WORDS > 1
&& __copy_from_user((((char *) &set.sig) + 4),
&frame->extramask,
sizeof(frame->extramask))))
goto badframe;
sigdelsetmask(&set, ~_BLOCKABLE);
set_current_blocked(&set);
if (ia32_restore_sigcontext(regs, &frame->sc, &ax))
goto badframe;
return ax;
badframe:
signal_fault(regs, frame, "32bit sigreturn");
return 0;
}
asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs)
{
struct rt_sigframe_ia32 __user *frame;
sigset_t set;
unsigned int ax;
struct pt_regs tregs;
frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4);
if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
goto badframe;
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
goto badframe;
sigdelsetmask(&set, ~_BLOCKABLE);
set_current_blocked(&set);
if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
goto badframe;
tregs = *regs;
if (sys32_sigaltstack(&frame->uc.uc_stack, NULL, &tregs) == -EFAULT)
goto badframe;
return ax;
badframe:
signal_fault(regs, frame, "32bit rt sigreturn");
return 0;
}
/*
* Set up a signal frame.
*/
static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
void __user *fpstate,
struct pt_regs *regs, unsigned int mask)
{
int err = 0;
put_user_try {
put_user_ex(get_user_seg(gs), (unsigned int __user *)&sc->gs);
put_user_ex(get_user_seg(fs), (unsigned int __user *)&sc->fs);
put_user_ex(get_user_seg(ds), (unsigned int __user *)&sc->ds);
put_user_ex(get_user_seg(es), (unsigned int __user *)&sc->es);
put_user_ex(regs->di, &sc->di);
put_user_ex(regs->si, &sc->si);
put_user_ex(regs->bp, &sc->bp);
put_user_ex(regs->sp, &sc->sp);
put_user_ex(regs->bx, &sc->bx);
put_user_ex(regs->dx, &sc->dx);
put_user_ex(regs->cx, &sc->cx);
put_user_ex(regs->ax, &sc->ax);
put_user_ex(current->thread.trap_nr, &sc->trapno);
put_user_ex(current->thread.error_code, &sc->err);
put_user_ex(regs->ip, &sc->ip);
put_user_ex(regs->cs, (unsigned int __user *)&sc->cs);
put_user_ex(regs->flags, &sc->flags);
put_user_ex(regs->sp, &sc->sp_at_signal);
put_user_ex(regs->ss, (unsigned int __user *)&sc->ss);
put_user_ex(ptr_to_compat(fpstate), &sc->fpstate);
/* non-iBCS2 extensions.. */
put_user_ex(mask, &sc->oldmask);
put_user_ex(current->thread.cr2, &sc->cr2);
} put_user_catch(err);
return err;
}
/*
* Determine which stack to use..
*/
static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
size_t frame_size,
void **fpstate)
{
unsigned long sp;
/* Default to using normal stack */
sp = regs->sp;
/* This is the X/Open sanctioned signal stack switching. */
if (ka->sa.sa_flags & SA_ONSTACK) {
if (sas_ss_flags(sp) == 0)
sp = current->sas_ss_sp + current->sas_ss_size;
}
/* This is the legacy signal stack switching. */
else if ((regs->ss & 0xffff) != __USER32_DS &&
!(ka->sa.sa_flags & SA_RESTORER) &&
ka->sa.sa_restorer)
sp = (unsigned long) ka->sa.sa_restorer;
if (used_math()) {
sp = sp - sig_xstate_ia32_size;
*fpstate = (struct _fpstate_ia32 *) sp;
if (save_i387_xstate_ia32(*fpstate) < 0)
return (void __user *) -1L;
}
sp -= frame_size;
/* Align the stack pointer according to the i386 ABI,
* i.e. so that on function entry ((sp + 4) & 15) == 0. */
sp = ((sp + 4) & -16ul) - 4;
return (void __user *) sp;
}
int ia32_setup_frame(int sig, struct k_sigaction *ka,
compat_sigset_t *set, struct pt_regs *regs)
{
struct sigframe_ia32 __user *frame;
void __user *restorer;
int err = 0;
void __user *fpstate = NULL;
/* copy_to_user optimizes that into a single 8 byte store */
static const struct {
u16 poplmovl;
u32 val;
u16 int80;
} __attribute__((packed)) code = {
0xb858, /* popl %eax ; movl $...,%eax */
__NR_ia32_sigreturn,
0x80cd, /* int $0x80 */
};
frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
return -EFAULT;
if (__put_user(sig, &frame->sig))
return -EFAULT;
if (ia32_setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]))
return -EFAULT;
if (_COMPAT_NSIG_WORDS > 1) {
if (__copy_to_user(frame->extramask, &set->sig[1],
sizeof(frame->extramask)))
return -EFAULT;
}
if (ka->sa.sa_flags & SA_RESTORER) {
restorer = ka->sa.sa_restorer;
} else {
/* Return stub is in 32bit vsyscall page */
if (current->mm->context.vdso)
restorer = VDSO32_SYMBOL(current->mm->context.vdso,
sigreturn);
else
restorer = &frame->retcode;
}
put_user_try {
put_user_ex(ptr_to_compat(restorer), &frame->pretcode);
/*
* These are actually not used anymore, but left because some
* gdb versions depend on them as a marker.
*/
put_user_ex(*((u64 *)&code), (u64 *)frame->retcode);
} put_user_catch(err);
if (err)
return -EFAULT;
/* Set up registers for signal handler */
regs->sp = (unsigned long) frame;
regs->ip = (unsigned long) ka->sa.sa_handler;
/* Make -mregparm=3 work */
regs->ax = sig;
regs->dx = 0;
regs->cx = 0;
loadsegment(ds, __USER32_DS);
loadsegment(es, __USER32_DS);
regs->cs = __USER32_CS;
regs->ss = __USER32_DS;
return 0;
}
int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
compat_sigset_t *set, struct pt_regs *regs)
{
struct rt_sigframe_ia32 __user *frame;
void __user *restorer;
int err = 0;
void __user *fpstate = NULL;
/* __copy_to_user optimizes that into a single 8 byte store */
static const struct {
u8 movl;
u32 val;
u16 int80;
u8 pad;
} __attribute__((packed)) code = {
0xb8,
__NR_ia32_rt_sigreturn,
0x80cd,
0,
};
frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
return -EFAULT;
put_user_try {
put_user_ex(sig, &frame->sig);
put_user_ex(ptr_to_compat(&frame->info), &frame->pinfo);
put_user_ex(ptr_to_compat(&frame->uc), &frame->puc);
err |= copy_siginfo_to_user32(&frame->info, info);
/* Create the ucontext. */
if (cpu_has_xsave)
put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
else
put_user_ex(0, &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
put_user_ex(sas_ss_flags(regs->sp),
&frame->uc.uc_stack.ss_flags);
put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
regs, set->sig[0]);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
if (ka->sa.sa_flags & SA_RESTORER)
restorer = ka->sa.sa_restorer;
else
restorer = VDSO32_SYMBOL(current->mm->context.vdso,
rt_sigreturn);
put_user_ex(ptr_to_compat(restorer), &frame->pretcode);
/*
* Not actually used anymore, but left because some gdb
* versions need it.
*/
put_user_ex(*((u64 *)&code), (u64 *)frame->retcode);
} put_user_catch(err);
if (err)
return -EFAULT;
/* Set up registers for signal handler */
regs->sp = (unsigned long) frame;
regs->ip = (unsigned long) ka->sa.sa_handler;
/* Make -mregparm=3 work */
regs->ax = sig;
regs->dx = (unsigned long) &frame->info;
regs->cx = (unsigned long) &frame->uc;
loadsegment(ds, __USER32_DS);
loadsegment(es, __USER32_DS);
regs->cs = __USER32_CS;
regs->ss = __USER32_DS;
return 0;
}
+493
View File
@@ -0,0 +1,493 @@
/*
* Compatibility mode system call entry point for x86-64.
*
* Copyright 2000-2002 Andi Kleen, SuSE Labs.
*/
#include <asm/dwarf2.h>
#include <asm/calling.h>
#include <asm/asm-offsets.h>
#include <asm/current.h>
#include <asm/errno.h>
#include <asm/ia32_unistd.h>
#include <asm/thread_info.h>
#include <asm/segment.h>
#include <asm/irqflags.h>
#include <linux/linkage.h>
#include <linux/err.h>
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
#include <linux/elf-em.h>
#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE)
#define __AUDIT_ARCH_LE 0x40000000
#ifndef CONFIG_AUDITSYSCALL
#define sysexit_audit ia32_ret_from_sys_call
#define sysretl_audit ia32_ret_from_sys_call
#endif
.section .entry.text, "ax"
.macro IA32_ARG_FIXUP noebp=0
movl %edi,%r8d
.if \noebp
.else
movl %ebp,%r9d
.endif
xchg %ecx,%esi
movl %ebx,%edi
movl %edx,%edx /* zero extension */
.endm
/* clobbers %eax */
.macro CLEAR_RREGS offset=0, _r9=rax
xorl %eax,%eax
movq %rax,\offset+R11(%rsp)
movq %rax,\offset+R10(%rsp)
movq %\_r9,\offset+R9(%rsp)
movq %rax,\offset+R8(%rsp)
.endm
/*
* Reload arg registers from stack in case ptrace changed them.
* We don't reload %eax because syscall_trace_enter() returned
* the %rax value we should see. Instead, we just truncate that
* value to 32 bits again as we did on entry from user mode.
* If it's a new value set by user_regset during entry tracing,
* this matches the normal truncation of the user-mode value.
* If it's -1 to make us punt the syscall, then (u32)-1 is still
* an appropriately invalid value.
*/
.macro LOAD_ARGS32 offset, _r9=0
.if \_r9
movl \offset+16(%rsp),%r9d
.endif
movl \offset+40(%rsp),%ecx
movl \offset+48(%rsp),%edx
movl \offset+56(%rsp),%esi
movl \offset+64(%rsp),%edi
movl %eax,%eax /* zero extension */
.endm
.macro CFI_STARTPROC32 simple
CFI_STARTPROC \simple
CFI_UNDEFINED r8
CFI_UNDEFINED r9
CFI_UNDEFINED r10
CFI_UNDEFINED r11
CFI_UNDEFINED r12
CFI_UNDEFINED r13
CFI_UNDEFINED r14
CFI_UNDEFINED r15
.endm
#ifdef CONFIG_PARAVIRT
ENTRY(native_usergs_sysret32)
swapgs
sysretl
ENDPROC(native_usergs_sysret32)
ENTRY(native_irq_enable_sysexit)
swapgs
sti
sysexit
ENDPROC(native_irq_enable_sysexit)
#endif
/*
* 32bit SYSENTER instruction entry.
*
* Arguments:
* %eax System call number.
* %ebx Arg1
* %ecx Arg2
* %edx Arg3
* %esi Arg4
* %edi Arg5
* %ebp user stack
* 0(%ebp) Arg6
*
* Interrupts off.
*
* This is purely a fast path. For anything complicated we use the int 0x80
* path below. Set up a complete hardware stack frame to share code
* with the int 0x80 path.
*/
ENTRY(ia32_sysenter_target)
CFI_STARTPROC32 simple
CFI_SIGNAL_FRAME
CFI_DEF_CFA rsp,0
CFI_REGISTER rsp,rbp
SWAPGS_UNSAFE_STACK
movq PER_CPU_VAR(kernel_stack), %rsp
addq $(KERNEL_STACK_OFFSET),%rsp
/*
* No need to follow this irqs on/off section: the syscall
* disabled irqs, here we enable it straight after entry:
*/
ENABLE_INTERRUPTS(CLBR_NONE)
movl %ebp,%ebp /* zero extension */
pushq_cfi $__USER32_DS
/*CFI_REL_OFFSET ss,0*/
pushq_cfi %rbp
CFI_REL_OFFSET rsp,0
pushfq_cfi
/*CFI_REL_OFFSET rflags,0*/
movl TI_sysenter_return+THREAD_INFO(%rsp,3*8-KERNEL_STACK_OFFSET),%r10d
CFI_REGISTER rip,r10
pushq_cfi $__USER32_CS
/*CFI_REL_OFFSET cs,0*/
movl %eax, %eax
pushq_cfi %r10
CFI_REL_OFFSET rip,0
pushq_cfi %rax
cld
SAVE_ARGS 0,1,0
/* no need to do an access_ok check here because rbp has been
32bit zero extended */
1: movl (%rbp),%ebp
.section __ex_table,"a"
.quad 1b,ia32_badarg
.previous
orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
CFI_REMEMBER_STATE
jnz sysenter_tracesys
cmpq $(IA32_NR_syscalls-1),%rax
ja ia32_badsys
sysenter_do_call:
IA32_ARG_FIXUP
sysenter_dispatch:
call *ia32_sys_call_table(,%rax,8)
movq %rax,RAX-ARGOFFSET(%rsp)
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
jnz sysexit_audit
sysexit_from_sys_call:
andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
/* clear IF, that popfq doesn't enable interrupts early */
andl $~0x200,EFLAGS-R11(%rsp)
movl RIP-R11(%rsp),%edx /* User %eip */
CFI_REGISTER rip,rdx
RESTORE_ARGS 0,24,0,0,0,0
xorq %r8,%r8
xorq %r9,%r9
xorq %r10,%r10
xorq %r11,%r11
popfq_cfi
/*CFI_RESTORE rflags*/
popq_cfi %rcx /* User %esp */
CFI_REGISTER rsp,rcx
TRACE_IRQS_ON
ENABLE_INTERRUPTS_SYSEXIT32
#ifdef CONFIG_AUDITSYSCALL
.macro auditsys_entry_common
movl %esi,%r9d /* 6th arg: 4th syscall arg */
movl %edx,%r8d /* 5th arg: 3rd syscall arg */
/* (already in %ecx) 4th arg: 2nd syscall arg */
movl %ebx,%edx /* 3rd arg: 1st syscall arg */
movl %eax,%esi /* 2nd arg: syscall number */
movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */
call __audit_syscall_entry
movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */
cmpq $(IA32_NR_syscalls-1),%rax
ja ia32_badsys
movl %ebx,%edi /* reload 1st syscall arg */
movl RCX-ARGOFFSET(%rsp),%esi /* reload 2nd syscall arg */
movl RDX-ARGOFFSET(%rsp),%edx /* reload 3rd syscall arg */
movl RSI-ARGOFFSET(%rsp),%ecx /* reload 4th syscall arg */
movl RDI-ARGOFFSET(%rsp),%r8d /* reload 5th syscall arg */
.endm
.macro auditsys_exit exit
testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
jnz ia32_ret_from_sys_call
TRACE_IRQS_ON
sti
movl %eax,%esi /* second arg, syscall return value */
cmpl $-MAX_ERRNO,%eax /* is it an error ? */
jbe 1f
movslq %eax, %rsi /* if error sign extend to 64 bits */
1: setbe %al /* 1 if error, 0 if not */
movzbl %al,%edi /* zero-extend that into %edi */
call __audit_syscall_exit
movq RAX-ARGOFFSET(%rsp),%rax /* reload syscall return value */
movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
cli
TRACE_IRQS_OFF
testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
jz \exit
CLEAR_RREGS -ARGOFFSET
jmp int_with_check
.endm
sysenter_auditsys:
CFI_RESTORE_STATE
auditsys_entry_common
movl %ebp,%r9d /* reload 6th syscall arg */
jmp sysenter_dispatch
sysexit_audit:
auditsys_exit sysexit_from_sys_call
#endif
sysenter_tracesys:
#ifdef CONFIG_AUDITSYSCALL
testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
jz sysenter_auditsys
#endif
SAVE_REST
CLEAR_RREGS
movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */
movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace_enter
LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
RESTORE_REST
cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
jmp sysenter_do_call
CFI_ENDPROC
ENDPROC(ia32_sysenter_target)
/*
* 32bit SYSCALL instruction entry.
*
* Arguments:
* %eax System call number.
* %ebx Arg1
* %ecx return EIP
* %edx Arg3
* %esi Arg4
* %edi Arg5
* %ebp Arg2 [note: not saved in the stack frame, should not be touched]
* %esp user stack
* 0(%esp) Arg6
*
* Interrupts off.
*
* This is purely a fast path. For anything complicated we use the int 0x80
* path below. Set up a complete hardware stack frame to share code
* with the int 0x80 path.
*/
ENTRY(ia32_cstar_target)
CFI_STARTPROC32 simple
CFI_SIGNAL_FRAME
CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET
CFI_REGISTER rip,rcx
/*CFI_REGISTER rflags,r11*/
SWAPGS_UNSAFE_STACK
movl %esp,%r8d
CFI_REGISTER rsp,r8
movq PER_CPU_VAR(kernel_stack),%rsp
/*
* No need to follow this irqs on/off section: the syscall
* disabled irqs and here we enable it straight after entry:
*/
ENABLE_INTERRUPTS(CLBR_NONE)
SAVE_ARGS 8,0,0
movl %eax,%eax /* zero extension */
movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
movq %rcx,RIP-ARGOFFSET(%rsp)
CFI_REL_OFFSET rip,RIP-ARGOFFSET
movq %rbp,RCX-ARGOFFSET(%rsp) /* this lies slightly to ptrace */
movl %ebp,%ecx
movq $__USER32_CS,CS-ARGOFFSET(%rsp)
movq $__USER32_DS,SS-ARGOFFSET(%rsp)
movq %r11,EFLAGS-ARGOFFSET(%rsp)
/*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
movq %r8,RSP-ARGOFFSET(%rsp)
CFI_REL_OFFSET rsp,RSP-ARGOFFSET
/* no need to do an access_ok check here because r8 has been
32bit zero extended */
/* hardware stack frame is complete now */
1: movl (%r8),%r9d
.section __ex_table,"a"
.quad 1b,ia32_badarg
.previous
orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
CFI_REMEMBER_STATE
jnz cstar_tracesys
cmpq $IA32_NR_syscalls-1,%rax
ja ia32_badsys
cstar_do_call:
IA32_ARG_FIXUP 1
cstar_dispatch:
call *ia32_sys_call_table(,%rax,8)
movq %rax,RAX-ARGOFFSET(%rsp)
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
jnz sysretl_audit
sysretl_from_sys_call:
andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
RESTORE_ARGS 0,-ARG_SKIP,0,0,0
movl RIP-ARGOFFSET(%rsp),%ecx
CFI_REGISTER rip,rcx
movl EFLAGS-ARGOFFSET(%rsp),%r11d
/*CFI_REGISTER rflags,r11*/
xorq %r10,%r10
xorq %r9,%r9
xorq %r8,%r8
TRACE_IRQS_ON
movl RSP-ARGOFFSET(%rsp),%esp
CFI_RESTORE rsp
USERGS_SYSRET32
#ifdef CONFIG_AUDITSYSCALL
cstar_auditsys:
CFI_RESTORE_STATE
movl %r9d,R9-ARGOFFSET(%rsp) /* register to be clobbered by call */
auditsys_entry_common
movl R9-ARGOFFSET(%rsp),%r9d /* reload 6th syscall arg */
jmp cstar_dispatch
sysretl_audit:
auditsys_exit sysretl_from_sys_call
#endif
cstar_tracesys:
#ifdef CONFIG_AUDITSYSCALL
testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
jz cstar_auditsys
#endif
xchgl %r9d,%ebp
SAVE_REST
CLEAR_RREGS 0, r9
movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace_enter
LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */
RESTORE_REST
xchgl %ebp,%r9d
cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
jmp cstar_do_call
END(ia32_cstar_target)
ia32_badarg:
movq $-EFAULT,%rax
jmp ia32_sysret
CFI_ENDPROC
/*
* Emulated IA32 system calls via int 0x80.
*
* Arguments:
* %eax System call number.
* %ebx Arg1
* %ecx Arg2
* %edx Arg3
* %esi Arg4
* %edi Arg5
* %ebp Arg6 [note: not saved in the stack frame, should not be touched]
*
* Notes:
* Uses the same stack frame as the x86-64 version.
* All registers except %eax must be saved (but ptrace may violate that)
* Arguments are zero extended. For system calls that want sign extension and
* take long arguments a wrapper is needed. Most calls can just be called
* directly.
* Assumes it is only called from user space and entered with interrupts off.
*/
ENTRY(ia32_syscall)
CFI_STARTPROC32 simple
CFI_SIGNAL_FRAME
CFI_DEF_CFA rsp,SS+8-RIP
/*CFI_REL_OFFSET ss,SS-RIP*/
CFI_REL_OFFSET rsp,RSP-RIP
/*CFI_REL_OFFSET rflags,EFLAGS-RIP*/
/*CFI_REL_OFFSET cs,CS-RIP*/
CFI_REL_OFFSET rip,RIP-RIP
PARAVIRT_ADJUST_EXCEPTION_FRAME
SWAPGS
/*
* No need to follow this irqs on/off section: the syscall
* disabled irqs and here we enable it straight after entry:
*/
ENABLE_INTERRUPTS(CLBR_NONE)
movl %eax,%eax
pushq_cfi %rax
cld
/* note the registers are not zero extended to the sf.
this could be a problem. */
SAVE_ARGS 0,1,0
orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
jnz ia32_tracesys
cmpq $(IA32_NR_syscalls-1),%rax
ja ia32_badsys
ia32_do_call:
IA32_ARG_FIXUP
call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
ia32_sysret:
movq %rax,RAX-ARGOFFSET(%rsp)
ia32_ret_from_sys_call:
CLEAR_RREGS -ARGOFFSET
jmp int_ret_from_sys_call
ia32_tracesys:
SAVE_REST
CLEAR_RREGS
movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace_enter
LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
RESTORE_REST
cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */
jmp ia32_do_call
END(ia32_syscall)
ia32_badsys:
movq $0,ORIG_RAX-ARGOFFSET(%rsp)
movq $-ENOSYS,%rax
jmp ia32_sysret
CFI_ENDPROC
.macro PTREGSCALL label, func, arg
ALIGN
GLOBAL(\label)
leaq \func(%rip),%rax
leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
jmp ia32_ptregs_common
.endm
CFI_STARTPROC32
PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi
PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi
PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx
PTREGSCALL stub32_execve, sys32_execve, %rcx
PTREGSCALL stub32_fork, sys_fork, %rdi
PTREGSCALL stub32_clone, sys32_clone, %rdx
PTREGSCALL stub32_vfork, sys_vfork, %rdi
PTREGSCALL stub32_iopl, sys_iopl, %rsi
ALIGN
ia32_ptregs_common:
popq %r11
CFI_ENDPROC
CFI_STARTPROC32 simple
CFI_SIGNAL_FRAME
CFI_DEF_CFA rsp,SS+8-ARGOFFSET
CFI_REL_OFFSET rax,RAX-ARGOFFSET
CFI_REL_OFFSET rcx,RCX-ARGOFFSET
CFI_REL_OFFSET rdx,RDX-ARGOFFSET
CFI_REL_OFFSET rsi,RSI-ARGOFFSET
CFI_REL_OFFSET rdi,RDI-ARGOFFSET
CFI_REL_OFFSET rip,RIP-ARGOFFSET
/* CFI_REL_OFFSET cs,CS-ARGOFFSET*/
/* CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
CFI_REL_OFFSET rsp,RSP-ARGOFFSET
/* CFI_REL_OFFSET ss,SS-ARGOFFSET*/
SAVE_REST
call *%rax
RESTORE_REST
jmp ia32_sysret /* misbalances the return cache */
CFI_ENDPROC
END(ia32_ptregs_common)
+54
View File
@@ -0,0 +1,54 @@
#include <linux/kernel.h>
#include <linux/spinlock.h>
#include <linux/list.h>
#include <linux/syscalls.h>
#include <linux/time.h>
#include <linux/sem.h>
#include <linux/msg.h>
#include <linux/shm.h>
#include <linux/ipc.h>
#include <linux/compat.h>
#include <asm/sys_ia32.h>
asmlinkage long sys32_ipc(u32 call, int first, int second, int third,
compat_uptr_t ptr, u32 fifth)
{
int version;
version = call >> 16; /* hack for backward compatibility */
call &= 0xffff;
switch (call) {
case SEMOP:
/* struct sembuf is the same on 32 and 64bit :)) */
return sys_semtimedop(first, compat_ptr(ptr), second, NULL);
case SEMTIMEDOP:
return compat_sys_semtimedop(first, compat_ptr(ptr), second,
compat_ptr(fifth));
case SEMGET:
return sys_semget(first, second, third);
case SEMCTL:
return compat_sys_semctl(first, second, third, compat_ptr(ptr));
case MSGSND:
return compat_sys_msgsnd(first, second, third, compat_ptr(ptr));
case MSGRCV:
return compat_sys_msgrcv(first, second, fifth, third,
version, compat_ptr(ptr));
case MSGGET:
return sys_msgget((key_t) first, second);
case MSGCTL:
return compat_sys_msgctl(first, second, compat_ptr(ptr));
case SHMAT:
return compat_sys_shmat(first, second, third, version,
compat_ptr(ptr));
case SHMDT:
return sys_shmdt(compat_ptr(ptr));
case SHMGET:
return sys_shmget(first, (unsigned)second, third);
case SHMCTL:
return compat_sys_shmctl(first, second, compat_ptr(ptr));
}
return -ENOSYS;
}
+7
View File
@@ -0,0 +1,7 @@
#include <linux/kernel.h>
#include <linux/errno.h>
long compat_ni_syscall(void)
{
return -ENOSYS;
}
+516
View File
@@ -0,0 +1,516 @@
/*
* sys_ia32.c: Conversion between 32bit and 64bit native syscalls. Based on
* sys_sparc32
*
* Copyright (C) 2000 VA Linux Co
* Copyright (C) 2000 Don Dugger <n0ano@valinux.com>
* Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com>
* Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
* Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
* Copyright (C) 2000 Hewlett-Packard Co.
* Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
* Copyright (C) 2000,2001,2002 Andi Kleen, SuSE Labs (x86-64 port)
*
* These routines maintain argument size conversion between 32bit and 64bit
* environment. In 2.5 most of this should be moved to a generic directory.
*
* This file assumes that there is a hole at the end of user address space.
*
* Some of the functions are LE specific currently. These are
* hopefully all marked. This should be fixed.
*/
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/signal.h>
#include <linux/syscalls.h>
#include <linux/times.h>
#include <linux/utsname.h>
#include <linux/mm.h>
#include <linux/uio.h>
#include <linux/poll.h>
#include <linux/personality.h>
#include <linux/stat.h>
#include <linux/rwsem.h>
#include <linux/compat.h>
#include <linux/vfs.h>
#include <linux/ptrace.h>
#include <linux/highuid.h>
#include <linux/sysctl.h>
#include <linux/slab.h>
#include <asm/mman.h>
#include <asm/types.h>
#include <asm/uaccess.h>
#include <linux/atomic.h>
#include <asm/vgtod.h>
#include <asm/sys_ia32.h>
#define AA(__x) ((unsigned long)(__x))
asmlinkage long sys32_truncate64(const char __user *filename,
unsigned long offset_low,
unsigned long offset_high)
{
return sys_truncate(filename, ((loff_t) offset_high << 32) | offset_low);
}
asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long offset_low,
unsigned long offset_high)
{
return sys_ftruncate(fd, ((loff_t) offset_high << 32) | offset_low);
}
/*
* Another set for IA32/LFS -- x86_64 struct stat is different due to
* support for 64bit inode numbers.
*/
static int cp_stat64(struct stat64 __user *ubuf, struct kstat *stat)
{
typeof(ubuf->st_uid) uid = 0;
typeof(ubuf->st_gid) gid = 0;
SET_UID(uid, stat->uid);
SET_GID(gid, stat->gid);
if (!access_ok(VERIFY_WRITE, ubuf, sizeof(struct stat64)) ||
__put_user(huge_encode_dev(stat->dev), &ubuf->st_dev) ||
__put_user(stat->ino, &ubuf->__st_ino) ||
__put_user(stat->ino, &ubuf->st_ino) ||
__put_user(stat->mode, &ubuf->st_mode) ||
__put_user(stat->nlink, &ubuf->st_nlink) ||
__put_user(uid, &ubuf->st_uid) ||
__put_user(gid, &ubuf->st_gid) ||
__put_user(huge_encode_dev(stat->rdev), &ubuf->st_rdev) ||
__put_user(stat->size, &ubuf->st_size) ||
__put_user(stat->atime.tv_sec, &ubuf->st_atime) ||
__put_user(stat->atime.tv_nsec, &ubuf->st_atime_nsec) ||
__put_user(stat->mtime.tv_sec, &ubuf->st_mtime) ||
__put_user(stat->mtime.tv_nsec, &ubuf->st_mtime_nsec) ||
__put_user(stat->ctime.tv_sec, &ubuf->st_ctime) ||
__put_user(stat->ctime.tv_nsec, &ubuf->st_ctime_nsec) ||
__put_user(stat->blksize, &ubuf->st_blksize) ||
__put_user(stat->blocks, &ubuf->st_blocks))
return -EFAULT;
return 0;
}
asmlinkage long sys32_stat64(const char __user *filename,
struct stat64 __user *statbuf)
{
struct kstat stat;
int ret = vfs_stat(filename, &stat);
if (!ret)
ret = cp_stat64(statbuf, &stat);
return ret;
}
asmlinkage long sys32_lstat64(const char __user *filename,
struct stat64 __user *statbuf)
{
struct kstat stat;
int ret = vfs_lstat(filename, &stat);
if (!ret)
ret = cp_stat64(statbuf, &stat);
return ret;
}
asmlinkage long sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf)
{
struct kstat stat;
int ret = vfs_fstat(fd, &stat);
if (!ret)
ret = cp_stat64(statbuf, &stat);
return ret;
}
asmlinkage long sys32_fstatat(unsigned int dfd, const char __user *filename,
struct stat64 __user *statbuf, int flag)
{
struct kstat stat;
int error;
error = vfs_fstatat(dfd, filename, &stat, flag);
if (error)
return error;
return cp_stat64(statbuf, &stat);
}
/*
* Linux/i386 didn't use to be able to handle more than
* 4 system call parameters, so these system calls used a memory
* block for parameter passing..
*/
struct mmap_arg_struct32 {
unsigned int addr;
unsigned int len;
unsigned int prot;
unsigned int flags;
unsigned int fd;
unsigned int offset;
};
asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *arg)
{
struct mmap_arg_struct32 a;
if (copy_from_user(&a, arg, sizeof(a)))
return -EFAULT;
if (a.offset & ~PAGE_MASK)
return -EINVAL;
return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
a.offset>>PAGE_SHIFT);
}
asmlinkage long sys32_mprotect(unsigned long start, size_t len,
unsigned long prot)
{
return sys_mprotect(start, len, prot);
}
asmlinkage long sys32_rt_sigaction(int sig, struct sigaction32 __user *act,
struct sigaction32 __user *oact,
unsigned int sigsetsize)
{
struct k_sigaction new_ka, old_ka;
int ret;
compat_sigset_t set32;
/* XXX: Don't preclude handling different sized sigset_t's. */
if (sigsetsize != sizeof(compat_sigset_t))
return -EINVAL;
if (act) {
compat_uptr_t handler, restorer;
if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
__get_user(handler, &act->sa_handler) ||
__get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
__get_user(restorer, &act->sa_restorer) ||
__copy_from_user(&set32, &act->sa_mask,
sizeof(compat_sigset_t)))
return -EFAULT;
new_ka.sa.sa_handler = compat_ptr(handler);
new_ka.sa.sa_restorer = compat_ptr(restorer);
/*
* FIXME: here we rely on _COMPAT_NSIG_WORS to be >=
* than _NSIG_WORDS << 1
*/
switch (_NSIG_WORDS) {
case 4: new_ka.sa.sa_mask.sig[3] = set32.sig[6]
| (((long)set32.sig[7]) << 32);
case 3: new_ka.sa.sa_mask.sig[2] = set32.sig[4]
| (((long)set32.sig[5]) << 32);
case 2: new_ka.sa.sa_mask.sig[1] = set32.sig[2]
| (((long)set32.sig[3]) << 32);
case 1: new_ka.sa.sa_mask.sig[0] = set32.sig[0]
| (((long)set32.sig[1]) << 32);
}
}
ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
if (!ret && oact) {
/*
* FIXME: here we rely on _COMPAT_NSIG_WORS to be >=
* than _NSIG_WORDS << 1
*/
switch (_NSIG_WORDS) {
case 4:
set32.sig[7] = (old_ka.sa.sa_mask.sig[3] >> 32);
set32.sig[6] = old_ka.sa.sa_mask.sig[3];
case 3:
set32.sig[5] = (old_ka.sa.sa_mask.sig[2] >> 32);
set32.sig[4] = old_ka.sa.sa_mask.sig[2];
case 2:
set32.sig[3] = (old_ka.sa.sa_mask.sig[1] >> 32);
set32.sig[2] = old_ka.sa.sa_mask.sig[1];
case 1:
set32.sig[1] = (old_ka.sa.sa_mask.sig[0] >> 32);
set32.sig[0] = old_ka.sa.sa_mask.sig[0];
}
if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
__put_user(ptr_to_compat(old_ka.sa.sa_handler),
&oact->sa_handler) ||
__put_user(ptr_to_compat(old_ka.sa.sa_restorer),
&oact->sa_restorer) ||
__put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
__copy_to_user(&oact->sa_mask, &set32,
sizeof(compat_sigset_t)))
return -EFAULT;
}
return ret;
}
asmlinkage long sys32_sigaction(int sig, struct old_sigaction32 __user *act,
struct old_sigaction32 __user *oact)
{
struct k_sigaction new_ka, old_ka;
int ret;
if (act) {
compat_old_sigset_t mask;
compat_uptr_t handler, restorer;
if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
__get_user(handler, &act->sa_handler) ||
__get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
__get_user(restorer, &act->sa_restorer) ||
__get_user(mask, &act->sa_mask))
return -EFAULT;
new_ka.sa.sa_handler = compat_ptr(handler);
new_ka.sa.sa_restorer = compat_ptr(restorer);
siginitset(&new_ka.sa.sa_mask, mask);
}
ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
if (!ret && oact) {
if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
__put_user(ptr_to_compat(old_ka.sa.sa_handler),
&oact->sa_handler) ||
__put_user(ptr_to_compat(old_ka.sa.sa_restorer),
&oact->sa_restorer) ||
__put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
__put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask))
return -EFAULT;
}
return ret;
}
asmlinkage long sys32_alarm(unsigned int seconds)
{
return alarm_setitimer(seconds);
}
asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr,
int options)
{
return compat_sys_wait4(pid, stat_addr, options, NULL);
}
/* 32-bit timeval and related flotsam. */
asmlinkage long sys32_sysfs(int option, u32 arg1, u32 arg2)
{
return sys_sysfs(option, arg1, arg2);
}
asmlinkage long sys32_sched_rr_get_interval(compat_pid_t pid,
struct compat_timespec __user *interval)
{
struct timespec t;
int ret;
mm_segment_t old_fs = get_fs();
set_fs(KERNEL_DS);
ret = sys_sched_rr_get_interval(pid, (struct timespec __user *)&t);
set_fs(old_fs);
if (put_compat_timespec(&t, interval))
return -EFAULT;
return ret;
}
asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *set,
compat_size_t sigsetsize)
{
sigset_t s;
compat_sigset_t s32;
int ret;
mm_segment_t old_fs = get_fs();
set_fs(KERNEL_DS);
ret = sys_rt_sigpending((sigset_t __user *)&s, sigsetsize);
set_fs(old_fs);
if (!ret) {
switch (_NSIG_WORDS) {
case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3];
case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2];
case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1];
case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0];
}
if (copy_to_user(set, &s32, sizeof(compat_sigset_t)))
return -EFAULT;
}
return ret;
}
asmlinkage long sys32_rt_sigqueueinfo(int pid, int sig,
compat_siginfo_t __user *uinfo)
{
siginfo_t info;
int ret;
mm_segment_t old_fs = get_fs();
if (copy_siginfo_from_user32(&info, uinfo))
return -EFAULT;
set_fs(KERNEL_DS);
ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __user *)&info);
set_fs(old_fs);
return ret;
}
/* warning: next two assume little endian */
asmlinkage long sys32_pread(unsigned int fd, char __user *ubuf, u32 count,
u32 poslo, u32 poshi)
{
return sys_pread64(fd, ubuf, count,
((loff_t)AA(poshi) << 32) | AA(poslo));
}
asmlinkage long sys32_pwrite(unsigned int fd, const char __user *ubuf,
u32 count, u32 poslo, u32 poshi)
{
return sys_pwrite64(fd, ubuf, count,
((loff_t)AA(poshi) << 32) | AA(poslo));
}
asmlinkage long sys32_personality(unsigned long personality)
{
int ret;
if (personality(current->personality) == PER_LINUX32 &&
personality == PER_LINUX)
personality = PER_LINUX32;
ret = sys_personality(personality);
if (ret == PER_LINUX32)
ret = PER_LINUX;
return ret;
}
asmlinkage long sys32_sendfile(int out_fd, int in_fd,
compat_off_t __user *offset, s32 count)
{
mm_segment_t old_fs = get_fs();
int ret;
off_t of;
if (offset && get_user(of, offset))
return -EFAULT;
set_fs(KERNEL_DS);
ret = sys_sendfile(out_fd, in_fd, offset ? (off_t __user *)&of : NULL,
count);
set_fs(old_fs);
if (offset && put_user(of, offset))
return -EFAULT;
return ret;
}
asmlinkage long sys32_execve(const char __user *name, compat_uptr_t __user *argv,
compat_uptr_t __user *envp, struct pt_regs *regs)
{
long error;
char *filename;
filename = getname(name);
error = PTR_ERR(filename);
if (IS_ERR(filename))
return error;
error = compat_do_execve(filename, argv, envp, regs);
putname(filename);
return error;
}
asmlinkage long sys32_clone(unsigned int clone_flags, unsigned int newsp,
struct pt_regs *regs)
{
void __user *parent_tid = (void __user *)regs->dx;
void __user *child_tid = (void __user *)regs->di;
if (!newsp)
newsp = regs->sp;
return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
}
/*
* Some system calls that need sign extended arguments. This could be
* done by a generic wrapper.
*/
long sys32_lseek(unsigned int fd, int offset, unsigned int whence)
{
return sys_lseek(fd, offset, whence);
}
long sys32_kill(int pid, int sig)
{
return sys_kill(pid, sig);
}
long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high,
__u32 len_low, __u32 len_high, int advice)
{
return sys_fadvise64_64(fd,
(((u64)offset_high)<<32) | offset_low,
(((u64)len_high)<<32) | len_low,
advice);
}
long sys32_vm86_warning(void)
{
struct task_struct *me = current;
static char lastcomm[sizeof(me->comm)];
if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
compat_printk(KERN_INFO
"%s: vm86 mode not supported on 64 bit kernel\n",
me->comm);
strncpy(lastcomm, me->comm, sizeof(lastcomm));
}
return -ENOSYS;
}
long sys32_lookup_dcookie(u32 addr_low, u32 addr_high,
char __user *buf, size_t len)
{
return sys_lookup_dcookie(((u64)addr_high << 32) | addr_low, buf, len);
}
asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi,
size_t count)
{
return sys_readahead(fd, ((u64)off_hi << 32) | off_lo, count);
}
asmlinkage long sys32_sync_file_range(int fd, unsigned off_low, unsigned off_hi,
unsigned n_low, unsigned n_hi, int flags)
{
return sys_sync_file_range(fd,
((u64)off_hi << 32) | off_low,
((u64)n_hi << 32) | n_low, flags);
}
asmlinkage long sys32_fadvise64(int fd, unsigned offset_lo, unsigned offset_hi,
size_t len, int advice)
{
return sys_fadvise64_64(fd, ((u64)offset_hi << 32) | offset_lo,
len, advice);
}
asmlinkage long sys32_fallocate(int fd, int mode, unsigned offset_lo,
unsigned offset_hi, unsigned len_lo,
unsigned len_hi)
{
return sys_fallocate(fd, mode, ((u64)offset_hi << 32) | offset_lo,
((u64)len_hi << 32) | len_lo);
}
asmlinkage long sys32_fanotify_mark(int fanotify_fd, unsigned int flags,
u32 mask_lo, u32 mask_hi,
int fd, const char __user *pathname)
{
return sys_fanotify_mark(fanotify_fd, flags,
((u64)mask_hi << 32) | mask_lo,
fd, pathname);
}
+25
View File
@@ -0,0 +1,25 @@
/* System call table for ia32 emulation. */
#include <linux/linkage.h>
#include <linux/sys.h>
#include <linux/cache.h>
#include <asm/asm-offsets.h>
#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void compat(void) ;
#include <asm/syscalls_32.h>
#undef __SYSCALL_I386
#define __SYSCALL_I386(nr, sym, compat) [nr] = compat,
typedef void (*sys_call_ptr_t)(void);
extern void compat_ni_syscall(void);
const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = {
/*
* Smells like a compiler bug -- it doesn't work
* when the & below is removed.
*/
[0 ... __NR_ia32_syscall_max] = &compat_ni_syscall,
#include <asm/syscalls_32.h>
};
+28
View File
@@ -0,0 +1,28 @@
include include/asm-generic/Kbuild.asm
header-y += boot.h
header-y += bootparam.h
header-y += debugreg.h
header-y += e820.h
header-y += hw_breakpoint.h
header-y += hyperv.h
header-y += ist.h
header-y += ldt.h
header-y += mce.h
header-y += msr-index.h
header-y += msr.h
header-y += mtrr.h
header-y += posix_types_32.h
header-y += posix_types_64.h
header-y += posix_types_x32.h
header-y += prctl.h
header-y += processor-flags.h
header-y += ptrace-abi.h
header-y += sigcontext32.h
header-y += ucontext.h
header-y += vm86.h
header-y += vsyscall.h
genhdr-y += unistd_32.h
genhdr-y += unistd_64.h
genhdr-y += unistd_x32.h
+65
View File
@@ -0,0 +1,65 @@
/* a.out coredump register dumper
*
* Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public Licence
* as published by the Free Software Foundation; either version
* 2 of the Licence, or (at your option) any later version.
*/
#ifndef _ASM_X86_A_OUT_CORE_H
#define _ASM_X86_A_OUT_CORE_H
#ifdef __KERNEL__
#ifdef CONFIG_X86_32
#include <linux/user.h>
#include <linux/elfcore.h>
#include <asm/debugreg.h>
/*
* fill in the user structure for an a.out core dump
*/
static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump)
{
/* changed the size calculations - should hopefully work better. lbt */
dump->magic = CMAGIC;
dump->start_code = 0;
dump->start_stack = regs->sp & ~(PAGE_SIZE - 1);
dump->u_tsize = ((unsigned long)current->mm->end_code) >> PAGE_SHIFT;
dump->u_dsize = ((unsigned long)(current->mm->brk + (PAGE_SIZE - 1)))
>> PAGE_SHIFT;
dump->u_dsize -= dump->u_tsize;
dump->u_ssize = 0;
aout_dump_debugregs(dump);
if (dump->start_stack < TASK_SIZE)
dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack))
>> PAGE_SHIFT;
dump->regs.bx = regs->bx;
dump->regs.cx = regs->cx;
dump->regs.dx = regs->dx;
dump->regs.si = regs->si;
dump->regs.di = regs->di;
dump->regs.bp = regs->bp;
dump->regs.ax = regs->ax;
dump->regs.ds = (u16)regs->ds;
dump->regs.es = (u16)regs->es;
dump->regs.fs = (u16)regs->fs;
dump->regs.gs = get_user_gs(regs);
dump->regs.orig_ax = regs->orig_ax;
dump->regs.ip = regs->ip;
dump->regs.cs = (u16)regs->cs;
dump->regs.flags = regs->flags;
dump->regs.sp = regs->sp;
dump->regs.ss = (u16)regs->ss;
dump->u_fpvalid = dump_fpu(regs, &dump->i387);
}
#endif /* CONFIG_X86_32 */
#endif /* __KERNEL__ */
#endif /* _ASM_X86_A_OUT_CORE_H */
+20
View File
@@ -0,0 +1,20 @@
#ifndef _ASM_X86_A_OUT_H
#define _ASM_X86_A_OUT_H
struct exec
{
unsigned int a_info; /* Use macros N_MAGIC, etc for access */
unsigned a_text; /* length of text, in bytes */
unsigned a_data; /* length of data, in bytes */
unsigned a_bss; /* length of uninitialized data area for file, in bytes */
unsigned a_syms; /* length of symbol table data in file, in bytes */
unsigned a_entry; /* start address */
unsigned a_trsize; /* length of relocation info for text, in bytes */
unsigned a_drsize; /* length of relocation info for data, in bytes */
};
#define N_TRSIZE(a) ((a).a_trsize)
#define N_DRSIZE(a) ((a).a_drsize)
#define N_SYMSIZE(a) ((a).a_syms)
#endif /* _ASM_X86_A_OUT_H */
+193
View File
@@ -0,0 +1,193 @@
#ifndef _ASM_X86_ACPI_H
#define _ASM_X86_ACPI_H
/*
* Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
* Copyright (C) 2001 Patrick Mochel <mochel@osdl.org>
*
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
#include <acpi/pdc_intel.h>
#include <asm/numa.h>
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/mpspec.h>
#include <asm/trampoline.h>
#define COMPILER_DEPENDENT_INT64 long long
#define COMPILER_DEPENDENT_UINT64 unsigned long long
/*
* Calling conventions:
*
* ACPI_SYSTEM_XFACE - Interfaces to host OS (handlers, threads)
* ACPI_EXTERNAL_XFACE - External ACPI interfaces
* ACPI_INTERNAL_XFACE - Internal ACPI interfaces
* ACPI_INTERNAL_VAR_XFACE - Internal variable-parameter list interfaces
*/
#define ACPI_SYSTEM_XFACE
#define ACPI_EXTERNAL_XFACE
#define ACPI_INTERNAL_XFACE
#define ACPI_INTERNAL_VAR_XFACE
/* Asm macros */
#define ACPI_ASM_MACROS
#define BREAKPOINT3
#define ACPI_DISABLE_IRQS() local_irq_disable()
#define ACPI_ENABLE_IRQS() local_irq_enable()
#define ACPI_FLUSH_CPU_CACHE() wbinvd()
int __acpi_acquire_global_lock(unsigned int *lock);
int __acpi_release_global_lock(unsigned int *lock);
#define ACPI_ACQUIRE_GLOBAL_LOCK(facs, Acq) \
((Acq) = __acpi_acquire_global_lock(&facs->global_lock))
#define ACPI_RELEASE_GLOBAL_LOCK(facs, Acq) \
((Acq) = __acpi_release_global_lock(&facs->global_lock))
/*
* Math helper asm macros
*/
#define ACPI_DIV_64_BY_32(n_hi, n_lo, d32, q32, r32) \
asm("divl %2;" \
: "=a"(q32), "=d"(r32) \
: "r"(d32), \
"0"(n_lo), "1"(n_hi))
#define ACPI_SHIFT_RIGHT_64(n_hi, n_lo) \
asm("shrl $1,%2 ;" \
"rcrl $1,%3;" \
: "=r"(n_hi), "=r"(n_lo) \
: "0"(n_hi), "1"(n_lo))
#ifdef CONFIG_ACPI
extern int acpi_lapic;
extern int acpi_ioapic;
extern int acpi_noirq;
extern int acpi_strict;
extern int acpi_disabled;
extern int acpi_pci_disabled;
extern int acpi_skip_timer_override;
extern int acpi_use_timer_override;
extern int acpi_fix_pin2_polarity;
extern u8 acpi_sci_flags;
extern int acpi_sci_override_gsi;
void acpi_pic_sci_set_trigger(unsigned int, u16);
extern int (*__acpi_register_gsi)(struct device *dev, u32 gsi,
int trigger, int polarity);
static inline void disable_acpi(void)
{
acpi_disabled = 1;
acpi_pci_disabled = 1;
acpi_noirq = 1;
}
extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq);
static inline void acpi_noirq_set(void) { acpi_noirq = 1; }
static inline void acpi_disable_pci(void)
{
acpi_pci_disabled = 1;
acpi_noirq_set();
}
/* Low-level suspend routine. */
extern int acpi_suspend_lowlevel(void);
extern const unsigned char acpi_wakeup_code[];
#define acpi_wakeup_address (__pa(TRAMPOLINE_SYM(acpi_wakeup_code)))
/* early initialization routine */
extern void acpi_reserve_wakeup_memory(void);
/*
* Check if the CPU can handle C2 and deeper
*/
static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate)
{
/*
* Early models (<=5) of AMD Opterons are not supposed to go into
* C2 state.
*
* Steppings 0x0A and later are good
*/
if (boot_cpu_data.x86 == 0x0F &&
boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
boot_cpu_data.x86_model <= 0x05 &&
boot_cpu_data.x86_mask < 0x0A)
return 1;
else if (amd_e400_c1e_detected)
return 1;
else
return max_cstate;
}
static inline bool arch_has_acpi_pdc(void)
{
struct cpuinfo_x86 *c = &cpu_data(0);
return (c->x86_vendor == X86_VENDOR_INTEL ||
c->x86_vendor == X86_VENDOR_CENTAUR);
}
static inline void arch_acpi_set_pdc_bits(u32 *buf)
{
struct cpuinfo_x86 *c = &cpu_data(0);
buf[2] |= ACPI_PDC_C_CAPABILITY_SMP;
if (cpu_has(c, X86_FEATURE_EST))
buf[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP;
if (cpu_has(c, X86_FEATURE_ACPI))
buf[2] |= ACPI_PDC_T_FFH;
/*
* If mwait/monitor is unsupported, C2/C3_FFH will be disabled
*/
if (!cpu_has(c, X86_FEATURE_MWAIT))
buf[2] &= ~(ACPI_PDC_C_C2C3_FFH);
}
#else /* !CONFIG_ACPI */
#define acpi_lapic 0
#define acpi_ioapic 0
static inline void acpi_noirq_set(void) { }
static inline void acpi_disable_pci(void) { }
static inline void disable_acpi(void) { }
#endif /* !CONFIG_ACPI */
#define ARCH_HAS_POWER_INIT 1
#ifdef CONFIG_ACPI_NUMA
extern int acpi_numa;
extern int x86_acpi_numa_init(void);
#endif /* CONFIG_ACPI_NUMA */
#define acpi_unlazy_tlb(x) leave_mm(x)
#endif /* _ASM_X86_ACPI_H */

Some files were not shown because too many files have changed in this diff Show More