blob: 3658f09749da010f4691f62db7f8b5bc74aa87ef [file] [log] [blame]
;; -----------------------------------------------------------------------
;;
;; Copyright 1994-2009 H. Peter Anvin - All Rights Reserved
;; Copyright 2009-2010 Intel Corporation; author: H. Peter Anvin
;;
;; This program is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, Inc., 53 Temple Place Ste 330,
;; Boston MA 02111-1307, USA; either version 2 of the License, or
;; (at your option) any later version; incorporated herein by reference.
;;
;; -----------------------------------------------------------------------
;;
;; bcopy32xx.inc
;;
;
; 32-bit bcopy routine
;
; This is the actual 32-bit portion of the bcopy and shuffle and boot
; routines. ALL THIS CODE NEEDS TO BE POSITION-INDEPENDENT, with the
; sole exception being the actual relocation code at the beginning of
; pm_shuffle_boot.
;
; It also really needs to live all in a single segment, for the
; address calculcations to actually work.
;
bits 32
section .bcopyxx.text
align 16
;
; pm_bcopy:
;
; This is the protected-mode core of the "bcopy" routine.
; Try to do aligned transfers; if the src and dst are relatively
; misaligned, align the dst.
;
; ECX is guaranteed to not be zero on entry.
;
; Clobbers ESI, EDI, ECX.
;
pm_bcopy:
push ebx
push edx
push eax
cmp esi,-1
je .bzero
cmp esi,edi ; If source < destination, we might
jb .reverse ; have to copy backwards
.forward:
; Initial alignment
mov edx,edi
shr edx,1
jnc .faa1
movsb
dec ecx
.faa1:
mov al,cl
cmp ecx,2
jb .f_tiny
shr edx,1
jnc .faa2
movsw
sub ecx,2
.faa2:
; Bulk transfer
mov al,cl ; Save low bits
shr ecx,2 ; Convert to dwords
rep movsd ; Do our business
; At this point ecx == 0
test al,2
jz .fab2
movsw
.fab2:
.f_tiny:
test al,1
jz .fab1
movsb
.fab1:
.done:
pop eax
pop edx
pop ebx
ret
.reverse:
lea eax,[esi+ecx-1] ; Point to final byte
cmp edi,eax
ja .forward ; No overlap, do forward copy
std ; Reverse copy
lea edi,[edi+ecx-1]
mov esi,eax
; Initial alignment
mov edx,edi
shr edx,1
jc .raa1
movsb
dec ecx
.raa1:
dec esi
dec edi
mov al,cl
cmp ecx,2
jb .r_tiny
shr edx,1
jc .raa2
movsw
sub ecx,2
.raa2:
; Bulk copy
sub esi,2
sub edi,2
mov al,cl ; Save low bits
shr ecx,2
rep movsd
; Final alignment
.r_final:
add esi,2
add edi,2
test al,2
jz .rab2
movsw
.rab2:
.r_tiny:
inc esi
inc edi
test al,1
jz .rab1
movsb
.rab1:
cld
jmp short .done
.bzero:
xor eax,eax
; Initial alignment
mov edx,edi
shr edx,1
jnc .zaa1
stosb
dec ecx
.zaa1:
mov bl,cl
cmp ecx,2
jb .z_tiny
shr edx,1
jnc .zaa2
stosw
sub ecx,2
.zaa2:
; Bulk
mov bl,cl ; Save low bits
shr ecx,2
rep stosd
test bl,2
jz .zab2
stosw
.zab2:
.z_tiny:
test bl,1
jz .zab1
stosb
.zab1:
jmp short .done
;
; shuffle_and_boot:
;
; This routine is used to shuffle memory around, followed by
; invoking an entry point somewhere in low memory. This routine
; can clobber any memory outside the bcopy special area.
;
; IMPORTANT: This routine does not set up any registers.
; It is the responsibility of the caller to generate an appropriate entry
; stub; *especially* when going to real mode.
;
; Inputs:
; ESI -> Pointer to list of (dst, src, len) pairs(*)
; EDI -> Pointer to safe area for list + shuffler
; (must not overlap this code nor the RM stack)
; ECX -> Byte count of list area (for initial copy)
;
; If src == -1: then the memory pointed to by (dst, len) is bzeroed;
; this is handled inside the bcopy routine.
;
; If len == 0: this marks the end of the list; dst indicates
; the entry point and src the mode (0 = pm, 1 = rm)
;
; (*) dst, src, and len are four bytes each
;
; do_raw_shuffle_and_boot is the same entry point, but with a C ABI:
; do_raw_shuffle_and_boot(safearea, descriptors, bytecount)
;
global do_raw_shuffle_and_boot
do_raw_shuffle_and_boot:
mov edi,eax
mov esi,edx
pm_shuffle:
cli ; End interrupt service (for good)
mov ebx,edi ; EBX <- descriptor list
lea edx,[edi+ecx+15] ; EDX <- where to relocate our code to
and edx,~15 ; Align 16 to benefit the GDT
call pm_bcopy
mov esi,__bcopyxx_start ; Absolute source address
mov edi,edx ; Absolute target address
sub edx,esi ; EDX <- address delta
mov ecx,__bcopyxx_dwords
lea eax,[edx+.safe] ; Resume point
; Relocate this code
rep movsd
jmp eax ; Jump to safe location
.safe:
; Give ourselves a safe stack
lea esp,[edx+bcopyxx_stack+__bcopyxx_end]
add edx,bcopy_gdt ; EDX <- new GDT
mov [edx+2],edx ; GDT self-pointer
lgdt [edx] ; Switch to local GDT
; Now for the actual shuffling...
.loop:
mov edi,[ebx]
mov esi,[ebx+4]
mov ecx,[ebx+8]
add ebx,12
jecxz .done
call pm_bcopy
jmp .loop
.done:
lidt [edx+RM_IDT_ptr-bcopy_gdt] ; RM-like IDT
push ecx ; == 0, for cleaning the flags register
and esi,esi
jz pm_shuffle_16
popfd ; Clean the flags
jmp edi ; Protected mode entry
; We have a 16-bit entry point, so we need to return
; to 16-bit mode. Note: EDX already points to the GDT.
pm_shuffle_16:
mov eax,edi
mov [edx+PM_CS16+2],ax
mov [edx+PM_DS16+2],ax
shr eax,16
mov [edx+PM_CS16+4],al
mov [edx+PM_CS16+7],ah
mov [edx+PM_DS16+4],al
mov [edx+PM_DS16+7],ah
mov eax,cr0
and al,~1
popfd ; Clean the flags
; No flag-changing instructions below...
mov dx,PM_DS16
mov ds,edx
mov es,edx
mov fs,edx
mov gs,edx
mov ss,edx
jmp PM_CS16:0
section .bcopyxx.data
alignz 16
; GDT descriptor entry
%macro desc 1
bcopy_gdt.%1:
PM_%1 equ bcopy_gdt.%1-bcopy_gdt
%endmacro
bcopy_gdt:
dw bcopy_gdt_size-1 ; Null descriptor - contains GDT
dd bcopy_gdt ; pointer for LGDT instruction
dw 0
; TSS segment to keep Intel VT happy. Intel VT is
; unhappy about anything that doesn't smell like a
; full-blown 32-bit OS.
desc TSS
dw 104-1, DummyTSS ; 08h 32-bit task state segment
dd 00008900h ; present, dpl 0, 104 bytes @DummyTSS
desc CS16
dd 0000ffffh ; 10h Code segment, use16, readable,
dd 00009b00h ; present, dpl 0, cover 64K
desc DS16
dd 0000ffffh ; 18h Data segment, use16, read/write,
dd 00009300h ; present, dpl 0, cover 64K
desc CS32
dd 0000ffffh ; 20h Code segment, use32, readable,
dd 00cf9b00h ; present, dpl 0, cover all 4G
desc DS32
dd 0000ffffh ; 28h Data segment, use32, read/write,
dd 00cf9300h ; present, dpl 0, cover all 4G
bcopy_gdt_size: equ $-bcopy_gdt
;
; Space for a dummy task state segment. It should never be actually
; accessed, but just in case it is, point to a chunk of memory that
; has a chance to not be used for anything real...
;
DummyTSS equ 0x580
align 4
RM_IDT_ptr: dw 0FFFFh ; Length (nonsense, but matches CPU)
dd 0 ; Offset
bcopyxx_stack equ 128 ; We want this much stack
section .rodata
global __syslinux_shuffler_size
extern __bcopyxx_len
align 4
__syslinux_shuffler_size:
dd __bcopyxx_len
bits 16
section .text16