commodorejohn
Veteran Member
Being as I'm interested in dabbling with game development for MS-DOS, I knew I'd need a random-number generator eventually. While researching the subject, I found this algorithm, which combines a number of attractive features: no multiply/divide (shift/XOR-based instead,) very little storage required, and a long period. (I'm no statistics expert, so I can't speak for the quality of the random numbers, but they certainly look random to me...) So I took a stab at implementing it in assembler, and here it is:
http://www.commodorejohn.com/shx-rand.zip (assembler source is NASM format)
It's really quite a nice and simple algorithm to implement in assembler; I did both an 8086 version and a 386 version (uses 32-bit operations and long immediate shifts.) Most of the work went into the 8086 version, which I tried to optimize to minimize memory access; I'm not going to claim the result is optimal (I'd be happy to hear suggestions for improvement,) but I think it's pretty good. A reference implementation in C is also included, along with test programs for all three which generate the first 65536 random numbers from a specified seed, and have been verified to give the same results.
So yeah, this was an interesting little project. I intend to add a 68k implementation to the mix, shouldn't be too hard. Questions? Comments? Smart remarks?
http://www.commodorejohn.com/shx-rand.zip (assembler source is NASM format)
It's really quite a nice and simple algorithm to implement in assembler; I did both an 8086 version and a 386 version (uses 32-bit operations and long immediate shifts.) Most of the work went into the 8086 version, which I tried to optimize to minimize memory access; I'm not going to claim the result is optimal (I'd be happy to hear suggestions for improvement,) but I think it's pretty good. A reference implementation in C is also included, along with test programs for all three which generate the first 65536 random numbers from a specified seed, and have been verified to give the same results.
So yeah, this was an interesting little project. I intend to add a 68k implementation to the mix, shouldn't be too hard. Questions? Comments? Smart remarks?
Code:
getrand: ; Destroys all main registers, returns AXBX = 32-bit random number
; Also returns EBX = 32-bit random number on 80386
%ifdef CPU_80386
; 80386 version (32-bit, has barrel-shifter, fast!)
mov eax, [randx] ; Ahh, 32-bit registers...
mov ebx, eax ; Does this save us a memory access on the XOR?
shl eax, 11 ; Ahh, barrel-shifter with large immediate shifts...
xor eax, ebx
mov ebx, [randw] ; I'm not going to try to make the list-shuffle look
mov ecx, [randy] ; like it corresponds, important thing is that the
mov [randx], ecx ; values wind up in the right place.
mov ecx, [randz]
mov [randy], ecx
mov [randz], ebx
mov ecx, ebx
shr ecx, 19
xor ebx, ecx
mov ecx, eax
shr ecx, 8
xor eax, ecx
xor ebx, eax
mov [randw], ebx
; Now load the high word of EBX into AX, for compatibility
mov eax, ebx ; I need to get a better 386 assembler reference,
shr eax, 16 ; but I don't believe there's a way to access the
; high word of a 32-bit register directly...
%else
; 8086 version (much slower, but more explanatorily commented!)
; Many steps are done out of order here in order to minimize re-loading
; of values from memory. I *think* I've tracked everything correctly so
; that the results are correct.
mov cx, [randx+2] ; CXDX = [randx]
mov dx, [randx]
mov ah, cl ; AXBX = [randx] << 8
mov al, dh
mov bh, dl
xor bl, bl
shl bx, 1 ; AXBX = AXBX << 3
rcl ax, 1
shl bx, 1
rcl ax, 1
shl bx, 1
rcl ax, 1
xor ax, cx ; First part of the algorithm:
xor bx, dx ; t = (x XOR (x << 11))
mov dl, bh ; CXDX = (t >> 8)
mov dh, al
mov cl, ah
xor ch, ch
xor ax, cx ; Piece for the second part of the algorithm:
xor bx, dx ; (t XOR (t >> 8))
mov cx, [randy+2] ; Move the stored values one space along in the list
mov dx, [randy] ; X = Y, Y = Z, Z = W
mov [randx+2], cx
mov [randx], dx ; Is this the fastest way to do it? Seems like it
mov cx, [randz+2] ; would be, I can't think of another method that
mov dx, [randz] ; requires fewer memory accesses and no overhead.
mov [randy+2], cx
mov [randy], dx
mov dx, [randw+2] ; DX = [randw] >> 16 (for later)
mov cx, [randw]
mov [randz+2], dx
mov [randz], cx
xor ax, dx ; Cuts the final XOR in half.
shr dx, 1 ; DX = DX >> 3
shr dx, 1
shr dx, 1
xor dx, cx ; Piece for the second part of the algorithm:
; (w XOR (w >> 19))
xor bx, dx ; Second part:
; w = (w XOR (w >> 19)) XOR (t XOR (t >> 8))
; return w
mov [randw], bx ; Final history-list operation, store new value in W
mov [randw+2], ax
%endif
ret
; Put these anywhere, but have 'em aligned!
alignb 4
randx: resd 1
randy: resd 1
randz: resd 1
randw: resd 1