What I'm working on 2, Sound

deathshadow · Nov 6, 2014

Just thought I'd share the next part of my rewrite of my DOS game engine, the sound engine.

As I've been switching to putting everything in the data segment instead of using the heap and heading towards a monolithic executable, the music playback system needs an overhaul since I was dynamically allocating space for not just the music, but things like MIDI SYSEX, Patch Data, and even the music itself.

I also needed a rewrite (as my other thread exists to try to get help testing) of the C/MS code as it wasn't working on all C/MS setups. Was good on my 1320B, was good on my game blaster, was good in DOSBox, but didn't work on a real "C/MS" card or any other models of Blasters with the SA1099 chips in them. I THINK I got it right with this version of the code:

Code:

; Creative Music Source dual SA1099 Sound support
; Jason M. Knight, October 2014
; Intended for compilation with NASM for use with TP7

; labels starting with "i_" are for local/internal use only

BITS 16
CPU 8086

%include "TURBOPAS.MAC"

segment CONST

; CMS registers are read only, so we have to buffer these ourselves!
cmsOctaveStore dw 0x0000, 0x0000, 0x0000
cmsFreqBits    dw 0x0000
cmsNoiseBits   dw 0x0000

cmsFreqMap: ; convert 0..243 to 0..255 with SAA1099 freq curve
	db    0,   2,   3,   5,   7,   9,  11,  13
	db   15,  17,  19,  21,  24,  26,  28,  30
	db   32,  33,  35,  37,  39,  40,  42,  45
	db   47,  48,  50,  51,  53,  55,  57,  58
	
	db   60,  61,  63,  65,  66,  67,  68,  70
	db   71,  73,  74,  76,  77,  79,  80,  82
	db   83,  85,  86,  87,  88,  90,  91,  93
	db   95,  96,  97,  98, 100, 101, 103, 104
	
	db  105, 106, 108, 109, 111, 112, 113, 114
	db  115, 116, 118, 119, 120, 121, 123, 124
	db  125, 126, 127, 129, 130, 131, 132, 133
	db  134, 135, 136, 137, 138, 140, 141, 142
	
	db  143, 144, 145, 146, 148, 149, 150, 151
	db  152, 153, 154, 155, 156, 157, 158, 159
	db  160, 161, 162, 162, 163, 164, 165, 166
	db  167, 168, 170, 171, 172, 173, 174, 174
	
	db  175, 176, 177, 178, 179, 180, 181, 182
	db  182, 183, 184, 185, 186, 187, 188, 189
	db  190, 191, 192, 193, 193, 194, 195, 196
	db  196, 197, 198, 199, 200, 200, 201, 202
	
	db  203, 203, 204, 205, 206, 206, 208, 208
	db  209, 209, 210, 211, 212, 212, 213, 213
	db  214, 215, 216, 217, 217, 218, 218, 219
	db  219, 220, 221, 222, 222, 223, 224, 225
	
	db  225, 226, 226, 227, 227, 228, 228, 229
	db  230, 231, 232, 232, 233, 233, 234, 234
	db  235, 235, 236, 236, 237, 238, 239, 239
	db  240, 240, 241, 241, 242, 243, 243, 244
	
	db  244, 245, 245, 246, 246, 247, 247, 248
	db  249, 250, 250, 251, 251, 252, 252, 253
	db  253, 254, 254, 255
	
segment CODE

; let's do the math here instead of during execution
%define cmsFreqMin 32
%define cmsFreqMax 7823
%define cmsFreqLo  245
%define cmsFreqHi  489
%define cmsFreqMapOffsetMinusLo cmsFreqMap - cmsFreqLo

extern cmsSoundPort

i_WriteByteBoth:
; INPUT
;   dx = soundport
;   al = register
;   ah = data
; PRESERVES
;   all
	add   dx, 3
	out   dx, al
	dec   dx
	xchg  ah, al
	out   dx, al
	dec   dx
	xchg  ah, al
	out   dx, al
	dec   dx
	xchg  ah, al
	out   dx, al
	xchg  ah, al
	ret
	
i_WriteWordBoth:
; INPUT
;   dx = soundport
;   al = register
;   bx = data
; PRESERVES
;   all
	mov   ah, al ; save for second output
	add   dx, 3
	out   dx, al ; oAL
	dec   dx
	xchg  bl, al ; BL = oAL, AL = oBL, BH = oBH
	out   dx, al ; oBL
	xchg  bh, al ; BL = oAL, AL = oBH, BH = oBL
	out   dx, al ; oBH
	dec   dx
	xchg  bl, al ; BL = oBH, AL = oAL, BH = oBL
	out   dx, al ; oAL
	dec   dx
	xchg  bh, al ; BL = oBH, AL = oBL, BH = oAL
	out   dx, al ; oBL
	xchg  bl, al ; BL = oBL, AL = oBH, BH = oAL
	out   dx, al ; oBH
	xchg  bh, al ; original order.
	ret
	
i_CmsSelectPort:
; INPUT
;   [bp + 6] == voice 0..11
; OUTPUT
;   bx = cms register (0..5)
;   dx = port (0x02?1 or 0x02?3)
	mov  bx, [bp + 6]
	mov  dx, [cmsSoundPort]
	inc  dx
	cmp  bx, 6
	js   .done
	sub  bx, 6
	inc  dx
	inc  dx
.done:
	ret
	
i_CmsFindBits:
; INPUT
;   al = register to set
;   bx = voice register 0..5
;   dx = chip register port (typically 0x02?1 or 0x02?3)
; OUTPUT
;   al == bitmask
;   bx == address of appropriate cmsFreqBits byte
;   dx == chip data port (typically 0x02?0 or 0x2?2)
; CORRUPTS
;   cl
	out  dx, al
	dec  dx
	mov  cl, bl
	mov  al, 1
	shl  al, cl
	mov  bx, cmsFreqBits
	test dx, 0x0002
	jz   .done
	inc  bx
.done:
	ret
	
i_CmsSetAL:
; INPUT
;   al = bit mask
;   bx = address of cmsBits byte
;   dx = chip data port
	or   [bx], al
	mov  al, [bx]
	out  dx, al
	ret
	
i_CmsMaskAL:
; INPUT
;   al = bit mask
;   bx = address of cmsBits byte
;   dx = chip data port
; CORRUPTS
;   al (masks off top two bits)
	not  al
	and  al, 0x3F
	and  [bx], al
	mov  al, [bx]
	out  dx, al
	ret
	
i_SetFreq:
; INPUT
;   ah = freq
;   bx = voice 0..5
;   dx = port 0x02?1 or 0x02?3
; OUTPUT
;   none
; CORRUPTS
;   al
	mov   al, bl
	or    al, 0x08
	out   dx, al
	dec   dx
	mov   al, ah
	out   dx, al
	inc   dx
	ret
	
i_SetOctave:
; INPUT
;   ch = octave
;   bx = voice 0..5
;   dx = port 0x02?1 or 0x2?3
; OUTPUT
;   none
; CORRUPTS
;   al, bx
	mov   al, bl
	shr   al, 1
	or    al, 0x10
	out   dx, al
	dec   dx
	shr   bx, 1
	mov   al, [bx + cmsOctaveStore]
	jc    .setHigh
; setLow
	and   al, 0xF0
	jmp  .done
.setHigh:
	and   al, 0x0F
	mov   cl, 4
	shl   ch, cl
.done:
	or    al, ch
	out   dx, al
	inc   dx
	mov   [bx + cmsOctaveStore], al
	ret
	
; procedure cmsReset
pProcNoArgs cmsReset
	mov   ax, ds
	mov   es, ax
	xor   ax, ax
	mov   di, cmsOctaveStore
	stosw
	stosw
	stosw
	stosw ; cmsFreqBits
	stosw ; cmsNoiseBits
	mov   dx, [cmsSoundPort]
	mov   cx, 0x20
.loopZero:
	call  i_WriteByteBoth
	inc   al
	loop  .loopZero
	mov   al, 0x1C
	mov   bx, 0x0102
	call  i_WriteWordBoth
	mov   ax, 0x0015
	call  i_WriteByteBoth
	mov   ax, 0x0014
	call  i_WriteByteBoth
	mov   ax, 0x0016
	call  i_WriteByteBoth
	mov   cx, 6
	mov   ax, 0x8800
.loopVolume:
	call  i_WriteByteBoth
	inc   al
	loop  .loopVolume
	retf
	
; proceudre cmsSetAmplitude(left, right, voice:word);
pProcArgs cmsSetAmplitude
	call i_CmsSelectPort
	mov  al, bl
	out  dx, al
	mov  al, [bp + 8]
	mov  cl, 4
	shr  al, cl
	add  al, [bp + 10]
	dec  dx
	out  dx, al
	pRet 6
	
; procedure cmsEnableFreq(voice:word);
pProcArgs cmsEnableFreq
	call i_CmsSelectPort
	mov  al, 0x14
	call i_CmsFindBits
	call i_CmsSetAL
	pRet 2

; procedure cmsDisableFreq(voice:word);
pProcArgs cmsDisableFreq
	call i_CmsSelectPort
	mov  al, 0x14
	call i_CmsFindBits
	call i_CmsMaskAL
	pRet 2

; procedure cmsEnableNoise(voice:word);
pProcArgs cmsEnableNoise
	call i_CmsSelectPort
	mov  al, 0x15
	call i_CmsFindBits
	call i_CmsSetAL
	pRet 2
	
; procedure cmsDisableNoise(voice:word);
pProcArgs cmsDisableNoise
	call i_CmsSelectPort
	mov  al, 0x15
	call i_CmsFindBits
	call i_CmsMaskAL
	pRet 2
	
; procedure cmsSetFreq(freq, voice:word);
pProcArgs cmsSetFreq
	call  i_CmsSelectPort
	mov   ah, [bp + 8]
	call  i_SetFreq
	pRet  4
	
; procedure cmsSetOctave(octave, voice:word);
pProcArgs cmsSetOctave
	call i_CmsSelectPort
	mov   ch, [bp + 8]
	call  i_SetOctave
	pRet  4

; function cmsSetHz(hz, voice:word);
pProcArgs cmsSetHz
	; first let's turn off this voice
	call i_CmsSelectPort ; bx == register 0..5, dx == 0x2?1 or 0x2?3
	mov   di, bx
	mov   si, dx
	mov   al, 0x14
	call  i_CmsFindBits
	call  i_CmsMaskAL
	; determine freq and octave from hz
	mov   bx, [bp + 8]
	cmp   bx, cmsFreqMin
	jl    .exit
	cmp   bx, cmsFreqMax
	jg    .exit
	mov   ch, 3
	; I HATE "WHILE" LOGIC!!!
.loopDownOctave:
	cmp   bx, cmsFreqLo
	jg    .loopUpOctave
	shl   bx, 1
	dec   ch
	jmp   .loopDownOctave
.loopUpOctave:
	cmp   bx, cmsFreqHi
	jl    .calcFreq
	shr   bx, 1
	inc   ch
	jmp   .loopUpOctave
.calcFreq:
	mov   ah, [bx + cmsFreqMapOffsetMinusLo] 
	; AH = freq, CH = OCTAVE
	mov   bx, di
	mov   dx, si
	call  i_SetFreq
	call  i_SetOctave
	; turn this voice back on.
	mov   bx, di
	mov   al, 0x14
	call  i_CmsFindBits
	call  i_CmsSetAL
.exit:
	pRet  4

Though if anyone wants to give that a once over I'd love to hear any suggestions. (I'm always screwing up when to use ja/jg vs. jb/jl - cannot keep those straight!)

One of the big changes is that in the old game I was using an if statement EVERY time a sound was being set to point it at the code for the correct card -- while this is the method I see most people using, it's not exactly the best way of going about it. As such I've switched to using procedural types and procedural variables so that at the start I can do the IF to point at the correct procedure for the correct card, removing the IF inside the call.

Code:

	case soundCard of
		sound_none:begin
			setFreq := quiet2;
			setNote := quiet2;
			setVolume := quiet3;
		end;
		sound_pcSpeaker:begin
			speakerEnable;
			setFreq := speakerSetHz;
			setNote := speakerSetNote;
			setVolume := quiet3;
		end;
		sound_adlib:begin
			adlibReset;
			adlibDefaultAllVoices;
			setFreq := adlibSetHz;
			setNote := adlibSetNote;
			setVolume := adlibSetVolume;
		end;

So on and so forth.

I've also set up a two voice PC speaker arpeggio in the 120hz game timer -- if only one voice is playing that note is held, but if both voices are playing it arpeggio's. I only load the speaker timer version if the speaker is selected, otherwise I load a simpler version so as to not suck down extra time for nothing.

Code:

checkVoice:
; ACCEPTS
;   BX   offset into speaker voices
; CORRUPTS
;   AX, CX
; RETURNS
;   ZF set == no playback, unset == playback
	mov  cx, WORD [bx + voices]
	or   cx, cx
	jz   .done
	mov  al, 0xB6
	out  0x43, al
	mov  al, cl
	out  0x42, al
	mov  al, ch
	out  0x42, al
	in   al, 0x61
	or   al, 3
	out  0x61, al
.done:
	ret
	
speakerTimerISR:
	push  ax
	push  bx
	push  cx
	push  ds
	mov   ax, DATA
	mov   ds, ax
; uncomment next two lines to slow the arpeggio to 60hz
;	xor   [speakerFlags], BYTE 0x80
;	jns   .tick
	mov   bx, [currentVoice]
	xor   bx, 2
	mov   [currentVoice], bx
	call  checkVoice
	jnz   .tick
	xor   bx, 2
	call  checkVoice
	jnz   .tick
	in    al, 0x61
	and   al, 0xFC
	out   0x61, al
.tick:
	inc   WORD [tickCounter]
	sub   [countISR], countDec
	jns   .done
	add   [countISR], countInc
	pushf
	call  far [oldISR]
.done:
	pop   ds
	pop   cx
	pop   bx
	pop   ax
	iret

I keep feeling like that could be more efficiently written, but it works.

I found a MAJOR bug in my tandy/jr sound code that came from doing a copypasta of someone else's code... bits 4, 5 and 6 of the command byte is listed in the docs as channel select -- and whatever code I was working from took that literally... but that's NOT the whole picture. Only 5 and 6 select the channel -- bit 4 selects if you are setting frequency (0) or amplitude (1)

The code I had gotten/was using was doing:

mov al, 0x10
mov cl, voice
shl al, cl

Which is just WRONG. Like... crazy wrong. It meant that the lower frequency bit was setting amplitude, which is why the tone was off slightly and volume setting was jacked.

The PROPER code...

mov al, voice
mov cl, 3
and al, cl ; just to be on the safe side
ror al, cl

using cl as the mask and the rotate amount is a nice double-whammy, as is that the rotate by 3 is faster than doing a shift by five to the left.

*** side note *** I'm shocked how many audio codebases I look at do NOT do any input sanitization

I'm expanding the MIDI support a good deal as well in that I'm adding code for more than just the MPU-401 style interface. I've already got the Sound Blaster DSP MIDI up and working since I FINALLY found a document that had some REALLY good code examples:

http://www.phatcode.net/articles.php?id=243

I'm used to the code presented in such documents being garbage (like say, the stuff for the IMF) -- I've not seen that one before and it was a breath of fresh air. Being a Borland fan I'm a stickler for good documentation! Probably why I like PHP even though I hate PHP. The SB "Normal" MIDI code (I'm not using "UART" mode since that's DSP 2.0+ only) I'm pretty much using from that PDF with no real modifications... their reset function seems a bit wonky, but after playing with a few different variations I understand the choices they made.

IMF (IBM Music Feature) is still on my 'to be supported' list, though my FB-01 went up with a bang and cloud of smoke (cap blew with enough force to crack the board and lift traces, I'm not all that hopeful on ressurecting it but I'm gonna try) and I don't have a real IMF to test with... I'm also still playing with making a FB-01 softsynth emulation on the pi and/or cubieboard. I also have a ... crazy idea related to such project boards I'm going to be playing with; directly interfacing the GPIO

Are there any other semi-common MIDI interfaces? I was thinking on adding the non MPU-401 style MIDIMAN support (like the 2x2 and 2x4), but I'm not certain it's worth the effort.

One new feature I'm starting coding on today is for the Innovation SSI-2001, a card you rarely if ever hear about... for those of you unfamiliar with it the card is a Commodore/MOS SID for the PC, with an 8 bit DIGITAL joystick interface (that maps/pretends to be analog like many gamepads do). Honestly I never even heard of the card until... well...

http://www.vintage-computer.com/vcf...-quot-The-Entertainer-quot-sound-card-exposed

Even though I don't have one, DOSBox has a very good implementation since SID emulation as a whole is quite mature at this point (though the incorrect clock isn't 100% right compared to real hardware) -- and coding for it should go pretty quick since I already wrote SID code for the C64 version of Paku Paku... All I have to do is port my C / 6502 ASM to Pascal / 8086 ASM since apparently it just maps the 29 SID registers to a range of ports (typically 0x0280..0x29D) -- easy peasy.

I'm also looking for information on programming the Covox Sound Master -- I figure it can't be too different from programming the mockingboard in my IIe Platinum... but I can't find any actual programming examples for it on the PC much less information on where/how the card is even mapped... that I don't actually HAVE said card and there's no emulation (that I'm aware of) is also a bit limiting; but I figure AY-3-8910 is AY-3-8910.

One further thing I'm playing with is setting a much higher timer rate on faster (386+, MAYBE faster 286) systems to allow for 1.5 bit PC speaker support and Covox/Disney software synth. (I'm thinking 4.8khz should be sufficient for parallel, not sure about speaker) -- For my needs I only NEED two voices, though for the parallel port DAC version I'll probably aim for 4 voice with 6 bits per voice (so it's simple addition mixing with no shifting or divides)... I may also allow the soft-synth to output to the 'blaster's DSP just for laughs, possibly even defaulting to that instead of Adlib on the 'blaster since I really HATE how Yamaha FM sounds. (always have!). The speaker version is probably just going to be square wave based, I'm not sure if it's worth my time (or the RAM footprint) to have a SIN lookup or the logic for triangle / sawtooth for the 8 bit output, or if I should just "add al, 0x40" to go square wave there too. Sometimes simpler is better.

Basically I'm devoting the month of November to sound support. I'm planning on when complete uploading not just a test of it, but the full source of the test so if anyone else wants to run with it, they can.

Any advice or opinions welcome.

commodorejohn · Nov 7, 2014

Wow, you've been keeping busy. I'm curious about the IMF - are you rolling your own patches? (Should get yourself an MT-32 while you're at it...)

deathshadow · Nov 7, 2014

commodorejohn said:
Wow, you've been keeping busy. I'm curious about the IMF - are you rolling your own patches? (Should get yourself an MT-32 while you're at it...)

I have an MT-32... and a SC-7... and a DX-50.. It's why version 1.6 of paku paku has MT-32 and GM support. Looks like what I NEED is a new FB-01 (or a new board for one) as this thing is a brick.

As to the IMF, all I "need" is patch 48 (sin wave), at least for my current projects... well, I may actually use one of the bass guitars for the bass part of the pac man theme, but that's about it.

Though one thing that's a PITA is how some MIDI synths have pitch-bend locked to 200 cents range (+1 / -1 note) which is pretty damned useless. At least all the rolands let me set it to 2400 cents (+12/-12) which is what I actually want.

My DAC code came out nice, though I'm going to axe the idea of Disney support; I didn't realize the disney wasn't flat to the DAC and instead was buffered at 7khz; which is less than half what I consider the minimum rate for a softsynth. (15khz).

But for a flat ladder dac, this seems to work pretty good.

Code:

dacCheckVoice:
; INPUT
;   AL = voice total
;   BX = offset to check
; OUTPUT
;   AL = new voice total
;   BX = next voice
; CORRUPTS
;   DX
	mov   dx, [bx + voices]
	or    dx, dx
	jz    .done
	dec   WORD [bx + voiceCounters]
	jnz   .done
	mov   [bx + voiceCounters], dx
	not   BYTE [bx + currentVoice]
	jz    .done
	add   al, [bx + voiceLevels]
.done:
	add   bx, 2
	ret

dacTimerISR:
	push  ax
	push  bx
	push  cx
	push  dx
	push  ds
	mov   ax, DATA
	mov   ds, ax
	xor   bx, bx
	mov   ax, bx
	mov   cx, dacVoices
	rep   call dacCheckVoice
	mov   dx, [dacPort]
	out   dx, al
	dec   BYTE [dacCount]
	jnz   .done
	add   [dacCount], BYTE 44
	inc   WORD [tickCounter]
	sub   [countISR], countDec
	jns   .done
	add   [countISR], countInc
	pushf
	call  far [oldISR]
.done:
	pop   ds
	pop   dx
	pop   cx
	pop   bx
	pop   ax
	iret

Though it seems like a 386 SX-25 is minimum spec for that. AT at 20mhz is a no-go for 15khz.

Krille · Nov 8, 2014

deathshadow said:
Though if anyone wants to give that a once over I'd love to hear any suggestions. (I'm always screwing up when to use ja/jg vs. jb/jl - cannot keep those straight!)

You should use the unsigned jcc:s (ja/jb) since the frequencies can't be negative.

Other suggestions;

In i_CmsFindBits, use test dl, 2 instead of test dx, 2. Saves you a byte.

In i_CmsSetAL you have this;

Code:

	or   [bx], al
	mov  al, [bx]

Similarly, in i_CmsMaskAL you have this;

Code:

	and  [bx], al
	mov  al, [bx]

This is slow because what really happens is this;
LOAD
OR/AND
STORE
LOAD

The processor does 4 operations when it could do just 3 if you do it like this instead;
LOAD
OR/AND
STORE

which looks like this in code;

Code:

	or   al, [bx]
	mov  [bx], al

and;

Code:

	and  al, [bx]
	mov  [bx], al

In cmsReset you can save a byte by using mov cl, 6 instead of mov cx, 6 since CX is preserved through all the calls after the loop instruction.

In checkVoice, use jcxz instead of or cx, cx; jz...

In speakerTimerISR you can do this;

Code:

	mov   bx, [currentVoice]
	xor   bl, 2
	mov   [currentVoice], bl
	call  checkVoice
	jnz   .tick
	xor   bl, 2
	call  checkVoice
	jnz   .tick

Also, when chaining to the old ISR at the end of speakerTimerISR, something like this should be faster;

Code:

.tick:
	pop   cx
	pop   bx
	pop   ax

	inc   WORD [tickCounter]
	sub   [countISR], countDec
	jns   .done
	add   [countISR], countInc
	pop   ds    ; Note the changed push/pop order
	iret
.done:
	pop   ds
	db    0EAh    ; Far jump opcode
oldISR	dd    0    ; Pointer filled in during init

Instead of a 4 byte instruction that loads a far pointer (another 4 bytes) you get a 5 byte instruction that already contains the pointer and, as an additional bonus, doesn't mess with the stack.

deathshadow said:
But for a flat ladder dac, this seems to work pretty good.

Code:

rep call dacCheckVoice

I'm surprised it works at all.

BTW, use inc bx; inc bx; instead of add bx, 2.

Hope this helps!

deathshadow · Nov 9, 2014

Good catches all around... though the "rep call" -- NASM unfolds that into a loop; looked funny the first few times I saw it -- I'm still tempted to switch it back so that if I decide to switch assemblers again I won't have to go through for another rewrite hunting for things like that.

Though your suggestion with speakerTimerISR on doing the pop of the extra's first? Seems to send it off to never-never land. Not sure why, I'll have to look at it more.

Krille · Nov 9, 2014

deathshadow said:
Good catches all around... though the "rep call" -- NASM unfolds that into a loop; looked funny the first few times I saw it -- I'm still tempted to switch it back so that if I decide to switch assemblers again I won't have to go through for another rewrite hunting for things like that.

I have never seen this before. Are you sure it really expands into a loop and not just puts a rep prefix before the call instruction? Because that's what my version of NASM does (version 2.11.02). I can't find any mention of this in the NASM manual and when searching for it on Google this is the only example I could find. Not surprisingly, that poster is asking why it doesn't work.

Though your suggestion with speakerTimerISR on doing the pop of the extra's first? Seems to send it off to never-never land. Not sure why, I'll have to look at it more.

Did you remember to change the push order so that DS is pushed first? Also, you probably need to use a CS segment override when storing the pointer during init.

deathshadow · Nov 10, 2014

Krille said:
I have never seen this before.

This is what NASM is generating from that code:

Code:

   129 0000002E B90400                    mov   cx, dacVoices
   130 00000031 E8CBFFE2FB                rep   call dacCheckVoice
   131 00000036 8B16[0000]                mov   dx, [dacPort]

E2FB is loop -4, right?... though... hmm. what the hell version of NASM is this, it's responding to -v with "unknown command"? I thought I grabbed 2.11.05. Well, in any case, that's what it outputs here.

Did you remember to c

Krille said:
hange the push order so that DS is pushed first? Also, you probably need to use a CS segment override when storing the pointer during init.

Yeah... I forgot the CS override; I'm still not wild about self modifying code though; not sure why. I was always taught it's a bit of a no-no, and the past decade working with PHP has reinforced that notion. (since in scripttard interpreted languages like PHP ANY self modifying code opens up a security hole you could sail the USS IOWA through). It's not true at all in machine language though so I need to clamp down on that "instinct" of wanting to call that sloppy code.

Though now that I'm up to three different ISR's (dac, speaker, simple) and might add a fourth (speaker386) it means I have to write it to the code multiple times.

As it is I'm still adjusting to NASM, that it's [ds:si] instead of ds:[si] for example still throws me after decades of using assemblers using the latter syntax.

Krille · Nov 10, 2014

deathshadow said:
This is what NASM is generating from that code:

Code:

129 0000002E B90400 mov cx, dacVoices 130 00000031 E8CBFFE2FB rep call dacCheckVoice 131 00000036 8B16[0000] mov dx, [dacPort]

E2FB is loop -4, right?... though... hmm. what the hell version of NASM is this, it's responding to -v with "unknown command"? I thought I grabbed 2.11.05. Well, in any case, that's what it outputs here.

Yep, that's a loop alright. I can't help but wonder what the NASM authors have been smoking. A good assembler should not allow this kind of ambiguity (is it a prefixed instruction or a built in macro, who the **** knows?!?) and ambiguity is one of the reasons I hate MASM/TASM and prefer NASM. It's so stupid I feel like crying...

Needless to say, you really shouldn't use this "feature". If you ever use the "wrong" version of NASM to assemble your code with this crap in it, you'll have no way of knowing what the output is without checking the list file or the resulting binary.

On a side note, nasm -v should return the version (mine outputs "NASM version 2.11.02 compiled on Feb 19 2014") so that's definitely a bug. They probably botched that when improving compatibility with YASM.

Yeah... I forgot the CS override; I'm still not wild about self modifying code though; not sure why. I was always taught it's a bit of a no-no, and the past decade working with PHP has reinforced that notion. (since in scripttard interpreted languages like PHP ANY self modifying code opens up a security hole you could sail the USS IOWA through). It's not true at all in machine language though so I need to clamp down on that "instinct" of wanting to call that sloppy code.

Well, yes. In a sense it is self-modifying code but no worse than, for example, the segment relocation being done during loading of exe files.

Though now that I'm up to three different ISR's (dac, speaker, simple) and might add a fourth (speaker386) it means I have to write it to the code multiple times.

Yeah, I'm just giving you options. You'll have to decide through testing whether it is worth doing or not.

Trixter · Nov 10, 2014

deathshadow said:
As I've been switching to putting everything in the data segment instead of using the heap and heading towards a monolithic executable, the music playback system needs an overhaul since I was dynamically allocating space for not just the music, but things like MIDI SYSEX, Patch Data, and even the music itself.

You should probably dynamically allocate the player code too, if you're really trying to save space. That way only the code for the chip being used is actually loaded in memory. If you force FAR CALLs for everything you can use TP's Overlay system, although it might require a small amount of heap.

(I'm always screwing up when to use ja/jg vs. jb/jl - cannot keep those straight!)

Here's a handy reference: http://www.unixwiz.net/techtips/x86-jumps.html (see the "signed-ness" column)

IMF (IBM Music Feature) is still on my 'to be supported' list

I can't imagine why...

One further thing I'm playing with is setting a much higher timer rate on faster (386+, MAYBE faster 286) systems to allow for 1.5 bit PC speaker support and Covox/Disney software synth.

You can do this on 808x as long as you're okay with stopping the game to play music. If your intention is playing music in the background bit-banging a device, you can still do so but with some compromises (ie. foreground will be half-speed). Digger source code exists (www.digger.org) for exploration. IIRC you can do it with 1000Hz or less as long as you don't need to play a tone above 500Hz.

deathshadow said:
My DAC code came out nice, though I'm going to axe the idea of Disney support; I didn't realize the disney wasn't flat to the DAC and instead was buffered at 7khz; which is less than half what I consider the minimum rate for a softsynth. (15khz).

Disney is actually very 808x-friendly to support -- It is clocked at 7KHz and has a 16-byte FIFO, so you can keep it full requiring less than 450 interrupts per second. Normally you'd need 7KHz interrupts per second...

deathshadow said:
This is what NASM is generating from that code:

Code:

129 0000002E B90400 mov cx, dacVoices 130 00000031 E8CBFFE2FB rep call dacCheckVoice 131 00000036 8B16[0000] mov dx, [dacPort]

That's nonsense; the REP prefix only works on string instructions, so the CALL will only occur once. Must be a NASM bug.

Yeah... I forgot the CS override; I'm still not wild about self modifying code though; not sure why.

For any system with a cache or prefetch queue, you run the risk of changing code that has already been cached, which means your changes are ineffective. However, changing something well outside of that window (such as altering a branch in an inner loop, 100 instructions away, before you actually start the loop) is fine. And you can certainly self-generate code with a RET at the end of it and then CALL what you generated.

Krille said:
ambiguity is one of the reasons I hate MASM/TASM and prefer NASM.

Maybe you never used TASM in IDEAL mode? The whole point of IDEAL is to force a specific syntax that cannot lead to ambiguity. It's not perfect (TASM IDEAL sometimes forces me to clarify something with a WORD PTR when the destination operand is a 16-bit register) but it has, thus far, never led to bugs through ambiguity.

Krille · Nov 10, 2014

deathshadow said:
As it is I'm still adjusting to NASM, that it's [ds:si] instead of ds:[si] for example still throws me after decades of using assemblers using the latter syntax.

I know you were just pointing out the difference here but I just wanted to mention that if you actually write [ds:si] in your sources then NASM will do what it's been told and emit an unnecessary segment override (unnecessary since DS is the default segment when using SI for memory references). This is a good thing and part of the unambiguous behaviour I like about NASM. So only use segment overrides when they are actually needed.

Trixter said:
That's nonsense; the REP prefix only works on string instructions, so the CALL will only occur once. Must be a NASM bug.

Apparently it's a feature since it will emit a loop but it really should be treated as a bug (avoided and reported as such).

Maybe you never used TASM in IDEAL mode? The whole point of IDEAL is to force a specific syntax that cannot lead to ambiguity. It's not perfect (TASM IDEAL sometimes forces me to clarify something with a WORD PTR when the destination operand is a 16-bit register) but it has, thus far, never led to bugs through ambiguity.

I've used TASM relatively little and never in IDEAL mode.

Trixter · Nov 11, 2014

Krille said:
Apparently it's a feature since it will emit a loop but it really should be treated as a bug (avoided and reported as such).

A loop that doesn't actually loop, when the assembler thinks it will loop, is not a feature. It's a bug. If NASM output a ton of segment prefixes when the default segment was clearly indicated, or tons of LOCK prefixes all over the place, both of those wouldn't break anything either but I'd consider those bugs as well. Just because it doesn't break the code doesn't mean it's not a bug

I've used TASM relatively little and never in IDEAL mode.

Well, it's great for writing stable code directly on 808x hardware itself. IDEAL mode was created to address the quirks and ambiguity of MASM. Some IDEAL features:

Duplicate member names allowed in structs (as opposed to MASM which treats a name as global)
Predictable EQU processing
Correct handling of grouped data segments
Consistent bracketed expression syntax
Type checking at assemble time (if you want it)

It is possible NASM can do this and more, but later versions of NASM don't run on 808x hardware, so I've never used it when coding specifically for/on 808x hardware.

Krille · Nov 11, 2014

Trixter said:
A loop that doesn't actually loop, when the assembler thinks it will loop, is not a feature. It's a bug.

You're missing the point. The code that Jake posted;

Code:

   129 0000002E B90400                    mov   cx, dacVoices
   130 00000031 E8CBFFE2FB                rep   call dacCheckVoice
   131 00000036 8B16[0000]                mov   dx, [dacPort]

looks like this if you punch it into debug;

Code:

150A:0100 B90400        MOV     CX,0004
150A:0103 E8CBFF        CALL    00D1
150A:0106 E2FB          LOOP    0103
150A:0108 8B160000      MOV     DX,[0000]

As you can see it's a loop.

If NASM output a ton of segment prefixes when the default segment was clearly indicated, or tons of LOCK prefixes all over the place, both of those wouldn't break anything either but I'd consider those bugs as well. Just because it doesn't break the code doesn't mean it's not a bug

Again, you're missing the point (admittedly, I wasn't really clear about this). MASM and TASM in some cases requires that you write a segment override in the code, not because it is actually needed but for some obscure syntactical reasons. Whether an actual segment override will be emitted in the binary or not is anyone's guess (meaning, you can't tell just by looking at the source code). Some people have made a habit of always including the segment in memory references just because they have been conditioned by MASM/TASM into thinking that it's always required. If you don't break that habit when you start using NASM (like Jake) then you will have lots of unneeded segment overrides in your code. This is not NASM's fault. The blame should be directed at MASM's stupid syntax that basically shields you from the whole concept of default segments.

Just to give an example;

Code:

	push	ds
	mov	ax,40h
	mov	ds,ax
	mov	al,ds:[75h]		;get the number of hard disks.
	pop	ds

This is actual TASM code from tail.asm in the latest version of the crynwr packet driver for the 3Com 3C509 card available here.

Even though DS is the default segment, TASM will not allow removal of 'ds:'. Additionally, if you try to make it a bit more efficient, like this;

Code:

	push	ds
	xor	ax,ax
	mov	ds,ax
	mov	al,ds:[475h]		;get the number of hard disks.
	pop	ds

then TASM will give you another error message indicating that it's not supported by the processor (I don't recall the exact error message but that's the gist of it). Things like these tend to warp your sense of what can and cannot be done and this is another reason I don't like MASM and TASM.

Trixter · Nov 11, 2014

Krille said:
You're missing the point.

I was. I concur this is a NASM "feature".

Even though DS is the default segment, TASM will not allow removal of 'ds:'.

It works fine for me. There is a warning with TASM 3.2:

Code:

*Warning* tail.ASM(486) [Constant] assumed to mean immediate constant

...but it assembles and works, and is one byte shorter due to no ds: override. The warning is pretty clear; changing the source to this:

Code:

        mov     al,[075h]

...eliminates the warning completely and also assembles without issues.

So, again, I'd blame MASM quirks, not TASM (which is doing its best to deal with MASM quirks).

Code:
Additionally, if you try to make it a bit more efficient, like this;

Code:

push ds xor ax,ax mov ds,ax mov al,ds:[475h] ;get the number of hard disks. pop ds

then TASM will give you another error message indicating that it's not supported by the processor (I don't recall the exact error message but that's the gist of it).

That one is more interesting. Warnings are:

Code:

*Warning* tail.ASM(486) [Constant] assumed to mean immediate constant
**Error** tail.ASM(486) Constant too large

That appears to be some sort of bug, as it doesn't make any sense at all. Even with BYTE PTR added, I get the warning; I can only get it to work in IDEAL mode. Oh well, don't use TASM if you can use NASM I suppose

deathshadow · Nov 11, 2014

Well, just to be sure I updated to the latest NASM -- which looks like it came out two weeks ago -- version 2.11.06 -- and now "rep any instruction" does not work... and -v works in this build; I don't know what version that was I was using, but I don't think it was a "proper" NASM build.

Trixter said:
You can do this on 808x as long as you're okay with stopping the game to play music.

Which is not an option since this synth is also for all game sounds, not just music. Hell, even my MIDI implementations are for game sounds. (hence my bitching about how some synths won't go to more than 200 cents of pitch-bend range!)

Trixter said:
IIRC you can do it with 1000Hz or less as long as you don't need to play a tone above 500Hz.

Realistically it's more like 250hz as the upper limit, as anything over that is going to have "skew" errors that will sound really bad. Most real world music doesn't need more than 11khz, and human hearing tops out at 18khz or so, yet 44khz is the standard for digital recording for a reason. It's why Nyquist-shannon is flawed hence why a transition band has to be added; and thanks to the range of human hearing the lower the desired high end frequency the wider that skew has to be. For 20khz an ideal width increase is roughly 9.75% ( = 2.05... 20 + 2.05 * 2 == 44.1khz) -- you get down to 440hz (A above Middle C), and your ideal skew ends up nearly 75%... so you'd need a 1.54khz clock to service that and not have it sound bad and/or be unable to reproduce certain frequencies... like the middle-C below that A!

Otherwise your sampling rate ends up acting like a bandpass filter.

commodorejohn · Nov 12, 2014

So, I'm curious - what do you have for a pitch-bend algorithm for the Yamaha FM chips? What with that weird two-part pitch specification they use and all.

deathshadow · Nov 12, 2014

commodorejohn said:
So, I'm curious - what do you have for a pitch-bend algorithm for the Yamaha FM chips?

I don't have to, as you set the Adlib by frequency not by note -- and you can do so without a note-off. MIDI you can only set notes, but to do things like the game siren (weewooweewoo) or the bonus item eat (booweep) you need a resolution of about 20 cents -- basically a fifth of a step (12 steps per octave), and note-on / note-off can also have unintended side effects since most MIDI sounds have some degree of Attack/decay and release delays (even the sine/square wave voices on most synths)... For MIDI I need to send a note on and then bend it to get a consistent siren or sweep. On devices like the PC Speaker, Adlib, Tandy/Jr, etc, etc because I can set a specific frequency, I don't have those worries.

It's actually funny I have to use a pitch-bend plus a fixed note for sound effects on MIDI, but have to use a frequency to note lookup table for speaker/Adlib/Tandy/Jr/SID during the music playback... can't win for losing.

... and a lot of synths just won't obey the pitch bend range change NRPN; most notable the stupid Microsoft GS wavetable built into Windows.

I'm actually rewriting the Adlib code right this second -- this is untested (I might have the upper and lower bytes of the freq reversed) but:

Code:

%macro adlibDelay 1
	mov  cx, %1
%%loop:
	in   al, dx
	loop %%loop
%endMacro

adlibOut:
; INPUT
;   AL = register
;   AH = data
; OUTPUT
;   DX = 0x388
; Corrupts
;   AL, CX, DX
	mov   dx, 0x0388
	out   dx, al
	adlibDelay 6
	inc   dx
	mov   al, ah
	out   dx, al
	dec   dx
	adlibDelay 35
	ret

; procedure adlibSetHz(hz, voice:word);
pProcArgs adlibSetHz
	mov   ax, [bp + 8]  ; hz
	mov   bx, 43691     ; roughly 2/3rds 65536
	mul   bx            ; dx now holds roughly 2/3rds hz
	mov   bl, 5         ; octave counter        
	
	; freq >= 2048 { freq /= 2; octave++ }
.octaveAdjust:        
	cmp   dx, 2048
	jae   .done
	shr   dx, 1
	inc   bl
	jmp   .octaveAdjust
.done:

; So at this point
;   BL = octave
;   DX = freq
	mov   al, [bp + 6]  ; voice
	and   al, 0x0F
	or    al, 0xA0
	mov   bh, al        ; store for later
	mov   ah, dh
	mov   di, dx        ; store for pulling lower byte later
	call  adlibOut      ; corrupts AL, CX, DX
	mov   ax, di
	mov   ah, bl
	and   ax, 0x0703
	shl   ah, 1
	shl   ah, 1
	or    ah, al
	mov   al, bh
	or    al, 0x10
	call  adlibOut
	pRet  4

Is where I'm at with it... not even run it past the assembler yet, but the concept is sound. Adlib Freq adjust is simply two-thirds the real deal, which you can then skew down with octave. The lower counter numbers (less than 2048) tends to sound better and have less errors at high frequencies. Sometimes you are just better off switching to the faster timer (which is all "octave" actually does!)

Oh and if anyone is curious, my TP interface macros:

Code:

; handy macros for creating Turbo Pascal compatible ASM

%macro pProcArgs 1
global %1
%1:
	push bp
	mov  bp, sp
%endmacro

%macro pRet 1
	pop  bp
	retf %1
%endmacro

%macro pProcNoArgs 1
global %1
%1:
%endmacro

Just seem simpler and easier to deal with than NASM's "c16.mac" -- though once I get a better handle on writing NASM Macros I'll probably put together something a bit more robust... like declaring arguments as define auto-incrementing, and saving how big they are for the pRet macro, as well as adding/omitting the push/mov of BP automatically if you declare parms or not. Still working on that too.

Trixter · Nov 15, 2014

Trixter said:
That appears to be some sort of bug, as it doesn't make any sense at all. Even with BYTE PTR added, I get the warning; I can only get it to work in IDEAL mode.

More like a syntax difference. I posted details of this to c.l.a.x and got several helpful replies: https://groups.google.com/forum/#!original/comp.lang.asm.x86/dU4gh6f48zM/lXaSHmuAv0gJ

deathshadow · Jan 6, 2015

Just to revisit this now that I've got some more time to play with the timer code:

Krille said:
Did you remember to change the push order so that DS is pushed first? Also, you probably need to use a CS segment override when storing the pointer during init.

Code:

simpleTimerISR:
	push  ds
	push  ax
	mov   ax, DATA
	mov   ds, ax
	pop   ax
	inc   WORD [tickCounter]
	sub   [countISR], countDec
	js    .oldCall
	pop   ds
	iret
.oldCall:
	add   [countISR], countInc
	pop  ds
	db    0xEA
oldISR:
	dd    0

... and from the setup routine:

Code:

	mov   ax, 0x351C
	int   0x21
	mov   [cs : oldISR], bx
	mov   [cs : oldISR + 2], es

... goes off to never never land, while this:

Code:

.oldCall:
	add   [countISR], countInc
	pushf
	call  far [oldISR]
	pop   ds
	iret

Works just fine. (assuming oldISR is dd in DS)

I like the idea of doing a far jump so you don't have to push the flags for a local return and letting it's IRET handle things, but I can't seem to get it to fly.

NOT that it actually matters, if I can optimize the code BETWEEN slices each "slice" should have free time on it's overhead... after all having the CPU wait around with it's thumb up it's backside is part of the optimization goal. (and why the slices are profiling adjusted).

... and of course when the slices overrun, the shorter/empty timeslices can pick up the slack.

one thing I've done is make the timeslice calls be an array of procedural variables instead of a CASE statement.

Replacing (this is from my menu code)

Code:

	waitTimer;
	case menuCounter of
		0:menuBlink;
		18:menuPellet;
		4, 10, 16, 22, 28, 34:menuGhostBox;
		8: menuGhost0;
		26:menuGhost1;
	end;
	dec(menuCounter);
	if (menuCounter < 0) then menuCounter := 35;

With this:

Code:

type
	tProc = procedure; 
	{ 
		for some jacktard reason TP won't make array of procedure but will
		let you make an array of a variable of TYPE procedure?
	}

const
	menuSlices : array[0..35] of tProc = (
		menuBlink,  nullProc0, nullProc0,  nullProc0, menuGhostBox, nullProc0,
		nullProc0,  nullProc0, menuGhost0, nullProc0, menuGhostBox, nullProc0,
		nullProc0,  nullProc0, nullProc0,  nullProc0, menuGhostBox, nullProc0,		
		menuPellet, nullProc0, nullProc0,  nullProc0, menuGhostBox, nullProc0,	
		nullProc0,  nullProc0, menuGhost1, nullProc0, menuGhostBox, nullProc0,
		nullProc0,  nullProc0, nullProc0,  nullProc0, menuGhostBox, nullProc0
	);

and this in place of the switch:

Code:

	waitTimer;
	menuSlices[menuCounter];
	dec(menuCounter);
	if (menuCounter < 0) then menuCounter := 35;

Somehow is only 16 bytes more code (?!?!?!?) but runs significantly faster... so much faster I'm now able to actually reduce the snow in the menu once I got the timer synced to the vsync once every other slice; it's smoother and more effective even with blindly calling null procedures (just a pointer to a retf) just because it reduces the delay between checking the timer and calling the slice. The only real reason there's still snow on CGA in the menu now is that menuGhostBox takes longer than the refresh time, but if I break it into two slices away (the next retrace) it looks like ass as the sides aren't updated the same frame as the top/bottom. You can even see "tearing" just one slice over -- a laugh when

I may try moving the keyboard handler into those nullproc -- at least on the menu. The game logic:

Code:

	gameSlices : array[0..5] of tProc = (
		drawSprites, cleanupBackBuffer, userInput, gameLogic, nullProc0, nullProc0
	);

Only checks the keyboard/joystick once every game frame. (to make the game faster on higher levels I reduce when the slice counter is set back to zero skipping the null procs).

Timeslices -- SO superior to trying to run it flat once you profile the slices to each take roughly the same amount of time... at least in terms of smooth animation and sound without oddball tearing effects... Again the only other choice would be to put all the sound into the ISR which could interrupt blitting... NOT a desirable situation. Again, sound -- like the 'siren' in the background of pacman during gameplay or the "chomp" sound is updated every timeslice. Everything else only needs be updated every 4..6 ticks (depending on level speed).

Makes me glad I learned RTOS programming -- it's why games like PC-Man, moonbugs and Round42 are so ugly and choppy in both sound and video at PC/XT speeds.

Trixter · Jan 6, 2015

deathshadow said:
Timeslices -- SO superior to trying to run it flat once you profile the slices to each take roughly the same amount of time...

This is a matter of semantics and preference. Arranging your game to use fine-grained slices would make sense if you were running in a multi-tasking OS and you were trying to interrupt jitter from background processes, but you're writing a game that runs in a single-tasking OS, so all you're doing for yourself is creating more overhead and limiting your flexibility. It would be more difficult to update the screen at the full framerate (ie. 60Hz) using your methodology, than simply making sure everything that needs to run can do so in a single timeslice (that is equal to the refresh rate of the screen).

Krille · Jan 6, 2015

If that's the only change then I don't see why it shouldn't work.

Though I did notice something I didn't see before;

Code:

	add   [countISR], countInc

This is a bug waiting to happen and should be changed into this;

Code:

	add   BYTE [countISR], countInc

because that's what NASM will emit, but you shouldn't rely on that to happen automagically. The fact that NASM doesn't force you to specify the size is one of my biggest gripes with it.

BTW, what is countISR, countInc and countDec anyway? Can you describe the timer code in more detail?

VCF West	Aug 01 - 02 2025,	CHM, Mountain View, CA
VCF Midwest	Sep 13 - 14 2025,	Schaumburg, IL
VCF Montreal	Jan 24 - 25, 2026,	RMC Saint Jean, Montreal, Canada
VCF SoCal	Feb 14 - 15, 2026,	Hotel Fera, Orange CA
VCF Southwest	May 29 - 31, 2026,	Westin Dallas Fort Worth Airport
VCF Southeast	June, 2026	Atlanta, GA

What I'm working on 2, Sound

Veteran Member

Veteran Member

Veteran Member

Veteran Member

Veteran Member

Veteran Member

Veteran Member

Veteran Member

Veteran Member

Veteran Member

Veteran Member

Veteran Member

Veteran Member

Veteran Member

Veteran Member

Veteran Member

Veteran Member

Veteran Member

Veteran Member

Veteran Member