;'sinful' by Kuemmel for Function demo party 2019
BACKGROUND=0		;try 0 or 1 for different backgrounds
org 100h
use16
data_start:									;for code	;real
dd 0.15915494309	;1/(2*PI)				;si			(si)	;0x3e22f983, accuracy needed...
dw 0x3b08			;approx. 1/480=0.00208	;si+2		(si+4)
dw 0x3b83			;approx. 0.004			;si+4		(si+6)
dw 1464 			;B approx. 5.7*256		;si+8	    (si+8)	;0x05b8 => also to be found within code
dw 359				;G approx. 1.4*256		;si+10	    (si+10)	;0x0167
dw 724				;R approx. 2.8*256		;si+12	    (si+12)	;0x02cd
;dw dummy												(si+14) used as storage => will overwrite next instruction later
;...this assembles to:
;cmp cx,byte +0x22
;or [ds:bp+di],bh
;cmp word [bp+di],byte -0x48
;add ax,0x167
;aam 0x2
;...which is useless but harmless and saves the mov si,data_start, but beware when you change anything !!!

;---init stuff
fninit										;FreeDOS needs that...
fld1
fld1
fldz										;i=0			t=1	1
push 0a000h
;---sine table 16 KByte 
;needs: bx=0;sp=-4
mov cx,4096
sin_table:						
	fld st0 								;i				i	t	1
	fdiv dword[si]							;i*2*PI 		i	t	1
	fidiv word[si-data_start+sin_table-2]	;i/4096 		i   t	1
	fsin									;sin()			i	t	1
	fmul dword[si]							;sin(a)/(2*PI)	i	t	1 factor fits here, too
	c_16384:
	fstp dword[bx+16384]					;i				t   1 shorter than using another segment
	fadd st0,st2							;i+=1			t   1
	sub bx,sp								;+4 = sub -4
loop sin_table
fstp st0									;t				1
pop es										;place only here to keep sp=-4 before

;---set screen mode 640x480 TrueColour
mov bx,112h
mov ax,4f02h
int 10h 		;returns ah=0 if successful (else failed), al=4fh means function supported

;---main intro loop
main_loop:
;---inc timer...defines speed...try si+2 or si+4, default is +2
fadd dword[si+2]				;t=t+0.004 or 0.002				1
;---change line thickness over time
fld st0 						;t								t		1
fsin							;sin(t) 						t		1 fsincos would save 2 bytes but looks worse
fld st0 						;sin(t) 					    sin(t)  t	    1
fmul dword[si+2]				;sin(t)*0.002					sin(t)	t	1
fadd dword[si+4]				;sin(t)*0.002+0.004				sin(t)	t	1
fstp dword[si+14]				;sin(t) 						t		1
;---change colour over time
fabs							;abs(sin(t))					t	1
fimul word[si-data_start+c_1464];blue_fac=abs(sin(t))*1464		t   1
fistp word[si+8]				;t								1

;---yx_screen_loop
cwd								;clear dx for screen bank counter, ax is always positive here
xor di,di						;init screen window address	
mov cx,480
y_loop: 					
	push cx
	sub cx,240
	mov ax,640
	x_loop: 					;st0				   st1	    st2      st3   st4	   st5	   st6	   st7
		mov word[bp+si],cx
		fild word[bp+si]
		fmul dword[si+2]		;y						t		1
		push ax
		sub ax,127				;nice x position for effect
		mov word[bp+si],ax
		fild word[bp+si]		;x						y		t		1
		push cx 				;backup for x_loop
		;---bankswitch test
		;needs bx=0
		test di,di				;test screenbank switch
		jnz skip_bank_switch
		c_1464: 				;1464 = 0x05b8
			mov ax,4F05h		;modifies ax
			int 10h 			;next 64 KByte bank
			inc dx	
		skip_bank_switch:
		call subroutine 		;to optimize far jumps and to allow LOOP for y_loop
		fstp st0				;f_new					x		y_new	t		1			
		mov bx,8				;reordered, bx needed due to subroutine bx modification
		rgb_loop:
			shl eax,8			;eax = 32Bit RGBx 
			fld st0 			;f_new					f_new	x	    y_new   t	    1
			fimul word[si+bx+6] ;f_new*f_r/g/b			f_new	x	    y_new   t	    1
			dec bx				;reordered, shorter than sub bl,2	
			fistp word[bp+si]   ;f_new					x	    y_new   t	    1			
			add word[bp+si],cx	;add background
			cmp bh,byte[bp+si+1];check overflow, thanks TomCat for saving some Bytes here !
			mov al,byte[bp+si]
			jz skip_clamp
			    salc
			skip_clamp:
			dec bx
		jnz rgb_loop
		fcompp					;x	     y_new			t		1
		stosd					;write saturated RGB pixel
		fstp st0				;t		   1
		pop cx
		pop ax
		dec ax
	jnz x_loop
	pop cx
loop y_loop

check_keyboard:
in al,0x60
dec ax
jnz main_loop
exito:
mov al,03h					
int 10h 				;back to textmode, skip those 4 Bytes if totally needed...
subroutine:					
	;---background grid creation
	if BACKGROUND=0
		and cx,11111b	;or try e.g. 11101b
		mov cl,64	;grid colour add offset...try other values
		jz skip_clear
		add al,16	;center grid in x direction
		and al,11111b	;or try e.g. 11101b
		jz skip_clear
			mov cl,0
		skip_clear:
	end if
	if BACKGROUND=1
		add al,23		;centering box in x direction
		add cl,8		;centering box in y direction
		and ax,10000b
		and cx,10000b
		or cx,ax
	end if
	;---waves loop			;st0				   st1	    st2      st3   st4	   st5	   st6	   st7
	fmul dword[si+2]		;x						y		t		1
	fldz					;f=0					x		y		t		1				
	fld st4 				;i=1					f		x		y		t		1	Agner: faster than fld1
	mov ax,8				;al would be shorter, but due to background choice
	waves_loop:
		fld st2 			;x						i		f		x		y		t		1
		fmul st0,st1		;i*x					i		f		x		y		t		1
		fadd st0,st5		;a=i*x+t				i		f		x		y		t		1
		fmul dword[si]		;a=a*(1/(2*PI)) 	    i	    f	    x	    y	    t		1
		fld st0 			;a						a		i		f		x		y		t	    1
		fisttp dword[bp+si]	;a						i		f	    x	    y	    t	    1
		fild dword[bp+si]   ;INT(a)				    a	    i		f		x		y	    t	    1
		fsubp st1,st0	    ;a-INT(a)			    i	    f	    x	    y	    t		1
		fimul word[si-data_start+c_16384+2] ;...(a-INT(a))*tablesize*4... seem to give a small speed penalty but saves 2 bytes
		fistp word[bp+si]   ;i					    f		x		y		t		1
		mov bx,word[bp+si]
		and bx,11111111111100b	;don't know why I need that...may be some float to signed int accuracy problems...glitches otherwise
		dec ax				;reordered
		fld dword[bx+16384]	;=sin(a)*0.2			i		f		x		y		t		1	
		fadd st0,st4		;y_new = y + sin(...	i		f		x		y		t		1		
		fst st4 			;y_new				    i	    f		x		y_new	t		1
		fdivr dword[si+14]	;si+14 c/y_new			i		f		x		y_new	t		1
		fabs				;abs(c/y_new)			i		f		x		y_new	t		1	
		faddp st2,st0		;i						f_new	x		y_new	t		1
		fadd st0,st5		;i+=1					f_new	x		y_new	t		1
	jnz waves_loop
ret