; > Tokens
; Token table - ok
; Tokeniser   - ok
; Detokeniser - ok
; 08-Mar-2009: Tokeniser sped up using offsets for each initial letter, linenums tokenised.
; 09-Mar-2009: LineFind written.


; Acorn-style token table
; =======================
; string, token, flag
;
; Token flag:
; Bit 0 - Conditional tokenisation (don't tokenise if followed by an alphabetic character).
; Bit 1 - Not start of statement.
; Bit 2 - Now middle of statement (command with parameters)
; Bit 3 - Expect a line number (after a GOTO, etc...).
; Bit 4 - Pseudo variable - add &40 token if at start of statement (external: hex number).
; Bit 5 - FN/PROC keyword - don't tokenise name of the subroutine.
; Bit 6 - Don't tokenise rest of line (REM, DATA, etc...)
; Bit 7 - Unused (external: quote toggle)

.TokenTable
.tknA	EQUB "AND"     ,&80,&02 ; 00000010
	EQUB "ABS"     ,&94,&02 ; 00000010
	EQUB "ACS"     ,&95,&02 ; 00000010
	EQUB "ADVAL"   ,&96,&02 ; 00000010
	EQUB "ASC"     ,&97,&02 ; 00000010
	EQUB "ASN"     ,&98,&02 ; 00000010
	EQUB "ATN"     ,&99,&02 ; 00000010
	EQUB "AUTO"    ,&C6,&0A ; 00001010
.tknB	EQUB "BGET"    ,&9A,&03 ; 00000011
	EQUB "BPUT"    ,&D5,&07 ; 00000111
.tknC	EQUB "COLOUR"  ,&FB,&06 ; 00000110
	EQUB "CALL"    ,&D6,&06 ; 00000110
	EQUB "CHAIN"   ,&D7,&06 ; 00000110
	EQUB "CHR$"    ,&BD,&02 ; 00000010
	EQUB "CLEAR"   ,&D8,&03 ; 00000011
	EQUB "CLOSE"   ,&D9,&07 ; 00000111
	EQUB "CLG"     ,&DA,&03 ; 00000011
	EQUB "CLS"     ,&DB,&03 ; 00000011
	EQUB "COS"     ,&9B,&02 ; 00000010
	EQUB "COUNT"   ,&9C,&03 ; 00000011
	EQUB "COLOR"   ,&FB,&06 ; 00000110
.tknD	EQUB "DATA"    ,&DC,&42 ; 01000010
	EQUB "DEG"     ,&9D,&02 ; 00000010
	EQUB "DEF"     ,&DD,&02 ; 00000010
	EQUB "DELETE"  ,&C7,&0A ; 00001010
	EQUB "DIV"     ,&81,&02 ; 00000010
	EQUB "DIM"     ,&DE,&06 ; 00000110
	EQUB "DRAW"    ,&DF,&06 ; 00000110
.tknE	EQUB "ENDPROC" ,&E1,&03 ; 00000011
	EQUB "END"     ,&E0,&03 ; 00000011
	EQUB "ENVELOPE",&E2,&06 ; 00000110
	EQUB "ELSE"    ,&8B,&08 ; 00001000
	EQUB "EVAL"    ,&A0,&02 ; 00000010
	EQUB "ERL"     ,&9E,&03 ; 00000011
	EQUB "ERROR"   ,&85,&00 ; 00000000
	EQUB "EOF"     ,&C5,&03 ; 00000011
	EQUB "EOR"     ,&82,&02 ; 00000010
	EQUB "ERR"     ,&9F,&03 ; 00000011
	EQUB "EXP"     ,&A1,&02 ; 00000010
	EQUB "EXT"     ,&A2,&03 ; 00000011
	EQUB "EDIT"    ,&CE,&0A ; 00001010
.tknF	EQUB "FOR"     ,&E3,&06 ; 00000110
	EQUB "FALSE"   ,&A3,&03 ; 00000011
	EQUB "FN"      ,&A4,&22 ; 00100010
.tknG	EQUB "GOTO"    ,&E5,&0E ; 00001110
	EQUB "GET$"    ,&BE,&02 ; 00000010
	EQUB "GET"     ,&A5,&02 ; 00000010
	EQUB "GOSUB"   ,&E4,&0E ; 00001110
	EQUB "GCOL"    ,&E6,&06 ; 00000110
.tknH	EQUB "HIMEM"   ,&93,&17 ; 00010111
.tknI	EQUB "INPUT"   ,&E8,&06 ; 00000110
	EQUB "IF"      ,&E7,&06 ; 00000110
	EQUB "INKEY$"  ,&BF,&02 ; 00000010
	EQUB "INKEY"   ,&A6,&02 ; 00000010
	EQUB "INT"     ,&A8,&02 ; 00000010
	EQUB "INSTR("  ,&A7,&02 ; 00000010
.tknJ
.tknK
.tknL	EQUB "LIST"    ,&C9,&0A ; 00001010
	EQUB "LINE"    ,&86,&02 ; 00000010
	EQUB "LOAD"    ,&C8,&06 ; 00000110
	EQUB "LOMEM"   ,&92,&17 ; 00010111
	EQUB "LOCAL"   ,&EA,&06 ; 00000110
	EQUB "LEFT$("  ,&C0,&02 ; 00000010
	EQUB "LEN"     ,&A9,&02 ; 00000010
	EQUB "LET"     ,&E9,&00 ; 00000000
	EQUB "LOG"     ,&AB,&02 ; 00000010
	EQUB "LN"      ,&AA,&02 ; 00000010
.tknM	EQUB "MID$("   ,&C1,&02 ; 00000010
	EQUB "MODE"    ,&EB,&06 ; 00000110
	EQUB "MOD"     ,&83,&02 ; 00000010
	EQUB "MOVE"    ,&EC,&06 ; 00000110
.tknN	EQUB "NEXT"    ,&ED,&06 ; 00000110
	EQUB "NEW"     ,&CA,&03 ; 00000011
	EQUB "NOT"     ,&AC,&02 ; 00000010
.tknO	EQUB "OLD"     ,&CB,&03 ; 00000011
	EQUB "ON"      ,&EE,&06 ; 00000110
	EQUB "OFF"     ,&87,&02 ; 00000010
	EQUB "OR"      ,&84,&02 ; 00000010
	EQUB "OPENIN"  ,&8E,&02 ; 00000010
	EQUB "OPENOUT" ,&AE,&02 ; 00000010
	EQUB "OPENUP"  ,&AD,&02 ; 00000010
	EQUB "OSCLI"   ,&FF,&06 ; 00000110
.tknP	EQUB "PRINT"   ,&F1,&06 ; 00000110
	EQUB "PAGE"    ,&90,&17 ; 00010111
	EQUB "PTR"     ,&8F,&17 ; 00010111
	EQUB "PI"      ,&AF,&03 ; 00000011
	EQUB "PLOT"    ,&F0,&06 ; 00000110
	EQUB "POINT("  ,&B0,&02 ; 00000010
	EQUB "PROC"    ,&F2,&26 ; 00100110
	EQUB "POS"     ,&B1,&03 ; 00000011
.tknQ
.tknR	EQUB "RETURN"  ,&F8,&03 ; 00000011
	EQUB "REPEAT"  ,&F5,&02 ; 00000010
	EQUB "REPORT"  ,&F6,&03 ; 00000011
	EQUB "READ"    ,&F3,&06 ; 00000110
	EQUB "REM"     ,&F4,&42 ; 01000010
	EQUB "RUN"     ,&F9,&03 ; 00000011
	EQUB "RAD"     ,&B2,&02 ; 00000010
	EQUB "RESTORE" ,&F7,&0E ; 00001110
	EQUB "RIGHT$(" ,&C2,&02 ; 00000010
	EQUB "RND"     ,&B3,&03 ; 00000011
	EQUB "RENUMBER",&CC,&0A ; 00001010
.tknS	EQUB "STEP"    ,&88,&02 ; 00000010
	EQUB "SAVE"    ,&CD,&06 ; 00000110
	EQUB "SGN"     ,&B4,&02 ; 00000010
	EQUB "SIN"     ,&B5,&02 ; 00000010
	EQUB "SQR"     ,&B6,&02 ; 00000010
	EQUB "SPC"     ,&89,&02 ; 00000010
	EQUB "STR$"    ,&C3,&02 ; 00000010
	EQUB "STRING$(",&C4,&02 ; 00000010
	EQUB "SOUND"   ,&D4,&06 ; 00000110
	EQUB "STOP"    ,&FA,&03 ; 00000011
.tknT	EQUB "TAN"     ,&B7,&02 ; 00000010
	EQUB "THEN"    ,&8C,&08 ; 00001000
	EQUB "TO"      ,&B8,&02 ; 00000010
	EQUB "TAB("    ,&8A,&02 ; 00000010
	EQUB "TRACE"   ,&FC,&0E ; 00001110
	EQUB "TIME"    ,&91,&17 ; 00010111
	EQUB "TRUE"    ,&B9,&03 ; 00000011
.tknU	EQUB "UNTIL"   ,&FD,&06 ; 00000110
	EQUB "USR"     ,&BA,&02 ; 00000010
.tknV	EQUB "VDU"     ,&EF,&06 ; 00000110
	EQUB "VAL"     ,&BB,&02 ; 00000010
	EQUB "VPOS"    ,&BC,&03 ; 00000011
.tknW	EQUB "WIDTH"   ,&FE,&06 ; 00000110
	EQUB "PAGE"    ,&D0,&02 ; 00000010
	EQUB "PTR"     ,&CF,&02 ; 00000010
	EQUB "TIME"    ,&D1,&02 ; 00000010
	EQUB "LOMEM"   ,&D2,&02 ; 00000010
	EQUB "HIMEM"   ,&D3,&02 ; 00000010
	EQUB "Missing ",&8D,&00 ; 00000000
	EQUB &00
	ALIGN
.TokenOffsets
EQUW tknA-TokenTable
EQUW tknB-TokenTable
EQUW tknC-TokenTable
EQUW tknD-TokenTable
EQUW tknE-TokenTable
EQUW tknF-TokenTable
EQUW tknG-TokenTable
EQUW tknH-TokenTable
EQUW tknI-TokenTable
EQUW tknJ-TokenTable
EQUW tknK-TokenTable
EQUW tknL-TokenTable
EQUW tknM-TokenTable
EQUW tknN-TokenTable
EQUW tknO-TokenTable
EQUW tknP-TokenTable
EQUW tknQ-TokenTable
EQUW tknR-TokenTable
EQUW tknS-TokenTable
EQUW tknT-TokenTable
EQUW tknU-TokenTable
EQUW tknV-TokenTable
EQUW tknW-TokenTable


; Tokeniser
; =========
; On entry, R5=>untokenised text
;           R4=>destination buffer
; Uses      R3=>token table address
;           R2= current tokeniser flags
;           R1= new tokeniser flags
;           R0= character
; On exit,  R5=><cr> at end of input line
;           R4=><cr> at end of output line
;
.Tokenise
.TokenZero
clr  r2			; Clear tokeniser flags
.TokenLoop
movb (r5),r0		; Get current character
bit  #&F0,r2		; Any skip flags set?
bne  TokenByte		; Inside quote/REM/FN/hex
bit  #&08,r2		; Is a line number expected?
beq  TokenNotLine	; No, try to tokenise
jsr  pc,CheckDigit	; Is character a digit?
bcs  TokenNotLine	; Not a digit, try to tokenise
jsr  pc,TokeniseNumber	; Tokenise line number
br   TokenLoop
.TokenNotLine
cmp  r0,#ASC"A"		; Tokens start with a letter
bcs  TokenByte		; <'A', enter character
cmp  r0,#ASC"X"
bcc  TokenByte		; >'W', enter character
jsr  pc,TokenSearch	; Search token table
			; Returns r5=>before next character
			;         r4= unchanged, output pointer
			;         r2= unchanged, current tokeniser flags
			;         r1= new tokeniser flag
			;         r0= byte to enter, token or char
bit  #2,r2		; Are we at the start of statement?
bne  TokenWord		; No, enter token/char
bit  #16,r1		; Is this a pseudo-token?
beq  TokenWord		; No, enter unchanged
add  #&40,r0		; Convert token to command token
.TokenWord
mov  r1,r2		; Copy new flags to current flags
bic  #16,r2		; Clear pseudo-token/hex flag
.TokenByte
movb r0,(r4)		; Enter byte in output buffer
;cmp  r0,#13		; At end of line?
;bne  TokenNotCR	; No, jump to next check
cmp  r0,#32		; At end of line?
bcc  TokenNotCR		; No, jump to next check
movb #13,(r4)		; Ensure <cr> terminator
rts  pc			; Return at end of line
			; R5=><cr> at end of source
			; R4=><cr> at end of dest
.TokenNotCR
inc  r4			; Increment output pointer
inc  r5			; Increment input pointer
cmp  r0,#34		; Is char quote?
bne  TokenNotQuote	; No, jump to next check
mov  #128,r0
xor  r0,r2		; Toggle quote flag
br   TokenLoop		; Loop back to continue tokenising

.TokenNotQuote
bit  #&C0,r2		; Inside quotes or REM/DATA/*cmd?
bne  TokenLoop		; Loop back, ignoring character
cmp  r0,#&3A		; Is char colon?
beq  TokenZero		; Loop back to reset to start of statement

cmp  r0,#ASC"*"		; Is char star?
bne  TokenNotStar	; No, jump to next check
bit  #2,r2		; At start of statement?
bne  TokenLoop		; No, treat as normal character
mov  #64,r2		; Treat rest of line as comment
br   TokenLoop		; Jump back to continue scanning line

.TokenNotStar
cmp  r0,#ASC"&"		; Is char hex number?
bne  TokenNotHex	; No, jump to next check
mov  #16,r2		; Set 'scanning hex number'
br   TokenLoop		; Continue scanning line

.TokenNotHex
bit  #&30,r2		; Scanning PROC/FN or hex?
beq  TokenLoop		; No, jump back to continue
cmp  r0,#ASC"0"		; r0<'0' C=1, r0>='0' C=0
bcc  TokenHexDigit
.TokenHexEnd
bic  #&30,r2		; Clear PROC/FN/Hex flag
br   TokenLoop		; Loop back to continue scanning
.TokenHexDigit
cmp  r0,#ASC"9"+1	; r0<':' C=1, r0>=':' C=0
bcs  TokenLoop		; Still hex/name, loop back to continue
cmp  r0,#ASC"A"		; r0<'A' C=1, r0>='A' C=0
bcs  TokenHexEnd	; End of name, clear flag and loop back
bit  #32,r2		; Scanning PROC/FN?
bne  TokenPROCFN
cmp  r0,#ASC"F"+1	; r0<'G' C=1, r0>='G' C=0
bcc  TokenHexEnd	; End of hex number
br   TokenLoop		; Loop to continue scanning
.TokenPROCFN
cmp  r0,#ASC"Z"+1	; r0<'[' C=1, r0>=']' C=0
bcs  TokenLoop		; Still an identifier
cmp  r0,#ASC"_"		; r0<'_' C=1, r0>='_' C=0
bcs  TokenHexEnd	; End of identifier
cmp  r0,#ASC"z"+1	; r0<'{' C=1, r0>='}' C=0
bcc  TokenHexEnd	; End of identifier
br   TokenLoop		; Loop to continue scanning

.TokeniseNumber
mov  r4,-(sp)		; Save output pointer
jsr  pc,ReadLineNumber	; Read number to R3/R4
bcs  TokenNotNumber	; Not a valid line number
mov  (sp)+,r3		; Get output pointer back
movb #&8D,(r3)+		; Line number marker
mov  r4,r0
swab r0
ror  r0
ror  r0
bic  #&FFCF,r0
mov  r4,r1
bic  #&FF3F,r1
bis  r1,r0
ror  r0
ror  r0
mov  #&14,r1
xor  r1,r0
mov  #3,r1
swab r4
br   TokenNumLp2
.TokenNumLp1
mov  r4,r0
bic  #&FFC0,r0
.TokenNumLp2
bis  #&40,r0
movb r0,(r3)+
swab r4
dec  r1
bne  TokenNumLp1
mov  r3,r4		; Update output pointer
mov  #8,r2		; Still expecting numbers
rts  pc
.TokenNotNumber
mov  (sp)+,r4		; Get output pointer back
movb (r5)+,r0
movb r0,(r4)+
clr  r2			; Not expecting numbers
rts  pc

; Search token table
; ==================
; On entry, R5=>input string to match
;           R4=>dest
;           R2= current flags
;           R0= current char
; On exit,  R5=>last char
;           R4=unchanged
;           R3 corrupted
;           R2=old flags
;           R1=new flags
;           R0=token or character
;           NC=token matched, R0=token, R5=>end of matched string
;           C=not matched, R0=byte, R5=>current char
.TokenSearch
adr  TokenOffsets-2*ASC"A",r3
asl  r0
add  r0,r3
mov  (r3),r0
adr  TokenTable,r3
add  r0,r3
;
; R3=>token table
;
.SearchTable
mov  r5,r1		; Save source pointer
.SearchLoop
movb (r5),r0		; Get source character
cmpb r0,(r3)		; Compare with token character
beq  SearchMatch	; Match, check if full match
cmp  r0,#ASC"."		; Abbreviation?
beq  SearchDot		; Jump to match abbreviation
.SearchNext
inc  r3			; Step past this token
movb (r3),r0
bit  #128,r0
beq  SearchNext		; Loop until token byte
inc  r3			; Step past token byte
.SearchNextBack
mov  r1,r5		; Restore source pointer
inc  r3			; Step past flag to next token
movb (r3),r0		; Get next byte
cmp  r0,#0		; Is it table terminator?
bne  SearchLoop		; Loop back to check next token
movb (r5),r0		; Get first char back
mov r2,r1		; new flags=old flags
rts pc
; R0=char, R1=old flag, R2=old flag, R3=corrupted, R4=preserved, R5=>current char
; flags=CC

.SearchDot
inc  r3			; Step to end of token
movb (r3),r0
bit  #128,r0
beq  SearchDot		; Loop until token byte fetched
inc  r3			; Step to flag byte
movb (r3),r1		; Get new flags
sec
rts pc
; R0=token, R1=new flag, R2=old flag, R3=corrupted, R4=preserved, R5=>last char
; flags=CS

.SearchMatch
inc  r5			; Step to next source char
inc  r3			; Step to next token char
movb (r3),r0		; Get next byte
bit  #&80,r0		; Is this a token byte?
beq  SearchLoop		; No, loop to check next character
inc  r3			; Point to flag
;bit  #2,r2		; Start of statement?
;beq  SearchFound	; Start of statement, don't check alpha
bitb  #1,(r3)		; Needs nonalpha terminator?
beq  SearchFound	; No nonalpha needed, token matched
movb (r5),r0		; Get following source character
cmp  r0,#ASC"A"
bcs  SearchFound	; <'A', matched
cmp  r0,#ASC"Z"+1
bcc  SearchFound	; >'Z', matched
; copy source to dest

; r4=>dest
; r5=>last source char+1
; r1=>first source char
.SearchAlpha
;; movb (r1)+,r0
;; movb r0,(r4)+
movb (r1)+,(r4)+
cmp r1,r5
bne SearchAlpha
dec r4
movb (r4),r0		; is this needed?
dec r5
mov r2,r1
clc
rts pc

.SearchFound
dec  r5			; Point to last character of source
movb (r3),r1		; Get new token flag
dec  r3			; Point to token byte
movb (r3),r0		; Get token byte
sec
rts pc
; R0=token, R1=new flag, R5=last matched char
; flags=CS

; Print character or token
; ========================
; On entry, r0=character
; On exit,  r0,r2 corrupted
;
.PrintR0Token
cmpb r0,#128		; Is it a token?
bcs  PrintAscii		; No, print via OSASCI
mov  r1,-(sp)
adr  TokenTable,r2
.DetokeniseLp1
mov  r2,r1		; Save start of this token string
.DetokeniseLp2
tstb (r2)+		; Loop to find b7=1
bpl  DetokeniseLp2
inc  r2			; Step past tokeniser flags
cmpb r0,-2(r2)
bne DetokeniseLp1	; No match, loop back
.DetokeniseLp3
movb (r1)+,r0
bmi  DetokeniseDone	; Exit if b7 set
jsr  pc,PrintR0		; Print character
br   DetokeniseLp3
.DetokeniseDone
mov  (sp)+,r1
rts  pc

.ReadLineNumber
mov  r5,-(sp)		; Save line pointer
jsr  pc,EvalDecimal	; Read number to R3/R4
bcs  ReadLineNotNum
tst  r3
sec
bne  ReadLineNotNum	; Number>65535
cmp  #&FF00,r4		; SC if number>&FF00
bcs  ReadLineNotNum
clr  (sp)+		; Drop saved line pointer
rts  pc			; CC=valid line number
.ReadLineNotNum
mov  (sp)+,r5		; Restore line pointer
rts  pc			; CS=invalid line number

.LineFind
mov  SV_PAGE,r1
.LineFindLp
movb 1(r1),r0		; Get line number high
cmpb r0,#&FF
beq  LineFindEnd	; End of program
movb 2(r1),r2		; Get line number low
swab r0
bic  #&00FF,r0
bic  #&FF00,r2
bis  r2,r0		; r0=line number
cmp  r0,r4		; Got to matching or higher line number?
bcc  LineFindFound	; r1=><cr> before matching line
movb 3(r1),r0
bic  #&FF00,r0
add  r0,r1		; Step to next line
br   LineFindLp
.LineFindEnd
sec
tst  r0			; NE
.LineFindFound
; If line found,        CC+EQ, r1=><cr> before matching line
; If line not found,    CC+NE, r1=><cr> before next line
; If end of program, MI+CS+NE, r1=><cr> before &FF end marker
rts  pc



; On exit, r4=> <cr> before line start
mov r5,r4		; Dummy, return current pointer

.LineDelete
rts pc

