;
;  D E T A B   --  DOS Detab Utility -- Robert Dewar
;
;  Program to demonstrate the use of the DOS INT 21 facilities
;  for writing a simple utility program. This program reads a
;  standard format text file, and replaces tab characters (09H)
;  by corresponding sequences of blanks, assuming tabs in the
;  standard columns (1,9,17,25..). One use of this program
;  might be to preprocess an assembly listing file for output
;  on a printer which does not support horizontal tabbing.
;
;  The command to invoke the program is:
;
;    DETAB  infile  outfile/S
;
;  The first file name, infile, is the file to be detabbed.
;  The second file name, outfile, is created and written
;  with the corresponding detabbed text.
;
;  The switch /S is optional. If present, it causes a statistics
;  line giving the number of tabs removed to be displayed.
;
;  Define function codes for DOS calls
;
f$crf    equ    3ch           ;create file
f$cls    equ    3eh           ;close file
f$opn    equ    3dh           ;open file
f$rdf    equ    3fh           ;read file
f$wrf    equ    40h           ;write file
f$exi    equ    4c00h         ;return to DOS
i$dos    equ    21h           ;interrupt for DOS functions
;
;  Define ASCII control characters
;
cr       equ    0dh           ;carriage return
eof      equ    1ah           ;end of file
ht       equ    09h           ;horizontal tab
lf       equ    0ah           ;line feed

;(* NOTE: /S bug from Dewar's original program is reproduced in this version *)

	  .model small

;-------------------------------------------------------------------------------
	  .code
;
;  The main program is simply a series of calls to subroutines
;  which divide the processing into several major sections.
;
start:   mov    ax,@data
	 mov    ds,ax         ;set DS to data segment
	 mov    PSP,es        ;save PSP segment address
	 mov    es,ax         ;set ES to data segment also

main:    call   sparm         ;scan parameters
	 call   openf         ;open files
	 call   detab         ;perform detab operation
	 call   close         ;close files
	 mov    ax,f$exi
	 int    i$dos         ;all done, exit
	 page

;
;  SPARM - Scan Parameters
;
;  First step is to scan parameters and fill in the input and
;  output file names. This is done by the sparm procedure. The
;  parameters are stored in our program segment prefix starting
;  at offset 81H, and terminated by a carriage return (0DH)
;  character. The length of the parameters is in byte 80h, but
;  we ignore it, since we use the terminating 0DH instead.
;
sparm    proc
	 push   ds
	 mov    ds,PSP        ;set DS to PSP address
	 mov    si,81h        ;point SI to parameters
;
;  Scan input file name
;
	 call   scbls         ;scan past blanks and switches
	 cmp    al,cr         ;error if no parameters
	 je     spm1
	 lea    di,finam      ;point DI to file name
	 call   sfnam         ;scan out file name
;
;  Scan output file name
;
	 call   scbls         ;scan past blanks and switches
	 cmp    al,cr         ;error if only one parameter
	 je     spm1
	 lea    di,fonam      ;point DI to output file name
	 call   sfnam         ;scan out file name
	 call   scbls         ;scan past blanks and switches
	 cmp    al,cr         ;error if not at CR now
	 jne    spm1
	 pop    ds
	 ret                  ;else all OK, exit

;
;  Here if parameter format error
;
spm1:    call   fatal         ;output fatal error message
	 db     'Invalid parameters',0
sparm    endp
	 page
;
;  SCBLS - Scan Blanks and Switches
;
;        (ds:si)              points to parameters
;        call   scbls         call to scan switches/blanks
;        (ds:si)              points to non-blank
;        (al)                 non-blank character
;
scbls    proc
	 cld                  ;ensure auto-increment
;
;  Loop here to skip blanks
;
scb1:    mov    al,[si]       ;load next character
	 cmp    al,' '        ;skip if non-blank
	 jne    scb2
	 inc    si            ;else bump past it
	 jmp    scb1          ;and loop
;
;  Here with non-blank character found
;
scb2:    cmp    al,'/'        ;test switch character
	 je     scb3          ;jump if so
	 ret                  ;else return non-blank character
;
;  Here we scan a switch, the only one permitted is /S
;
scb3:    inc    si            ;bump past /
	 mov    es:sswt,1     ;set /S switch, ES points to data segment
	 lodsb                ;load the switch char
	 cmp    al,'S'        ;all OK if /S
	 je     scb1
	 cmp    al,'s'        ;allow lower case too
	 je     scb1
	 call   fatal         ;else bad switch, complain
	 db     'Invalid switch',0
scbls    endp
	 page
;
;  SFNAM - Scan File Name
;
;  This routine scans out a file name terminated by slash
;  space or any control character (including CR) and stores
;  the name with a terminating ASCIIZ 00H byte.
;
;        (ds:si)              points to parameters
;        (es:di)              points to area for name
;        call   sfnam         call to scan file name
;        (ds:si)              bumped past file name
;
sfnam    proc
	 push   ax            ;save registers
	 push   di
	 cld                  ;ensure auto-increment
;
;  Loop to store name
;
sfn1:    mov    al,[si]       ;load next character
	 cmp    al,'/'        ;terminate if slash
	 je     sfn2
	 cmp    al,20h        ;terminate if blank
	 jbe    sfn2          ;(or ctrl char < 20h)
	 movsb                ;else copy char, bump ptrs
	 jmp    sfn1          ;and loop
;
;  Here on encountering terminator
;
sfn2:    sub    al,al         ;store terminating zero
	 stosb
	 pop    di            ;restore registers
	 pop    ax
	 ret                  ;return to caller
sfnam    endp
	 page
;
;  OPENF - Open Files
;
;  The parameters have been scanned out and the two file names
;  stored in finam and fonam. This routine opens the input file
;  for reading and creates the output file ready to be written.
;
openf    proc
;
;  Open input file
;
	 lea    dx,finam      ;point DS:DX to input name
	 mov    al,0          ;set code for read only
	 mov    ah,f$opn      ;open the file
	 int    i$dos
	 jnc    opn1          ;jump if no error
	 call   fatal         ;else signal error
	 db     'Error opening input file',0
;
;  File is opened for reading
;
opn1:    mov    fihnd,ax      ;store input handle
;
;  Create output file
;
	 lea    dx,fonam      ;point DS:DX to output name
	 mov    cx,0          ;set normal attribute
	 mov    ah,f$crf      ;create the file
	 int    i$dos
	 jnc    opn2          ;jump if no error
	 call   fatal         ;else signal error
	 db     'Error creating output file',0
;
;  Output file is created
;
opn2:    mov    fohnd,ax      ;store output handle
	 ret                  ;return, all set
openf    endp
	 page
;
;  CLOSE - Close Files
;
;  This routine is called to close the input and output
;  files. It also outputs the statistics line if /S set.
;
close    proc
	 cmp    sswt,0        ;jump if no /S given
	 jz     cls1
	 lea    dx,smsg       ;else point to message
	 call   dsmsg         ;display it
;
;  Next step is to close the output file, so that it will be
;  properly recorded on disk. If we end the program without
;  closing the file, then all the data is written on the disk,
;  but the directory entry is not updated to indicate this, so
;  the data will be inaccessible.
;
cls1:    mov    bx,fihnd      ;BX has input file handle
	 mov    ah,f$cls      ;perform close function
	 int    i$dos         ;(no error possible)
;
;  Final step is to close the input file. This is not strictly
;  necessary, but it is good form to close all files, including
;  files which are only read as input files.
;
	 mov    bx,fohnd      ;BX has output file handle
	 mov    ah,f$cls      ;perform close function
	 int    i$dos         ;(no error possible)
	 ret                  ;return to caller
close    endp
	 page
;
;  DETAB - Perform Detab Operation
;
;  This is the procedure which reads the input file data and
;  copies to the output file, replacing tabs by blanks as we
;  go. Note that the format of the input file consists of
;  ASCII records, each terminated by a CR-LF (carriage return,
;  line feed - 0DH,0AH) sequence. The last record is followed
;  by an end of file character (1AH).
;
;  Our first step is to initialize disk input/output
;
detab    proc
	 call   rwini         ;initialize disk i/o
;
;  This is the loop through records, we use BX to keep track
;  of the number of characters ouitput in the current record,
;  so that we can tell how many blanks to output if we find
;  a horizontal tab character.
;
dtb1:    sub    bx,bx         ;zero count of output characters
;
;  Loop through characters in one record
;
dtb2:    call   rbyte         ;read byte from input file
	 cmp    al,ht         ;jump if tab character
	 je     dtb3
	 call   wbyte         ;else copy to output file
	 inc    bx            ;and bump count of chars output
	 cmp    al,lf         ;get next record if line feed
	 je     dtb1
	 cmp    al,eof        ;else get next char unless eof
	 jne    dtb2
;
;  Here if end of file character encountered
;
	 call   rwtrm         ;terminate disk input/output
	 ret                  ;all done
;
;  Here if tab encountered. The number of blanks we need can
;  be determined from the count of characters already written
;  to the output record. It is given by the following formula:
;
;    blanks needed =  8 - (chars_output mod 8),
;
;  since tabs are every eight characters. Note that we can do
;  the mod 8 operation easily with an AND since 8 = 2**3.
;
dtb3:    mov    cx,8          ;prepare to set blank count in CX
	 mov    ax,bx         ;get count of chars mod 8
	 and    ax,111b
	 sub    cx,ax         ;set blank count in CX
	 page
;
;  Loop to generate blanks in output file
;
dtb4:    mov    al,' '        ;write a blank to output file
	 call   wbyte
	 inc    bx            ;bump count of characters written
	 loop   dtb4          ;loop till all written
;
;  Increment count of tabs if /S switch set
;
	 cmp    sswt,0        ;loop back to continue if no /S
	 jz     dtb2
	 lea    si,smsgc      ;else point to last count digit
;
;  Loop to increment (decimal) count of tabs processed. Note
;  the OR of the digit with '0' on a carry. This has no
;  effect if there is already a digit there, but it changes
;  a space to an ASCII zero.
;
dtb5:    mov    al,[si]       ;load next digit
	 or     al,'0'        ;make sure ASCII zone is set
	 inc    al            ;bump count
	 mov    [si],al       ;store incremented digit
	 cmp    al,'9'        ;all done if no carry
	 jbe    dtb2          ;(loop back to continue copy)
	 mov    al,'0'        ;else store 0
	 mov    [si],al
	 dec    si            ;point to next high order digit
	 jmp    dtb5          ;and loop
detab    endp
	 page
;
;  The following package of routines provides an interface
;  allowing single byte read/write at the logical level, with
;  buffering bein hidden from the caller. The calls are:
;
;    RWINI     Initialize input/output
;    RBYTE     Read one byte
;    WBYTE     Write one byte
;    RWTRM     Terminate input/output
;
;
;  RWINI - Read/Write Initialize
;
;  This routine must be called before calls to RBYTE or WBYTE
;
;  The input counter is set to zero, which forces an initial
;  read since RBYTE checks for ictr zero (and refills the
;  buffer) before getting a character. The output counter
;  and pointer are set to zero so that the first call to
;  WBYTE starts to fill the initial buffer
;
rwini    proc
	 mov    ictr,0        ;input buffer empty (force read)
	 mov    octr,0        ;output buffer empty
	 mov    optr,offset obuf ;point to start of buffer
	 ret
rwini    endp
;
;
;  RBYTE - Read Byte from Input File
;
;  This procedure reads a byte from the input file, and returns
;  it in AL (no other registers are changed). Actually we read
;  2048 bytes at a time (one record) and use the input buffer
;  ibuf to save the data of a record.
;
rbyte    proc
	 push   bx            ;save registers
	 cmp    ictr,0        ;jump if data in input buffer
	 jnz    rbt1
	 call   riblk         ;else read next block first
;
;  Here with data available in the buffer
;
rbt1:    mov    bx,iptr       ;get data byte
	 mov    al,[bx]
	 inc    iptr          ;increment data pointer
	 dec    ictr          ;decrement data counter
	 pop    bx            ;restore registers
	 ret                  ;return to caller
rbyte    endp
	 page
;
;  RIBLK - Read Input Block
;
;  This procedure fills the data input buffer from the input
;  file. If possible, a full 2048 (BSZ) bytes are read, except
;  at the end of file, where a shorter buffer may be read.
;
riblk    proc
	 push   ax            ;save registers
	 push   bx
	 push   cx
	 mov    bx,fihnd      ;point BX to input handle
	 lea    dx,ibuf       ;point DS:BX to buffer
	 mov    cx,bsz        ;read full block
	 mov    ah,f$rdf      ;perform read function
	 int    i$dos
;
;  The value in AX is the number of bytes actually read, which
;  will be 2048 except at the end of file. A returned value of
;  zero requires special notice. Normally files are always
;  terminated by a 1ah (EOF) character, and the output file of
;  this program will always have the terminating EOF character.
;  However, the rules in DOS permit this character to be left
;  out so if we get a count of zero, we supply a dummy EOF
;  character as the last data in the file.
;
	 or     ax,ax         ;jump if some data read
	 jnz    rib2
	 mov    ibuf,eof      ;else set dummy EOF data
	 inc    ax            ;and adjust count to 1
;
;  New block is read and ax indicates data length
;
rib2:    mov    ictr,ax       ;set input buffer counter
	 lea    ax,ibuf       ;set input buffer pointer
	 mov    iptr,ax
	 pop    cx            ;restore registers
	 pop    bx
	 pop    ax
	 ret                  ;return to caller
riblk    endp
	 page
;
;  WBYTE - Write a Byte
;
;  This procedure writes a byte to the output file from AL
;  (no registers are changed). As in RBYTE, we actually do
;  the output operation in 2048 (BSZ) byte blocks, using
;  obuf to buffer up the data until we have a full block.
;
wbyte    proc
	 push   bx            ;save registers
	 cmp    octr,bsz      ;jump if room in buffer
	 jb     wbt1
	 call   woblk         ;else write out current block
;
;  Here with room in output buffer
;
wbt1:    mov    bx,optr       ;store data byte in output buffer
	 mov    [bx],al
	 inc    optr          ;bump output pointer
	 inc    octr          ;increment count of stored bytes
	 pop    bx            ;restore registers
	 ret                  ;return
wbyte    endp
;
;
;  RWTRM - Read/Write Terminate
;
;  This routine is called after the last call to RBYTE or
;  WBYTE. The last WBYTE call will typically write a 1ah (end
;  of file character) before this calls is made.
;
rwtrm    proc
	 call   woblk         ;write final block
	 ret                  ;return to caller
rwtrm    endp
	 page
;
;  WOBLK - Write Output Block
;
;  This procedure writes one output block to the output file.
;  The value in octr indicates the length of the block to be
;  written. Except for the last call at the end of file, this
;  value will always be equal to 2048 (bsz).
;
woblk    proc
	 push   ax            ;save registers
	 push   bx
	 push   cx
;
;  Perform the write
;
	 mov    bx,fohnd      ;BX has output file handle
	 lea    dx,obuf       ;DS:DX points to buffer
	 mov    cx,octr       ;CX has count
	 mov    ah,f$wrf      ;do disk write
	 int    i$dos
;
;  Here on completion of write operation. The value in AX is
;  the number of bytes actually written. This should be the
;  same as the count in CX. If it is less than CX, it means
;  that the disk is full - a fatal error situation.
;
	 cmp    ax,cx         ;right number of bytes written?
	 je     wob1          ;jump if ok
	 call   fatal         ;else terminate
	 db     'Error writing output file',0
;
;  Write successful
;
wob1:    lea    ax,obuf       ;reset output buffer pointer
	 mov    optr,ax
	 mov    octr,0        ;reset output buffer counter
	 pop    cx            ;restore registers
	 pop    bx
	 pop    ax
	 ret                  ;return to caller
woblk    endp
	 page
;
;  DSMSG - Display Message
;
;        (dx)          points to text ended by binary zero
;        call  dsmsg   call to display message
;
;  The message is displayed on the standard error file by
;  writing to handle 2 (which is always the terminal).
;
dsmsg    proc
	 push   ax            ;save registers
	 push   bx
	 push   cx
	 push   di
	 push   es
	 push   ds
	 pop    es            ;set ES = DS
	 cld                  ;scan to terminating zero
	 mov    di,dx
	 sub    al,al
	 repne  scasb
	 mov    cx,di         ;calculate length of message
	 sub    cx,dx
	 dec    cx
	 mov    bx,0002       ;set handle for standard error
	 mov    ah,f$wrf      ;write error message
	 int    i$dos
	 pop    es            ;restore registers
	 pop    di
	 pop    cx
	 pop    bx
	 pop    ax
	 ret                  ;return to caller
dsmsg    endp
;
;
;  FATAL - Terminate after Fatal Error
;
;        call   fatal
;        db     'error text ended by ',0
;
fatal    proc
	 mov    ax,@code
	 mov    ds,ax
	 pop    dx            ;point DS:DX to error message
	 call   dsmsg         ;display error message
	 mov    ax,f$exi      ;exit to DOS
	 int    i$dos
fatal    endp
;-------------------------------------------------------------------------------


;-------------------------------------------------------------------------------
	  .data
;
;  File names and handles for input and output files. The
;  fields will be filled in as part of the initialization
;  processing. The name fields allow a maximum of 64
;  characters for the path, 8 for the file name, 1 for
;  the period, 3 for the extension and 1 extra for the
;  ASCIIZ terminator. Note that the limit of 64 characters
;  on a path name is imposed by DOS.
;
finam    db     77 dup (?)    ;input file name
fihnd    dw     ?             ;input file handle
;
fonam    db     77 dup (?)    ;output file name
fohnd    dw     ?             ;output file handle
;
;  Locations for input/output control. The buffers are 2048
;  bytes long. It is most efficient to read and write files
;  using a multiple of 512 bytes (the sector size). We could
;  make the buffers larger. The effect would be to speed up
;  processing at the expense of making the program larger.
;  The choice is thus a trade off between speed and space.
;
bsz      equ    2048          ;size of buffer
ibuf     db     bsz dup (?)   ;input buffer
iptr     dw     ?             ;pointer to next byte in ibuf
ictr     dw     ?             ;count of bytes left in ibuf
;
obuf     db     bsz dup (?)   ;output buffer
optr     dw     ?             ;pointer to next byte in obuf
octr     dw     ?             ;count of bytes stored in obuf
;
;  Message for statistics (/S) output
;
sswt     db     0             ;set to 1 if /S switch present
smsg     db     'Number of tabs removed:      '
smsgc    db     '0',0         ;lower order digit of count
PSP      dw     ?             ;segment address of PSP
	 page
;-------------------------------------------------------------------------------


;-------------------------------------------------------------------------------
	  .stack 100h
;-------------------------------------------------------------------------------

;
;  End of DETAB program
;
	  end start
