I wrote a DOS version of the chastext program for simple search and replace. It does have some limitations because command line arguments are handled very different in DOS than they are in Linux. I can’t simple put quotes around two words to have them count as one argument like I can in Linux.
Aside from that, it seems to work. I can replace individual words in a text file with a different word. I will have a demo video up soon but see the post about the Linux version in the Linux forum to get the basic idea of what it should do.
I am not trying to recreate sed or awk but a simple find/replace is a worthwhile project for learning something new after I have mastered my chastehex and chastecmp programs. I can manipulate binary files flawlessly because they are predictable so now I am testing my limits on text based processing.
main.asm
org 100h ;DOS programs start at this address
mov word [radix],16 ; can choose radix for integer output!
mov ch,0 ;zero ch (upper half of cx)
mov cl,[80h] ;load length in bytes of the command string
cmp cx,0
jnz args_exist
mov ax,help ;if no arguments were given, show a help message
call putstring
jmp ending ;and end the program because there is nothing to do
args_exist:
;Point bx to the beginning of arg string
;however, this always contains a space
mov bx,81h
skip_start_spaces:
cmp byte [bx],' ' ;is this byte a space?
jnz skip_start_spaces_end ;if not, we are done skipping spaces
inc bx ;otherwise, go to next char
dec cx ;but subtract 1 from character count
jmp skip_start_spaces
skip_start_spaces_end:
mov [arg_string_index],bx ; save the location of the first non space in the arg string
;find the end of the string based on length
mov ax,bx
add ax,cx
mov [arg_string_end],ax ;now we know where the string ends.
;now bx points to the first non space character in the arguments passed to the DOS program
;and we know that [arg_string_end] is where it ends
;the next step is to filter the arguments into separate zero terminated strings
;each space will be changed to a zero (normally)
;but we also need to account for spaces inside quotes that are considered part of the string
;Linux handles this normally but DOS needs me to write the code to mimic this behavior
;because the program needs to function identically for DOS or Linux
mov cl,' ' ;set the default filter character (argument terminator) to a space
mov ch,0 ;are we currently checking spaces 0 or quote characters 1 as terminators?
;this loop is the new and improved argument filter
;it keeps track of whether we are inside or outside a quote
;and also which type of quote started the quote
;the actual quote marks are not part of the string unless they
;are the opposite quote type than what started the string
;The important thing is that spaces can exist inside of quoted strings
;as one argument rather than each new word being a new argument
;could be important for filenames containing spaces, etc.
argument_filter:
cmp bx,[arg_string_end] ;are we at the end of the arg string?
jz argument_filter_end ;if yes, stop the filter and terminate with zero
cmp ch,1 ;are we inside a quoted string?
jz quote_check ;if yes, don't do anything to the spaces
cmp byte[bx],cl ;compare the byte at address bx to the string terminator
jnz ignore_char ;if it is not the same, we ignore it
mov byte[bx],0 ;but if it matches, change it to a zero
ignore_char:
cmp byte [bx],0x22 ;is this a double quote -> "
jz start_quote
cmp byte [bx],0x27 ;is this a single quote -> '
jz start_quote
jmp quote_no ;it was not a quote
start_quote:
mov ch,1 ;set ch to 1 to set that we are inside a quote now
mov cl,[bx] ;save this quote type as the new terminator
mov byte[bx],0 ;but delete the first quote with zero
;check for single or double quotes
quote_check:
cmp [bx],cl ;is this character the same type of quote that started this sub string?
jnz quote_no ;if it is not, then skip to quote_no section
;but if it was matching, change this byte to zero
;and change cl back to a space
mov cl,' ' ;cl is now a space
mov ch,0 ;ch is 0 because now we have ended the quoted string
mov byte[bx],0 ;delete the end quote with zero
quote_no:
inc bx ;go to the next character
jmp argument_filter ;jump back to the beginning of argument filter
argument_filter_end:
mov byte [bx],0 ;terminate the ending with a zero for safety
;special case!!!
;If the first argument passed began with a quoted string
;it would have been changed to a 0 instead. This requires us to add one to the
;starting argument string index
mov bx,[arg_string_index]
cmp byte[bx],0
jnz first_argument_was_not_quote
inc word[arg_string_index] ;add 1 so it points to the next byte before we process arguments
first_argument_was_not_quote:
;now that the argument string is prepared, we will try to use the first argument as a filename to open
mov ah,3Dh ;call number for DOS open existing file
mov al,0 ;file access: 0=read,1=write,2=read+write
mov dx,[arg_string_index] ;string address to interpret as filename
int 21h ;DOS call to finalize open function
mov [file_handle],ax ;save the file handle
jc file_error ;if carry flag is set, we have an error, otherwise, file is open
file_opened:
mov ax,dx
;call putstring
;call putline
jmp use_file ;skip past error message and start using the file
;this section prints error message and then ends the program if file error found
file_error: ;prints error code2=file not found
mov ax,dx
call putstr_and_line
mov ax,file_error_message
call putstring
mov ax,[file_handle]
call putint
jmp ending
;how we use the file depends on the number of arguments given
;if no arguments other than the filename exist, we do a regular hex dump
;otherwise we look for two more arguments: the search and replace strings
use_file:
call get_next_arg ;get address of next arg and return into ax register
cmp ax,[arg_string_end] ;this time, if ax equals end of string, we hex dump and then end the program later
jz textdump ;jump to hexdump section
;otherwise, we save the address at ax to our search string
mov [string_search],ax
;call putstr_and_line
call get_next_arg ;get address of next arg and return into ax register
cmp ax,[arg_string_end] ;this time, if ax equals end of string, we hex dump and then end the program later
jz textdump ;jump to hexdump section
;otherwise, we save the address at ax to our replacement string
mov [string_replace],ax
;call putstr_and_line
;all other arguments that may exist after this are irrelevant
textdump:
;we start the loop with a call to read exactly 1 byte
mov ah,3Fh ;call number for read function
mov bx,[file_handle] ;store file handle to read from in bx
mov cx,1 ;we are reading one byte
mov dx,byte_array ;store the bytes here
int 21h
;call putint ;check the number of bytes read
cmp ax,1 ;check to see if exactly 1 byte was read
jz file_success ;if true, proceed to display
;mov ax,end_of_file
;call putstring
jmp file_close ;otherwise close the file and end program after failure
; this point is reached if 1 byte was read from the file successfully
file_success:
;first, check to see if there is a search string
;if there is a search string, skip the normal putchar
cmp word[string_search],0
jnz putchar_skip
;but if there is not a search string
;we will print the last read character
;and then jump to the beginning of the textdump loop to print them until EOF
mov al,[byte_array]
call putchar
jmp textdump
putchar_skip:
;if search string doesn't exist, just jump and repeat the loop
;otherwise we continue into the section that compares the input with the search string
mov bx,[string_search]
mov al,[bx]
mov ah,[byte_array]
cmp al,ah ;compare the first character of search string with the byte read already
jz search_start ; if they are equal, skip putchar and begin searching for the string
;otherwise, if they are not equal, just putchar the last byte read and repeat the loop
mov al,[byte_array]
call putchar
jmp textdump
search_start:
mov ax,[string_search]
call strlen ;get the length of the search string
;call putint_and_line ; print length of search string only for debugging
;attempt to read the length-1 bytes because the first one is already read into the byte array
dec ax ;subtract 1 from ax which holds our length of string
mov dx,byte_array+1 ;store the bytes here
mov cx,ax ;we are reading this many bytes to have a string to compare
mov bx,[file_handle] ;store file handle to read from in bx
mov ah,3Fh ;call number for read function
int 21h
;do some math to calculate where the string should end
mov bx,dx ;mov into bx the address of second byte in the string
add bx,ax ;add ax (the return value of the number of characters read)
mov byte [bx],0 ;terminate the string with zero
mov si,[string_search]
mov di,byte_array
call strcmp ;compare these two strings
cmp ax,0 ;test if they are the same (if ax returned zero)
jnz normal_print ;if they are not a match print them unmodified and unquoted
;but if they are a match, then we either quote them
;or replace them if a replacement string is available
cmp word[string_replace],0 ;check to see if a replacement string is available
jz print_quotes ;if not, skip to the part where we just quote the strings that match
;otherwise, we will print the replacement string instead of the original!
mov ax,[string_replace]
call putstring ;print the string
jmp normal_print_skip
print_quotes:
;print quotes around matched string
mov al,'"'
call putchar
mov ax,byte_array
call putstring ;print the string
mov al,'"'
call putchar
jmp normal_print_skip
normal_print: ;print normal / unquoted because it doesn't match
mov ax,byte_array
call putstring ;print the string
normal_print_skip:
jmp textdump
file_close:
;close the file if it is open
mov ah,3Eh
mov bx,[file_handle]
int 21h
;debugging section I use just to test values
;call putline
;mov ax,[string_search]
;call putstr_and_line
;mov ax,[string_replace]
;call putstr_and_line
ending:
mov ax,4C00h ; Exit program
int 21h
;the strlen and strcmp are named after the equivalent C functions
;but are written from scratch by me based on their expected behavior
;a function to get the length of string in ax and return the integer in ax
strlen:
mov bx,ax ; copy ax to bx. bx will be used as index to the string
strlen_start: ; this loop finds the length of the string as part of the putstring function
cmp [bx],byte 0 ; compare byte at address bx with 0
jz strlen_end ; if comparison was zero, jump to loop end because we have found the length
inc bx
jmp strlen_start
strlen_end:
sub bx,ax ;subtract start pointer from current pointer to get length of string
mov ax,bx ;copy the string length back to eax
ret
;compare the string at si to the one at di
strcmp:
mov ax,0 ;this will be stay zero unless the strings are different
strcmp_start:
mov bl,[di]
cmp bl,0
jz strcmp_end
mov bh,[si]
cmp bh,0
jz strcmp_end
inc di
inc si
cmp bl,bh
jz strcmp_start ;if they are the same, continue to next character
inc ax ;if they were different, eax will be incremented and the function ends
strcmp_end:
ret
;function to move ahead to the next argument
;only works after the filter has been applied to turn all spaces into zeroes
get_next_arg:
mov bx,[arg_string_index] ;get address of current arg
find_zero:
cmp byte [bx],0
jz found_zero
inc bx
jmp find_zero ; this char is not zero, go to the next char
found_zero:
;once we have found a zero, check to make sure we are not at the end
find_non_zero:
cmp bx,[arg_string_end]
jz arg_finish ;if bx is already at end, nothing left to find
cmp byte [bx],0
jnz arg_finish ;if this char is not zero we have found the next string!
inc bx
jmp find_non_zero ;otherwise, keep looking
arg_finish:
mov [arg_string_index],bx ; save this index to the variable
mov ax,bx ;but also save it to ax register for use in printing or something else
ret
help db 'chastext by Chastity White Rose',0Dh,0Ah
db '"cat" or "type" a file without changing it:',0Dh,0Ah,9,'chastext file',0Dh,0Ah
db 'search for a string and quote it:',0Dh,0Ah,9,'chastext file search',0Dh,0Ah
db 'replace string:',0Dh,0Ah,9,'chastext file search replace',0Dh,0Ah
db 'Find or replace any string!',0Dh,0Ah,0
; About the chastelib variant
;instead of including chastelib16.asm as a header file
;I copy pasted it except that I excluded functions that were not used.
;Notably, the strint function is excluded because strint_32 is used instead
;start of chastelib
; This file is where I keep my function definitions.
; These are usually my string and integer output routines.
;this is my best putstring function for DOS because it uses call 40h of interrupt 21h
;this means that it works in a similar way to my Linux Assembly code
;the plan is to make both my DOS and Linux functions identical except for the size of registers involved
putstring:
push ax
push bx
push cx
push dx
mov bx,ax ;copy ax to bx for use as index register
putstring_strlen_start: ;this loop finds the length of the string as part of the putstring function
cmp [bx], byte 0 ;compare this byte with 0
jz putstring_strlen_end ;if comparison was zero, jump to loop end because we have found the length
inc bx ;increment bx (add 1)
jmp putstring_strlen_start ;jump to the start of the loop and keep trying until we find a zero
putstring_strlen_end:
sub bx,ax ; sub ax from bx to get the difference for number of bytes
mov cx,bx ; mov bx to cx
mov dx,ax ; dx will have address of string to write
mov ah,40h ; select DOS function 40h write
mov bx,1 ; file handle 1=stdout
int 21h ; call the DOS kernel
pop dx
pop cx
pop bx
pop ax
ret
;this is the location in memory where digits are written to by the intstr function
int_string db 16 dup '?' ;enough bytes to hold maximum size 16-bit binary integer
int_string_end db 0 ;zero byte terminator for the integer string
radix dw 2 ;radix or base for integer output. 2=binary, 8=octal, 10=decimal, 16=hexadecimal
int_width dw 8
intstr:
mov bx,int_string_end-1 ;find address of lowest digit(just before the newline 0Ah)
mov cx,1
digits_start:
mov dx,0;
div word [radix]
cmp dx,10
jb decimal_digit
jge hexadecimal_digit
decimal_digit: ;we go here if it is only a digit 0 to 9
add dx,'0'
jmp save_digit
hexadecimal_digit:
sub dx,10
add dx,'A'
save_digit:
mov [bx],dl
cmp ax,0
jz intstr_end
dec bx
inc cx
jmp digits_start
intstr_end:
prefix_zeros:
cmp cx,[int_width]
jnb end_zeros
dec bx
mov [bx],byte '0'
inc cx
jmp prefix_zeros
end_zeros:
mov ax,bx ; store string in ax for display later
ret
;function to print string form of whatever integer is in ax
;The radix determines which number base the string form takes.
;Anything from 2 to 36 is a valid radix
;in practice though, only bases 2,8,10,and 16 will make sense to other programmers
;this function does not process anything by itself but calls the combination of my other
;functions in the order I intended them to be used.
putint:
push ax
push bx
push cx
push dx
call intstr
call putstring
pop dx
pop cx
pop bx
pop ax
ret
;the next utility functions simply print a space or a newline
;these help me save code when printing lots of things for debugging
space db ' ',0
line db 0Dh,0Ah,0
putspace:
push ax
mov ax,space
call putstring
pop ax
ret
putline:
push ax
mov ax,line
call putstring
pop ax
ret
;a function for printing a single character that is the value of al
char: db 0,0
putchar:
push ax
mov [char],al
mov ax,char
call putstring
pop ax
ret
;a small function just for the common operation
;printing an integer followed by a space
;this saves a few bytes in the assembled code
putint_and_space:
call putint
call putspace
ret
;a small function just for the common operation
;printing an integer followed by a space
;this saves a few bytes in the assembled code
putint_and_line:
call putint
call putline
ret
;a small function just for the common operation
;printing an integer followed by a space
;this saves a few bytes in the assembled code
putstr_and_space:
call putstring
call putspace
ret
;a small function just for the common operation
;printing an integer followed by a space
;this saves a few bytes in the assembled code
putstr_and_line:
call putstring
call putline
ret
;end of chastelib
arg_string_index dw 0
arg_string_end dw 0
file_error_message db 'Could not open the file! Error number: ',0
file_handle dw 0
end_of_file db 'EOF',0
;where we will store data from the file
bytes_read dw 0
string_search dw 0 ; place to hold the search string pointer
string_replace dw 0 ; place to hold the replacement string pointer
byte_array db 0x80 dup 0