apps/plugins/frotz/text.c at master · tsiry-sandratraina.com/rockbox-zig

tsiry-sandratraina.com / rockbox-zig
fork atom
A modern Music Player Daemon based on Rockbox open source high quality audio player
libadwaita audio rust zig deno mpris rockbox mpd
fork atom
rockbox-zig / apps / plugins / frotz / text.c
at master 1109 lines 22 kB view raw
wrap content
Torne Wuff New plugin: frotz, a Z-machine interpreter, for playing interactive fiction. 16y ago
7f28c94e
   1/* text.c - Text manipulation functions
   2 *	Copyright (c) 1995-1997 Stefan Jokisch
   3 *
   4 * This file is part of Frotz.
   5 *
   6 * Frotz is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License as published by
   8 * the Free Software Foundation; either version 2 of the License, or
   9 * (at your option) any later version.
  10 *
  11 * Frotz is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * along with this program; if not, write to the Free Software
  18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
  19 */
  20
  21#include "frotz.h"
  22
  23enum string_type {
  24    LOW_STRING, ABBREVIATION, HIGH_STRING, EMBEDDED_STRING, VOCABULARY
  25};
  26
  27extern zword object_name (zword);
  28
  29static zchar decoded[10];
  30static zword encoded[3];
  31
  32/* 
  33 * According to Matteo De Luigi <matteo.de.luigi@libero.it>, 
  34 * 0xab and 0xbb were in each other's proper positions.
  35 *   Sat Apr 21, 2001
  36 */
  37static zchar zscii_to_latin1[] = {
  38    0xe4, 0xf6, 0xfc, 0xc4, 0xd6, 0xdc, 0xdf, 0xbb,
  39    0xab, 0xeb, 0xef, 0xff, 0xcb, 0xcf, 0xe1, 0xe9,
  40    0xed, 0xf3, 0xfa, 0xfd, 0xc1, 0xc9, 0xcd, 0xd3,
  41    0xda, 0xdd, 0xe0, 0xe8, 0xec, 0xf2, 0xf9, 0xc0,
  42    0xc8, 0xcc, 0xd2, 0xd9, 0xe2, 0xea, 0xee, 0xf4,
  43    0xfb, 0xc2, 0xca, 0xce, 0xd4, 0xdb, 0xe5, 0xc5,
  44    0xf8, 0xd8, 0xe3, 0xf1, 0xf5, 0xc3, 0xd1, 0xd5,
  45    0xe6, 0xc6, 0xe7, 0xc7, 0xfe, 0xf0, 0xde, 0xd0,
  46    0xa3, 0x00, 0x00, 0xa1, 0xbf
  47};
  48
  49/*
  50 * translate_from_zscii
  51 *
  52 * Map a ZSCII character onto the ISO Latin-1 alphabet.
  53 *
  54 */
  55
  56zchar translate_from_zscii (zbyte c)
  57{
  58
  59    if (c == 0xfc)
  60	return ZC_MENU_CLICK;
  61    if (c == 0xfd)
  62	return ZC_DOUBLE_CLICK;
  63    if (c == 0xfe)
  64	return ZC_SINGLE_CLICK;
  65
  66    if (c >= 0x9b && story_id != BEYOND_ZORK) {
  67
  68	if (hx_unicode_table != 0) {	/* game has its own Unicode table */
  69
  70	    zbyte N;
  71
  72	    LOW_BYTE (hx_unicode_table, N)
  73
  74	    if (c - 0x9b < N) {
  75
  76		zword addr = hx_unicode_table + 1 + 2 * (c - 0x9b);
  77		zword unicode;
  78
  79		LOW_WORD (addr, unicode)
  80
  81		return (unicode < 0x100) ? (zchar) unicode : '?';
  82
  83	    } else return '?';
  84
  85	} else				/* game uses standard set */
  86
  87	    if (c <= 0xdf) {
  88
  89		if (c == 0xdc || c == 0xdd)	/* Oe and oe ligatures */
  90		    return '?';			/* are not ISO-Latin 1 */
  91
  92		return zscii_to_latin1[c - 0x9b];
  93
  94	    } else return '?';
  95    }
  96
  97    return c;
  98
  99}/* translate_from_zscii */
 100
 101/*
 102 * translate_to_zscii
 103 *
 104 * Map an ISO Latin-1 character onto the ZSCII alphabet.
 105 *
 106 */
 107
 108zbyte translate_to_zscii (zchar c)
 109{
 110    int i;
 111
 112    if (c == ZC_SINGLE_CLICK)
 113	return 0xfe;
 114    if (c == ZC_DOUBLE_CLICK)
 115	return 0xfd;
 116    if (c == ZC_MENU_CLICK)
 117	return 0xfc;
 118
 119    if (c >= ZC_LATIN1_MIN) {
 120
 121	if (hx_unicode_table != 0) {	/* game has its own Unicode table */
 122
 123	    zbyte N;
 124	    int i;
 125
 126	    LOW_BYTE (hx_unicode_table, N)
 127
 128	    for (i = 0x9b; i < 0x9b + N; i++) {
 129
 130		zword addr = hx_unicode_table + 1 + 2 * (i - 0x9b);
 131		zword unicode;
 132
 133		LOW_WORD (addr, unicode)
 134
 135		if (c == unicode)
 136		    return (zbyte) i;
 137
 138	    }
 139
 140	    return '?';
 141
 142	} else {			/* game uses standard set */
 143
 144	    for (i = 0x9b; i <= 0xdf; i++)
 145		if (c == zscii_to_latin1[i - 0x9b])
 146		    return (zbyte) i;
 147
 148	    return '?';
 149
 150	}
 151    }
 152
 153    if (c == 0)		/* Safety thing from David Kinder */
 154	c = '?';	/* regarding his Unicode patches */
 155			/* Sept 15, 2002 */
 156
 157    return c;
 158
 159}/* translate_to_zscii */
 160
 161/*
 162 * alphabet
 163 *
 164 * Return a character from one of the three character sets.
 165 *
 166 */
 167
 168static zchar alphabet (int set, int index)
 169{
 170
 171    if (h_alphabet != 0) {	/* game uses its own alphabet */
 172
 173	zbyte c;
 174
 175	zword addr = h_alphabet + 26 * set + index;
 176	LOW_BYTE (addr, c)
 177
 178	return translate_from_zscii (c);
 179
 180    } else			/* game uses default alphabet */
 181
 182	if (set == 0)
 183	    return 'a' + index;
 184	else if (set == 1)
 185	    return 'A' + index;
 186	else if (h_version == V1)
 187	    return " 0123456789.,!?_#'\"/\\<-:()"[index];
 188	else
 189	    return " ^0123456789.,!?_#'\"/\\-:()"[index];
 190
 191}/* alphabet */
 192
 193/*
 194 * load_string
 195 *
 196 * Copy a ZSCII string from the memory to the global "decoded" string.
 197 *
 198 */
 199
 200static void load_string (zword addr, zword length)
 201{
 202    int resolution = (h_version <= V3) ? 2 : 3;
 203    int i = 0;
 204
 205    while (i < 3 * resolution)
 206
 207	if (i < length) {
 208
 209	    zbyte c;
 210
 211	    LOW_BYTE (addr, c)
 212	    addr++;
 213
 214	    decoded[i++] = translate_from_zscii (c);
 215
 216	} else decoded[i++] = 0;
 217
 218}/* load_string */
 219
 220/*
 221 * encode_text
 222 *
 223 * Encode the Unicode text in the global "decoded" string then write
 224 * the result to the global "encoded" array. (This is used to look up
 225 * words in the dictionary.) Up to V3 the vocabulary resolution is
 226 * two, since V4 it is three words. Because each word contains three
 227 * Z-characters, that makes six or nine Z-characters respectively.
 228 * Longer words are chopped to the proper size, shorter words are are
 229 * padded out with 5's. For word completion we pad with 0s and 31s,
 230 * the minimum and maximum Z-characters.
 231 *
 232 */
 233
 234static void encode_text (int padding)
 235{
 236    static zchar again[] = { 'a', 'g', 'a', 'i', 'n', 0 };
 237    static zchar examine[] = { 'e', 'x', 'a', 'm', 'i', 'n', 'e', 0 };
 238    static zchar wait[] = { 'w', 'a', 'i', 't', 0 };
 239
 240    zbyte zchars[12];
 241    const zchar *ptr = decoded;
 242    zchar c;
 243    int resolution = (h_version <= V3) ? 2 : 3;
 244    int i = 0;
 245
 246    /* Expand abbreviations that some old Infocom games lack */
 247
 248    if (f_setup.expand_abbreviations)
 249
 250	if (padding == 0x05 && decoded[1] == 0)
 251
 252	    switch (decoded[0]) {
 253		case 'g': ptr = again; break;
 254		case 'x': ptr = examine; break;
 255		case 'z': ptr = wait; break;
 256	    }
 257
 258    /* Translate string to a sequence of Z-characters */
 259
 260    while (i < 3 * resolution)
 261
 262	if ((c = *ptr++) != 0) {
 263
 264	    int index, set;
 265	    zbyte c2;
 266
 267	    /* Search character in the alphabet */
 268
 269	    for (set = 0; set < 3; set++)
 270		for (index = 0; index < 26; index++)
 271		    if (c == alphabet (set, index))
 272			goto letter_found;
 273
 274	    /* Character not found, store its ZSCII value */
 275
 276	    c2 = translate_to_zscii (c);
 277
 278	    zchars[i++] = 5;
 279	    zchars[i++] = 6;
 280	    zchars[i++] = c2 >> 5;
 281	    zchars[i++] = c2 & 0x1f;
 282
 283	    continue;
 284
 285	letter_found:
 286
 287	    /* Character found, store its index */
 288
 289	    if (set != 0)
 290		zchars[i++] = ((h_version <= V2) ? 1 : 3) + set;
 291
 292	    zchars[i++] = index + 6;
 293
 294	} else zchars[i++] = padding;
 295
 296    /* Three Z-characters make a 16bit word */
 297
 298    for (i = 0; i < resolution; i++)
 299
 300	encoded[i] =
 301	    (zchars[3 * i + 0] << 10) |
 302	    (zchars[3 * i + 1] << 5) |
 303	    (zchars[3 * i + 2]);
 304
 305    encoded[resolution - 1] |= 0x8000;
 306
 307}/* encode_text */
 308
 309/*
 310 * z_check_unicode, test if a unicode character can be read and printed.
 311 *
 312 * 	zargs[0] = Unicode
 313 *
 314 */
 315
 316void z_check_unicode (void)
 317{
 318    zword c = zargs[0];
 319
 320    if (c >= 0x20 && c <= 0x7e)
 321	store (3);
 322    else if (c == 0xa0)
 323	store (1);
 324    else if (c >= 0xa1 && c <= 0xff)
 325	store (3);
 326    else
 327	store (0);
 328
 329}/* z_check_unicode */
 330
 331/*
 332 * z_encode_text, encode a ZSCII string for use in a dictionary.
 333 *
 334 *	zargs[0] = address of text buffer
 335 *	zargs[1] = length of ASCII string
 336 *	zargs[2] = offset of ASCII string within the text buffer
 337 *	zargs[3] = address to store encoded text in
 338 *
 339 * This is a V5+ opcode and therefore the dictionary resolution must be
 340 * three 16bit words.
 341 *
 342 */
 343
 344void z_encode_text (void)
 345{
 346    int i;
 347
 348    load_string ((zword) (zargs[0] + zargs[2]), zargs[1]);
 349
 350    encode_text (0x05);
 351
 352    for (i = 0; i < 3; i++)
 353	storew ((zword) (zargs[3] + 2 * i), encoded[i]);
 354
 355}/* z_encode_text */
 356
 357/*
 358 * decode_text
 359 *
 360 * Convert encoded text to Unicode. The encoded text consists of 16bit
 361 * words. Every word holds 3 Z-characters (5 bits each) plus a spare
 362 * bit to mark the last word. The Z-characters translate to ZSCII by
 363 * looking at the current current character set. Some select another
 364 * character set, others refer to abbreviations.
 365 *
 366 * There are several different string types:
 367 *
 368 *    LOW_STRING - from the lower 64KB (byte address)
 369 *    ABBREVIATION - from the abbreviations table (word address)
 370 *    HIGH_STRING - from the end of the memory map (packed address)
 371 *    EMBEDDED_STRING - from the instruction stream (at PC)
 372 *    VOCABULARY - from the dictionary (byte address)
 373 *
 374 * The last type is only used for word completion.
 375 *
 376 */
 377
 378#define outchar(c)	if (st==VOCABULARY) *ptr++=c; else print_char(c)
 379
 380static void decode_text (enum string_type st, zword addr)
 381{
 382    zchar *ptr;
 383    long byte_addr;
 384    zchar c2;
 385    zword code;
 386    zbyte c, prev_c = 0;
 387    int shift_state = 0;
 388    int shift_lock = 0;
 389    int status = 0;
 390
 391    ptr = NULL;		/* makes compilers shut up */
 392    byte_addr = 0;
 393
 394    /* Calculate the byte address if necessary */
 395
 396    if (st == ABBREVIATION)
 397
 398	byte_addr = (long) addr << 1;
 399
 400    else if (st == HIGH_STRING) {
 401
 402	if (h_version <= V3)
 403	    byte_addr = (long) addr << 1;
 404	else if (h_version <= V5)
 405	    byte_addr = (long) addr << 2;
 406	else if (h_version <= V7)
 407	    byte_addr = ((long) addr << 2) + ((long) h_strings_offset << 3);
 408	else /* h_version == V8 */
 409	    byte_addr = (long) addr << 3;
 410
 411	if (byte_addr >= story_size)
 412	    runtime_error (ERR_ILL_PRINT_ADDR);
 413
 414    }
 415
 416    /* Loop until a 16bit word has the highest bit set */
 417
 418    if (st == VOCABULARY)
 419	ptr = decoded;
 420
 421    do {
 422
 423	int i;
 424
 425	/* Fetch the next 16bit word */
 426
 427	if (st == LOW_STRING || st == VOCABULARY) {
 428	    LOW_WORD (addr, code)
 429	    addr += 2;
 430	} else if (st == HIGH_STRING || st == ABBREVIATION) {
 431	    HIGH_WORD (byte_addr, code)
 432	    byte_addr += 2;
 433	} else
 434	    CODE_WORD (code)
 435
 436	/* Read its three Z-characters */
 437
 438	for (i = 10; i >= 0; i -= 5) {
 439
 440	    zword abbr_addr;
 441	    zword ptr_addr;
 442
 443	    c = (code >> i) & 0x1f;
 444
 445	    switch (status) {
 446
 447	    case 0:	/* normal operation */
 448
 449		if (shift_state == 2 && c == 6)
 450		    status = 2;
 451
 452		else if (h_version == V1 && c == 1)
 453		    new_line ();
 454
 455		else if (h_version >= V2 && shift_state == 2 && c == 7)
 456		    new_line ();
 457
 458		else if (c >= 6)
 459		    outchar (alphabet (shift_state, c - 6));
 460
 461		else if (c == 0)
 462		    outchar (' ');
 463
 464		else if (h_version >= V2 && c == 1)
 465		    status = 1;
 466
 467		else if (h_version >= V3 && c <= 3)
 468		    status = 1;
 469
 470		else {
 471
 472		    shift_state = (shift_lock + (c & 1) + 1) % 3;
 473
 474		    if (h_version <= V2 && c >= 4)
 475			shift_lock = shift_state;
 476
 477		    break;
 478
 479		}
 480
 481		shift_state = shift_lock;
 482
 483		break;
 484
 485	    case 1:	/* abbreviation */
 486
 487		ptr_addr = h_abbreviations + 64 * (prev_c - 1) + 2 * c;
 488
 489		LOW_WORD (ptr_addr, abbr_addr)
 490		decode_text (ABBREVIATION, abbr_addr);
 491
 492		status = 0;
 493		break;
 494
 495	    case 2:	/* ZSCII character - first part */
 496
 497		status = 3;
 498		break;
 499
 500	    case 3:	/* ZSCII character - second part */
 501
 502		c2 = translate_from_zscii ((prev_c << 5) | c);
 503		outchar (c2);
 504
 505		status = 0;
 506		break;
 507
 508	    }
 509
 510	    prev_c = c;
 511
 512	}
 513
 514    } while (!(code & 0x8000));
 515
 516    if (st == VOCABULARY)
 517	*ptr = 0;
 518
 519}/* decode_text */
 520
 521#undef outchar
 522
 523/*
 524 * z_new_line, print a new line.
 525 *
 526 * 	no zargs used
 527 *
 528 */
 529
 530void z_new_line (void)
 531{
 532
 533    new_line ();
 534
 535}/* z_new_line */
 536
 537/*
 538 * z_print, print a string embedded in the instruction stream.
 539 *
 540 *	no zargs used
 541 *
 542 */
 543
 544void z_print (void)
 545{
 546
 547    decode_text (EMBEDDED_STRING, 0);
 548
 549}/* z_print */
 550
 551/*
 552 * z_print_addr, print a string from the lower 64KB.
 553 *
 554 *	zargs[0] = address of string to print
 555 *
 556 */
 557
 558void z_print_addr (void)
 559{
 560
 561    decode_text (LOW_STRING, zargs[0]);
 562
 563}/* z_print_addr */
 564
 565/*
 566 * z_print_char print a single ZSCII character.
 567 *
 568 *	zargs[0] = ZSCII character to be printed
 569 *
 570 */
 571
 572void z_print_char (void)
 573{
 574
 575    print_char (translate_from_zscii (zargs[0]));
 576
 577}/* z_print_char */
 578
 579/*
 580 * z_print_form, print a formatted table.
 581 *
 582 *	zargs[0] = address of formatted table to be printed
 583 *
 584 */
 585
 586void z_print_form (void)
 587{
 588    zword count;
 589    zword addr = zargs[0];
 590
 591    bool first = TRUE;
 592
 593    for (;;) {
 594
 595	LOW_WORD (addr, count)
 596	addr += 2;
 597
 598	if (count == 0)
 599	    break;
 600
 601	if (!first)
 602	    new_line ();
 603
 604	while (count--) {
 605
 606	    zbyte c;
 607
 608	    LOW_BYTE (addr, c)
 609	    addr++;
 610
 611	    print_char (translate_from_zscii (c));
 612
 613	}
 614
 615	first = FALSE;
 616
 617    }
 618
 619}/* z_print_form */
 620
 621/*
 622 * print_num
 623 *
 624 * Print a signed 16bit number.
 625 *
 626 */
 627
 628void print_num (zword value)
 629{
 630    int i;
 631
 632    /* Print sign */
 633
 634    if ((short) value < 0) {
 635	print_char ('-');
 636	value = - (short) value;
 637    }
 638
 639    /* Print absolute value */
 640
 641    for (i = 10000; i != 0; i /= 10)
 642	if (value >= i || i == 1)
 643	    print_char ('0' + (value / i) % 10);
 644
 645}/* print_num */
 646
 647/*
 648 * z_print_num, print a signed number.
 649 *
 650 * 	zargs[0] = number to print
 651 *
 652 */
 653
 654void z_print_num (void)
 655{
 656
 657    print_num (zargs[0]);
 658
 659}/* z_print_num */
 660
 661/*
 662 * print_object
 663 *
 664 * Print an object description.
 665 *
 666 */
 667
 668void print_object (zword object)
 669{
 670    zword addr = object_name (object);
 671    zword code = 0x94a5;
 672    zbyte length;
 673
 674    LOW_BYTE (addr, length)
 675    addr++;
 676
 677    if (length != 0)
 678	LOW_WORD (addr, code)
 679
 680    if (code == 0x94a5) { 	/* encoded text 0x94a5 == empty string */
 681
 682	print_string ("object#");	/* supply a generic name */
 683	print_num (object);		/* for anonymous objects */
 684
 685    } else decode_text (LOW_STRING, addr);
 686
 687}/* print_object */
 688
 689/*
 690 * z_print_obj, print an object description.
 691 *
 692 * 	zargs[0] = number of object to be printed
 693 *
 694 */
 695
 696void z_print_obj (void)
 697{
 698
 699    print_object (zargs[0]);
 700
 701}/* z_print_obj */
 702
 703/*
 704 * z_print_paddr, print the string at the given packed address.
 705 *
 706 * 	zargs[0] = packed address of string to be printed
 707 *
 708 */
 709
 710void z_print_paddr (void)
 711{
 712
 713    decode_text (HIGH_STRING, zargs[0]);
 714
 715}/* z_print_paddr */
 716
 717/*
 718 * z_print_ret, print the string at PC, print newline then return true.
 719 *
 720 * 	no zargs used
 721 *
 722 */
 723
 724void z_print_ret (void)
 725{
 726
 727    decode_text (EMBEDDED_STRING, 0);
 728    new_line ();
 729    ret (1);
 730
 731}/* z_print_ret */
 732
 733/*
 734 * print_string
 735 *
 736 * Print a string of ASCII characters.
 737 *
 738 */
 739
 740void print_string (const char *s)
 741{
 742    char c;
 743
 744    while ((c = *s++) != 0)
 745
 746	if (c == '\n')
 747	    new_line ();
 748	else
 749	    print_char (c);
 750
 751}/* print_string */
 752
 753/*
 754 * z_print_unicode
 755 *
 756 * 	zargs[0] = Unicode
 757 *
 758 */
 759
 760void z_print_unicode (void)
 761{
 762
 763    print_char ((zargs[0] <= 0xff) ? zargs[0] : '?');
 764
 765}/* z_print_unicode */
 766
 767/*
 768 * lookup_text
 769 *
 770 * Scan a dictionary searching for the given word. The first argument
 771 * can be
 772 *
 773 * 0x00 - find the first word which is >= the given one
 774 * 0x05 - find the word which exactly matches the given one
 775 * 0x1f - find the last word which is <= the given one
 776 *
 777 * The return value is 0 if the search fails.
 778 *
 779 */
 780
 781static zword lookup_text (int padding, zword dct)
 782{
 783    zword entry_addr;
 784    zword entry_count;
 785    zword entry;
 786    zword addr;
 787    zbyte entry_len;
 788    zbyte sep_count;
 789    int resolution = (h_version <= V3) ? 2 : 3;
 790    int entry_number;
 791    int lower, upper;
 792    int i;
 793    bool sorted;
 794
 795    encode_text (padding);
 796
 797    LOW_BYTE (dct, sep_count)		/* skip word separators */
 798    dct += 1 + sep_count;
 799    LOW_BYTE (dct, entry_len)		/* get length of entries */
 800    dct += 1;
 801    LOW_WORD (dct, entry_count)		/* get number of entries */
 802    dct += 2;
 803
 804    if ((short) entry_count < 0) {	/* bad luck, entries aren't sorted */
 805
 806	entry_count = - (short) entry_count;
 807	sorted = FALSE;
 808
 809    } else sorted = TRUE;		/* entries are sorted */
 810
 811    lower = 0;
 812    upper = entry_count - 1;
 813
 814    while (lower <= upper) {
 815
 816	if (sorted)                             /* binary search */
 817	    entry_number = (lower + upper) / 2;
 818	else                                    /* linear search */
 819	    entry_number = lower;
 820
 821	entry_addr = dct + entry_number * entry_len;
 822
 823	/* Compare word to dictionary entry */
 824
 825	addr = entry_addr;
 826
 827	for (i = 0; i < resolution; i++) {
 828	    LOW_WORD (addr, entry)
 829	    if (encoded[i] != entry)
 830		goto continuing;
 831	    addr += 2;
 832	}
 833
 834	return entry_addr;		/* exact match found, return now */
 835
 836    continuing:
 837
 838	if (sorted)				/* binary search */
 839
 840	    if (encoded[i] > entry)
 841		lower = entry_number + 1;
 842	    else
 843		upper = entry_number - 1;
 844
 845	else lower++;                           /* linear search */
 846
 847    }
 848
 849    /* No exact match has been found */
 850
 851    if (padding == 0x05)
 852	return 0;
 853
 854    entry_number = (padding == 0x00) ? lower : upper;
 855
 856    if (entry_number == -1 || entry_number == entry_count)
 857	return 0;
 858
 859    return dct + entry_number * entry_len;
 860
 861}/* lookup_text */
 862
 863/*
 864 * tokenise_text
 865 *
 866 * Translate a single word to a token and append it to the token
 867 * buffer. Every token consists of the address of the dictionary
 868 * entry, the length of the word and the offset of the word from
 869 * the start of the text buffer. Unknown words cause empty slots
 870 * if the flag is set (such that the text can be scanned several
 871 * times with different dictionaries); otherwise they are zero.
 872 *
 873 */
 874
 875static void tokenise_text (zword text, zword length, zword from, zword parse, zword dct, bool flag)
 876{
 877    zword addr;
 878    zbyte token_max, token_count;
 879
 880    LOW_BYTE (parse, token_max)
 881    parse++;
 882    LOW_BYTE (parse, token_count)
 883
 884    if (token_count < token_max) {	/* sufficient space left for token? */
 885
 886	storeb (parse++, token_count + 1);
 887
 888	load_string ((zword) (text + from), length);
 889
 890	addr = lookup_text (0x05, dct);
 891
 892	if (addr != 0 || !flag) {
 893
 894	    parse += 4 * token_count;
 895
 896	    storew ((zword) (parse + 0), addr);
 897	    storeb ((zword) (parse + 2), length);
 898	    storeb ((zword) (parse + 3), from);
 899
 900	}
 901
 902    }
 903
 904}/* tokenise_text */
 905
 906/*
 907 * tokenise_line
 908 *
 909 * Split an input line into words and translate the words to tokens.
 910 *
 911 */
 912
 913void tokenise_line (zword text, zword token, zword dct, bool flag)
 914{
 915    zword addr1;
 916    zword addr2;
 917    zbyte length;
 918    zbyte c;
 919
 920    length = 0;		/* makes compilers shut up */
 921
 922    /* Use standard dictionary if the given dictionary is zero */
 923
 924    if (dct == 0)
 925	dct = h_dictionary;
 926
 927    /* Remove all tokens before inserting new ones */
 928
 929    storeb ((zword) (token + 1), 0);
 930
 931    /* Move the first pointer across the text buffer searching for the
 932       beginning of a word. If this succeeds, store the position in a
 933       second pointer. Move the first pointer searching for the end of
 934       the word. When it is found, "tokenise" the word. Continue until
 935       the end of the buffer is reached. */
 936
 937    addr1 = text;
 938    addr2 = 0;
 939
 940    if (h_version >= V5) {
 941	addr1++;
 942	LOW_BYTE (addr1, length)
 943    }
 944
 945    do {
 946
 947	zword sep_addr;
 948	zbyte sep_count;
 949	zbyte separator;
 950
 951	/* Fetch next ZSCII character */
 952
 953	addr1++;
 954
 955	if (h_version >= V5 && addr1 == text + 2 + length)
 956	    c = 0;
 957	else
 958	    LOW_BYTE (addr1, c)
 959
 960	/* Check for separator */
 961
 962	sep_addr = dct;
 963
 964	LOW_BYTE (sep_addr, sep_count)
 965	sep_addr++;
 966
 967	do {
 968
 969	    LOW_BYTE (sep_addr, separator)
 970	    sep_addr++;
 971
 972	} while (c != separator && --sep_count != 0);
 973
 974	/* This could be the start or the end of a word */
 975
 976	if (sep_count == 0 && c != ' ' && c != 0) {
 977
 978	    if (addr2 == 0)
 979		addr2 = addr1;
 980
 981	} else if (addr2 != 0) {
 982
 983	    tokenise_text (
 984		text,
 985		(zword) (addr1 - addr2),
 986		(zword) (addr2 - text),
 987		token, dct, flag );
 988
 989	    addr2 = 0;
 990
 991	}
 992
 993	/* Translate separator (which is a word in its own right) */
 994
 995	if (sep_count != 0)
 996
 997	    tokenise_text (
 998		text,
 999		(zword) (1),
1000		(zword) (addr1 - text),
1001		token, dct, flag );
1002
1003    } while (c != 0);
1004
1005}/* tokenise_line */
1006
1007/*
1008 * z_tokenise, make a lexical analysis of a ZSCII string.
1009 *
1010 *	zargs[0] = address of string to analyze
1011 *	zargs[1] = address of token buffer
1012 *	zargs[2] = address of dictionary (optional)
1013 *	zargs[3] = set when unknown words cause empty slots (optional)
1014 *
1015 */
1016
1017void z_tokenise (void)
1018{
1019
1020    /* Supply default arguments */
1021
1022    if (zargc < 3)
1023	zargs[2] = 0;
1024    if (zargc < 4)
1025	zargs[3] = 0;
1026
1027    /* Call tokenise_line to do the real work */
1028
1029    tokenise_line (zargs[0], zargs[1], zargs[2], zargs[3] != 0);
1030
1031}/* z_tokenise */
1032
1033/*
1034 * completion
1035 *
1036 * Scan the vocabulary to complete the last word on the input line
1037 * (similar to "tcsh" under Unix). The return value is
1038 *
1039 *    2 ==> completion is impossible
1040 *    1 ==> completion is ambiguous
1041 *    0 ==> completion is successful
1042 *
1043 * The function also returns a string in its second argument. In case
1044 * of 2, the string is empty; in case of 1, the string is the longest
1045 * extension of the last word on the input line that is common to all
1046 * possible completions (for instance, if the last word on the input
1047 * is "fo" and its only possible completions are "follow" and "folly"
1048 * then the string is "ll"); in case of 0, the string is an extension
1049 * to the last word that results in the only possible completion.
1050 *
1051 */
1052
1053int completion (const zchar *buffer, zchar *result)
1054{
1055    zword minaddr;
1056    zword maxaddr;
1057    zchar *ptr;
1058    zchar c;
1059    int len;
1060    int i;
1061
1062    *result = 0;
1063
1064    /* Copy last word to "decoded" string */
1065
1066    len = 0;
1067
1068    while ((c = *buffer++) != 0)
1069
1070	if (c != ' ') {
1071
1072	    if (len < 9)
1073		decoded[len++] = c;
1074
1075	} else len = 0;
1076
1077    decoded[len] = 0;
1078
1079    /* Search the dictionary for first and last possible extensions */
1080
1081    minaddr = lookup_text (0x00, h_dictionary);
1082    maxaddr = lookup_text (0x1f, h_dictionary);
1083
1084    if (minaddr == 0 || maxaddr == 0 || minaddr > maxaddr)
1085	return 2;
1086
1087    /* Copy first extension to "result" string */
1088
1089    decode_text (VOCABULARY, minaddr);
1090
1091    ptr = result;
1092
1093    for (i = len; (c = decoded[i]) != 0; i++)
1094	*ptr++ = c;
1095    *ptr = 0;
1096
1097    /* Merge second extension with "result" string */
1098
1099    decode_text (VOCABULARY, maxaddr);
1100
1101    for (i = len, ptr = result; (c = decoded[i]) != 0; i++, ptr++)
1102	if (*ptr != c) break;
1103    *ptr = 0;
1104
1105    /* Search was ambiguous or successful */
1106
1107    return (minaddr == maxaddr) ? 0 : 1;
1108
1109}/* completion */