jhton.c

// Copyright (C) 2000 - 2002 Hewlett-Packard Company
//
// This program is free software; you can redistribute it and/or modify it
// under the term of the GNU Lesser General Public License as published by the
// Free Software Foundation; either version 2 of the License, or (at your
// option) any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
// for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this program; if not, write to the Free Software Foundation,
// Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
// _________________

// @(#) $Revision: 4.22 $ $Source: /cvsroot/judy/tool/jhton.c,v $
//
// JUDY HTML MANUAL ENTRY TO NROFF TRANSLATOR.
//
// USAGE:  <program> filename.htm[l]
// Writes nroff -man output to stdout.
// Suggestion:  Pipe output through rmnl to delete extraneous newlines that
// this program cannot easily avoid.
//
// Compile with -DDEBUG for assertions and other checks.
// If so, run with DUMP set in the env to dump the docnodes tree.
//
// CONCEPT:  This program was written out of necessity for the 11.11 OEUR
// release of Judy.  Ideally our manual entries would be written in an abstract
// XML format with an XSLT-based means to translate them to any other format,
// such as HTML or nroff.  In lieu of that, this program knows how to translate
// a limited subset of HTML, as used in our manual entries, to equivalent
// nroff, as described below preceding EmitNroffHeader().
//
// The translation is still complex enough to merit writing a parser that first
// builds a tree representation of the structured (HTML) document.  I would use
// yacc and lex if I knew them...
//
// This program is written for simplicity, cleanliness, and robustness, and not
// necessarily for speed.
//
// FLEXIBILITY:  It should be possible to teach this program new HTML tags; see
// data structures below.  The program might also be useful for other HTML
// manual entries, so long as they follow the simple conventions used in the
// Judy entries; see the comments before EmitNroffHeader().  You can also
// discover the format by trial and error -- this program issues verbose error
// messages.
//
// CONVENTIONS:
//
// - Global variables start with "g_" except option_*.
// - Global constants start with "gc_".
//
// - Pointer variable names start with one "P" per level of indirection.
//
// - Exception:  (char *) and (char[]) types, that is, strings, do not
//   necessarily start with "P".  A generic char pointer is named "Pch".
//   Variables of type (char **) start with a single P.
//
// - Exception:  the well known name "char ** argv".
//
// - Pointers to first elements of serial linked lists of structures have names
//   ending in "Phead".
//
// - Line lengths are less than 80 columns.  Message parameters to Error()
//   begin on the same line for easy finding.
//
// - Error messages might exceed one line when emitted, but no effort is made
//   to wrap them nicely.

#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>		// for varargs.
#include <string.h>		// for str*().
#include <errno.h>
#include <ctype.h>		// for isspace(), etc.
#include <time.h>
#include <assert.h>

#define	ISSPACE(Char) isspace((int) (Char))	// for convenience with lint.
#define	ISDIGIT(Char) isdigit((int) (Char))
#define	ISUPPER(Char) isupper((int) (Char))

#define	PUTS(String) (void) fputs(String, stdout)	// for convenience.
#define	PUTC(Char)   (void) putc((int) (Char), stdout)

#ifndef DEBUG
#define NDEBUG			// turn off assertions by default.
#endif

// Shorthand notation to avoid #ifdefs for single-line conditional statement:
//
// Warning:  This cannot be used around compiler directives, such as
// "#include", nor in the case where Code contains a comma other than nested
// within parentheses or quotes.

#ifndef DEBUG
#define	DBGCODE(Code) // null.
#else
#define	DBGCODE(Code) Code
#endif


// ****************************************************************************
// MISCELLANEOUS GLOBAL VALUES:

#define	FUNCTION			// null; easy to find functions.
#define	FALSE	0
#define	TRUE	1
#define	CHNULL	('\0')
#define	PCNULL	((char *) NULL)
typedef int bool_t;			// for clarity with Boolean values.

char * gc_usage[] = {
    "usage: %s filename.htm[l]",
    "",
    "Reads restricted (Judy-specific) HTML from filename.htm[l] and emits",
    "equivalent nroff -man to stdout.",
    PCNULL,
};

char * gc_myname;			// how program was invoked.

#define	OKEXIT	0
#define	NOEXIT	0			// values for Error().
#define	ERREXIT	1
#define	USAGE	2
#define	NOERRNO	0

// Prefix for printf formats:

#define	FILELINE "File \"%s\", line %d: "

// Common error string:

char * FmtErrLineEnds = FILELINE "Input line ends within an HTML tag; for this "
			"translator, all tags must be on a single input line";

// Macros for skipping whitespace or non-whitespace; in the latter case,
// stopping at end of line or end of tag:

#define	SKIPSPACE(Pch)	   { while (ISSPACE(*(Pch))) ++(Pch); }

#define	SKIPNONSPACE(Pch)  { while ((! ISSPACE(*(Pch))) \
				 && (*(Pch) != CHNULL)  \
				 && (*(Pch) != '>')) ++(Pch); }

// Highest line number + 1, and last input line number that caused output:

int g_linenumlim;
int g_prevlinenum = 0;

// <PRE> block equivalents in nroff need some special handling for bold font
// and for continuing a tagged paragraph; these are bit flags:

#define	INPRE_BLOCK  0x1	// came from <PRE>.
#define	INPRE_BOLD   0x2	// came from <B><PRE>.
#define	INPRE_INDENT 0x4	// under <DL> below top level.


// ****************************************************************************
// DOCUMENT NODE TYPES:
//
// If an HTML tag is not in this list, it's unrecognized and causes a fatal
// error.  Otherwise the tag type (dn_type) is one of DN_TYPE_*, which are
// defined so the code can use them, but they MUST match the order of
// initialization of g_dntype[].
//
// Note:  The default node type is DN_TYPE_TEXT, that is, text outside of any
// tag.

enum {
	DN_TYPE_TEXT = 0,
	DN_TYPE_HTML,
	DN_TYPE_HEAD,
	DN_TYPE_TITLE,
	DN_TYPE_BODY,
	DN_TYPE_COMM,
	DN_TYPE_TABLE,
	DN_TYPE_TR,
	DN_TYPE_TD,
	DN_TYPE_DL,
	DN_TYPE_DT,
	DN_TYPE_DD,
	DN_TYPE_A,
	DN_TYPE_B,
	DN_TYPE_I,
	DN_TYPE_PRE,
	DN_TYPE_P,
	DN_TYPE_BR
};

// Regarding dnt_nest:  If an HTML tag type is marked as nesting, that means
// it is required not to be a singleton in this context; it must have a closing
// tag, and when the tree is built, the intervening text is nested as a child.
// Otherwise, intervening text is a sibling; a closing tag is allowed (whether
// or not this makes sense), but is not required; however, if present, it must
// match.

struct docnode_type {
	char *	dnt_tag;		// HTML tag.
	bool_t	dnt_savetag;		// flag: save HTML tag.
	bool_t	dnt_nest;		// flag: see comments above.
	int	dnt_type;		// corresponding number.
} g_dntype[] = {

// Note:  HTML is case-insensitive, but for expediency this program is
// case-sensitive.  Tags must be as shown below.

    { "",	FALSE, FALSE, DN_TYPE_TEXT,  },	// special, see above.

    { "HTML",	FALSE, TRUE,  DN_TYPE_HTML,  },
    { "HEAD",	FALSE, TRUE,  DN_TYPE_HEAD,  },
    { "TITLE",	FALSE, TRUE,  DN_TYPE_TITLE, },
    { "BODY",	FALSE, TRUE,  DN_TYPE_BODY,  },

    { "!--",	TRUE,  FALSE, DN_TYPE_COMM,  },	// comments are singleton tags.

    { "TABLE",	FALSE, TRUE,  DN_TYPE_TABLE, },	// limited understanding!
    { "TR",	FALSE, TRUE,  DN_TYPE_TR,    },
    { "TD",	TRUE,  TRUE,  DN_TYPE_TD,    },

    { "DL",	FALSE, TRUE,  DN_TYPE_DL,    },
    { "DT",	FALSE, TRUE,  DN_TYPE_DT,    },
    { "DD",	FALSE, FALSE, DN_TYPE_DD,    },	// </DD> not req in our manuals.

    { "A",	TRUE,  TRUE,  DN_TYPE_A,     },	// either "name" or "href" type.
    { "B",	FALSE, TRUE,  DN_TYPE_B,     },
    { "I",	FALSE, TRUE,  DN_TYPE_I,     },

    { "PRE",	FALSE, TRUE,  DN_TYPE_PRE,   },

    { "P",	FALSE, FALSE, DN_TYPE_P,     },	// </P>  not req in our manuals.
    { "BR",	FALSE, FALSE, DN_TYPE_BR,    },	// </BR> not req in our manuals.

    { PCNULL,	FALSE, FALSE, 0,	     },	// end of list.
};

// Convenience macros:

#define	TAG(DN_Type)	 (g_dntype[DN_Type].dnt_tag)
#define	SAVETAG(DN_Type) (g_dntype[DN_Type].dnt_savetag)
#define	NEST(DN_Type)	 (g_dntype[DN_Type].dnt_nest)


// ****************************************************************************
// DOCUMENT NODE DATA STRUCTURES:
//
// Document nodes are saved in a doubly-linked tree of docnodes.  Each docnode
// points sideways to a doubly-linked list of sibling docnodes for
// previous/successive unnested document objects, plus points to its parent and
// to the first of a sideways doubly-linked child list of nested objects.  All
// data lives in malloc'd memory.
//
// The dn_text field is null for a tag node unless the tag text is worth
// saving.  The field is non-null for non-tag (document) text.

typedef	struct docnode * Pdn_t;

struct docnode {
	int	dn_type;	// node type, index in g_dntype[].
	int	dn_linenum;	// where introduced, for reconstructing.
	bool_t	dn_closed;	// flag: closing tag was seen.
	bool_t	dn_noemit;	// flag: skip on output, for marking ahead.
	bool_t	dn_bold;	// flag: for <PRE>, whole section is bold.

	char *	dn_text;	// node text; see above.

	Pdn_t	dn_Pprev;	// previous node in sibling list.
	Pdn_t	dn_Pnext;	// next     node in sibling list.
	Pdn_t	dn_Pparent;	// up-link to parent node, if any.
	Pdn_t	dn_Pchild;	// down-link to first node in child subtree.
};

#define	PDNNULL	((Pdn_t) NULL)

Pdn_t g_Pdnhead = PDNNULL;	// head of docnode tree.


// ****************************************************************************
// FUNCTION SIGNATURES (forward declarations):

int	main(int argc, char ** argv);

void	ReadInputFile( char * Filename, FILE * PFile);
void	CheckNesting(Pdn_t Pdn);
void	EmitNroffHeader(char * Filename, char ** PPageName);
void	EmitNroffBody(Pdn_t Pdn, int DLLevel, int InPRE, char * PageName);

void	ExtractHeader(  Pdn_t Pdn, char ** PFileRev,
		        char ** PPageName, char ** PPageSection,
			char *  PLcLetter, char ** PRevision);
char *	ExtractText(    Pdn_t Pdn);
void	ExtractPageInfo(Pdn_t Pdn, char * Pch,
			char ** PPageName, char ** PPageSection,
			char *  PLcLetter);

int	TagType(char * Tag, bool_t * isclosing, char * Filename, int Linenum);
Pdn_t	AppDocNode( Pdn_t Pdn,	     int linenum);
Pdn_t	NewDocNode( Pdn_t dn_Pparent, int linenum);
char *	SaveDocNode(Pdn_t Pdn, int DN_Type, char * Pch,
		    char * Filename, int Linenum);
bool_t	ParentPre(Pdn_t Pdn, bool_t BoldOnly);

void	MarkNoEmit(   Pdn_t Pdn, bool_t Font);
void	EmitText(     char * Pch, int InPRE, int Linenum);
void	EmitTextPRE(  char * Pch, int InPRE);
void	EmitTextBS(   char * Pch);
bool_t	NoWhiteSpace( char * Pch);
int	CountNewlines(char * Pch);

char *	StrSave(  char * String);
char *	StrSaveN( char * String, ...);
void *	Malloc(	  size_t Size);

void	Usage(void);
void    Error(int Exitvalue, int MyErrno, char * Message, ...);

DBGCODE(void DumpTree(Pdn_t Pdn, int Depth, bool_t Separator);)


// ****************************************************************************
// M A I N

FUNCTION int main(
	int	argc,
	char **	argv)
{
	char *	filename;	// input file.
	FILE *	Pfile;		// open input file.
	char *	pagename;	// such as "Judy1".

	gc_myname = argv[0];
	if (argc != 2) Usage();
	filename = argv[1];

#ifdef DEBUG
// Assert that each dnt_type matches its index in the table, since the code
// depends on this:

	{
	    int dn_type;

	    for (dn_type = 0; TAG(dn_type) != PCNULL; ++dn_type)
		assert(g_dntype[dn_type].dnt_type == dn_type);
	}
#endif


// READ FROM LIST OF FILES OR STDIN; BUILD TREE:

	if ((Pfile = fopen(filename, "r")) == (FILE *) NULL)
	{
	    Error(ERREXIT, errno, "Cannot open file \"%s\" to read it",
		  filename);
	}

	ReadInputFile(filename, Pfile);

	if (ferror(Pfile))
	    Error(ERREXIT, errno, "Cannot read from file \"%s\"", filename);

	DBGCODE(DumpTree(g_Pdnhead, /* Depth = */ 0, /* Separator = */ TRUE);)

	if (g_Pdnhead == PDNNULL)
	{
	    Error(ERREXIT, NOERRNO, "No HTML tags found in file \"%s\"",
		  filename);
	}

	CheckNesting(g_Pdnhead);


// EMIT NROFF VERSION OF TEXT:

	EmitNroffHeader(filename, &pagename);
	EmitNroffBody(g_Pdnhead, /* DLLevel = */ 0, /* InPRE = */ 0, pagename);
	PUTC('\n');			// ensure last line is terminated.

	return(0);

} // main()


// ****************************************************************************
// R E A D   I N P U T   F I L E
//
// Given a filename and stream pointer for reading, read and parse
// Judy-specific HTML and build a structure representing the document, under
// g_Pdnhead.  Set g_linenumlim.
//
// Note:  Ideally this would be a shorter function with helper subroutines, but
// I wrote this fast.  :-)
//
// Note:  This used be called just ReadFile(), but on win_ipf, at least the
// cross-compile environment, this resulted in a duplicate symbol error, as if
// ReadFile() is in a library somewhere.

FUNCTION void ReadInputFile(
	char *	Filename,
	FILE *	PFile)
{
	int	linenum = 0;		// input line number.
	char	line[BUFSIZ];		// read from file.
	char *	Pch;			// place in line.
	char *	Pch2;			// place in line.
	char *	Pchp;			// prior to skipped whitespace.
	char *	tagname;		// for error reporting.
	char	chold;			// old char.
	int	dn_type;		// docnode type.
	bool_t	isclosing;		// is a closing tag.
	Pdn_t	Pdn = PDNNULL;		// current docnode.
	Pdn_t	Pdnprev;		// previous in sibling list.


// READ LINE, TRIM AT ANY NEWLINE, AND SKIP LEADING WHITESPACE:

	while (fgets(line, BUFSIZ, PFile) != PCNULL)
	{
	    line[strcspn(line, "\n")] = CHNULL;
	    ++linenum;

	    Pch = Pchp = line;
	    SKIPSPACE(Pch);		// skip any leading whitespace.

	    if (! ParentPre(Pdn, /* BoldOnly = */ FALSE))
		Pchp = Pch;		// skip for storing, too.


// HANDLE EMPTY LINE:
//
// If within an "already open" DN_TYPE_TEXT, append a newline to the existing
// text in case it turns out to be significant later (mainly in a <PRE>
// section).

	    if (*Pch == CHNULL)
	    {
		if ((Pdn->dn_type) == DN_TYPE_TEXT)
		{
		    assert((Pdn->dn_text) != PCNULL);
		    (Pdn->dn_text) = StrSaveN(Pdn->dn_text, "\n", PCNULL);
		}
		continue;
	    }


// HANDLE DATA LINE:
//
// Look for a "<" that starts an HTML tag.

	    while (*Pch != CHNULL)		// more on line.
	    {
		Pch2 = strchr(Pch, '<');	// look for next tag start.


// SAVE DOCUMENT TEXT:
//
// Save any text preceding a tag (or through end of line) in a document text
// (DN_TYPE_TEXT) docnode; either the existing node if there is one, else a new
// node as a sibling or child of the previous, as appropriate.
//
// This builds one docnode for any inter-tag text, whether it's a portion of
// one line, or many lines long.  In some ways that's simplest and most
// efficient, and in other ways it's weird.
//
// Note:  Use Pchp here, not Pch, so as not to ignore leading whitespace on
// <PRE> lines.

		if (! ((Pch2 == PCNULL) ? strlen(Pchp) : Pch2 - Pchp))
		{
		    assert((Pch == Pch2) || (Pch2 == PCNULL));
		}
		else
		{
		    if (g_Pdnhead == PDNNULL)	// no current tree.
		    {
			Error(ERREXIT, NOERRNO, FILELINE "For this "
			      "translator, the HTML file must start with an "
			      "HTML tag in \"<>\", and no other text",
			      Filename, linenum);
		    }

// Current docnode is not DN_TYPE_TEXT, so append a new docnode child or
// sibling, as appropriate, to the tree:

		    assert(Pdn != PDNNULL);	// should already be assigned.

		    if ((Pdn->dn_type) != DN_TYPE_TEXT)
		    {
			Pdn = AppDocNode(Pdn, linenum);
			(Pdn->dn_type) = DN_TYPE_TEXT;
		    }

// Save current text in current docnode, appending to any existing text:
//
// Note:  To avoid messing up alignments in <PRE> text, use Pchp, not Pch,
// which can skip whitespace.

		    if (Pch2 != PCNULL) *Pch2 = CHNULL;	 // terminate briefly.

		    if ((Pdn->dn_text) == PCNULL)	// no existing text.
			(Pdn->dn_text) = StrSave(Pchp);
		    else
			(Pdn->dn_text) = StrSaveN(Pdn->dn_text, Pchp, PCNULL);

		    assert((Pdn->dn_text) != PCNULL);

// If there's still a tag on the line, unterminate Pch2; otherwise append a
// newline to the saved text in case more document text follows:

		    if (Pch2 != PCNULL)
			*Pch2 = '<';			// unterminate.
		    else
			(Pdn->dn_text) = StrSaveN(Pdn->dn_text, "\n", PCNULL);

		} // if text preceding tag.


// ANALYZE HTML TAG (if any):

		if (Pch2 == PCNULL) break;	// no tag on line; line is done.

		Pch = Pch2 + 1;		// skip "<".
		SKIPSPACE(Pch);		// skip any whitespace.

		if (*Pch == CHNULL)
		    Error(ERREXIT, NOERRNO, FmtErrLineEnds, Filename, linenum);

		Pch2 = Pch;
		SKIPNONSPACE(Pch2);  // find whitespace or end of line or tag.

		if (*Pch2 == CHNULL)
		    Error(ERREXIT, NOERRNO, FmtErrLineEnds, Filename, linenum);

		chold = *Pch2;
		*Pch2 = CHNULL;		// temporarily terminate.
		dn_type = TagType(Pch, &isclosing, Filename, linenum);
		*Pch2 = chold;


// HANDLE CLOSING TAG:
//
// First ensure the tag checks out OK.

		if (isclosing)
		{
		    if (*Pch2 != '>')
		    {
			Error(ERREXIT, NOERRNO, FILELINE "Closing HTML tag "
			      "\"%s\" must be followed immediately by \">\"; "
			      "this translator does not even allow whitespace",
			      Filename, linenum, Pch);
		    }

		    if (g_Pdnhead == PDNNULL)
		    {
			Error(ERREXIT, NOERRNO, FILELINE "Closing HTML tag "
			      "\"%s\" found before any opening tag in the file",
			      Filename, linenum, Pch);
		    }

		    tagname = Pch;		// for error reporting.
		    Pch = Pchp = Pch2 + 1;	// skip ">" in line.

// Check if the closing tag is an optional closing for the last unclosed,
// non-DN_TYPE_TEXT docnode in the current sibling list, if any:

		    for (Pdnprev  = Pdn;
			 Pdnprev != PDNNULL;
			 Pdnprev  = Pdnprev->dn_Pprev)
		    {
			if (((Pdnprev->dn_type) == DN_TYPE_TEXT)
			 || (Pdnprev->dn_closed))
			{
			    continue;		// skip text or closed tag.
			}

			if ((Pdnprev->dn_type) == dn_type)
			{
			    (Pdnprev->dn_closed) = TRUE;  // optional closing.
			    break;
			}
		    }

		    if (Pdnprev != PDNNULL) continue;	// matched closing.

// Otherwise check that the closing tag is the (required) closing tag for the
// (required) parent node (which must not have been closed yet):

		    if ((Pdn->dn_Pparent) == PDNNULL)
		    {
			Error(ERREXIT, NOERRNO, FILELINE "Closing HTML tag "
			      "\"%s\" does not match an opening tag",
			      Filename, linenum, tagname);
		    }

		    assert(! (Pdn->dn_Pparent->dn_closed));

		    if ((Pdn->dn_Pparent->dn_type) != dn_type)
		    {
			Error(ERREXIT, NOERRNO, FILELINE "Parent HTML tag "
			      "\"%s\" found on line %d requires a closing tag, "
			      "but \"%s\" does not match it; check for out-of-"
			      "order HTML tags", Filename, linenum,
			      TAG(Pdn->dn_Pparent->dn_type),
			      Pdn->dn_Pparent->dn_linenum, tagname);
		    }

// Go uplevel in the tree to the parent node:

		    Pdn = Pdn->dn_Pparent;
		    (Pdn->dn_closed) = TRUE;
		    continue;

		} // closing tag.


// NEW HTML TAG:  ADD SIBLING OR CHILD NODE TO TREE:
//
// Save appropriate information about the tag and move beyond its closing point
// in the input line.

		Pdn = AppDocNode(Pdn, linenum);
		Pch = Pch2;
		assert( Pch != PCNULL);
		assert(*Pch != CHNULL);
		Pch = Pchp = SaveDocNode(Pdn, dn_type, Pch, Filename, linenum);

	    } // while more on input line.
	} // while more in input file.

	g_linenumlim = linenum + 1;

} // ReadInputFile()


// ****************************************************************************
// C H E C K   N E S T I N G
//
// Given a docnode, recursively check that all nested HTML tags were closed.
// If not, error out.

FUNCTION void CheckNesting(
	Pdn_t Pdn)	// current docnode.
{
	if (NEST(Pdn->dn_type) && (! (Pdn->dn_closed)))
	{
	    Error(ERREXIT, NOERRNO, "No closing tag found for HTML tag \"%s\" "
		  "from input line %d", TAG(Pdn->dn_type), Pdn->dn_linenum);
	}

	if ((Pdn->dn_Pchild) != PDNNULL) CheckNesting(Pdn->dn_Pchild);
	if ((Pdn->dn_Pnext)  != PDNNULL) CheckNesting(Pdn->dn_Pnext);

} // CheckNesting()


// ****************************************************************************
// E M I T   N R O F F   H E A D E R
//
// Given the input filename, a pointer to a page name string to return, and the
// docnode tree under g_Pdnhead, extract header info and emit nroff header
// lines.

FUNCTION void EmitNroffHeader(
	char *	Filename,		// input file.
	char **	PPageName)		// such as "Judy1", to return.
{
	char *	filerev = PCNULL;	// from first comment in input.
	char *	pagesection;		// such as "3X".
	char	lcletter;		// manual tab section, such as "j".
	char *	revision;		// from centered table datum.
	time_t	currtime;		// for ctime().

// Extract "weird" header values:
//
// These must be found in the docnodes tree and prepared for emitting nroff.

	ExtractHeader(g_Pdnhead, &filerev,
		      PPageName, &pagesection, &lcletter, &revision);

	if (filerev == PCNULL)
	{
	    Error(ERREXIT, NOERRNO, "HTML file lacks comment lines; it must "
		  "contain at least one comment line, and the first one must "
		  "contain revision information");
	}

// Emit file header; note, ctime() output already contains a newline:

	(void) time(&currtime);
	(void) printf(".\\\" Auto-translated to nroff -man from %s by %s at %s",
		      Filename, gc_myname, ctime(&currtime));

	(void) printf(".\\\" %s\n",  filerev);
	(void) printf(".TA %c\n",    lcletter);
	(void) printf(".TH %s %s\n", *PPageName, pagesection);
	(void) puts(  ".ds )H Hewlett-Packard Company");
	(void) printf(".ds ]W %s\n", revision);

} // EmitNroffHeader()


// ****************************************************************************
// E M I T   N R O F F   B O D Y
//
// Given a current node in the docnodes tree, the current <DL> level, a flag
// whether below a <PRE> node, the manual entry page name, and in
// g_prevlinenum, the previous input line number that resulted in output,
// recursively emit nroff body text.  Translate the HTML docnodes as described
// in the comments prior to EmitNroffHeader(), and also translate certain HTML
// escaped chars back to literal form.  Hope the results are legal nroff
// without spurious unintended nroff commands embedded.
//
// Note:  This function recurses two ways; first, to the child subtree, and
// second, to the next sibling at the current level.

FUNCTION void EmitNroffBody(
	Pdn_t	Pdn,		// current top of subtree.
	int	DLLevel,	// <DL> level, top = 0.
	int	InPRE,		// bit flags for <PRE> handling.
	char *	PageName)	// such as "Judy1".
{
	int	DLcount = 0;	   // set to 1 if hit <DL> here.
	char *	suffix  = PCNULL;  // to print after children, before siblings.

// When about to emit text, if the previous output came from a lower input line
// number, start with a newline; otherwise do not, and let the text
// concatenate:
//
// Use CHECKPREV except when the text to be emitted is forced to a new line.

#ifdef CPPRINT		// for special debugging:
#define	CHECKPREVPRINT printf("\ncp %d %d\n", g_prevlinenum, Pdn->dn_linenum)
#else
#define	CHECKPREVPRINT // null
#endif

#define	CHECKPREV	\
	CHECKPREVPRINT;	\
	{ if (g_prevlinenum && (g_prevlinenum < (Pdn->dn_linenum))) PUTC('\n');}

// To support CHECKPREV, call SETPREV() after emitting text that might need a
// line break to a new line, or SETPREVNONL to ensure NO newline, that is, the
// next text concatenates on the same line:
//
// Note:  For a correct line number, SETPREV() must account for any newlines in
// the text just emitted.

#define	SETPREV(Text)	g_prevlinenum = (Pdn->dn_linenum) + CountNewlines(Text)
#define	SETPREVNONL	g_prevlinenum = g_linenumlim	// no newline.

// Check if under a lower-level <DL>, for continuing an indented paragraph:

#define	UNDER_DL ((DLLevel > 1)				\
	       && ((Pdn->dn_Pparent) != PDNNULL)	\
	       && ((Pdn->dn_Pparent->dn_type) == DN_TYPE_DL))


// SWITCH ON DOCNODE TYPE:

	if (Pdn->dn_noemit)	// upstream node said to skip this one.
	    goto NextNode;

	switch (Pdn->dn_type)
	{


// DOCUMENT TEXT:
//
// Just emit it with HTML escaped chars modified, with backslashes doubled,
// with no trailing newline, and if not within <PRE> text, with any leading
// whitespace deleted, so that, for example, something like "\fI text\fP" does
// not result.

	case DN_TYPE_TEXT:

	    assert((Pdn->dn_text) != PCNULL);
	    CHECKPREV;
	    EmitText(Pdn->dn_text, InPRE, Pdn->dn_linenum);
	    SETPREV(Pdn->dn_text);
	    break;


// IGNORE THESE TYPES:
//
// See EmitNroffHeader() for nroff equivalents already emitted in some cases.
// In some cases, mark all child nodes no-emit to ignore them.

	case DN_TYPE_HTML:
	case DN_TYPE_HEAD:
	case DN_TYPE_BODY:
	case DN_TYPE_COMM:	break;

	case DN_TYPE_TITLE:
	case DN_TYPE_TABLE:
	case DN_TYPE_TR:
	case DN_TYPE_TD:

	    MarkNoEmit(Pdn->dn_Pchild, /* Font = */ FALSE);
	    break;


// DESCRIPTIVE LIST:
//
// At the top level these represent manual entry sections, and any bold markers
// around the text are ignored.  Below the top level these translate to tagged
// paragraphs.  Here, just note the increment and continue the walk.

	case DN_TYPE_DL:

	    DLcount = 1;
	    break;


// DESCRIPTIVE LIST TAG:

	case DN_TYPE_DT:

	    assert(NEST(DN_TYPE_DT));		// tag text must be child.

	    if ((Pdn->dn_Pchild) == PDNNULL)	// no child exists.
	    {
		Error(ERREXIT, NOERRNO, "HTML tag \"%s\" found at input line "
		      "%d lacks text, which is required by this translator",
		      TAG(DN_TYPE_DT), Pdn->dn_linenum);
	    }

// Further handling depends on DLLevel as explained above:

	    if (DLLevel <= 1)		// major manual section.
	    {
		PUTS("\n.SH ");

		if ((Pdn->dn_Pchild->dn_type) == DN_TYPE_B)
		    (Pdn->dn_Pchild->dn_noemit) = TRUE;	 // skip <B>...</B>.
	    }

// If a <DT> immediately follows a previous <DT>, use .PD 0 for the successive
// .TP to join lines:

	    else
	    {
		if (((Pdn->dn_Pprev) != PDNNULL)
		 && ((Pdn->dn_Pprev->dn_type) == DN_TYPE_DT))
		{
		    PUTS("\n.PD 0\n");
		    suffix = "\n.PD\n";
		}

		PUTS("\n.TP 15\n.C ");
	    }

	    SETPREVNONL;
	    break;


// DESCRIPTIVE LIST DATUM:
//
// Just proceed to dump the embedded text.

	case DN_TYPE_DD: break;


// ANCHOR:
//
// Ignore inbound ("name") anchors and process outbound ("href") anchor labels
// into appropriately highlighted text.

	case DN_TYPE_A:
	{
	    size_t len;		// of substring.
	    Pdn_t  Pdn2;	// child node.
	    char * Pch;		// place in text.

	    assert((Pdn->dn_text) != PCNULL);

	    if (strstr(Pdn->dn_text, "name=") != PCNULL) break;

	    if (strstr(Pdn->dn_text, "href=") == PCNULL)
	    {
		Error(NOEXIT, NOERRNO, "Unrecognized HTML anchor type \"%s\" "
		      "at input line %d ignored; only \"name=\" and \"href=\" "
		      "are allowed by this translator",
		      Pdn->dn_text, Pdn->dn_linenum);
		break;
	    }

// Check for nested text (anchor label):
//
// TBD:  The error message lies a little.  If the text is something like,
// "foo<B>bar</B>", it passes this test; and later, all font tags in the anchor
// label are marked no-emit; and any other embedded tags, who knows what
// happens?

	    if (((Pdn2 = Pdn->dn_Pchild)->dn_type) != DN_TYPE_TEXT)
	    {
		Error(ERREXIT, NOERRNO, "HTML \"href\" anchor at input line "
		      "%d lacks a directly nested anchor label, with no "
		      "further nested tags; this translator cannot support "
		      "nested tags in anchor labels", Pdn->dn_linenum);
	    }
	    assert((Pdn2->dn_text) != PCNULL);

// If the anchor is within a <B><PRE>, do nothing special with fonts, as
// explained earlier:

	    if (ParentPre(Pdn, /* BoldOnly = */ TRUE)) break;

// Since anchor label text font will be forced in a moment, ignore any nested
// font directives so they don't mess up nroff:

	    MarkNoEmit(Pdn->dn_Pchild, /* Font = */ TRUE);

// See if anchor label appears to be a reference to the current page, to some
// other page, or else just make it italicized text:
//
// TBD:  This is pretty shaky, hope it's close enough.

	    len = strlen(PageName);

	    if (strncmp(Pdn2->dn_text, PageName, len) == 0)  // self-reference.
	    {
		CHECKPREV;
		PUTS("\\fB");			// bold font.
		SETPREVNONL;
		suffix = "\\fP";		// revert to previous font.
		break;
	    }

// Contains '(' and no whitespace => appears to reference some other page:
//
// Emit revised, tagged anchor label text immediately.

	    if (((Pch = strchr(Pdn2->dn_text, '(')) != PCNULL)
	     && NoWhiteSpace(Pdn2->dn_text))
	    {
		CHECKPREV;
		PUTS("\\fI");			// italic font.
		*Pch = CHNULL;			// terminate briefly.
		PUTS(Pdn2->dn_text);
		*Pch = '(';
		PUTS("\\fP");			// revert to previous font.
		PUTS(Pch);
		SETPREV(Pdn2->dn_text);

		(Pdn2->dn_noemit) = TRUE;	// skip later.
		break;
	    }

// Just make the anchor label italicized text:

	    CHECKPREV;
	    PUTS("\\fI");			// italic font.
	    SETPREVNONL;
	    suffix = "\\fP";			// revert to previous font.
	    break;

	} // case DN_TYPE_A


// BOLD TEXT:
//
// If the first child is <PRE>, use a "hard" font change; otherwise an in-line
// change.
//
// Note:  For <DT><B>, this node is already marked dn_noemit and not seen here.
//
// Note:  For <B><PRE>, nroff seems to reset font upon .PP, so mark the bold
// for later emission.

	case DN_TYPE_B:

	    if (((Pdn->dn_Pchild) != PDNNULL)
	     && ((Pdn->dn_Pchild->dn_type) == DN_TYPE_PRE))
	    {
		(Pdn->dn_Pchild->dn_bold) = TRUE;	// see above.
		break;
	    }

	    CHECKPREV;
	    PUTS("\\fB");			// bold font.
	    SETPREVNONL;
	    suffix = "\\fP";			// revert to previous font.
	    break;


// ITALIC TEXT:

	case DN_TYPE_I:

	    CHECKPREV;
	    PUTS("\\fI");			// italic font.
	    SETPREVNONL;
	    suffix = "\\fP";			// revert to previous font.
	    break;


// PREFORMATTED TEXT:
//
// Emit prefix/suffix directives based on example in strchr(3C).

	case DN_TYPE_PRE:

	    PUTS(UNDER_DL ? "\n.IP\n.nf\n.ps +1\n" : "\n.PP\n.nf\n.ps +1\n");
	    if (Pdn->dn_bold) PUTS(".ft B\n");	// deferred bold.
	    SETPREVNONL;
	    suffix = ((Pdn->dn_bold) ? "\n.ft P\n.ps\n.fi\n" : "\n.ps\n.fi\n");

	    // set for all children:
	    InPRE = INPRE_BLOCK
		  | ((Pdn->dn_bold) ? INPRE_BOLD   : 0)
		  | (UNDER_DL ?	      INPRE_INDENT : 0);
	    break;


// PARAGRAPH BREAK:
//
// If the parent is a <DL> below the top level, use .IP to continue a .TP
// (tagged paragraph); otherwise emit a standard .PP.

	case DN_TYPE_P:

	    PUTS(UNDER_DL ? "\n.IP\n" : "\n.PP\n");
	    SETPREVNONL;
	    break;


// LINE BREAK:

	case DN_TYPE_BR: PUTS("\n.br\n"); SETPREVNONL; break;


// UNRECOGNIZED DOCNODE TYPE:

	default:
	    Error(ERREXIT, NOERRNO, "Internal error: Unexpected docnode type "
		  "%d in docnodes tree", Pdn->dn_type);

	} // end switch on dn_type


// VISIT CHILD AND SIBLING DOCNODES:
//
// If this was a <DL> here, pass an incremented value to child nodes, but not
// to sibling nodes.

NextNode:
	if ((Pdn->dn_Pchild) != PDNNULL)
	    EmitNroffBody(Pdn->dn_Pchild, DLLevel + DLcount, InPRE, PageName);

	if (suffix != PCNULL) PUTS(suffix);

	if ((Pdn->dn_Pnext) != PDNNULL)
	    EmitNroffBody(Pdn->dn_Pnext, DLLevel, InPRE, PageName);

} // EmitNroffBody()


// ****************************************************************************
// E X T R A C T   H E A D E R
//
// Given a current docnode and pointers to values to return, walk the entire
// docnode tree once, recursively, in-order (parent then child then sibling) to
// extract nroff header information.  Find the first comment line, insist it
// contain "@\(#)", and put this in *PFileRev.  Also find exactly one
// DN_TYPE_TABLE, containing exactly one DN_TYPE_TR, containing one DN_TYPE_TD
// containing "align=\"left\"" and one DN_TYPE_TD containing
// "align=\"center\"", and extract from these the nroff .TH pagename,
// pagesection, and lcletter, and nroff ]W variable revision string,
// respectively.  Error out if anything goes wrong.
//
// Note:  Some of the returned strings are in separate malloc'd memory and
// others are not; treat them as read-only.

FUNCTION void ExtractHeader(
	Pdn_t	Pdn,		// current docnode.
	char **	PFileRev,	// from first comment in input.
	char **	PPageName,	// such as "Judy1".
	char **	PPageSection,	// such as "3X".
	char *	PLcLetter,	// manual tab section, such as "j".
	char **	PRevision)	// from centered table datum.
{
static	bool_t	found_filerev  = FALSE;
static	bool_t	found_table    = FALSE;
static	bool_t	found_tr       = FALSE;
static	bool_t	found_tdleft   = FALSE;
static	bool_t	found_tdcenter = FALSE;

	char *	text;		// from text node.

// Note:  The following are used for both 0 and >= 2 instances, so they don't
// include a line number because there is none for the 0 case:

#define	ERR_TABLE	"This translator expects exactly one HTML table " \
			"(\"TABLE\" tag) in the input file"
#define	ERR_TR		"This translator expects exactly one HTML table row " \
			"(\"TR\" tag) in the input file"
#define	ERR_TDLEFT	"This translator expects exactly one HTML table row " \
			"datum (\"TD\" tag) in the input containing " \
			"'align=\"left\"'"
#define	ERR_TDCENTER	"This translator expects exactly one HTML table row " \
			"datum (\"TD\" tag) in the input containing " \
			"'align=\"center\"'"


// CHECK CURRENT DOCNODE TYPE:

	switch (Pdn->dn_type)
	{
	case DN_TYPE_COMM:

	    if (found_filerev) break;	// already done.
	    found_filerev = TRUE;

// Hide the whatstring markers here from what(1) itself:

	    if (strstr(Pdn->dn_text, "@" "(#)") == PCNULL)
	    {
		Error(ERREXIT, NOERRNO, "First HTML comment line in input, "
		      "found at line %d, must contain a whatstring, marked by "
		      "\"@" "(#)\"", Pdn->dn_linenum);
	    }

	    *PFileRev = Pdn->dn_text;
	    break;

	case DN_TYPE_TABLE:

	    if (found_table) Error(ERREXIT, NOERRNO, ERR_TABLE);
	    found_table = TRUE;

	    if (((Pdn->dn_Pchild) == PDNNULL)
	     || ((Pdn->dn_Pchild->dn_type) != DN_TYPE_TR))
	    {
		Error(ERREXIT, NOERRNO, "The HTML \"%s\" tag at line %d must "
		      "be followed by a \"%s\" tag, but it is not",
		      TAG(DN_TYPE_TABLE), Pdn->dn_linenum, TAG(DN_TYPE_TR));
	    }
	    break;

	case DN_TYPE_TR:

	    if (found_tr) Error(ERREXIT, NOERRNO, ERR_TR);
	    found_tr = TRUE;

	    if (((Pdn->dn_Pchild) == PDNNULL)
	     || ((Pdn->dn_Pchild->dn_type) != DN_TYPE_TD))
	    {
		Error(ERREXIT, NOERRNO, "The HTML \"%s\" tag at line %d must "
		      "be followed by a \"%s\" tag, but it is not",
		      TAG(DN_TYPE_TR), Pdn->dn_linenum, TAG(DN_TYPE_TD));
	    }
	    break;

	case DN_TYPE_TD:

	    if (strstr(Pdn->dn_text, "align=\"left\"") != PCNULL)
	    {
		if (found_tdleft) Error(ERREXIT, NOERRNO, ERR_TDLEFT);
		found_tdleft = TRUE;

		text = StrSave(ExtractText(Pdn));
		ExtractPageInfo(Pdn, text, PPageName, PPageSection, PLcLetter);
	    }
	    else if (strstr(Pdn->dn_text, "align=\"center\"") != PCNULL)
	    {
		if (found_tdcenter) Error(ERREXIT, NOERRNO, ERR_TDCENTER);
		found_tdcenter = TRUE;

		*PRevision = ExtractText(Pdn);
	    }
	    // else ignore line.

	} // switch on dn_type


// VISIT CHILD AND SIBLING DOCNODES:
//
// Note:  Do this even though it seems redundant, to ensure no duplicates.

	if ((Pdn->dn_Pchild) != PDNNULL)
	{
	    ExtractHeader(Pdn->dn_Pchild, PFileRev,
			  PPageName, PPageSection, PLcLetter, PRevision);
	}

	if ((Pdn->dn_Pnext) != PDNNULL)
	{
	    ExtractHeader(Pdn->dn_Pnext, PFileRev,
			  PPageName, PPageSection, PLcLetter, PRevision);
	}


// AT TOP OF TREE, CHECK FOR SUCCESS:
//
// Note:  If you read the fine print, it's clear the ERR_TR here is impossible
// to hit.

	if (Pdn != g_Pdnhead) return;

	if (! found_table)	Error(ERREXIT, NOERRNO, ERR_TABLE);
	if (! found_tr)		Error(ERREXIT, NOERRNO, ERR_TR);
	if (! found_tdleft)	Error(ERREXIT, NOERRNO, ERR_TDLEFT);
	if (! found_tdcenter)	Error(ERREXIT, NOERRNO, ERR_TDCENTER);

} // ExtractHeader()


// ****************************************************************************
// E X T R A C T   T E X T
//
// Given a non-null docnode, return the non-null dn_text field from its first
// child (directly, not a copy).  Error out if anything goes wrong.

FUNCTION char * ExtractText(
	Pdn_t Pdn)	// parent node.
{
	assert(Pdn != PDNNULL);

#define	ERR_NULLTEXT "Node for HTML tag \"%s\", found at line %d, lacks a " \
		     "child \"text\" node containing a non-null text string " \
		     "as required"

// TBD:  This does not report a case of a text string containing only
// whitespace, but some callers do that themselves:

	if (((Pdn->dn_Pchild) == PDNNULL)
	 || ((Pdn->dn_Pchild->dn_type) != DN_TYPE_TEXT)
	 || ((Pdn->dn_Pchild->dn_text) == PCNULL)
	 || ((Pdn->dn_Pchild->dn_text[0]) == CHNULL))
	{
	    Error(ERREXIT, NOERRNO, ERR_NULLTEXT,
		  TAG(Pdn->dn_type), Pdn->dn_linenum);
	}

	return(Pdn->dn_Pchild->dn_text);

} // ExtractText()


// ****************************************************************************
// E X T R A C T   P A G E   I N F O
//
// Given a docnode, a non-null, modifiable string alleged to contain an nroff
// -man header, such as "Judy(3X)", and pointers to return values, break out
// and return the pieces.  Error out if anything goes wrong.
//
// Note:  Returned strings are in separate malloc'd memory.

FUNCTION void ExtractPageInfo(
	Pdn_t	Pdn,		// for error reporting.
	char *	Pch,		// string to decipher.
	char **	PPageName,	// such as "Judy1".
	char **	PPageSection,	// such as "3X".
	char *	PLcLetter)	// manual tab section, such as "j".
{
	char *	Pch2;		// second place in string.
	char *	Pch3 = PCNULL;	// third  place in string; init for gcc -Wall.

// Find start of string:

	assert(Pch != PCNULL);
	SKIPSPACE(Pch);

	if (*Pch == CHNULL)		// nothing but whitepace.
	{
	    Error(ERREXIT, NOERRNO, ERR_NULLTEXT,
		  TAG(Pdn->dn_type), Pdn->dn_linenum);
	}

// Find "(":

	if ((Pch2 = strchr(Pch, '(')) == PCNULL)
	{
	    Error(ERREXIT, NOERRNO, "Node for HTML tag \"%s\", found at "
		  "line %d, has a child \"text\" node whose text lacks a '('",
		  TAG(Pdn->dn_type), Pdn->dn_linenum);
	}

	if (Pch == Pch2)
	{
	    Error(ERREXIT, NOERRNO, "Node for HTML tag \"%s\", found at "
		  "line %d, has a child \"text\" node whose text starts with "
		  "'(' and lacks a leading pagename",
		  TAG(Pdn->dn_type), Pdn->dn_linenum);
	}

// Validate the "()" suffix, such as "(1)" or "(3X)":

	if ((! ISDIGIT(Pch2[1]))			// not "(<digit>".
	 || ((Pch3 = strchr(Pch2, ')')) == PCNULL)	// no ")".
	 || (Pch2 + 3 < Pch3)				// too far away.
	 || ((Pch2 + 3 == Pch3)				// <digit><suffix>.
	  && (! ISUPPER(Pch2[2]))))			// not <A-Z>.
	{
	    Error(ERREXIT, NOERRNO, "Node for HTML tag \"%s\", found at "
		  "line %d, has a child \"text\" node whose text lacks a "
		  "standard UNIX manual entry suffix in the form "
		  "\"(<digit>[<A-Z>])\", such as \"(1)\" or \"(3X)\"",
		  TAG(Pdn->dn_type), Pdn->dn_linenum);
	}

// Break out parts:

	*Pch2 = *Pch3 = CHNULL;			// terminate at '(' and ')'.
	*PPageName    = StrSave(Pch);
	*PPageSection = StrSave(Pch2 + 1);

// Look for *PLcLetter:

	if (! ISUPPER(**PPageName))
	{
	    Error(ERREXIT, NOERRNO, "Node for HTML tag \"%s\", found at "
		  "line %d, has a child \"text\" node whose text does not "
		  "start with an uppercase letter",
		  TAG(Pdn->dn_type), Pdn->dn_linenum);
	}

	*PLcLetter = tolower((int) (**PPageName));

} // ExtractPageInfo()


// ****************************************************************************
// T A G   T Y P E
//
// Given a non-null string that should be an HTML tag type, a pointer to a
// bool_t to return whether this is a closing tag, and a filename and line
// number for error reporting, look up the tag type in g_dntype[] and return
// its index.  Error out if not found.
//
// As a special case, if presented with "!---" with any number of dashes, look
// for "!--".

FUNCTION int TagType(
	char *	 Tag,		// to look up.
	bool_t * Pisclosing,	// return flag: is a closing tag.
	char *	 Filename,	// for error reporting.
	int	 Linenum)	// for error reporting.
{
	int	 dn_type;	// to return.
	char *	 mytag;		// local variation.

	assert( Tag != PCNULL);
	assert(*Tag != CHNULL);

// Check for closing tag (yes, even for types that don't really allow it):

	if ((*Pisclosing = (*Tag == '/')))	// (()) for gcc.
	{
	    ++Tag;
	    SKIPSPACE(Tag);

	    if (*Tag == CHNULL)
		Error(ERREXIT, NOERRNO, FmtErrLineEnds, Filename, Linenum);
	}

// Translate comment tag to known type:

	mytag = (strncmp(Tag, "!--", 3) ? Tag : "!--");	 // see above.

// Look up tag:
//
// Note:  Main code already asserted dnt_type == dn_type for each entry.

	for (dn_type = 0; TAG(dn_type) != PCNULL; ++dn_type)
	    if (strcmp(mytag, TAG(dn_type)) == 0) return(dn_type);

	Error(ERREXIT, NOERRNO, FILELINE "Unrecognized HTML tag \"%s\"; "
	      "see program source file for recognized types; this is a "
	      "limited, special-purpose translator", Filename, Linenum, Tag);

	/*NOTREACHED*/
	return(0);		// make some compilers happy.

} // TagType()


// ****************************************************************************
// A P P   D O C   N O D E
//
// Given a current docnode tree node, the input file line number, and
// g_Pdnhead, create a new docnode, append it to the tree in the right place,
// and return a pointer to it, with g_Pdnhead updated if required:
//
// * If empty tree, insert new as head of tree.
//
// * Otherwise if current node nests and is not closed, insert as its child.
//
// * Otherwise insert as a sibling of the current node.
//
// Note:  Most HTML tags are non-singletons and hence nest, but if the nesting
// doesn't make sense, too bad, it's not detected, at least not here.

FUNCTION Pdn_t AppDocNode(
	Pdn_t Pdn,		// current docnode tree node.
	int   Linenum)		// in input file.
{
// No current tree, insert first node:

	if (g_Pdnhead == PDNNULL)
	    return(g_Pdnhead = NewDocNode(PDNNULL, Linenum));

// Insert new node as child, with parent set to current node:

	if (NEST(Pdn->dn_type) && (! (Pdn->dn_closed)))
	    return((Pdn->dn_Pchild) = NewDocNode(Pdn, Linenum));

// Insert new node as sibling with same parent:

	(Pdn->dn_Pnext) = NewDocNode(Pdn->dn_Pparent, Linenum);
	(Pdn->dn_Pnext->dn_Pprev) = Pdn;
	return(Pdn->dn_Pnext);

} // AppDocNode()


// ****************************************************************************
// N E W   D O C   N O D E
//
// Malloc() a new docnode and initialize its fields except dn_type, with error
// checking.  Set its parent to the given value.

FUNCTION Pdn_t NewDocNode(
	Pdn_t dn_Pparent,	// parent to record.
	int   Linenum)		// in input file.
{
	Pdn_t Pdn = (Pdn_t) Malloc(sizeof(struct docnode));

	(Pdn -> dn_linenum) = Linenum;
	(Pdn -> dn_closed)  = FALSE;
	(Pdn -> dn_noemit)  = FALSE;
	(Pdn -> dn_bold)    = FALSE;
	(Pdn -> dn_text)    = PCNULL;
	(Pdn -> dn_Pprev)   = PDNNULL;
	(Pdn -> dn_Pnext)   = PDNNULL;
	(Pdn -> dn_Pparent) = dn_Pparent;
	(Pdn -> dn_Pchild)  = PDNNULL;

	return(Pdn);

} // NewDocNode()


// ****************************************************************************
// S A V E   D O C   N O D E
//
// Given a pointer to a docnode, the docnode type, a string for the current
// location (past tag name at whitespace or ">"), and a filename and line
// number for error reporting, save the docnode type in the node, and also save
// the tag text if appropriate; then find the end of the tag (">") and return
// past that location (possibly before more whitespace).  Error out in case of
// syntax error.

FUNCTION char * SaveDocNode(
	Pdn_t	Pdn,		// docnode to modify.
	int	DN_Type,	// new type to save.
	char *	Pch,		// current location past tagname.
	char *	Filename,	// for error reporting.
	int	Linenum)	// for error reporting.
{
	char *	Pch2 = PCNULL;	// second location; init for gcc -Wall.

	assert( Pch != PCNULL);
	assert(*Pch != CHNULL);

// Save type:

	(Pdn->dn_type) = DN_Type;

// Pass whitespace and then find the end of the tag:

	SKIPSPACE(Pch);

	if ((*Pch == CHNULL) || ((Pch2 = strchr(Pch, '>')) == PCNULL))
	    Error(ERREXIT, NOERRNO, FmtErrLineEnds, Filename, Linenum);

// Optionally save tag text:

	if (SAVETAG(DN_Type))
	{
	    *Pch2 = CHNULL;			// temporarily terminate.
	    (Pdn->dn_text) = StrSave(Pch);
	    *Pch2 = '>';
	}

	return(Pch2 + 1);

} // SaveDocNode()


// ****************************************************************************
// P A R E N T   P R E
//
// Given a docnode (can be null) and a flag whether only bold <PRE> is of
// interest, return TRUE if any of its parents is a <PRE> (marked for bold
// text), that is, DN_TYPE_PRE (with dn_bold set); otherwise return FALSE.

FUNCTION bool_t ParentPre(
	Pdn_t  Pdn,		// starting node.
	bool_t BoldOnly)	// flag: only care about bold <PRE>.
{
	if (Pdn == PDNNULL) return (FALSE);	// no parent.

	for (Pdn = Pdn->dn_Pparent; Pdn != PDNNULL; Pdn = Pdn->dn_Pparent)
	{
	    if (((Pdn->dn_type) == DN_TYPE_PRE)
	     && ((! BoldOnly) || (Pdn->dn_bold)))
	    {
		return(TRUE);
	    }
	}

	return(FALSE);

} // ParentPre()


// ****************************************************************************
// M A R K   N O   E M I T
//
// Given a docnode (can be null), and a flag, recursively mark the node and all
// children and siblings as do-not-emit, unless the flag is set, only mark font
// docnodes.

FUNCTION void MarkNoEmit(
	Pdn_t  Pdn,	// top node to mark.
	bool_t Font)	// flag: only mark font docnodes.
{
	if (Pdn == PDNNULL) return;

	if ((! Font)
	 || ((Pdn->dn_type) == DN_TYPE_B)
	 || ((Pdn->dn_type) == DN_TYPE_I))
	{
	    (Pdn->dn_noemit) = TRUE;
	}

	if ((Pdn->dn_Pchild) != PDNNULL) MarkNoEmit(Pdn->dn_Pchild, Font);
	if ((Pdn->dn_Pnext)  != PDNNULL) MarkNoEmit(Pdn->dn_Pnext,  Font);

} // MarkNoEmit()


// ****************************************************************************
// E M I T   T E X T
//
// Given a text string, a bitflag for <PRE> status, and an input line number
// for error reporting, copy the text string to stdout with no added newlines,
// but translating selected HTML escape codes to simple characters, doubling
// any backslashes, and if InPRE, inserting .IP (if INPRE_INDENT) or .PP at
// blank lines (between successive newlines), and if INPRE_BOLD, putting back
// bold font since .IP/.PP seems to reset the font.  Warn about unrecognized
// escape codes.

struct et_list {
	char * et_escape;	// expected text.
	size_t et_len;		// of expected text.
	char   et_emit;		// equivalent char.
} et_list[] = {
	{ "amp;", 4, '&', },
	{ "gt;",  3, '>', },
	{ "lt;",  3, '<', },
	{ PCNULL, 0, ' ', },	// end of list.
};

FUNCTION void EmitText(
	char * Pch,		// text to emit.
	int    InPRE,		// bitflag for <PRE> status.
	int    Linenum)		// for error reporting.
{
	char * Pch2;		// place in text.
	struct et_list * Pet;	// place in et_list[].

	while ((Pch2 = strchr(Pch, '&')) != PCNULL)	// another escape code.
	{
	    *Pch2 = CHNULL;		// briefly terminate.
	    EmitTextPRE(Pch, InPRE);	// emit preceding part.
	    *Pch2 = '&';
	    Pch = Pch2 + 1;			// past '&'.

	    for (Pet = et_list; Pet->et_escape != PCNULL; ++Pet)
	    {
		if (strncmp(Pch, Pet->et_escape, Pet->et_len) == 0)
		{
		    PUTC(Pet->et_emit);		// translate.
		    Pch += Pet->et_len;		// skip escapecode.
		    break;
		}
	    }

	    if (Pet->et_escape == PCNULL)	// no match found.
	    {
		Error(NOEXIT, NOERRNO, "Unrecognized HTML escape code in "
		      "line %d (or text beginning on that line): \"%.4s...\", "
		      "passed through unaltered", Linenum, Pch2);

		PUTC('&');		// emit start of escape code.
		// continue with Pch is just after the '&'.
	    }
	}

	EmitTextPRE(Pch, InPRE);	// emit remaining part.

} // EmitText()


// ****************************************************************************
// E M I T   T E X T   P R E
//
// Given a text string with no HTML escape codes in it and a bitflag for <PRE>
// status (see EmitText()), emit the string with <PRE> handling, and with any
// backslashes doubled.

FUNCTION void EmitTextPRE(
	char * Pch,		// string to emit.
	int    InPRE)		// bitflag for <PRE> status.
{
	char * Pch2;		// place in string.

	if (! InPRE) { EmitTextBS(Pch); return; }

	while ((Pch2 = strchr(Pch, '\n')) != PCNULL)	// another newline.
	{
	    *Pch2 = CHNULL;		// briefly terminate.
	    EmitTextBS(Pch);		// emit preceding part.
	    *Pch2 = '\n';
	    PUTC('\n');			// emit current newline.

	    if (*(Pch = Pch2 + 1) == '\n')	// successive newline.
	    {
		// emit before next newline:
		PUTS((InPRE & INPRE_INDENT) ? ".IP" : ".PP");

		// also reset font:
		if (InPRE & INPRE_BOLD) PUTS("\n.ft B");
	    }
	}

	EmitTextBS(Pch);		// emit trailing part.

} // EmitTextPRE()


// ****************************************************************************
// E M I T   T E X T   B S
//
// Given a text string with no HTML escape codes in it, emit the string with
// any backslashes doubled.

FUNCTION void EmitTextBS(
	char * Pch)		// string to emit.
{
	while (*Pch != CHNULL)
	{
	    PUTC(*Pch); if (*Pch == '\\') PUTC('\\');
	    ++Pch;
	}

} // EmitTextBS()


// ****************************************************************************
// N O   W H I T E   S P A C E
//
// Given a string, return TRUE if it contains no whitespace, otherwise FALSE.

FUNCTION bool_t NoWhiteSpace(
	char * Pch)	// string to check.
{
	assert(Pch != PCNULL);

	while (*Pch != CHNULL) { if (ISSPACE(*Pch)) return(FALSE); ++Pch; }
	return(TRUE);

} // NoWhiteSpace()


// ****************************************************************************
// C O U N T   N E W L I N E S
//
// Return the number of newline chars in a string.

FUNCTION int CountNewlines(
	char * Pch)	// in which to count newlines.
{
	int    count = 0;

	assert(Pch != PCNULL);

	while (*Pch != CHNULL) count += ((*Pch++) == '\n');
	return(count);

} // CountNewlines()


// ****************************************************************************
// S T R   S A V E
//
// Given a string, copy the string into malloc'd space and return a pointer to
// the new copy.  Error out if malloc() fails.

FUNCTION char * StrSave(
	char * string)
{
	return(strcpy((char *) Malloc((size_t) (strlen(string) + 1)), string));

} // StrSave()


// ****************************************************************************
// S T R   S A V E   N
//
// Given one or more strings, terminated by a null pointer, allocate space for
// the concatenation of the strings, concatenate them, and return a pointer to
// the result.  Also free() all but the last string.

FUNCTION char * StrSaveN(
	char *	String, ...)
{
	va_list Parg;			// place in arg list.
	char *	stringN;		// string from arg list.
	char *	stringN1 = PCNULL;	// previous string.
	size_t	size = 0;		// total bytes needed.
	char *	result;			// string to return.


// DETERMINE SPACE NEEDED:

	va_start(Parg, String);

	for (stringN  = String;
	     stringN != PCNULL;
	     stringN  = va_arg(Parg, char *))
	{
	    size += strlen(stringN);
	}

	va_end(Parg);


// ALLOCATE SPACE, CONCATENATE STRINGS:

	*(result = (char *) Malloc((size_t) (size + 1))) = CHNULL;

	va_start(Parg, String);

	for (stringN  = String;
	     stringN != PCNULL;
	     stringN  = va_arg(Parg, char *))
	{
	    if (stringN1 != PCNULL) free((void *) stringN1);
	    stringN1 = stringN;			// all but last string.
	    (void) strcat(result, stringN);
	}

	va_end(Parg);
	return(result);

} // StrSaveN()


// ****************************************************************************
// M A L L O C
//
// Do a malloc() with error checking.

FUNCTION void * Malloc(
	size_t	Size)	// bytes to get.
{
	void *	Pm;	// pointer to memory.

	if ((Pm = malloc(Size)) == (void *) NULL)
	    Error(ERREXIT, errno, "Cannot malloc %d bytes", Size);

	return(Pm);

} // Malloc()


// ****************************************************************************
// U S A G E
//
// Print usage messages (char *gc_usage[]) to stderr and exit with ERREXIT.
// Follow each message line by a newline.

FUNCTION void Usage(void)
{
	int which = 0;		// current line.

	while (gc_usage[which] != PCNULL)
	{
	    (void) fprintf(stderr, gc_usage[which++], gc_myname);
	    (void) putc('\n', stderr);
	}

	exit(ERREXIT);

} // Usage()


// ****************************************************************************
// E R R O R
//
// Given an exit value (NOEXIT, ERREXIT, or USAGE), an errno value (NOERRNO if
// none), a message (printf) string, and zero or more argument strings, print
// an error message to stderr and, if exitvalue is NOEXIT, return; if USAGE,
// print a pointer to the program's usage message; otherwise exit with the
// value.
//
// Message is preceded by "<myname>: " using global gc_myname, and by
// "Warning: " for NOEXIT, and followed by a period and newline.  If myerrno
// (system error number) is not NOERRNO, a relevant message is appended before
// the period.

FUNCTION void Error(
	int	Exitvalue,	// or NOEXIT for warning.
	int	MyErrno,	// system errno if relevant.
	char *	Message, ...)
{
	va_list Parg;		// place in arg list.

	(void) fprintf(stderr, "%s: ", gc_myname);
	if (Exitvalue == NOEXIT) (void) fputs("Warning: ", stderr);

	va_start(Parg, Message);
	(void) vfprintf(stderr, Message, Parg);
	va_end(Parg);

	if (MyErrno != NOERRNO)
	{
	    (void) fprintf(stderr, ": %s (errno = %d)", strerror(MyErrno),
			   MyErrno);
	}

	(void) putc('.',  stderr);
	(void) putc('\n', stderr);

	if (Exitvalue == USAGE)
	{
	    (void) fprintf(stderr, "For a usage summary, run %s -?\n",
			   gc_myname);
	}

	DBGCODE(DumpTree(g_Pdnhead, /* Depth = */ 0, /* Separator = */ FALSE);)

	if (Exitvalue != NOEXIT)
	    exit(Exitvalue);

} // Error()


#ifdef DEBUG

// ****************************************************************************
// D U M P   T R E E
//
// Dump to stdout a representation of the docnode tree under g_Pdnhead.
// Recursively traverse the tree in-order (parent then child then sibling).

FUNCTION void DumpTree(
	Pdn_t  Pdn,		// first node of current sibling list.
	int    Depth,		// current depth.
	bool_t Separator)	// print a separator line after a long dump.
{
	int   indent;		// for counting to Depth.

// Check if enabled:

	if (getenv("DUMP") == PCNULL)
	{
	    PUTS(".\\\" $DUMP not set; DumpTree() disabled.\n");
	    return;
	}

// Check for empty tree:

	if ((Depth == 0) && (Pdn == PDNNULL))
	{
	    PUTS("Head pointer is null.\n");
	    return;
	}

// Print siblings and each of their children, indented to Depth after the node
// address:

	while (Pdn != PDNNULL)
	{
	    (void) printf("%lx ", (unsigned long) Pdn);

	    for (indent = 0; indent <= Depth; ++indent) PUTC('.');

	    (void) printf(" %-5s %3d %c %lx %lx \"%s\"\n",
			  ((Pdn -> dn_type) == DN_TYPE_TEXT) ?
			      "text" : TAG(Pdn -> dn_type),
			  Pdn -> dn_linenum,
			  (Pdn->dn_closed) ? 'c' : 'o',
			  Pdn -> dn_Pparent,
			  Pdn -> dn_Pprev,
			  Pdn -> dn_text);

	    if ((Pdn -> dn_Pchild) != PDNNULL)
		DumpTree(Pdn -> dn_Pchild, Depth + 1, Separator);

	    Pdn = Pdn -> dn_Pnext;
	}

// Print separator line:

	if ((Depth == 0) && Separator)
	    PUTS("=======================================================\n");

} // DumpTree()

#endif // DEBUG