HTML 是描述富文本的一种可能性。作为 WWW 的文件格式,它已经很成熟了。恕我直言,可能任何现代的富文本文本处理工具都支持它。 (我个人多年来就知道 WinWord 的这一点。)
编写 HTML 文件相当容易,因为 HTML 文件实际上只不过是可以用纯 ASCII 编写的源代码。
简短的演示print-HTML.c
:
#include <stdio.h>
struct Entry {
const char *author;
const char *title;
};
void printEntry(FILE *f, struct Entry *pEntry, int i)
{
fprintf(f,
"<tr><!-- start of table row -->\n"
"<td>%d</td><!-- number -->\n"
"<td>%s</td><!-- Author -->\n"
"<td>%s</td><!-- Title -->\n"
"</tr><!-- end of table row -->\n",
i, pEntry->author, pEntry->title);
}
void printTable(FILE *f, size_t nEntries, struct Entry table[])
{
fprintf(f,
"<table><!-- start of table -->\n"
"<tr><!-- start of table head row -->\n"
"<th>No.</th><th>Author</th><th>Title</th>\n"
"</tr><!-- end of table head row -->\n");
for (size_t i = 0; i < nEntries; ++i) {
printEntry(f, table + i, (int)i + 1);
}
fprintf(f,
"</table><!-- end of table -->\n");
}
void printDoc(
FILE *f, const char *title, size_t nEntries, struct Entry table[])
{
fprintf(f,
"<!DOCTYPE html>\n"
"<html>\n"
"<head>\n"
"<title>%s</title>\n"
"</head>\n"
"<body>\n"
"<h1>%s</h1>\n",
title, title);
printTable(f, nEntries, table);
fprintf(f,
"</body>\n"
"</html>\n");
}
int main()
{
/* the sample table */
struct Entry table[] = {
{ "Kernighan and Ritchie", "The C Programming Language" },
{ "Kernighan and Ritchie", "Programming in C" },
{ "Tim Berners-Lee", "Weaving the Web" },
{ "Tim Berners-Lee", "Hypertext Markup Language: the HTML explained from the Inventor of the WWW" }
};
enum { nEntries = sizeof table / sizeof table[0] };
/* output as HTML */
printDoc(stdout, "My Favorite Books", nEntries, table);
/* done */
return 0;
}
会话示例:
$ gcc -std=c11 -o print-HTML print-HTML.c
$ ./print-HTML
<!DOCTYPE html>
<html>
<head>
<title>My Favorite Books</title>
</head>
<body>
<h1>My Favorite Books</h1>
<table><!-- start of table -->
<tr><!-- start of table head row -->
<th>No.</th><th>Author</th><th>Title</th>
</tr><!-- end of table head row -->
<tr><!-- start of table row -->
<td>1</td><!-- number -->
<td>Kernighan and Ritchie</td><!-- Author -->
<td>The C Programming Language</td><!-- Title -->
</tr><!-- end of table row -->
<tr><!-- start of table row -->
<td>2</td><!-- number -->
<td>Kernighan and Ritchie</td><!-- Author -->
<td>Programming in C</td><!-- Title -->
</tr><!-- end of table row -->
<tr><!-- start of table row -->
<td>3</td><!-- number -->
<td>Tim Berners-Lee</td><!-- Author -->
<td>Weaving the Web</td><!-- Title -->
</tr><!-- end of table row -->
<tr><!-- start of table row -->
<td>4</td><!-- number -->
<td>Tim Berners-Lee</td><!-- Author -->
<td>Hypertext Markup Language: the HTML explained from the Inventor of the WWW</td><!-- Title -->
</tr><!-- end of table row -->
</table><!-- end of table -->
</body>
</html>
$ ./print-HTML >test.html
$
下面是我打开的应用程序的一些快照test.html
in:
Firefox:
Windows 版 MS Word:
微软Excel:
Update:
在上面的示例代码中,我小心翼翼地防止使用元字符(<
, >
, &
, and "
)在文本片段中。如果这些字符出现在原始文本中,则它们可能不会按原样打印(因为这些字符在 HTML 语法中可能具有特殊含义)。相反,它们必须被它们的实体所取代:
-
<
⇒ <
(标签开始)
-
>
⇒ >
(标签结束)
-
&
⇒ &
(实体开始)
-
"
⇒ "
(引用属性值的开始/结束)
-
'
⇒ '
(带引号的属性值的替代开始/结束)。
在 HTML 中,有更多的预定义实体。 (在 XML 中,这些是唯一的预定义实体。)
更新后的示例代码:
#include <stdio.h>
void printHTMLText(FILE *f, const char *text)
{
for (; *text; ++text) {
switch (*text) {
case '<': fprintf(f, "<"); break;
case '>': fprintf(f, ">"); break;
case '&': fprintf(f, "&"); break;
case '"': fprintf(f, """); break;
case '\'': fprintf(f, "'"); break;
default: putc(*text, f);
}
}
}
struct Entry {
const char *author;
const char *title;
};
void printEntry(FILE *f, struct Entry *pEntry, int i)
{
fprintf(f,
"<tr><!-- start of table row -->\n"
"<td>%d</td><!-- number -->\n"
"<td>",
i);
printHTMLText(f, pEntry->author);
fprintf(f,
"</td><!-- Author -->\n"
"<td>");
printHTMLText(f, pEntry->title);
fprintf(f,
"</td><!-- Title -->\n"
"</tr><!-- end of table row -->\n");
}
void printTable(FILE *f, size_t nEntries, struct Entry table[])
{
fprintf(f,
"<table><!-- start of table -->\n"
"<tr><!-- start of table head row -->\n"
"<th>No.</th><th>Author</th><th>Title</th>\n"
"</tr><!-- end of table head row -->\n");
for (size_t i = 0; i < nEntries; ++i) {
printEntry(f, table + i, (int)i + 1);
}
fprintf(f,
"</table><!-- end of table -->\n");
}
void printDoc(
FILE *f, const char *title, size_t nEntries, struct Entry table[])
{
fprintf(f,
"<!DOCTYPE html>\n"
"<html>\n"
"<head>\n"
"<title>");
printHTMLText(f, title);
fprintf(f,
"</title>\n"
"</head>\n"
"<body>\n"
"<h1>");
printHTMLText(f, title);
fprintf(f,
"</h1>\n");
printTable(f, nEntries, table);
fprintf(f,
"</body>\n"
"</html>\n");
}
int main()
{
struct Entry table[] = {
{ "Kernighan & Ritchie", "The C Programming Language" },
{ "Kernighan & Ritchie", "Programming in C" },
{ "Tim Berners-Lee", "Weaving the Web" },
{ "Tim Berners-Lee", "Hypertext Markup Language: the HTML explained from the Inventor of the WWW" }
};
enum { nEntries = sizeof table / sizeof table[0] };
printDoc(stdout, "My Favorite Books", nEntries, table);
return 0;
}
将打印例如
{ "Kernighan & Ritchie", "The C Programming Language" }
as:
<td>Kernighan & Ritchie</td><!-- Author -->
<td>The C Programming Language</td><!-- Title -->
Note:
"
实际上只需在双引号属性值中进行替换。 (也'
在单引号属性值中)。反过来,<
and >
不需要在属性值中进行替换。为了使事情简单紧凑,函数printHTMLText()
替换任何这些字符。