C
80
extract title
Guest on 27th April 2022 01:48:00 AM
#include <stdio.h>
#include <string.h>
typedef int BOOL;
#define TRUE 1
#define FALSE 0
main(argc, argv)
int argc;
char **argv;
{
FILE *fp;
char filename[256];
char buffer[1024];
char *p;
BOOL found = FALSE;
BOOL tag = FALSE;
int lines = 0;
if (argc != 3) {
This program takes an HTML document and extracts to its stdout\n\
the TITLE of the document, all in one line.\n\n\
Usage:\n\
\t%s directory filename\n\n", argv[0]);
}
if (*filename)
if (!(fp
= fopen(filename
, "r"))) {
fprintf(stderr
, "%s: Unable to open file \"%s\"\n",
argv[0], filename);
}
while (lines++ < 20 && /* Scan only first 20 lines */
NULL
!= (p
= fgets(buffer
, 1024, fp
))) {
if (*p)
p
[strlen(p
)-1] = NULL
; /* Overwrite newline */
while (p && *p) {
if (tag) {
if (p) {
p++;
tag = FALSE;
}
else continue;
}
if (found)
while (*p
&& *p
!= '<') fputc(*(p
++), stdout
);
else
while (*p && *p != '<') p++;
if (!*p) {
if (found)
fputc(' ', stdout
); /* We replace newline with space */
continue;
}
else if (!found
&& (!strncmp(p
, "<TITLE>", 7) ||
p += 7;
found = TRUE;
}
else if (found
&& (!strncmp(p
, "</TITLE>", 8) ||
}
else tag = TRUE;
} /* while stuff in buffer */
} /* while not EOF and not very many lines read */
/* If we come here, the title was not found among */
/* the first few lines. */
printf("%s\n", argv
[2]); /* Then using filename */
}